BOAST 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,104 @@
1
+ module BOAST
2
+ @@ocl_cuda_dim_assoc = { 0 => "x", 1 => "y", 2 => "z" }
3
+
4
+ def BOAST::barrier(*locality)
5
+ if @@lang == CL then
6
+ loc=""
7
+ if locality.include?(:local) and locality.include?(:global) then
8
+ return FuncCall::new("barrier","CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE")
9
+ elsif locality.include?(:local) then
10
+ return FuncCall::new("barrier","CLK_LOCAL_MEM_FENCE")
11
+ elsif locality.include?(:global) then
12
+ return FuncCall::new("barrier","CLK_GLOBAL_MEM_FENCE")
13
+ else
14
+ raise "Unsupported locality"
15
+ end
16
+ elsif @@lang == CUDA then
17
+ return FuncCall::new("__syncthreads")
18
+ else
19
+ raise "Unsupported language!"
20
+ end
21
+ end
22
+
23
+
24
+ def BOAST::get_work_dim
25
+ if @@lang == CL then
26
+ return FuncCall::new("get_work_dim")
27
+ else
28
+ raise "Unsupported language!"
29
+ end
30
+ end
31
+
32
+ def BOAST::get_global_size(dim)
33
+ if @@lang == CL then
34
+ return FuncCall::new("get_global_size",dim)
35
+ elsif @@lang == CUDA then
36
+ d = @@ocl_cuda_dim_assoc[dim]
37
+ raise "Unsupported dimension!" if not d
38
+ return Expression::new(".", "gridDim", d)*Expression::new(".", "blockDim", d)
39
+ else
40
+ raise "Unsupported language!"
41
+ end
42
+ end
43
+
44
+ def BOAST::get_global_id(dim)
45
+ if @@lang == CL then
46
+ return FuncCall::new("get_global_id",dim)
47
+ elsif @@lang == CUDA then
48
+ d = @@ocl_cuda_dim_assoc[dim]
49
+ raise "Unsupported dimension!" if not d
50
+ return Expression::new(".", "threadIdx", d)+Expression::new(".", "blockIdx", d)*Expression::new(".", "blockDim", d)
51
+ else
52
+ raise "Unsupported language!"
53
+ end
54
+ end
55
+
56
+ def BOAST::get_local_size(dim)
57
+ if @@lang == CL then
58
+ return FuncCall::new("get_local_size",dim)
59
+ elsif @@lang == CUDA then
60
+ d = @@ocl_cuda_dim_assoc[dim]
61
+ raise "Unsupported dimension!" if not d
62
+ return Expression::new(".", "blockDim", d)
63
+ else
64
+ raise "Unsupported language!"
65
+ end
66
+ end
67
+
68
+ def BOAST::get_local_id(dim)
69
+ if @@lang == CL then
70
+ return FuncCall::new("get_local_id",dim)
71
+ elsif @@lang == CUDA then
72
+ d = @@ocl_cuda_dim_assoc[dim]
73
+ raise "Unsupported dimension!" if not d
74
+ return Expression::new(".", "threadIdx", d)
75
+ else
76
+ raise "Unsupported language!"
77
+ end
78
+ end
79
+
80
+ def BOAST::get_num_groups(dim)
81
+ if @@lang == CL then
82
+ return FuncCall::new("get_num_groups",dim)
83
+ elsif @@lang == CUDA then
84
+ d = @@ocl_cuda_dim_assoc[dim]
85
+ raise "Unsupported dimension!" if not d
86
+ return Expression::new(".", "gridDim", d)
87
+ else
88
+ raise "Unsupported language!"
89
+ end
90
+ end
91
+
92
+ def BOAST::get_group_id(dim)
93
+ if @@lang == CL then
94
+ return FuncCall::new("get_group_id",dim)
95
+ elsif @@lang == CUDA then
96
+ d = @@ocl_cuda_dim_assoc[dim]
97
+ raise "Unsupported dimension!" if not d
98
+ return Expression::new(".", "blockIdx", d)
99
+ else
100
+ raise "Unsupported language!"
101
+ end
102
+ end
103
+
104
+ end
@@ -0,0 +1,545 @@
1
+ require 'stringio'
2
+ require 'rubygems'
3
+ require 'rake'
4
+ require 'tempfile'
5
+ require 'rbconfig'
6
+ require 'systemu'
7
+
8
+ module BOAST
9
+ @@verbose = false
10
+
11
+ def BOAST::get_verbose
12
+ return @@verbose
13
+ end
14
+
15
+ def BOAST::set_verbose(verbose)
16
+ @@verbose = verbose
17
+ end
18
+
19
+ class CKernel
20
+ include Rake::DSL
21
+ attr_accessor :code
22
+ attr_accessor :procedure
23
+ attr_accessor :lang
24
+ attr_accessor :binary
25
+ attr_accessor :kernels
26
+
27
+ def initialize(options={})
28
+ if options[:code] then
29
+ @code = options[:code]
30
+ elsif BOAST::get_chain_code
31
+ @code = BOAST::get_output
32
+ @code.seek(0,SEEK_END)
33
+ else
34
+ @code = StringIO::new
35
+ end
36
+ BOAST::set_output( @code )
37
+ if options[:kernels] then
38
+ @kernels = options[:kernels]
39
+ else
40
+ @kernels = []
41
+ end
42
+ if options[:lang] then
43
+ @lang = options[:lang]
44
+ else
45
+ @lang = BOAST::get_lang
46
+ end
47
+ end
48
+
49
+ def print
50
+ @code.rewind
51
+ puts @code.read
52
+ end
53
+
54
+ def to_str
55
+ @code.rewind
56
+ return code.read
57
+ end
58
+
59
+ def to_s
60
+ @code.rewind
61
+ return code.read
62
+ end
63
+
64
+ def setup_compiler(options = {})
65
+ Rake::Task::clear
66
+ verbose = options[:verbose]
67
+ verbose = BOAST::get_verbose if not verbose
68
+ Rake::verbose(verbose)
69
+ Rake::FileUtilsExt.verbose_flag=verbose
70
+ f_compiler = options[:FC]
71
+ f_compiler = "gfortran" if not f_compiler
72
+ c_compiler = options[:CC]
73
+ c_compiler = "cc" if not c_compiler
74
+ cxx_compiler = options[:CXX]
75
+ cxx_compiler = "g++" if not cxx_compiler
76
+ cuda_compiler = options[:NVCC]
77
+ cuda_compiler = "nvcc"if not cuda_compiler
78
+ f_flags = options[:FCFLAGS]
79
+ f_flags = "-O2 -Wall" if not f_flags
80
+ f_flags += " -fPIC"
81
+ f_flags += " -fno-second-underscore" if f_compiler == 'g95'
82
+ ld_flags = options[:LDFLAGS]
83
+ ld_flags = "" if not ld_flags
84
+ cuda_flags = options[:NVCCFLAGS]
85
+ cuda_flags = "-O2" if not cuda_flags
86
+ cuda_flags += " --compiler-options '-fPIC'"
87
+
88
+
89
+ includes = "-I#{RbConfig::CONFIG["archdir"]}"
90
+ includes += " -I#{RbConfig::CONFIG["rubyhdrdir"]} -I#{RbConfig::CONFIG["rubyhdrdir"]}/#{RbConfig::CONFIG["arch"]}"
91
+ ld_flags += " -L#{RbConfig::CONFIG["libdir"]} #{RbConfig::CONFIG["LIBRUBYARG"]} -lrt"
92
+ ld_flags += " -lcudart" if @lang == BOAST::CUDA
93
+ narray_path = nil
94
+ begin
95
+ spec = Gem::Specification::find_by_name('narray')
96
+ narray_path = spec.full_gem_path
97
+ rescue Gem::LoadError => e
98
+ rescue NoMethodError => e
99
+ spec = Gem::available?('narray')
100
+ if spec then
101
+ require 'narray'
102
+ narray_path = Gem.loaded_specs['narray'].full_gem_path
103
+ end
104
+ end
105
+ includes += " -I#{narray_path}" if narray_path
106
+ cflags = "-O2 -Wall -fPIC #{includes}"
107
+ cxxflags = String::new(cflags)
108
+ cflags += " -DHAVE_NARRAY_H" if narray_path
109
+ cflags += options[:CFLAGS] if options[:CFLAGS]
110
+ fcflags = f_flags
111
+ cudaflags = cuda_flags
112
+
113
+ runner = lambda { |t, call_string|
114
+ if verbose then
115
+ sh call_string
116
+ else
117
+ status, stdout, stderr = systemu call_string
118
+ if not status.success? then
119
+ puts stderr
120
+ fail "#{t.source}: compilation failed"
121
+ end
122
+ status.success?
123
+ end
124
+ }
125
+
126
+ rule '.o' => '.c' do |t|
127
+ c_call_string = "#{c_compiler} #{cflags} -c -o #{t.name} #{t.source}"
128
+ runner.call(t, c_call_string)
129
+ end
130
+
131
+ rule '.o' => '.f90' do |t|
132
+ f_call_string = "#{f_compiler} #{fcflags} -c -o #{t.name} #{t.source}"
133
+ runner.call(t, f_call_string)
134
+ end
135
+
136
+ rule '.o' => '.cpp' do |t|
137
+ cxx_call_string = "#{cxx_compiler} #{cxxflags} -c -o #{t.name} #{t.source}"
138
+ runner.call(t, cxx_call_string)
139
+ end
140
+
141
+ rule '.o' => '.cu' do |t|
142
+ cuda_call_string = "#{cuda_compiler} #{cudaflags} -c -o #{t.name} #{t.source}"
143
+ runner.call(t, cuda_call_string)
144
+ end
145
+ return ld_flags
146
+ end
147
+
148
+ def build_opencl(options)
149
+ require 'opencl_ruby_ffi'
150
+ platform = nil
151
+ platforms = OpenCL::get_platforms
152
+ if options[:platform_vendor] then
153
+ platforms.each{ |p|
154
+ platform = p if p.vendor.match(options[:platform_vendor])
155
+ }
156
+ else
157
+ platform = platforms.first
158
+ end
159
+ device = nil
160
+ type = options[:device_type] ? options[:device_type] : OpenCL::Device::Type::ALL
161
+ devices = platform.devices(type)
162
+ if options[:device_name] then
163
+ devices.each{ |d|
164
+ device = d if d.name.match(options[:device_name])
165
+ }
166
+ else
167
+ device = devices.first
168
+ end
169
+ @context = OpenCL::create_context([device])
170
+ program = @context.create_program_with_source([@code.string])
171
+ opts = options[:CLFLAGS]
172
+ program.build(:options => options[:CLFLAGS])
173
+ if options[:verbose] then
174
+ program.build_log.each {|dev,log|
175
+ STDERR.puts "#{device.name}: #{log}"
176
+ }
177
+ end
178
+ @queue = @context.create_command_queue(device, :properties => OpenCL::CommandQueue::PROFILING_ENABLE)
179
+ @kernel = program.create_kernel(@procedure.name)
180
+ run_method = <<EOF
181
+ def self.run(*args)
182
+ raise "Wrong number of arguments \#{args.length} for #{@procedure.parameters.length}" if args.length > #{@procedure.parameters.length+1} or args.length < #{@procedure.parameters.length}
183
+ params = []
184
+ opts = {}
185
+ opts = args.pop if args.length == #{@procedure.parameters.length+1}
186
+ @procedure.parameters.each_index { |i|
187
+ if @procedure.parameters[i].dimension then
188
+ if @procedure.parameters[i].direction == :in and @procedure.parameters[i].direction == :out then
189
+ params[i] = @context.create_buffer( args[i].size * args[i].element_size )
190
+ @queue.enqueue_write_buffer( params[i], args[i], :blocking => true )
191
+ elsif @procedure.parameters[i].direction == :in then
192
+ params[i] = @context.create_buffer( args[i].size * args[i].element_size, :flags => OpenCL::Mem::Flags::READ_ONLY )
193
+ @queue.enqueue_write_buffer( params[i], args[i], :blocking => true )
194
+ elsif @procedure.parameters[i].direction == :out then
195
+ params[i] = @context.create_buffer( args[i].size * args[i].element_size, :flags => OpenCL::Mem::Flags::WRITE_ONLY )
196
+ else
197
+ params[i] = @context.create_buffer( args[i].size * args[i].element_size )
198
+ end
199
+ else
200
+ if @procedure.parameters[i].type.is_a?(Real) then
201
+ params[i] = OpenCL::Half::new(args[i]) if @procedure.parameters[i].type.size == 2
202
+ params[i] = OpenCL::Float::new(args[i]) if @procedure.parameters[i].type.size == 4
203
+ params[i] = OpenCL::Double::new(args[i]) if @procedure.parameters[i].type.size == 8
204
+ elsif @procedure.parameters[i].type.is_a?(Int) then
205
+ if @procedure.parameters[i].type.signed
206
+ params[i] = OpenCL::Char::new(args[i]) if @procedure.parameters[i].type.size == 1
207
+ params[i] = OpenCL::Short::new(args[i]) if @procedure.parameters[i].type.size == 2
208
+ params[i] = OpenCL::Int::new(args[i]) if @procedure.parameters[i].type.size == 4
209
+ params[i] = OpenCL::Long::new(args[i]) if @procedure.parameters[i].type.size == 8
210
+ else
211
+ params[i] = OpenCL::UChar::new(args[i]) if @procedure.parameters[i].type.size == 1
212
+ params[i] = OpenCL::UShort::new(args[i]) if @procedure.parameters[i].type.size == 2
213
+ params[i] = OpenCL::UInt::new(args[i]) if @procedure.parameters[i].type.size == 4
214
+ params[i] = OpenCL::ULong::new(args[i]) if @procedure.parameters[i].type.size == 8
215
+ end
216
+ else
217
+ params[i] = args[i]
218
+ end
219
+ end
220
+ }
221
+ params.each_index{ |i|
222
+ @kernel.set_arg(i, params[i])
223
+ }
224
+ event = @queue.enqueue_NDrange_kernel(@kernel, opts[:global_work_size], :local_work_size => opts[:local_work_size])
225
+ @procedure.parameters.each_index { |i|
226
+ if @procedure.parameters[i].dimension then
227
+ if @procedure.parameters[i].direction == :in and @procedure.parameters[i].direction == :out then
228
+ @queue.enqueue_read_buffer( params[i], args[i], :blocking => true )
229
+ elsif @procedure.parameters[i].direction == :out then
230
+ @queue.enqueue_read_buffer( params[i], args[i], :blocking => true )
231
+ end
232
+ end
233
+ }
234
+ result = {}
235
+ result[:start] = event.profiling_command_start
236
+ result[:end] = event.profiling_command_end
237
+ result[:duration] = (result[:end] - result[:start])/1000000000.0
238
+ return result
239
+ end
240
+ EOF
241
+ eval run_method
242
+ return self
243
+ end
244
+
245
+ def build(options = {})
246
+ return build_opencl(options) if @lang == BOAST::CL
247
+ ldflags = self.setup_compiler(options)
248
+ extension = ".c" if @lang == BOAST::C
249
+ extension = ".cu" if @lang == BOAST::CUDA
250
+ extension = ".f90" if @lang == BOAST::FORTRAN
251
+ #temporary
252
+ c_compiler = options[:CC]
253
+ c_compiler = "cc" if not c_compiler
254
+ linker = options[:LD]
255
+ linker = c_compiler if not linker
256
+ #end temporary
257
+ source_file = Tempfile::new([@procedure.name,extension])
258
+ path = source_file.path
259
+ target = path.chomp(File::extname(path))+".o"
260
+ fill_code(source_file)
261
+ source_file.close
262
+
263
+ previous_lang = BOAST::get_lang
264
+ previous_output = BOAST::get_output
265
+ BOAST::set_lang(BOAST::C)
266
+ module_file_name = File::split(path.chomp(File::extname(path)))[0] + "/Mod_" + File::split(path.chomp(File::extname(path)))[1].gsub("-","_") + ".c"
267
+ module_name = File::split(module_file_name.chomp(File::extname(module_file_name)))[1]
268
+ module_file = File::open(module_file_name,"w+")
269
+ BOAST::set_output(module_file)
270
+ fill_module(module_file, module_name)
271
+ module_file.rewind
272
+ # puts module_file.read
273
+ module_file.close
274
+ BOAST::set_lang(previous_lang)
275
+ BOAST::set_output(previous_output)
276
+ module_target = module_file_name.chomp(File::extname(module_file_name))+".o"
277
+ module_final = module_file_name.chomp(File::extname(module_file_name))+".so"
278
+ kernel_files = []
279
+ @kernels.each { |kernel|
280
+ kernel_file = Tempfile::new([kernel.procedure.name,".o"])
281
+ kernel.binary.rewind
282
+ kernel_file.write( kernel.binary.read )
283
+ kernel_file.close
284
+ kernel_files.push(kernel_file.path)
285
+ }
286
+ file module_final => [module_target, target] do
287
+ #puts "#{linker} -shared -o #{module_final} #{module_target} #{target} #{kernel_files.join(" ")} -Wl,-Bsymbolic-functions -Wl,-z,relro -rdynamic -Wl,-export-dynamic #{ldflags}"
288
+ sh "#{linker} -shared -o #{module_final} #{module_target} #{target} #{kernel_files.join(" ")} -Wl,-Bsymbolic-functions -Wl,-z,relro -rdynamic -Wl,-export-dynamic #{ldflags}"
289
+ end
290
+ Rake::Task[module_final].invoke
291
+ require(module_final)
292
+ eval "self.extend(#{module_name})"
293
+ f = File::open(target,"rb")
294
+ @binary = StringIO::new
295
+ @binary.write( f.read )
296
+ f.close
297
+ File.unlink(target)
298
+ File.unlink(module_target)
299
+ File.unlink(module_file_name)
300
+ File.unlink(module_final)
301
+ kernel_files.each { |f|
302
+ File.unlink(f)
303
+ }
304
+ return self
305
+ end
306
+
307
+ def fill_code(source_file)
308
+ @code.rewind
309
+ source_file.puts "#include <inttypes.h>" if @lang == BOAST::C or @lang == BOAST::CUDA
310
+ source_file.puts "#include <cuda.h>" if @lang == BOAST::CUDA
311
+ source_file.write @code.read
312
+ if @lang == BOAST::CUDA then
313
+ source_file.write <<EOF
314
+ extern "C" {
315
+ #{@procedure.header(BOAST::CUDA,false)}{
316
+ dim3 dimBlock(block_size[0], block_size[1], block_size[2]);
317
+ dim3 dimGrid(block_number[0], block_number[1], block_number[2]);
318
+ cudaEvent_t __start, __stop;
319
+ float __time;
320
+ cudaEventCreate(&__start);
321
+ cudaEventCreate(&__stop);
322
+ cudaEventRecord(__start, 0);
323
+ #{@procedure.name}<<<dimGrid,dimBlock>>>(#{@procedure.parameters.join(", ")});
324
+ cudaEventRecord(__stop, 0);
325
+ cudaEventSynchronize(__stop);
326
+ cudaEventElapsedTime(&__time, __start, __stop);
327
+ return (unsigned long long int)((double)__time*(double)1e6);
328
+ }
329
+ }
330
+ EOF
331
+ end
332
+ @code.rewind
333
+ end
334
+
335
+ def fill_module(module_file, module_name)
336
+ module_file.write <<EOF
337
+ #include "ruby.h"
338
+ #include <inttypes.h>
339
+ #include <time.h>
340
+ #ifdef HAVE_NARRAY_H
341
+ #include "narray.h"
342
+ #endif
343
+ EOF
344
+ if( @lang == BOAST::CUDA ) then
345
+ module_file.print "#include <cuda_runtime.h>\n"
346
+ end
347
+ module_file.print @procedure.header(@lang)
348
+ module_file.write <<EOF
349
+ VALUE #{module_name} = Qnil;
350
+ void Init_#{module_name}();
351
+ VALUE method_run(int argc, VALUE *argv, VALUE self);
352
+ void Init_#{module_name}() {
353
+ #{module_name} = rb_define_module("#{module_name}");
354
+ rb_define_method(#{module_name}, "run", method_run, -1);
355
+ }
356
+ VALUE method_run(int argc, VALUE *argv, VALUE self) {
357
+ EOF
358
+ if( @lang == BOAST::CUDA ) then
359
+ module_file.write <<EOF
360
+ if( argc < #{@procedure.parameters.length} || argc > #{@procedure.parameters.length + 1} )
361
+ rb_raise(rb_eArgError, "wrong number of arguments for #{@procedure.name} (%d for #{@procedure.parameters.length})", argc);
362
+ VALUE rb_opts;
363
+ VALUE rb_ptr;
364
+ size_t block_size[3] = {1,1,1};
365
+ size_t block_number[3] = {1,1,1};
366
+ EOF
367
+ else
368
+ module_file.write <<EOF
369
+ if( argc != #{@procedure.parameters.length} )
370
+ rb_raise(rb_eArgError, "wrong number of arguments for #{@procedure.name} (%d for #{@procedure.parameters.length})", argc);
371
+ VALUE rb_ptr;
372
+ EOF
373
+ end
374
+ argc = @procedure.parameters.length
375
+ argv = Variable::new("argv",Real,{:dimension => [ Dimension::new(0,argc-1) ] })
376
+ rb_ptr = Variable::new("rb_ptr",Int)
377
+ @procedure.parameters.each { |param|
378
+ param_copy = param.copy
379
+ param_copy.constant = nil
380
+ param_copy.direction = nil
381
+ param_copy.decl
382
+ }
383
+ @procedure.parameters.each_index do |i|
384
+ param = @procedure.parameters[i]
385
+ if not param.dimension then
386
+ case param.type
387
+ when Int
388
+ (param === FuncCall::new("NUM2INT", argv[i])).print if param.type.size == 4
389
+ (param === FuncCall::new("NUM2LONG", argv[i])).print if param.type.size == 8
390
+ when Real
391
+ (param === FuncCall::new("NUM2DBL", argv[i])).print
392
+ end
393
+ else
394
+ (rb_ptr === argv[i]).print
395
+ if @lang == BOAST::CUDA then
396
+ module_file.print <<EOF
397
+ if ( IsNArray(rb_ptr) ) {
398
+ struct NARRAY *n_ary;
399
+ size_t array_size;
400
+ Data_Get_Struct(rb_ptr, struct NARRAY, n_ary);
401
+ array_size = n_ary->total * na_sizeof[n_ary->type];
402
+ cudaMalloc( (void **) &#{param.name}, array_size);
403
+ EOF
404
+ if param.direction == :in then
405
+ module_file.print <<EOF
406
+ cudaMemcpy(#{param.name}, (void *) n_ary->ptr, array_size, cudaMemcpyHostToDevice);
407
+ EOF
408
+ end
409
+ module_file.print <<EOF
410
+ } else
411
+ rb_raise(rb_eArgError, "wrong type of argument %d", #{i});
412
+
413
+ EOF
414
+ else
415
+ module_file.print <<EOF
416
+ if (TYPE(rb_ptr) == T_STRING) {
417
+ #{param.name} = (void *) RSTRING_PTR(rb_ptr);
418
+ } else if ( IsNArray(rb_ptr) ) {
419
+ struct NARRAY *n_ary;
420
+ Data_Get_Struct(rb_ptr, struct NARRAY, n_ary);
421
+ #{param.name} = (void *) n_ary->ptr;
422
+ } else
423
+ rb_raise(rb_eArgError, "wrong type of argument %d", #{i});
424
+ EOF
425
+ end
426
+ end
427
+ end
428
+ if @lang == BOAST::CUDA then
429
+ module_file.write <<EOF
430
+ if( argc == #{@procedure.parameters.length + 1} ) {
431
+ rb_opts = argv[argc -1];
432
+ if ( rb_opts != Qnil ) {
433
+ VALUE rb_array_data = Qnil;
434
+ int i;
435
+ if (TYPE(rb_opts) != T_HASH)
436
+ rb_raise(rb_eArgError, "Cuda options should be passed as a hash");
437
+ rb_ptr = rb_hash_aref(rb_opts, ID2SYM(rb_intern("block_size")));
438
+ if( rb_ptr != Qnil ) {
439
+ if (TYPE(rb_ptr) != T_ARRAY)
440
+ rb_raise(rb_eArgError, "Cuda option block_size should be an array");
441
+ for(i=0; i<3; i++) {
442
+ rb_array_data = rb_ary_entry(rb_ptr, i);
443
+ if( rb_array_data != Qnil )
444
+ block_size[i] = (size_t) NUM2LONG( rb_array_data );
445
+ }
446
+ }
447
+ rb_ptr = rb_hash_aref(rb_opts, ID2SYM(rb_intern("block_number")));
448
+ if( rb_ptr != Qnil ) {
449
+ if (TYPE(rb_ptr) != T_ARRAY)
450
+ rb_raise(rb_eArgError, "Cuda option block_number should be an array");
451
+ for(i=0; i<3; i++) {
452
+ rb_array_data = rb_ary_entry(rb_ptr, i);
453
+ if( rb_array_data != Qnil )
454
+ block_number[i] = (size_t) NUM2LONG( rb_array_data );
455
+ }
456
+ }
457
+ }
458
+ }
459
+ EOF
460
+ end
461
+ module_file.print " #{@procedure.properties[:return].type.decl} ret;\n" if @procedure.properties[:return]
462
+ module_file.print " VALUE stats = rb_hash_new();\n"
463
+ module_file.print " struct timespec start, stop;\n"
464
+ module_file.print " unsigned long long int duration;\n"
465
+ module_file.print " clock_gettime(CLOCK_REALTIME, &start);\n"
466
+ if @lang == BOAST::CUDA then
467
+ module_file.print " duration = "
468
+ elsif @procedure.properties[:return] then
469
+ module_file.print " ret = "
470
+ end
471
+ module_file.print " #{@procedure.name}"
472
+ module_file.print "_" if @lang == BOAST::FORTRAN
473
+ module_file.print "_wrapper" if @lang == BOAST::CUDA
474
+ module_file.print "("
475
+ if(@lang == BOAST::FORTRAN) then
476
+ params = []
477
+ @procedure.parameters.each { |param|
478
+ if param.dimension then
479
+ params.push( param.name )
480
+ else
481
+ params.push( "&"+param.name )
482
+ end
483
+ }
484
+ module_file.print params.join(", ")
485
+ else
486
+ module_file.print @procedure.parameters.join(", ")
487
+ end
488
+ if @lang == BOAST::CUDA then
489
+ module_file.print ", " if @procedure.parameters.length > 0
490
+ module_file.print "block_number, block_size"
491
+ end
492
+ module_file.print " );\n"
493
+ module_file.print " clock_gettime(CLOCK_REALTIME, &stop);\n"
494
+
495
+ if @lang == BOAST::CUDA then
496
+ @procedure.parameters.each_index do |i|
497
+ param = @procedure.parameters[i]
498
+ if param.dimension then
499
+ (rb_ptr === argv[i]).print
500
+ module_file.print <<EOF
501
+ if ( IsNArray(rb_ptr) ) {
502
+ EOF
503
+ if param.direction == :out then
504
+ module_file.print <<EOF
505
+ struct NARRAY *n_ary;
506
+ size_t array_size;
507
+ Data_Get_Struct(rb_ptr, struct NARRAY, n_ary);
508
+ array_size = n_ary->total * na_sizeof[n_ary->type];
509
+ cudaMemcpy(#{param.name}, (void *) n_ary->ptr, array_size, cudaMemcpyDeviceToHost);
510
+ EOF
511
+ end
512
+ module_file.print <<EOF
513
+ cudaFree( (void *) #{param.name});
514
+ } else
515
+ rb_raise(rb_eArgError, "wrong type of argument %d", #{i});
516
+
517
+ EOF
518
+ end
519
+ end
520
+ end
521
+ if @lang != BOAST::CUDA then
522
+ module_file.print " duration = (unsigned long long int)stop.tv_sec * (unsigned long long int)1000000000 + stop.tv_nsec;\n"
523
+ module_file.print " duration -= (unsigned long long int)start.tv_sec * (unsigned long long int)1000000000 + start.tv_nsec;\n"
524
+ end
525
+ module_file.print " rb_hash_aset(stats,ID2SYM(rb_intern(\"duration\")),rb_float_new((double)duration*(double)1e-9));\n"
526
+ if @procedure.properties[:return] then
527
+ type_ret = @procedure.properties[:return].type
528
+ module_file.print " rb_hash_aset(stats,ID2SYM(rb_intern(\"return\")),rb_int_new((long long)ret));\n" if type_ret.kind_of?(Int) and type_ret.signed
529
+ module_file.print " rb_hash_aset(stats,ID2SYM(rb_intern(\"return\")),rb_int_new((unsigned long long)ret));\n" if type_ret.kind_of?(Int) and not type_ret.signed
530
+ module_file.print " rb_hash_aset(stats,ID2SYM(rb_intern(\"return\")),rb_float_new((double)ret));\n" if type_ret.kind_of?(Real)
531
+ end
532
+ module_file.print " return stats;\n"
533
+ module_file.print "}"
534
+ end
535
+
536
+ def method_missing(meth, *args, &block)
537
+ if meth.to_s == "run" then
538
+ self.build
539
+ self.run(*args,&block)
540
+ else
541
+ super
542
+ end
543
+ end
544
+ end
545
+ end