BOAST 0.9995 → 0.9996

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/BOAST/CKernel.rb CHANGED
@@ -8,7 +8,6 @@ require 'yaml'
8
8
  require 'pathname'
9
9
 
10
10
  module BOAST
11
- @@verbose = false
12
11
  @@compiler_default_options = {
13
12
  :FC => 'gfortran',
14
13
  :FCFLAGS => '-O2 -Wall',
@@ -36,7 +35,16 @@ module BOAST
36
35
  "icpc" => "-openmp"
37
36
  }
38
37
 
39
- def self.read_boast_config
38
+ module PrivateStateAccessor
39
+ private_boolean_state_accessor :verbose
40
+ end
41
+
42
+ boolean_state_accessor :verbose
43
+ @@verbose = false
44
+
45
+ module_function
46
+
47
+ def read_boast_config
40
48
  home_config_dir = ENV["XDG_CONFIG_HOME"]
41
49
  home_config_dir = "#{Dir.home}/.config" if not home_config_dir
42
50
  Dir.mkdir( home_config_dir ) if not File::exist?( home_config_dir )
@@ -70,36 +78,22 @@ module BOAST
70
78
  @@verbose = ENV["VERBOSE"] if ENV["VERBOSE"]
71
79
  end
72
80
 
73
- BOAST::read_boast_config
81
+ read_boast_config
74
82
 
75
- def self.get_openmp_flags
83
+ def get_openmp_flags
76
84
  return @@openmp_default_flags.clone
77
85
  end
78
86
 
79
- def self.get_compiler_options
87
+ def get_compiler_options
80
88
  return @@compiler_default_options.clone
81
89
  end
82
90
 
83
- def self.verbose
84
- return @@verbose
85
- end
86
-
87
-
88
- def self.get_verbose
89
- return @@verbose
90
- end
91
-
92
- def self.verbose=(verbose)
93
- @@verbose = verbose
94
- end
95
-
96
- def self.set_verbose(verbose)
97
- @@verbose = verbose
98
- end
99
-
100
91
  class CKernel
101
92
  include Rake::DSL
102
- include BOAST::Inspectable
93
+ include Inspectable
94
+ include PrivateStateAccessor
95
+ include TypeTransition
96
+
103
97
  attr_accessor :code
104
98
  attr_accessor :procedure
105
99
  attr_accessor :lang
@@ -110,13 +104,13 @@ module BOAST
110
104
  def initialize(options={})
111
105
  if options[:code] then
112
106
  @code = options[:code]
113
- elsif BOAST::get_chain_code
114
- @code = BOAST::get_output
107
+ elsif get_chain_code
108
+ @code = get_output
115
109
  @code.seek(0,SEEK_END)
116
110
  else
117
111
  @code = StringIO::new
118
112
  end
119
- BOAST::set_output( @code )
113
+ set_output(@code)
120
114
  if options[:kernels] then
121
115
  @kernels = options[:kernels]
122
116
  else
@@ -125,7 +119,7 @@ module BOAST
125
119
  if options[:lang] then
126
120
  @lang = options[:lang]
127
121
  else
128
- @lang = BOAST::get_lang
122
+ @lang = get_lang
129
123
  end
130
124
  end
131
125
 
@@ -139,29 +133,27 @@ module BOAST
139
133
  return code.read
140
134
  end
141
135
 
142
- def setup_compiler(options = {})
143
- Rake::Task::clear
144
- verbose = options[:verbose]
145
- verbose = BOAST::get_verbose if not verbose
146
- Rake::verbose(verbose)
147
- Rake::FileUtilsExt.verbose_flag=verbose
148
- f_compiler = options[:FC]
149
- c_compiler = options[:CC]
150
- cxx_compiler = options[:CXX]
151
- cuda_compiler = options[:NVCC]
152
- f_flags = options[:FCFLAGS]
153
- f_flags += " -fPIC"
154
- f_flags += " -fno-second-underscore" if f_compiler == 'g95'
155
- ld_flags = options[:LDFLAGS]
156
- cuda_flags = options[:NVCCFLAGS]
157
- cuda_flags += " --compiler-options '-fPIC'"
158
136
 
137
+ def get_openmp_flags(compiler)
138
+ openmp_flags = BOAST::get_openmp_flags[compiler]
139
+ if not openmp_flags then
140
+ keys = BOAST::get_openmp_flags.keys
141
+ keys.each { |k|
142
+ openmp_flags = BOAST::get_openmp_flags[k] if compiler.match(k)
143
+ }
144
+ end
145
+ return openmp_flags
146
+ end
159
147
 
148
+ def get_includes(narray_path)
160
149
  includes = "-I#{RbConfig::CONFIG["archdir"]}"
161
150
  includes += " -I#{RbConfig::CONFIG["rubyhdrdir"]} -I#{RbConfig::CONFIG["rubyhdrdir"]}/#{RbConfig::CONFIG["arch"]}"
162
151
  includes += " -I#{RbConfig::CONFIG["rubyarchhdrdir"]}" if RbConfig::CONFIG["rubyarchhdrdir"]
163
- ld_flags += " -L#{RbConfig::CONFIG["libdir"]} #{RbConfig::CONFIG["LIBRUBYARG"]} -lrt"
164
- ld_flags += " -lcudart" if @lang == BOAST::CUDA
152
+ includes += " -I#{narray_path}" if narray_path
153
+ return includes
154
+ end
155
+
156
+ def get_narray_path
165
157
  narray_path = nil
166
158
  begin
167
159
  spec = Gem::Specification::find_by_name('narray')
@@ -174,49 +166,96 @@ module BOAST
174
166
  narray_path = Gem.loaded_specs['narray'].full_gem_path
175
167
  end
176
168
  end
177
- includes += " -I#{narray_path}" if narray_path
169
+ end
170
+
171
+ def setup_c_compiler(options, includes, narray_path, runner)
172
+ c_compiler = options[:CC]
178
173
  cflags = options[:CFLAGS]
179
- cxxflags = options[:CXXFLAGS]
180
174
  cflags += " -fPIC #{includes}"
181
- cxxflags += " -fPIC #{includes}"
182
175
  cflags += " -DHAVE_NARRAY_H" if narray_path
183
- fcflags = f_flags
184
- cudaflags = cuda_flags
176
+ if options[:openmp] and @lang == C then
177
+ openmp_cflags = get_openmp_flags(c_compiler)
178
+ raise "unkwown openmp flags for: #{c_compiler}" if not openmp_cflags
179
+ cflags += " #{openmp_cflags}"
180
+ end
185
181
 
182
+ rule '.o' => '.c' do |t|
183
+ c_call_string = "#{c_compiler} #{cflags} -c -o #{t.name} #{t.source}"
184
+ runner.call(t, c_call_string)
185
+ end
186
+ end
187
+
188
+ def setup_cxx_compiler(options, includes, runner)
189
+ cxx_compiler = options[:CXX]
190
+ cxxflags = options[:CXXFLAGS]
191
+ cxxflags += " -fPIC #{includes}"
192
+ if options[:openmp] and @lang == C then
193
+ openmp_cxxflags = get_openmp_flags(cxx_compiler)
194
+ raise "unkwown openmp flags for: #{cxx_compiler}" if not openmp_cxxflags
195
+ cxxflags += " #{openmp_cxxflags}"
196
+ end
197
+
198
+ rule '.o' => '.cpp' do |t|
199
+ cxx_call_string = "#{cxx_compiler} #{cxxflags} -c -o #{t.name} #{t.source}"
200
+ runner.call(t, cxx_call_string)
201
+ end
202
+ end
203
+
204
+ def setup_fortran_compiler(options, runner)
205
+ f_compiler = options[:FC]
206
+ fcflags = options[:FCFLAGS]
207
+ fcflags += " -fPIC"
208
+ fcflags += " -fno-second-underscore" if f_compiler == 'g95'
209
+ if options[:openmp] and @lang == FORTRAN then
210
+ openmp_fcflags = get_openmp_flags(f_compiler)
211
+ raise "unkwown openmp flags for: #{f_compiler}" if not openmp_fcflags
212
+ fcflags += " #{openmp_fcflags}"
213
+ end
214
+
215
+ rule '.o' => '.f90' do |t|
216
+ f_call_string = "#{f_compiler} #{fcflags} -c -o #{t.name} #{t.source}"
217
+ runner.call(t, f_call_string)
218
+ end
219
+ end
220
+
221
+ def setup_cuda_compiler(options, runner)
222
+ cuda_compiler = options[:NVCC]
223
+ cudaflags = options[:NVCCFLAGS]
224
+ cudaflags += " --compiler-options '-fPIC'"
225
+
226
+ rule '.o' => '.cu' do |t|
227
+ cuda_call_string = "#{cuda_compiler} #{cudaflags} -c -o #{t.name} #{t.source}"
228
+ runner.call(t, cuda_call_string)
229
+ end
230
+ end
231
+
232
+ def setup_linker(options)
233
+ ldflags = options[:LDFLAGS]
234
+ ldflags += " -L#{RbConfig::CONFIG["libdir"]} #{RbConfig::CONFIG["LIBRUBYARG"]} -lrt"
235
+ ldflags += " -lcudart" if @lang == CUDA
236
+ c_compiler = options[:CC]
237
+ c_compiler = "cc" if not c_compiler
238
+ linker = options[:LD]
239
+ linker = c_compiler if not linker
186
240
  if options[:openmp] then
187
- case @lang
188
- when BOAST::C
189
- openmp_c_flags = BOAST::get_openmp_flags[c_compiler]
190
- if not openmp_c_flags then
191
- keys = BOAST::get_openmp_flags.keys
192
- keys.each { |k|
193
- openmp_c_flags = BOAST::get_openmp_flags[k] if c_compiler.match(k)
194
- }
195
- end
196
- raise "unkwown openmp flags for: #{c_compiler}" if not openmp_c_flags
197
- cflags += " #{openmp_c_flags}"
198
- openmp_cxx_flags = BOAST::get_openmp_flags[cxx_compiler]
199
- if not openmp_cxx_flags then
200
- keys = BOAST::get_openmp_flags.keys
201
- keys.each { |k|
202
- openmp_cxx_flags = BOAST::get_openmp_flags[k] if cxx_compiler.match(k)
203
- }
204
- end
205
- raise "unkwown openmp flags for: #{cxx_compiler}" if not openmp_cxx_flags
206
- cxxflags += " #{openmp_cxx_flags}"
207
- when BOAST::FORTRAN
208
- openmp_f_flags = BOAST::get_openmp_flags[f_compiler]
209
- if not openmp_f_flags then
210
- keys = BOAST::get_openmp_flags.keys
211
- keys.each { |k|
212
- openmp_f_flags = BOAST::get_openmp_flags[k] if f_compiler.match(k)
213
- }
214
- end
215
- raise "unkwown openmp flags for: #{f_compiler}" if not openmp_f_flags
216
- fcflags += " #{openmp_f_flags}"
217
- end
241
+ openmp_ldflags = get_openmp_flags(linker)
242
+ raise "unkwown openmp flags for: #{linker}" if not openmp_ldflags
243
+ ldflags += " #{openmp_ldflags}"
218
244
  end
219
245
 
246
+ return [linker, ldflags]
247
+ end
248
+
249
+ def setup_compilers(options = {})
250
+ Rake::Task::clear
251
+ verbose = options[:verbose]
252
+ verbose = get_verbose if not verbose
253
+ Rake::verbose(verbose)
254
+ Rake::FileUtilsExt.verbose_flag=verbose
255
+
256
+ narray_path = get_narray_path
257
+ includes = get_includes(narray_path)
258
+
220
259
  runner = lambda { |t, call_string|
221
260
  if verbose then
222
261
  sh call_string
@@ -230,26 +269,13 @@ module BOAST
230
269
  end
231
270
  }
232
271
 
233
- rule '.o' => '.c' do |t|
234
- c_call_string = "#{c_compiler} #{cflags} -c -o #{t.name} #{t.source}"
235
- runner.call(t, c_call_string)
236
- end
272
+ setup_c_compiler(options, includes, narray_path, runner)
273
+ setup_cxx_compiler(options, includes, runner)
274
+ setup_fortran_compiler(options, runner)
275
+ setup_cuda_compiler(options, runner)
237
276
 
238
- rule '.o' => '.f90' do |t|
239
- f_call_string = "#{f_compiler} #{fcflags} -c -o #{t.name} #{t.source}"
240
- runner.call(t, f_call_string)
241
- end
242
-
243
- rule '.o' => '.cpp' do |t|
244
- cxx_call_string = "#{cxx_compiler} #{cxxflags} -c -o #{t.name} #{t.source}"
245
- runner.call(t, cxx_call_string)
246
- end
277
+ return setup_linker(options)
247
278
 
248
- rule '.o' => '.cu' do |t|
249
- cuda_call_string = "#{cuda_compiler} #{cudaflags} -c -o #{t.name} #{t.source}"
250
- runner.call(t, cuda_call_string)
251
- end
252
- return ld_flags
253
279
  end
254
280
 
255
281
  def select_cl_platform(options)
@@ -287,8 +313,32 @@ module BOAST
287
313
  return devices.first
288
314
  end
289
315
 
290
- def build_opencl(options)
316
+ def init_opencl_types
317
+ @@opencl_real_types = {
318
+ 2 => OpenCL::Half,
319
+ 4 => OpenCL::Float,
320
+ 8 => OpenCL::Double
321
+ }
322
+
323
+ @@opencl_int_types = {
324
+ true => {
325
+ 1 => OpenCL::Char,
326
+ 2 => OpenCL::Short,
327
+ 4 => OpenCL::Int,
328
+ 8 => OpenCL::Long
329
+ },
330
+ false => {
331
+ 1 => OpenCL::UChar,
332
+ 2 => OpenCL::UShort,
333
+ 4 => OpenCL::UInt,
334
+ 8 => OpenCL::ULong
335
+ }
336
+ }
337
+ end
338
+
339
+ def init_opencl(options)
291
340
  require 'opencl_ruby_ffi'
341
+ init_opencl_types
292
342
  device = select_cl_device(options)
293
343
  @context = OpenCL::create_context([device])
294
344
  program = @context.create_program_with_source([@code.string])
@@ -299,81 +349,84 @@ module BOAST
299
349
  puts e.to_s
300
350
  puts program.build_status
301
351
  puts program.build_log
302
- if options[:verbose] or BOAST::get_verbose then
352
+ if options[:verbose] or get_verbose then
303
353
  puts @code.string
304
354
  end
305
355
  raise "OpenCL Failed to build #{@procedure.name}"
306
356
  end
307
- if options[:verbose] or BOAST::get_verbose then
357
+ if options[:verbose] or get_verbose then
308
358
  program.build_log.each {|dev,log|
309
359
  puts "#{device.name}: #{log}"
310
360
  }
311
361
  end
312
362
  @queue = @context.create_command_queue(device, :properties => OpenCL::CommandQueue::PROFILING_ENABLE)
313
363
  @kernel = program.create_kernel(@procedure.name)
314
- run_method = <<EOF
315
- def self.run(*args)
316
- raise "Wrong number of arguments \#{args.length} for #{@procedure.parameters.length}" if args.length > #{@procedure.parameters.length+1} or args.length < #{@procedure.parameters.length}
317
- params = []
318
- opts = {}
319
- opts = args.pop if args.length == #{@procedure.parameters.length+1}
320
- @procedure.parameters.each_index { |i|
321
- if @procedure.parameters[i].dimension then
322
- if @procedure.parameters[i].direction == :in then
364
+ return self
365
+ end
366
+
367
+ def create_opencl_array(arg, parameter)
368
+ if parameter.direction == :in then
323
369
  flags = OpenCL::Mem::Flags::READ_ONLY
324
- elsif @procedure.parameters[i].direction == :out then
370
+ elsif parameter.direction == :out then
325
371
  flags = OpenCL::Mem::Flags::WRITE_ONLY
326
372
  else
327
373
  flags = OpenCL::Mem::Flags::READ_WRITE
328
374
  end
329
- if @procedure.parameters[i].texture then
330
- params[i] = @context.create_image_2D( OpenCL::ImageFormat::new( OpenCL::ChannelOrder::R, OpenCL::ChannelType::UNORM_INT8 ), args[i].size * args[i].element_size, 1, :flags => flags )
331
- # if @procedure.parameters[i].direction == :inout or @procedure.parameters[i].direction == :in then
332
- @queue.enqueue_write_image( params[i], args[i], :blocking => true )
333
- # end
375
+ if parameter.texture then
376
+ param = @context.create_image_2D( OpenCL::ImageFormat::new( OpenCL::ChannelOrder::R, OpenCL::ChannelType::UNORM_INT8 ), arg.size * arg.element_size, 1, :flags => flags )
377
+ @queue.enqueue_write_image( param, arg, :blocking => true )
334
378
  else
335
- params[i] = @context.create_buffer( args[i].size * args[i].element_size, :flags => flags )
336
- # if @procedure.parameters[i].direction == :inout or @procedure.parameters[i].direction == :in then
337
- @queue.enqueue_write_buffer( params[i], args[i], :blocking => true )
338
- # end
379
+ param = @context.create_buffer( arg.size * arg.element_size, :flags => flags )
380
+ @queue.enqueue_write_buffer( param, arg, :blocking => true )
339
381
  end
340
- else
341
- if @procedure.parameters[i].type.is_a?(Real) then
342
- params[i] = OpenCL::Half::new(args[i]) if @procedure.parameters[i].type.size == 2
343
- params[i] = OpenCL::Float::new(args[i]) if @procedure.parameters[i].type.size == 4
344
- params[i] = OpenCL::Double::new(args[i]) if @procedure.parameters[i].type.size == 8
345
- elsif @procedure.parameters[i].type.is_a?(Int) then
346
- if @procedure.parameters[i].type.signed
347
- params[i] = OpenCL::Char::new(args[i]) if @procedure.parameters[i].type.size == 1
348
- params[i] = OpenCL::Short::new(args[i]) if @procedure.parameters[i].type.size == 2
349
- params[i] = OpenCL::Int::new(args[i]) if @procedure.parameters[i].type.size == 4
350
- params[i] = OpenCL::Long::new(args[i]) if @procedure.parameters[i].type.size == 8
351
- else
352
- params[i] = OpenCL::UChar::new(args[i]) if @procedure.parameters[i].type.size == 1
353
- params[i] = OpenCL::UShort::new(args[i]) if @procedure.parameters[i].type.size == 2
354
- params[i] = OpenCL::UInt::new(args[i]) if @procedure.parameters[i].type.size == 4
355
- params[i] = OpenCL::ULong::new(args[i]) if @procedure.parameters[i].type.size == 8
356
- end
382
+ return param
383
+ end
384
+
385
+ def create_opencl_scalar(arg, parameter)
386
+ if parameter.type.is_a?(Real) then
387
+ return @@opencl_real_types[parameter.type.size]::new(arg)
388
+ elsif parameter.type.is_a?(Int) then
389
+ return @@opencl_int_types[parameter.type.signed][parameter.type.size]::new(arg)
357
390
  else
358
- params[i] = args[i]
391
+ return arg
359
392
  end
360
393
  end
394
+
395
+ def create_opencl_param(arg, parameter)
396
+ if parameter.dimension then
397
+ return create_opencl_array(arg, parameter)
398
+ else
399
+ return create_opencl_scalar(arg, parameter)
400
+ end
401
+ end
402
+
403
+ def read_opencl_param(param, arg, parameter)
404
+ if parameter.texture then
405
+ @queue.enqueue_read_image( param, arg, :blocking => true )
406
+ else
407
+ @queue.enqueue_read_buffer( param, arg, :blocking => true )
408
+ end
409
+ end
410
+
411
+ def build_opencl(options)
412
+ init_opencl(options)
413
+
414
+ run_method = <<EOF
415
+ def self.run(*args)
416
+ raise "Wrong number of arguments \#{args.length} for #{@procedure.parameters.length}" if args.length > #{@procedure.parameters.length+1} or args.length < #{@procedure.parameters.length}
417
+ params = []
418
+ opts = {}
419
+ opts = args.pop if args.length == #{@procedure.parameters.length+1}
420
+ @procedure.parameters.each_index { |i|
421
+ params[i] = create_opencl_param( args[i], @procedure.parameters[i] )
361
422
  }
362
423
  params.each_index{ |i|
363
424
  @kernel.set_arg(i, params[i])
364
425
  }
365
426
  event = @queue.enqueue_NDrange_kernel(@kernel, opts[:global_work_size], :local_work_size => opts[:local_work_size])
366
427
  @procedure.parameters.each_index { |i|
367
- if @procedure.parameters[i].dimension then
368
- if @procedure.parameters[i].texture then
369
- if @procedure.parameters[i].direction == :inout or @procedure.parameters[i].direction == :out then
370
- @queue.enqueue_read_image( params[i], args[i], :blocking => true )
371
- end
372
- else
373
- if @procedure.parameters[i].direction == :inout or @procedure.parameters[i].direction == :out then
374
- @queue.enqueue_read_buffer( params[i], args[i], :blocking => true )
375
- end
376
- end
428
+ if @procedure.parameters[i].dimension and (@procedure.parameters[i].direction == :inout or @procedure.parameters[i].direction == :out) then
429
+ read_opencl_param( params[i], args[i], @procedure.parameters[i] )
377
430
  end
378
431
  }
379
432
  result = {}
@@ -387,75 +440,90 @@ EOF
387
440
  return self
388
441
  end
389
442
 
390
- def build(options = {})
391
- compiler_options = BOAST::get_compiler_options
392
- compiler_options.update(options)
393
- return build_opencl(compiler_options) if @lang == BOAST::CL
394
- ldflags = self.setup_compiler(compiler_options)
395
- extension = ".c" if @lang == BOAST::C
396
- extension = ".cu" if @lang == BOAST::CUDA
397
- extension = ".f90" if @lang == BOAST::FORTRAN
398
- #temporary
399
- c_compiler = compiler_options[:CC]
400
- c_compiler = "cc" if not c_compiler
401
- linker = compiler_options[:LD]
402
- linker = c_compiler if not linker
403
- #end temporary
404
- if options[:openmp] then
405
- openmp_ld_flags = BOAST::get_openmp_flags[linker]
406
- if not openmp_ld_flags then
407
- keys = BOAST::get_openmp_flags.keys
408
- keys.each { |k|
409
- openmp_ld_flags = BOAST::get_openmp_flags[k] if linker.match(k)
410
- }
411
- end
412
- raise "unkwown openmp flags for: #{linker}" if not openmp_ld_flags
413
- ldflags += " #{openmp_ld_flags}"
414
- end
415
- source_file = Tempfile::new([@procedure.name,extension])
416
- path = source_file.path
417
- target = path.chomp(File::extname(path))+".o"
418
- fill_code(source_file)
419
- source_file.close
443
+ @@extensions = {
444
+ C => ".c",
445
+ CUDA => ".cu",
446
+ FORTRAN => ".f90"
447
+ }
420
448
 
421
- previous_lang = BOAST::get_lang
422
- previous_output = BOAST::get_output
423
- BOAST::set_lang(BOAST::C)
449
+ def get_sub_kernels
450
+ kernel_files = []
451
+ @kernels.each { |kernel|
452
+ kernel_file = Tempfile::new([kernel.procedure.name,".o"])
453
+ kernel.binary.rewind
454
+ kernel_file.write( kernel.binary.read )
455
+ kernel_file.close
456
+ kernel_files.push(kernel_file)
457
+ }
458
+ end
459
+
460
+ def create_module_source(path)
461
+ previous_lang = get_lang
462
+ previous_output = get_output
463
+ set_lang( C )
424
464
  module_file_name = File::split(path.chomp(File::extname(path)))[0] + "/Mod_" + File::split(path.chomp(File::extname(path)))[1].gsub("-","_") + ".c"
425
465
  module_name = File::split(module_file_name.chomp(File::extname(module_file_name)))[1]
426
466
  module_file = File::open(module_file_name,"w+")
427
- BOAST::set_output(module_file)
467
+ set_output( module_file )
428
468
  fill_module(module_file, module_name)
429
469
  module_file.rewind
430
470
  #puts module_file.read
431
471
  module_file.close
432
- BOAST::set_lang(previous_lang)
433
- BOAST::set_output(previous_output)
472
+ set_lang( previous_lang )
473
+ set_output( previous_output )
474
+ return [module_file_name, module_name]
475
+ end
476
+
477
+ def save_binary(target)
478
+ f = File::open(target,"rb")
479
+ @binary = StringIO::new
480
+ @binary.write( f.read )
481
+ f.close
482
+ end
483
+
484
+ def create_source
485
+ extension = @@extensions[@lang]
486
+ source_file = Tempfile::new([@procedure.name,extension])
487
+ path = source_file.path
488
+ target = path.chomp(File::extname(path))+".o"
489
+ fill_code(source_file)
490
+ source_file.close
491
+ return [source_file, path, target]
492
+ end
493
+
494
+ def build(options = {})
495
+ compiler_options = BOAST::get_compiler_options
496
+ compiler_options.update(options)
497
+ return build_opencl(compiler_options) if @lang == CL
498
+
499
+ linker, ldflags = setup_compilers(compiler_options)
500
+
501
+ extension = @@extensions[@lang]
502
+
503
+ source_file, path, target = create_source
504
+
505
+ module_file_name, module_name = create_module_source(path)
506
+
434
507
  module_target = module_file_name.chomp(File::extname(module_file_name))+".o"
435
508
  module_final = module_file_name.chomp(File::extname(module_file_name))+".so"
436
- kernel_files = []
437
- @kernels.each { |kernel|
438
- kernel_file = Tempfile::new([kernel.procedure.name,".o"])
439
- kernel.binary.rewind
440
- kernel_file.write( kernel.binary.read )
441
- kernel_file.close
442
- kernel_files.push(kernel_file)
443
- }
509
+
510
+
511
+ kernel_files = get_sub_kernels
512
+
444
513
  file module_final => [module_target, target] do
445
514
  #puts "#{linker} -shared -o #{module_final} #{module_target} #{target} #{kernel_files.join(" ")} -Wl,-Bsymbolic-functions -Wl,-z,relro -rdynamic -Wl,-export-dynamic #{ldflags}"
446
515
  sh "#{linker} -shared -o #{module_final} #{module_target} #{target} #{(kernel_files.collect {|f| f.path}).join(" ")} -Wl,-Bsymbolic-functions -Wl,-z,relro -rdynamic -Wl,-export-dynamic #{ldflags}"
447
516
  end
448
517
  Rake::Task[module_final].invoke
518
+
449
519
  require(module_final)
450
520
  eval "self.extend(#{module_name})"
451
- f = File::open(target,"rb")
452
- @binary = StringIO::new
453
- @binary.write( f.read )
454
- f.close
455
- File.unlink(target)
456
- File.unlink(module_target)
457
- File.unlink(module_file_name)
458
- File.unlink(module_final)
521
+
522
+ save_binary(target)
523
+
524
+ [target, module_target, module_file_name, module_final].each { |fn|
525
+ File::unlink(fn)
526
+ }
459
527
  kernel_files.each { |f|
460
528
  f.unlink
461
529
  }
@@ -464,13 +532,13 @@ EOF
464
532
 
465
533
  def fill_code(source_file)
466
534
  @code.rewind
467
- source_file.puts "#include <inttypes.h>" if @lang == BOAST::C or @lang == BOAST::CUDA
468
- source_file.puts "#include <cuda.h>" if @lang == BOAST::CUDA
535
+ source_file.puts "#include <inttypes.h>" if @lang == C or @lang == CUDA
536
+ source_file.puts "#include <cuda.h>" if @lang == CUDA
469
537
  source_file.write @code.read
470
- if @lang == BOAST::CUDA then
538
+ if @lang == CUDA then
471
539
  source_file.write <<EOF
472
540
  extern "C" {
473
- #{@procedure.boast_header_s(BOAST::CUDA)}{
541
+ #{@procedure.boast_header_s(CUDA)}{
474
542
  dim3 dimBlock(block_size[0], block_size[1], block_size[2]);
475
543
  dim3 dimGrid(block_number[0], block_number[1], block_number[2]);
476
544
  cudaEvent_t __start, __stop;
@@ -490,8 +558,8 @@ EOF
490
558
  @code.rewind
491
559
  end
492
560
 
493
- def fill_module(module_file, module_name)
494
- module_file.write <<EOF
561
+ def module_header(module_file)
562
+ module_file.print <<EOF
495
563
  #include "ruby.h"
496
564
  #include <inttypes.h>
497
565
  #include <time.h>
@@ -499,11 +567,13 @@ EOF
499
567
  #include "narray.h"
500
568
  #endif
501
569
  EOF
502
- if( @lang == BOAST::CUDA ) then
570
+ if( @lang == CUDA ) then
503
571
  module_file.print "#include <cuda_runtime.h>\n"
504
572
  end
505
- @procedure.boast_header(@lang)
506
- module_file.write <<EOF
573
+ end
574
+
575
+ def module_preamble(module_file, module_name)
576
+ module_file.print <<EOF
507
577
  VALUE #{module_name} = Qnil;
508
578
  void Init_#{module_name}();
509
579
  VALUE method_run(int argc, VALUE *argv, VALUE self);
@@ -511,46 +581,37 @@ void Init_#{module_name}() {
511
581
  #{module_name} = rb_define_module("#{module_name}");
512
582
  rb_define_method(#{module_name}, "run", method_run, -1);
513
583
  }
514
- VALUE method_run(int argc, VALUE *argv, VALUE self) {
515
584
  EOF
516
- if( @lang == BOAST::CUDA ) then
517
- module_file.write <<EOF
585
+ end
586
+
587
+ def check_args(module_file)
588
+ if @lang == CUDA then
589
+ module_file.print <<EOF
518
590
  if( argc < #{@procedure.parameters.length} || argc > #{@procedure.parameters.length + 1} )
519
591
  rb_raise(rb_eArgError, "wrong number of arguments for #{@procedure.name} (%d for #{@procedure.parameters.length})", argc);
520
- VALUE rb_opts;
521
- VALUE rb_ptr;
522
- size_t block_size[3] = {1,1,1};
523
- size_t block_number[3] = {1,1,1};
524
592
  EOF
525
593
  else
526
- module_file.write <<EOF
594
+ module_file.print <<EOF
527
595
  if( argc != #{@procedure.parameters.length} )
528
596
  rb_raise(rb_eArgError, "wrong number of arguments for #{@procedure.name} (%d for #{@procedure.parameters.length})", argc);
529
- VALUE rb_ptr;
530
597
  EOF
531
598
  end
532
- argc = @procedure.parameters.length
533
- argv = Variable::new("argv",Real,{:dimension => [ Dimension::new(0,argc-1) ] })
534
- rb_ptr = Variable::new("rb_ptr",Int)
535
- @procedure.parameters.each { |param|
536
- param_copy = param.copy
537
- param_copy.constant = nil
538
- param_copy.direction = nil
539
- param_copy.decl
540
- }
599
+ end
600
+
601
+ def get_params_value(module_file, argv, rb_ptr)
541
602
  @procedure.parameters.each_index do |i|
542
603
  param = @procedure.parameters[i]
543
604
  if not param.dimension then
544
605
  case param.type
545
- when Int
546
- (param === FuncCall::new("NUM2INT", argv[i])).print if param.type.size == 4
547
- (param === FuncCall::new("NUM2LONG", argv[i])).print if param.type.size == 8
548
- when Real
549
- (param === FuncCall::new("NUM2DBL", argv[i])).print
606
+ when Int
607
+ (param === FuncCall::new("NUM2INT", argv[i])).pr if param.type.size == 4
608
+ (param === FuncCall::new("NUM2LONG", argv[i])).pr if param.type.size == 8
609
+ when Real
610
+ (param === FuncCall::new("NUM2DBL", argv[i])).pr
550
611
  end
551
612
  else
552
- (rb_ptr === argv[i]).print
553
- if @lang == BOAST::CUDA then
613
+ (rb_ptr === argv[i]).pr
614
+ if @lang == CUDA then
554
615
  module_file.print <<EOF
555
616
  if ( IsNArray(rb_ptr) ) {
556
617
  struct NARRAY *n_ary;
@@ -558,16 +619,9 @@ EOF
558
619
  Data_Get_Struct(rb_ptr, struct NARRAY, n_ary);
559
620
  array_size = n_ary->total * na_sizeof[n_ary->type];
560
621
  cudaMalloc( (void **) &#{param.name}, array_size);
561
- EOF
562
- # if param.direction == :in then
563
- module_file.print <<EOF
564
622
  cudaMemcpy(#{param.name}, (void *) n_ary->ptr, array_size, cudaMemcpyHostToDevice);
565
- EOF
566
- # end
567
- module_file.print <<EOF
568
623
  } else
569
624
  rb_raise(rb_eArgError, "wrong type of argument %d", #{i});
570
-
571
625
  EOF
572
626
  else
573
627
  module_file.print <<EOF
@@ -583,8 +637,26 @@ EOF
583
637
  end
584
638
  end
585
639
  end
586
- if @lang == BOAST::CUDA then
587
- module_file.write <<EOF
640
+ end
641
+
642
+ def decl_module_params(module_file)
643
+ @procedure.parameters.each { |param|
644
+ param_copy = param.copy
645
+ param_copy.constant = nil
646
+ param_copy.direction = nil
647
+ param_copy.decl
648
+ }
649
+ module_file.print " #{@procedure.properties[:return].type.decl} ret;\n" if @procedure.properties[:return]
650
+ module_file.print " VALUE stats = rb_hash_new();\n"
651
+ module_file.print " struct timespec start, stop;\n"
652
+ module_file.print " unsigned long long int duration;\n"
653
+ end
654
+
655
+ def get_cuda_launch_bounds(module_file)
656
+ module_file.print <<EOF
657
+ VALUE rb_opts;
658
+ size_t block_size[3] = {1,1,1};
659
+ size_t block_number[3] = {1,1,1};
588
660
  if( argc == #{@procedure.parameters.length + 1} ) {
589
661
  rb_opts = argv[argc -1];
590
662
  if ( rb_opts != Qnil ) {
@@ -615,23 +687,20 @@ EOF
615
687
  }
616
688
  }
617
689
  EOF
618
- end
619
- module_file.print " #{@procedure.properties[:return].type.decl} ret;\n" if @procedure.properties[:return]
620
- module_file.print " VALUE stats = rb_hash_new();\n"
621
- module_file.print " struct timespec start, stop;\n"
622
- module_file.print " unsigned long long int duration;\n"
623
- module_file.print " clock_gettime(CLOCK_REALTIME, &start);\n"
624
- if @lang == BOAST::CUDA then
690
+ end
691
+
692
+ def create_procedure_call(module_file)
693
+ if @lang == CUDA then
625
694
  module_file.print " duration = "
626
695
  elsif @procedure.properties[:return] then
627
696
  module_file.print " ret = "
628
697
  end
629
698
  module_file.print " #{@procedure.name}"
630
- module_file.print "_" if @lang == BOAST::FORTRAN
631
- module_file.print "_wrapper" if @lang == BOAST::CUDA
699
+ module_file.print "_" if @lang == FORTRAN
700
+ module_file.print "_wrapper" if @lang == CUDA
632
701
  module_file.print "("
633
702
  params = []
634
- if(@lang == BOAST::FORTRAN) then
703
+ if(@lang == FORTRAN) then
635
704
  @procedure.parameters.each { |param|
636
705
  if param.dimension then
637
706
  params.push( param.name )
@@ -650,18 +719,19 @@ EOF
650
719
  end
651
720
  }
652
721
  end
653
- if @lang == BOAST::CUDA then
722
+ if @lang == CUDA then
654
723
  params.push( "block_number", "block_size" )
655
724
  end
656
725
  module_file.print params.join(", ")
657
726
  module_file.print " );\n"
658
- module_file.print " clock_gettime(CLOCK_REALTIME, &stop);\n"
727
+ end
659
728
 
660
- if @lang == BOAST::CUDA then
729
+ def get_results(module_file, argv, rb_ptr)
730
+ if @lang == CUDA then
661
731
  @procedure.parameters.each_index do |i|
662
732
  param = @procedure.parameters[i]
663
733
  if param.dimension then
664
- (rb_ptr === argv[i]).print
734
+ (rb_ptr === argv[i]).pr
665
735
  module_file.print <<EOF
666
736
  if ( IsNArray(rb_ptr) ) {
667
737
  EOF
@@ -683,7 +753,10 @@ EOF
683
753
  end
684
754
  end
685
755
  end
686
- if @lang != BOAST::CUDA then
756
+ end
757
+
758
+ def store_result(module_file)
759
+ if @lang != CUDA then
687
760
  module_file.print " duration = (unsigned long long int)stop.tv_sec * (unsigned long long int)1000000000 + stop.tv_nsec;\n"
688
761
  module_file.print " duration -= (unsigned long long int)start.tv_sec * (unsigned long long int)1000000000 + start.tv_nsec;\n"
689
762
  end
@@ -694,14 +767,49 @@ EOF
694
767
  module_file.print " rb_hash_aset(stats,ID2SYM(rb_intern(\"return\")),rb_int_new((unsigned long long)ret));\n" if type_ret.kind_of?(Int) and not type_ret.signed
695
768
  module_file.print " rb_hash_aset(stats,ID2SYM(rb_intern(\"return\")),rb_float_new((double)ret));\n" if type_ret.kind_of?(Real)
696
769
  end
770
+ end
771
+
772
+ def fill_module(module_file, module_name)
773
+ module_header(module_file)
774
+ @procedure.boast_header(@lang)
775
+ module_preamble(module_file, module_name)
776
+
777
+ module_file.puts "VALUE method_run(int argc, VALUE *argv, VALUE self) {"
778
+
779
+ check_args(module_file)
780
+
781
+ argc = @procedure.parameters.length
782
+ argv = Variable::new("argv", CustomType, :type_name => "VALUE", :dimension => [ Dimension::new(0,argc-1) ] )
783
+ rb_ptr = Variable::new("rb_ptr", CustomType, :type_name => "VALUE")
784
+ set_transition("VALUE", "VALUE", :default, CustomType::new(:type_name => "VALUE"))
785
+ rb_ptr.decl
786
+
787
+ decl_module_params(module_file)
788
+
789
+ get_params_value(module_file, argv, rb_ptr)
790
+
791
+ if @lang == CUDA then
792
+ module_file.print get_cuda_launch_bounds(module_file)
793
+ end
794
+
795
+ module_file.print " clock_gettime(CLOCK_REALTIME, &start);\n"
796
+
797
+ create_procedure_call(module_file)
798
+
799
+ module_file.print " clock_gettime(CLOCK_REALTIME, &stop);\n"
800
+
801
+ get_results(module_file, argv, rb_ptr)
802
+
803
+ store_result(module_file)
804
+
697
805
  module_file.print " return stats;\n"
698
- module_file.print "}"
806
+ module_file.print "}"
699
807
  end
700
808
 
701
809
  def method_missing(meth, *args, &block)
702
810
  if meth.to_s == "run" then
703
- self.build
704
- self.run(*args,&block)
811
+ build
812
+ run(*args,&block)
705
813
  else
706
814
  super
707
815
  end
@@ -737,6 +845,104 @@ EOF
737
845
  return res
738
846
  end
739
847
 
848
+ def get_array_type(param)
849
+ if param.type.class == Real then
850
+ case param.type.size
851
+ when 4
852
+ type = NArray::SFLOAT
853
+ when 8
854
+ type = NArray::FLOAT
855
+ else
856
+ STDERR::puts "Unsupported Float size for NArray: #{param.type.size}, defaulting to byte" if debug?
857
+ type = NArray::BYTE
858
+ end
859
+ elsif param.type.class == Int then
860
+ case param.type.size
861
+ when 1
862
+ type = NArray::BYTE
863
+ when 2
864
+ type = NArray::SINT
865
+ when 4
866
+ type = NArray::SINT
867
+ else
868
+ STDERR::puts "Unsupported Int size for NArray: #{param.type.size}, defaulting to byte" if debug?
869
+ type = NArray::BYTE
870
+ end
871
+ else
872
+ STDERR::puts "Unkown array type for NArray: #{param.type}, defaulting to byte" if debug?
873
+ type = NArray::BYTE
874
+ end
875
+ return type
876
+ end
877
+
878
+ def get_scalar_type(param)
879
+ if param.type.class == Real then
880
+ case param.type.size
881
+ when 4
882
+ type = "f"
883
+ when 8
884
+ type = "d"
885
+ else
886
+ raise "Unsupported Real scalar size: #{param.type.size}!"
887
+ end
888
+ elsif param.type.class == Int then
889
+ case param.type.size
890
+ when 1
891
+ type = "C"
892
+ when 2
893
+ type = "S"
894
+ when 4
895
+ type = "L"
896
+ when 8
897
+ type = "Q"
898
+ else
899
+ raise "Unsupported Int scalar size: #{param.type.size}!"
900
+ end
901
+ if param.type.signed? then
902
+ type.downcase!
903
+ end
904
+ end
905
+ return type
906
+ end
907
+
908
+ def read_param(param, directory, suffix, intent)
909
+ if intent == :out and ( param.direction == :in or param.constant ) then
910
+ return nil
911
+ end
912
+ f = File::new( directory + "/" + param.name+suffix, "rb" )
913
+ if param.dimension then
914
+ type = get_array_type(param)
915
+ if f.size == 0 then
916
+ res = NArray::new(type, 1)
917
+ else
918
+ res = NArray.to_na(f.read, type)
919
+ end
920
+ else
921
+ type = get_scalar_type(param)
922
+ res = f.read.unpack(type).first
923
+ end
924
+ f.close
925
+ return res
926
+ end
927
+
928
+ def get_gpu_dim(directory)
929
+ f = File::new( directory + "/problem_size", "r")
930
+ s = f.read
931
+ local_dim, global_dim = s.scan(/<(.*?)>/)
932
+ local_dim = local_dim.pop.split(",").collect!{ |e| e.to_i }
933
+ global_dim = global_dim.pop.split(",").collect!{ |e| e.to_i }
934
+ (local_dim.length..2).each{ |i| local_dim[i] = 1 }
935
+ (global_dim.length..2).each{ |i| global_dim[i] = 1 }
936
+ if @lang == CL then
937
+ local_dim.each_index { |indx| global_dim[indx] *= local_dim[indx] }
938
+ res = { :global_work_size => global_dim, :local_work_size => local_dim }
939
+ else
940
+ res = { :block_number => global_dim, :block_size => local_dim }
941
+ end
942
+ f.close
943
+ return res
944
+ end
945
+
740
946
  def load_ref_files( path = "", suffix = "", intent )
741
947
  proc_path = path + "/#{@procedure.name}/"
742
948
  res_h = {}
@@ -749,89 +955,10 @@ EOF
749
955
  dirs.each { |d|
750
956
  res = []
751
957
  @procedure.parameters.collect { |param|
752
- if intent == :out and ( param.direction == :in or param.constant ) then
753
- res.push nil
754
- next
755
- end
756
- f = File::new( d+"/"+param.name+suffix, "rb" )
757
- if param.dimension then
758
- if param.type.class == BOAST::Real then
759
- case param.type.size
760
- when 4
761
- type = NArray::SFLOAT
762
- when 8
763
- type = NArray::FLOAT
764
- else
765
- STDERR::puts "Unsupported Float size for NArray: #{param.type.size}, defaulting to byte" if BOAST::debug
766
- type = NArray::BYTE
767
- end
768
- elsif param.type.class == BOAST::Int then
769
- case param.type.size
770
- when 1
771
- type = NArray::BYTE
772
- when 2
773
- type = NArray::SINT
774
- when 4
775
- type = NArray::SINT
776
- else
777
- STDERR::puts "Unsupported Int size for NArray: #{param.type.size}, defaulting to byte" if BOAST::debug
778
- type = NArray::BYTE
779
- end
780
- else
781
- STDERR::puts "Unkown array type for NArray: #{param.type}, defaulting to byte" if BOAST::debug
782
- type = NArray::BYTE
783
- end
784
- if f.size == 0 then
785
- res.push NArray::new(type, 1)
786
- else
787
- res.push NArray.to_na(f.read, type)
788
- end
789
- else
790
- if param.type.class == BOAST::Real then
791
- case param.type.size
792
- when 4
793
- type = "f"
794
- when 8
795
- type = "d"
796
- else
797
- raise "Unsupported Real scalar size: #{param.type.size}!"
798
- end
799
- elsif param.type.class == BOAST::Int then
800
- case param.type.size
801
- when 1
802
- type = "C"
803
- when 2
804
- type = "S"
805
- when 4
806
- type = "L"
807
- when 8
808
- type = "Q"
809
- else
810
- raise "Unsupported Int scalar size: #{param.type.size}!"
811
- end
812
- if param.type.signed? then
813
- type.downcase!
814
- end
815
- end
816
- res.push f.read.unpack(type).first
817
- end
818
- f.close
958
+ res.push read_param(param, d, suffix, intent)
819
959
  }
820
- if @lang == BOAST::CUDA or @lang == BOAST::CL then
821
- f = File::new( d +"/problem_size", "r")
822
- s = f.read
823
- local_dim, global_dim = s.scan(/<(.*?)>/)
824
- local_dim = local_dim.pop.split(",").collect!{ |e| e.to_i }
825
- global_dim = global_dim.pop.split(",").collect!{ |e| e.to_i }
826
- (local_dim.length..2).each{ |i| local_dim[i] = 1 }
827
- (global_dim.length..2).each{ |i| global_dim[i] = 1 }
828
- if @lang == BOAST::CL then
829
- local_dim.each_index { |indx| global_dim[indx] *= local_dim[indx] }
830
- res.push( { :global_work_size => global_dim, :local_work_size => local_dim } )
831
- else
832
- res.push( { :block_number => global_dim, :block_size => local_dim } )
833
- end
834
- f.close
960
+ if @lang == CUDA or @lang == CL then
961
+ res.push get_gpu_dim(d)
835
962
  end
836
963
  res_h[d] = res
837
964
  }