BOAST 1.0.3 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/BOAST.gemspec +2 -31
  3. data/README.md +240 -0
  4. data/lib/BOAST/{OpenMP.rb → Language/OpenMP.rb} +1 -0
  5. data/lib/BOAST/{Variable.rb → Language/Variable.rb} +2 -1
  6. data/lib/BOAST/Runtime/CKernel.rb +94 -0
  7. data/lib/BOAST/Runtime/CRuntime.rb +32 -0
  8. data/lib/BOAST/Runtime/CUDARuntime.rb +158 -0
  9. data/lib/BOAST/Runtime/CompiledRuntime.rb +398 -0
  10. data/lib/BOAST/Runtime/Compilers.rb +205 -0
  11. data/lib/BOAST/Runtime/Config.rb +94 -0
  12. data/lib/BOAST/Runtime/FFIRuntime.rb +104 -0
  13. data/lib/BOAST/Runtime/FORTRANRuntime.rb +45 -0
  14. data/lib/BOAST/Runtime/MPPARuntime.rb +464 -0
  15. data/lib/BOAST/Runtime/NonRegression.rb +157 -0
  16. data/lib/BOAST/Runtime/OpenCLRuntime.rb +181 -0
  17. data/lib/BOAST/Runtime/Probe.rb +136 -0
  18. data/lib/BOAST.rb +37 -26
  19. metadata +40 -28
  20. data/lib/BOAST/CKernel.rb +0 -1236
  21. /data/lib/BOAST/{Algorithm.rb → Language/Algorithm.rb} +0 -0
  22. /data/lib/BOAST/{Arithmetic.rb → Language/Arithmetic.rb} +0 -0
  23. /data/lib/BOAST/{BOAST_OpenCL.rb → Language/BOAST_OpenCL.rb} +0 -0
  24. /data/lib/BOAST/{Case.rb → Language/Case.rb} +0 -0
  25. /data/lib/BOAST/{ControlStructure.rb → Language/ControlStructure.rb} +0 -0
  26. /data/lib/BOAST/{DataTypes.rb → Language/DataTypes.rb} +0 -0
  27. /data/lib/BOAST/{Expression.rb → Language/Expression.rb} +0 -0
  28. /data/lib/BOAST/{For.rb → Language/For.rb} +0 -0
  29. /data/lib/BOAST/{FuncCall.rb → Language/FuncCall.rb} +0 -0
  30. /data/lib/BOAST/{Functors.rb → Language/Functors.rb} +0 -0
  31. /data/lib/BOAST/{If.rb → Language/If.rb} +0 -0
  32. /data/lib/BOAST/{Index.rb → Language/Index.rb} +0 -0
  33. /data/lib/BOAST/{Inspectable.rb → Language/Inspectable.rb} +0 -0
  34. /data/lib/BOAST/{Operators.rb → Language/Operators.rb} +0 -0
  35. /data/lib/BOAST/{Optimization.rb → Language/Optimization.rb} +0 -0
  36. /data/lib/BOAST/{Parens.rb → Language/Parens.rb} +0 -0
  37. /data/lib/BOAST/{Pragma.rb → Language/Pragma.rb} +0 -0
  38. /data/lib/BOAST/{Print.rb → Language/Print.rb} +0 -0
  39. /data/lib/BOAST/{Procedure.rb → Language/Procedure.rb} +0 -0
  40. /data/lib/BOAST/{Slice.rb → Language/Slice.rb} +0 -0
  41. /data/lib/BOAST/{State.rb → Language/State.rb} +0 -0
  42. /data/lib/BOAST/{Transitions.rb → Language/Transitions.rb} +0 -0
  43. /data/lib/BOAST/{While.rb → Language/While.rb} +0 -0
data/lib/BOAST/CKernel.rb DELETED
@@ -1,1236 +0,0 @@
1
- require 'stringio'
2
- require 'rubygems'
3
- require 'rake'
4
- require 'tempfile'
5
- require 'rbconfig'
6
- require 'systemu'
7
- require 'yaml'
8
- require 'pathname'
9
- require 'os'
10
-
11
- module BOAST
12
- @@compiler_default_options = {
13
- :FC => 'gfortran',
14
- :FCFLAGS => '-O2 -Wall',
15
- :CC => 'gcc',
16
- :CFLAGS => '-O2 -Wall',
17
- :CXX => 'g++',
18
- :CXXFLAGS => '-O2 -Wall',
19
- :NVCC => 'nvcc',
20
- :NVCCFLAGS => '-O2',
21
- :LDFLAGS => '',
22
- :CLFLAGS => '',
23
- :CLVENDOR => nil,
24
- :CLPLATFORM => nil,
25
- :CLDEVICE => nil,
26
- :CLDEVICETYPE => nil,
27
- :openmp => false
28
- }
29
-
30
- @@openmp_default_flags = {
31
- "gcc" => "-fopenmp",
32
- "icc" => "-openmp",
33
- "gfortran" => "-fopenmp",
34
- "ifort" => "-openmp",
35
- "g++" => "-fopenmp",
36
- "icpc" => "-openmp"
37
- }
38
-
39
- module PrivateStateAccessor
40
- private_boolean_state_accessor :verbose
41
- private_boolean_state_accessor :debug_source
42
- private_boolean_state_accessor :ffi
43
- end
44
-
45
- boolean_state_accessor :verbose
46
- boolean_state_accessor :debug_source
47
- boolean_state_accessor :ffi
48
- @@ffi = false
49
- @@verbose = false
50
- @@debug_source = false
51
- FORTRAN_LINE_LENGTH = 72
52
-
53
- module_function
54
-
55
- def read_boast_config
56
- home_config_dir = ENV["XDG_CONFIG_HOME"]
57
- home_config_dir = "#{Dir.home}/.config" if not home_config_dir
58
- Dir.mkdir( home_config_dir ) if not File::exist?( home_config_dir )
59
- return if not File::directory?(home_config_dir)
60
- boast_config_dir = "#{home_config_dir}/BOAST"
61
- Dir.mkdir( boast_config_dir ) if not File::exist?( boast_config_dir )
62
- compiler_options_file = "#{boast_config_dir}/compiler_options"
63
- if File::exist?( compiler_options_file ) then
64
- File::open( compiler_options_file, "r" ) { |f|
65
- @@compiler_default_options.update( YAML::load( f.read ) )
66
- }
67
- else
68
- File::open( compiler_options_file, "w" ) { |f|
69
- f.write YAML::dump( @@compiler_default_options )
70
- }
71
- end
72
- openmp_flags_file = "#{boast_config_dir}/openmp_flags"
73
- if File::exist?( openmp_flags_file ) then
74
- File::open( openmp_flags_file, "r" ) { |f|
75
- @@openmp_default_flags.update( YAML::load( f.read ) )
76
- }
77
- else
78
- File::open( openmp_flags_file, "w" ) { |f|
79
- f.write YAML::dump( @@openmp_default_flags )
80
- }
81
- end
82
- @@compiler_default_options.each_key { |k|
83
- @@compiler_default_options[k] = ENV[k.to_s] if ENV[k.to_s]
84
- }
85
- @@compiler_default_options[:LD] = ENV["LD"] if ENV["LD"]
86
- @@verbose = ENV["VERBOSE"] if ENV["VERBOSE"]
87
- @@ffi = ENV["FFI"] if ENV["FFI"]
88
- @@debug_source = ENV["DEBUG_SOURCE"] if ENV["DEBUG_SOURCE"]
89
- end
90
-
91
- read_boast_config
92
-
93
- def get_openmp_flags
94
- return @@openmp_default_flags.clone
95
- end
96
-
97
- def get_compiler_options
98
- return @@compiler_default_options.clone
99
- end
100
-
101
- class CKernel
102
- include Rake::DSL
103
- include Inspectable
104
- include PrivateStateAccessor
105
- include TypeTransition
106
-
107
- attr_accessor :code
108
- attr_accessor :procedure
109
- attr_accessor :lang
110
- attr_accessor :binary
111
- attr_accessor :kernels
112
- attr_accessor :cost_function
113
-
114
- def initialize(options={})
115
- if options[:code] then
116
- @code = options[:code]
117
- elsif get_chain_code
118
- @code = get_output
119
- @code.seek(0,SEEK_END)
120
- else
121
- @code = StringIO::new
122
- end
123
- set_output(@code)
124
- if options[:kernels] then
125
- @kernels = options[:kernels]
126
- else
127
- @kernels = []
128
- end
129
- if options[:lang] then
130
- @lang = options[:lang]
131
- else
132
- @lang = get_lang
133
- end
134
- end
135
-
136
- def print
137
- @code.rewind
138
- puts @code.read
139
- end
140
-
141
- def to_s
142
- @code.rewind
143
- return code.read
144
- end
145
-
146
-
147
- def get_openmp_flags(compiler)
148
- openmp_flags = BOAST::get_openmp_flags[compiler]
149
- if not openmp_flags then
150
- keys = BOAST::get_openmp_flags.keys
151
- keys.each { |k|
152
- openmp_flags = BOAST::get_openmp_flags[k] if compiler.match(k)
153
- }
154
- end
155
- return openmp_flags
156
- end
157
-
158
- def get_includes(narray_path)
159
- includes = "-I#{RbConfig::CONFIG["archdir"]}"
160
- includes += " -I#{RbConfig::CONFIG["rubyhdrdir"]} -I#{RbConfig::CONFIG["rubyhdrdir"]}/#{RbConfig::CONFIG["arch"]}"
161
- includes += " -I#{RbConfig::CONFIG["rubyarchhdrdir"]}" if RbConfig::CONFIG["rubyarchhdrdir"]
162
- includes += " -I#{narray_path}" if narray_path
163
- return includes
164
- end
165
-
166
- def get_narray_path
167
- narray_path = nil
168
- begin
169
- spec = Gem::Specification::find_by_name('narray')
170
- narray_path = spec.full_gem_path
171
- rescue Gem::LoadError => e
172
- rescue NoMethodError => e
173
- spec = Gem::available?('narray')
174
- if spec then
175
- require 'narray'
176
- narray_path = Gem.loaded_specs['narray'].full_gem_path
177
- end
178
- end
179
- end
180
-
181
- def setup_c_compiler(options, includes, narray_path, runner)
182
- c_compiler = options[:CC]
183
- cflags = options[:CFLAGS]
184
- cflags += " -fPIC #{includes}"
185
- cflags += " -DHAVE_NARRAY_H" if narray_path
186
- if options[:openmp] and @lang == C then
187
- openmp_cflags = get_openmp_flags(c_compiler)
188
- raise "unkwown openmp flags for: #{c_compiler}" if not openmp_cflags
189
- cflags += " #{openmp_cflags}"
190
- end
191
-
192
- rule ".#{RbConfig::CONFIG["OBJEXT"]}" => '.c' do |t|
193
- c_call_string = "#{c_compiler} #{cflags} -c -o #{t.name} #{t.source}"
194
- runner.call(t, c_call_string)
195
- end
196
- end
197
-
198
- def setup_cxx_compiler(options, includes, runner)
199
- cxx_compiler = options[:CXX]
200
- cxxflags = options[:CXXFLAGS]
201
- cxxflags += " -fPIC #{includes}"
202
- if options[:openmp] and @lang == C then
203
- openmp_cxxflags = get_openmp_flags(cxx_compiler)
204
- raise "unkwown openmp flags for: #{cxx_compiler}" if not openmp_cxxflags
205
- cxxflags += " #{openmp_cxxflags}"
206
- end
207
-
208
- rule ".#{RbConfig::CONFIG["OBJEXT"]}" => '.cpp' do |t|
209
- cxx_call_string = "#{cxx_compiler} #{cxxflags} -c -o #{t.name} #{t.source}"
210
- runner.call(t, cxx_call_string)
211
- end
212
- end
213
-
214
- def setup_fortran_compiler(options, runner)
215
- f_compiler = options[:FC]
216
- fcflags = options[:FCFLAGS]
217
- fcflags += " -fPIC"
218
- fcflags += " -fno-second-underscore" if f_compiler == 'g95'
219
- if options[:openmp] and @lang == FORTRAN then
220
- openmp_fcflags = get_openmp_flags(f_compiler)
221
- raise "unkwown openmp flags for: #{f_compiler}" if not openmp_fcflags
222
- fcflags += " #{openmp_fcflags}"
223
- end
224
-
225
- rule ".#{RbConfig::CONFIG["OBJEXT"]}" => '.f90' do |t|
226
- f_call_string = "#{f_compiler} #{fcflags} -c -o #{t.name} #{t.source}"
227
- runner.call(t, f_call_string)
228
- end
229
- end
230
-
231
- def setup_cuda_compiler(options, runner)
232
- cuda_compiler = options[:NVCC]
233
- cudaflags = options[:NVCCFLAGS]
234
- cudaflags += " --compiler-options '-fPIC'"
235
-
236
- rule ".#{RbConfig::CONFIG["OBJEXT"]}" => '.cu' do |t|
237
- cuda_call_string = "#{cuda_compiler} #{cudaflags} -c -o #{t.name} #{t.source}"
238
- runner.call(t, cuda_call_string)
239
- end
240
- end
241
-
242
- def setup_linker(options)
243
- ldflags = options[:LDFLAGS]
244
- ldflags += " -L#{RbConfig::CONFIG["libdir"]} #{RbConfig::CONFIG["LIBRUBYARG"]}"
245
- ldflags += " -lrt" if not OS.mac?
246
- ldflags += " -lcudart" if @lang == CUDA
247
- c_compiler = options[:CC]
248
- c_compiler = "cc" if not c_compiler
249
- linker = options[:LD]
250
- linker = c_compiler if not linker
251
- if options[:openmp] then
252
- openmp_ldflags = get_openmp_flags(linker)
253
- raise "unkwown openmp flags for: #{linker}" if not openmp_ldflags
254
- ldflags += " #{openmp_ldflags}"
255
- end
256
-
257
- if OS.mac? then
258
- ldflags = "-Wl,-undefined,dynamic_lookup -Wl,-multiply_defined,suppress #{ldflags}"
259
- ldshared = "-dynamic -bundle"
260
- else
261
- ldflags = "-Wl,-Bsymbolic-functions -Wl,-z,relro -rdynamic -Wl,-export-dynamic #{ldflags}"
262
- ldshared = "-shared"
263
- end
264
-
265
- return [linker, ldshared, ldflags]
266
- end
267
-
268
- def setup_compilers(options = {})
269
- Rake::Task::clear
270
- verbose = options[:verbose]
271
- verbose = get_verbose if not verbose
272
- Rake::verbose(verbose)
273
- Rake::FileUtilsExt.verbose_flag=verbose
274
-
275
- narray_path = get_narray_path
276
- includes = get_includes(narray_path)
277
-
278
- runner = lambda { |t, call_string|
279
- if verbose then
280
- sh call_string
281
- else
282
- status, stdout, stderr = systemu call_string
283
- if not status.success? then
284
- puts stderr
285
- fail "#{t.source}: compilation failed"
286
- end
287
- status.success?
288
- end
289
- }
290
-
291
- setup_c_compiler(options, includes, narray_path, runner)
292
- setup_cxx_compiler(options, includes, runner)
293
- setup_fortran_compiler(options, runner)
294
- setup_cuda_compiler(options, runner)
295
-
296
- return setup_linker(options)
297
-
298
- end
299
-
300
- def select_cl_platform(options)
301
- platforms = OpenCL::get_platforms
302
- if options[:platform_vendor] then
303
- platforms.select!{ |p|
304
- p.vendor.match(options[:platform_vendor])
305
- }
306
- elsif options[:CLVENDOR] then
307
- platforms.select!{ |p|
308
- p.vendor.match(options[:CLVENDOR])
309
- }
310
- end
311
- if options[:CLPLATFORM] then
312
- platforms.select!{ |p|
313
- p.name.match(options[:CLPLATFORM])
314
- }
315
- end
316
- return platforms.first
317
- end
318
-
319
- def select_cl_device(options)
320
- platform = select_cl_platform(options)
321
- type = options[:device_type] ? OpenCL::Device::Type.const_get(options[:device_type]) : options[:CLDEVICETYPE] ? OpenCL::Device::Type.const_get(options[:CLDEVICETYPE]) : OpenCL::Device::Type::ALL
322
- devices = platform.devices(type)
323
- if options[:device_name] then
324
- devices.select!{ |d|
325
- d.name.match(options[:device_name])
326
- }
327
- elsif options[:CLDEVICE] then
328
- devices.select!{ |d|
329
- d.name.match(options[:CLDEVICE])
330
- }
331
- end
332
- return devices.first
333
- end
334
-
335
- def init_opencl_types
336
- @@opencl_real_types = {
337
- 2 => OpenCL::Half,
338
- 4 => OpenCL::Float,
339
- 8 => OpenCL::Double
340
- }
341
-
342
- @@opencl_int_types = {
343
- true => {
344
- 1 => OpenCL::Char,
345
- 2 => OpenCL::Short,
346
- 4 => OpenCL::Int,
347
- 8 => OpenCL::Long
348
- },
349
- false => {
350
- 1 => OpenCL::UChar,
351
- 2 => OpenCL::UShort,
352
- 4 => OpenCL::UInt,
353
- 8 => OpenCL::ULong
354
- }
355
- }
356
- end
357
-
358
- def init_opencl(options)
359
- require 'opencl_ruby_ffi'
360
- init_opencl_types
361
- device = select_cl_device(options)
362
- @context = OpenCL::create_context([device])
363
- program = @context.create_program_with_source([@code.string])
364
- opts = options[:CLFLAGS]
365
- begin
366
- program.build(:options => options[:CLFLAGS])
367
- rescue OpenCL::Error => e
368
- puts e.to_s
369
- puts program.build_status
370
- puts program.build_log
371
- if options[:verbose] or get_verbose then
372
- puts @code.string
373
- end
374
- raise "OpenCL Failed to build #{@procedure.name}"
375
- end
376
- if options[:verbose] or get_verbose then
377
- program.build_log.each {|dev,log|
378
- puts "#{device.name}: #{log}"
379
- }
380
- end
381
- @queue = @context.create_command_queue(device, :properties => OpenCL::CommandQueue::PROFILING_ENABLE)
382
- @kernel = program.create_kernel(@procedure.name)
383
- return self
384
- end
385
-
386
- def create_opencl_array(arg, parameter)
387
- if parameter.direction == :in then
388
- flags = OpenCL::Mem::Flags::READ_ONLY
389
- elsif parameter.direction == :out then
390
- flags = OpenCL::Mem::Flags::WRITE_ONLY
391
- else
392
- flags = OpenCL::Mem::Flags::READ_WRITE
393
- end
394
- if parameter.texture then
395
- param = @context.create_image_2D( OpenCL::ImageFormat::new( OpenCL::ChannelOrder::R, OpenCL::ChannelType::UNORM_INT8 ), arg.size * arg.element_size, 1, :flags => flags )
396
- @queue.enqueue_write_image( param, arg, :blocking => true )
397
- else
398
- param = @context.create_buffer( arg.size * arg.element_size, :flags => flags )
399
- @queue.enqueue_write_buffer( param, arg, :blocking => true )
400
- end
401
- return param
402
- end
403
-
404
- def create_opencl_scalar(arg, parameter)
405
- if parameter.type.is_a?(Real) then
406
- return @@opencl_real_types[parameter.type.size]::new(arg)
407
- elsif parameter.type.is_a?(Int) then
408
- return @@opencl_int_types[parameter.type.signed][parameter.type.size]::new(arg)
409
- else
410
- return arg
411
- end
412
- end
413
-
414
- def create_opencl_param(arg, parameter)
415
- if parameter.dimension then
416
- return create_opencl_array(arg, parameter)
417
- else
418
- return create_opencl_scalar(arg, parameter)
419
- end
420
- end
421
-
422
- def read_opencl_param(param, arg, parameter)
423
- if parameter.texture then
424
- @queue.enqueue_read_image( param, arg, :blocking => true )
425
- else
426
- @queue.enqueue_read_buffer( param, arg, :blocking => true )
427
- end
428
- end
429
-
430
- def build_opencl(options)
431
- init_opencl(options)
432
-
433
- run_method = <<EOF
434
- def self.run(*args)
435
- raise "Wrong number of arguments \#{args.length} for #{@procedure.parameters.length}" if args.length > #{@procedure.parameters.length+1} or args.length < #{@procedure.parameters.length}
436
- params = []
437
- opts = {}
438
- opts = args.pop if args.length == #{@procedure.parameters.length+1}
439
- @procedure.parameters.each_index { |i|
440
- params[i] = create_opencl_param( args[i], @procedure.parameters[i] )
441
- }
442
- params.each_index{ |i|
443
- @kernel.set_arg(i, params[i])
444
- }
445
- gws = opts[:global_work_size]
446
- if not gws then
447
- gws = []
448
- opts[:block_number].each_index { |i|
449
- gws.push(opts[:block_number][i]*opts[:block_size][i])
450
- }
451
- end
452
- lws = opts[:local_work_size]
453
- if not lws then
454
- lws = opts[:block_size]
455
- end
456
- event = @queue.enqueue_NDrange_kernel(@kernel, gws, :local_work_size => lws)
457
- @procedure.parameters.each_index { |i|
458
- if @procedure.parameters[i].dimension and (@procedure.parameters[i].direction == :inout or @procedure.parameters[i].direction == :out) then
459
- read_opencl_param( params[i], args[i], @procedure.parameters[i] )
460
- end
461
- }
462
- result = {}
463
- result[:start] = event.profiling_command_start
464
- result[:end] = event.profiling_command_end
465
- result[:duration] = (result[:end] - result[:start])/1000000000.0
466
- return result
467
- end
468
- EOF
469
- eval run_method
470
- return self
471
- end
472
-
473
- @@extensions = {
474
- C => ".c",
475
- CUDA => ".cu",
476
- FORTRAN => ".f90"
477
- }
478
-
479
- def get_sub_kernels
480
- kernel_files = []
481
- @kernels.each { |kernel|
482
- kernel_file = Tempfile::new([kernel.procedure.name,".#{RbConfig::CONFIG["OBJEXT"]}"])
483
- kernel.binary.rewind
484
- kernel_file.write( kernel.binary.read )
485
- kernel_file.close
486
- kernel_files.push(kernel_file)
487
- }
488
- end
489
-
490
- def create_module_source(path)
491
- previous_lang = get_lang
492
- previous_output = get_output
493
- set_lang( C )
494
- module_file_name = File::split(path.chomp(File::extname(path)))[0] + "/Mod_" + File::split(path.chomp(File::extname(path)))[1].gsub("-","_") + ".c"
495
- module_name = File::split(module_file_name.chomp(File::extname(module_file_name)))[1]
496
- module_file = File::open(module_file_name,"w+")
497
- set_output( module_file )
498
- fill_module(module_file, module_name)
499
- if debug_source? then
500
- module_file.rewind
501
- puts module_file.read
502
- end
503
- module_file.close
504
- set_lang( previous_lang )
505
- set_output( previous_output )
506
- return [module_file_name, module_name]
507
- end
508
-
509
- def save_binary(target)
510
- f = File::open(target,"rb")
511
- @binary = StringIO::new
512
- @binary.write( f.read )
513
- f.close
514
- end
515
-
516
- def create_source
517
- extension = @@extensions[@lang]
518
- source_file = Tempfile::new([@procedure.name,extension])
519
- path = source_file.path
520
- target = path.chomp(File::extname(path))+".#{RbConfig::CONFIG["OBJEXT"]}"
521
- fill_code(source_file)
522
- if debug_source? then
523
- source_file.rewind
524
- puts source_file.read
525
- end
526
- source_file.close
527
- return [source_file, path, target]
528
- end
529
-
530
- def create_ffi_module(module_name, module_final)
531
- s =<<EOF
532
- require 'ffi'
533
- require 'narray_ffi'
534
- module #{module_name}
535
- extend FFI::Library
536
- ffi_lib "#{module_final}"
537
- attach_function :#{@procedure.name}#{@lang == FORTRAN ? "_" : ""}, [ #{@procedure.parameters.collect{ |p| ":"+p.decl_ffi.to_s }.join(", ")} ], :#{@procedure.properties[:return] ? @procedure.properties[:return].type.decl_ffi : "void" }
538
- def run(*args)
539
- if args.length < @procedure.parameters.length or args.length > @procedure.parameters.length + 1 then
540
- raise "Wrong number of arguments for \#{@procedure.name} (\#{args.length} for \#{@procedure.parameters.length})"
541
- else
542
- ev_set = nil
543
- if args.length == @procedure.parameters.length + 1 then
544
- options = args.last
545
- if options[:PAPI] then
546
- require 'PAPI'
547
- ev_set = PAPI::EventSet::new
548
- ev_set.add_named(options[:PAPI])
549
- end
550
- end
551
- t_args = []
552
- r_args = {}
553
- if @lang == FORTRAN then
554
- @procedure.parameters.each_with_index { |p, i|
555
- if p.decl_ffi(true) != :pointer then
556
- arg_p = FFI::MemoryPointer::new(p.decl_ffi(true))
557
- arg_p.send("write_\#{p.decl_ffi(true)}",args[i])
558
- t_args.push(arg_p)
559
- r_args[p] = arg_p if p.scalar_output?
560
- else
561
- t_args.push( args[i] )
562
- end
563
- }
564
- else
565
- @procedure.parameters.each_with_index { |p, i|
566
- if p.scalar_output? then
567
- arg_p = FFI::MemoryPointer::new(p.decl_ffi(true))
568
- arg_p.send("write_\#{p.decl_ffi(true)}",args[i])
569
- t_args.push(arg_p)
570
- r_args[p] = arg_p
571
- else
572
- t_args.push( args[i] )
573
- end
574
- }
575
- end
576
- results = {}
577
- counters = nil
578
- ev_set.start if ev_set
579
- begin
580
- start = Time::new
581
- ret = #{@procedure.name}#{@lang == FORTRAN ? "_" : ""}(*t_args)
582
- stop = Time::new
583
- ensure
584
- if ev_set then
585
- counters = ev_set.stop
586
- ev_set.cleanup
587
- ev_set.destroy
588
- end
589
- end
590
- results = { :start => start, :stop => stop, :duration => stop - start, :return => ret }
591
- results[:PAPI] = Hash[[options[:PAPI]].flatten.zip(counters)] if ev_set
592
- if r_args.length > 0 then
593
- ref_return = {}
594
- r_args.each { |p, p_arg|
595
- ref_return[p.name.to_sym] = p_arg.send("read_\#{p.decl_ffi(true)}")
596
- }
597
- results[:reference_return] = ref_return
598
- end
599
- return results
600
- end
601
- end
602
- end
603
- EOF
604
- eval s
605
- end
606
-
607
- def build(options = {})
608
- compiler_options = BOAST::get_compiler_options
609
- compiler_options.update(options)
610
- return build_opencl(compiler_options) if @lang == CL
611
-
612
- linker, ldshared, ldflags = setup_compilers(compiler_options)
613
-
614
- extension = @@extensions[@lang]
615
-
616
- source_file, path, target = create_source
617
-
618
- if not ffi? then
619
- module_file_name, module_name = create_module_source(path)
620
- module_target = module_file_name.chomp(File::extname(module_file_name))+"."+RbConfig::CONFIG["OBJEXT"]
621
- module_final = module_file_name.chomp(File::extname(module_file_name))+"."+RbConfig::CONFIG["DLEXT"]
622
- else
623
- module_final = path.chomp(File::extname(path))+"."+RbConfig::CONFIG["DLEXT"]
624
- module_name = "Mod_" + File::split(path.chomp(File::extname(path)))[1].gsub("-","_")
625
- end
626
-
627
- kernel_files = get_sub_kernels
628
-
629
- if not ffi? then
630
- file module_final => [module_target, target] do
631
- #puts "#{linker} #{ldshared} -o #{module_final} #{module_target} #{target} #{kernel_files.join(" ")} #{ldflags}"
632
- sh "#{linker} #{ldshared} -o #{module_final} #{module_target} #{target} #{(kernel_files.collect {|f| f.path}).join(" ")} #{ldflags}"
633
- end
634
- Rake::Task[module_final].invoke
635
-
636
- require(module_final)
637
- else
638
- file module_final => [target] do
639
- #puts "#{linker} #{ldshared} -o #{module_final} #{target} #{kernel_files.join(" ")} #{ldflags}"
640
- sh "#{linker} #{ldshared} -o #{module_final} #{target} #{(kernel_files.collect {|f| f.path}).join(" ")} #{ldflags}"
641
- end
642
- Rake::Task[module_final].invoke
643
- create_ffi_module(module_name, module_final)
644
- end
645
- eval "self.extend(#{module_name})"
646
- save_binary(target)
647
-
648
- if not ffi? then
649
- [target, module_target, module_file_name, module_final].each { |fn|
650
- File::unlink(fn)
651
- }
652
- else
653
- [target, module_final].each { |fn|
654
- File::unlink(fn)
655
- }
656
- end
657
- kernel_files.each { |f|
658
- f.unlink
659
- }
660
- return self
661
- end
662
-
663
- def fill_code(source_file)
664
- @code.rewind
665
- source_file.puts "#include <inttypes.h>" if @lang == C or @lang == CUDA
666
- source_file.puts "#include <cuda.h>" if @lang == CUDA
667
- # check for too long FORTRAN lines
668
- if @lang == FORTRAN then
669
- @code.each_line { |line|
670
- # check for omp pragmas
671
- if line.match(/^\s*!\$/) then
672
- if line.match(/^\s*!\$(omp|OMP)/) then
673
- chunks = line.scan(/.{1,#{FORTRAN_LINE_LENGTH-7}}/)
674
- source_file.puts chunks.join("&\n!$omp&")
675
- else
676
- chunks = line.scan(/.{1,#{FORTRAN_LINE_LENGTH-4}}/)
677
- source_file.puts chunks.join("&\n!$&")
678
- end
679
- elsif line.match(/^\w*!/) then
680
- source_file.write line
681
- else
682
- chunks = line.scan(/.{1,#{FORTRAN_LINE_LENGTH-2}}/)
683
- source_file.puts chunks.join("&\n&")
684
- end
685
- }
686
- else
687
- source_file.write @code.read
688
- end
689
- if @lang == CUDA then
690
- source_file.write <<EOF
691
- extern "C" {
692
- #{@procedure.boast_header_s(CUDA)}{
693
- dim3 dimBlock(block_size[0], block_size[1], block_size[2]);
694
- dim3 dimGrid(block_number[0], block_number[1], block_number[2]);
695
- cudaEvent_t __start, __stop;
696
- float __time;
697
- cudaEventCreate(&__start);
698
- cudaEventCreate(&__stop);
699
- cudaEventRecord(__start, 0);
700
- #{@procedure.name}<<<dimGrid,dimBlock>>>(#{@procedure.parameters.join(", ")});
701
- cudaEventRecord(__stop, 0);
702
- cudaEventSynchronize(__stop);
703
- cudaEventElapsedTime(&__time, __start, __stop);
704
- return (unsigned long long int)((double)__time*(double)1e6);
705
- }
706
- }
707
- EOF
708
- end
709
- @code.rewind
710
- end
711
-
712
- def module_header(module_file)
713
- module_file.print <<EOF
714
- #include "ruby.h"
715
- #include <inttypes.h>
716
- #ifdef HAVE_NARRAY_H
717
- #include "narray.h"
718
- #endif
719
- EOF
720
- if OS.mac? then
721
- module_file.print <<EOF
722
- #if __cplusplus
723
- extern "C" {
724
- #endif
725
- #include <mach/mach_time.h>
726
- #if __cplusplus
727
- }
728
- #endif
729
- EOF
730
- else
731
- module_file.print "#include <time.h>\n"
732
- end
733
- if @lang == CUDA then
734
- module_file.print "#include <cuda_runtime.h>\n"
735
- end
736
- end
737
-
738
- def module_preamble(module_file, module_name)
739
- module_file.print <<EOF
740
- VALUE #{module_name} = Qnil;
741
- void Init_#{module_name}();
742
- VALUE method_run(int _boast_argc, VALUE *_boast_argv, VALUE _boast_self);
743
- void Init_#{module_name}() {
744
- #{module_name} = rb_define_module("#{module_name}");
745
- rb_define_method(#{module_name}, "run", method_run, -1);
746
- }
747
- EOF
748
- end
749
-
750
- def check_args(module_file)
751
- module_file.print <<EOF
752
- VALUE _boast_rb_opts;
753
- if( _boast_argc < #{@procedure.parameters.length} || _boast_argc > #{@procedure.parameters.length + 1} )
754
- rb_raise(rb_eArgError, "wrong number of arguments for #{@procedure.name} (%d for #{@procedure.parameters.length})", _boast_argc);
755
- _boast_rb_opts = Qnil;
756
- if( _boast_argc == #{@procedure.parameters.length + 1} ) {
757
- _boast_rb_opts = _boast_argv[_boast_argc -1];
758
- if ( _boast_rb_opts != Qnil ) {
759
- if (TYPE(_boast_rb_opts) != T_HASH)
760
- rb_raise(rb_eArgError, "Options should be passed as a hash");
761
- }
762
- }
763
- EOF
764
- end
765
-
766
- def get_params_value(module_file, argv, rb_ptr)
767
- set_decl_module(true)
768
- @procedure.parameters.each_index do |i|
769
- param = @procedure.parameters[i]
770
- if not param.dimension then
771
- case param.type
772
- when Int
773
- (param === FuncCall::new("NUM2INT", argv[i])).pr if param.type.size == 4
774
- (param === FuncCall::new("NUM2LONG", argv[i])).pr if param.type.size == 8
775
- when Real
776
- (param === FuncCall::new("NUM2DBL", argv[i])).pr
777
- end
778
- else
779
- (rb_ptr === argv[i]).pr
780
- if @lang == CUDA then
781
- module_file.print <<EOF
782
- if ( IsNArray(_boast_rb_ptr) ) {
783
- struct NARRAY *_boast_n_ary;
784
- size_t _boast_array_size;
785
- Data_Get_Struct(_boast_rb_ptr, struct NARRAY, _boast_n_ary);
786
- _boast_array_size = _boast_n_ary->total * na_sizeof[_boast_n_ary->type];
787
- cudaMalloc( (void **) &#{param.name}, _boast_array_size);
788
- cudaMemcpy(#{param.name}, (void *) _boast_n_ary->ptr, _boast_array_size, cudaMemcpyHostToDevice);
789
- } else {
790
- rb_raise(rb_eArgError, "wrong type of argument %d", #{i});
791
- }
792
- EOF
793
- else
794
- module_file.print <<EOF
795
- if (TYPE(_boast_rb_ptr) == T_STRING) {
796
- #{param.name} = (void *) RSTRING_PTR(_boast_rb_ptr);
797
- } else if ( IsNArray(_boast_rb_ptr) ) {
798
- struct NARRAY *_boast_n_ary;
799
- Data_Get_Struct(_boast_rb_ptr, struct NARRAY, _boast_n_ary);
800
- #{param.name} = (void *) _boast_n_ary->ptr;
801
- } else {
802
- rb_raise(rb_eArgError, "wrong type of argument %d", #{i});
803
- }
804
- EOF
805
- end
806
- end
807
- end
808
- set_decl_module(false)
809
- end
810
-
811
- def decl_module_params(module_file)
812
- set_decl_module(true)
813
- @procedure.parameters.each { |param|
814
- param_copy = param.copy
815
- param_copy.constant = nil
816
- param_copy.direction = nil
817
- param_copy.decl
818
- }
819
- set_decl_module(false)
820
- module_file.print " #{@procedure.properties[:return].type.decl} _boast_ret;\n" if @procedure.properties[:return]
821
- module_file.print " VALUE _boast_stats = rb_hash_new();\n"
822
- module_file.print " VALUE _boast_event_set = Qnil;\n"
823
- if OS.mac? then
824
- module_file.print " uint64_t _mac_boast_start, _mac_boast_stop;\n"
825
- module_file.print " mach_timebase_info_data_t _mac_boast_timebase_info;\n"
826
- else
827
- module_file.print " struct timespec _boast_start, _boast_stop;\n"
828
- end
829
- module_file.print " unsigned long long int _boast_duration;\n"
830
- end
831
-
832
- def get_cuda_launch_bounds(module_file)
833
- module_file.print <<EOF
834
- size_t _boast_block_size[3] = {1,1,1};
835
- size_t _boast_block_number[3] = {1,1,1};
836
- if( _boast_rb_opts != Qnil ) {
837
- VALUE _boast_rb_array_data = Qnil;
838
- int _boast_i;
839
- _boast_rb_ptr = rb_hash_aref(_boast_rb_opts, ID2SYM(rb_intern("block_size")));
840
- if( _boast_rb_ptr != Qnil ) {
841
- if (TYPE(_boast_rb_ptr) != T_ARRAY)
842
- rb_raise(rb_eArgError, "Cuda option block_size should be an array");
843
- for(_boast_i=0; _boast_i<3; _boast_i++) {
844
- _boast_rb_array_data = rb_ary_entry(_boast_rb_ptr, _boast_i);
845
- if( _boast_rb_array_data != Qnil )
846
- _boast_block_size[_boast_i] = (size_t) NUM2LONG( _boast_rb_array_data );
847
- }
848
- } else {
849
- _boast_rb_ptr = rb_hash_aref(_boast_rb_opts, ID2SYM(rb_intern("local_work_size")));
850
- if( _boast_rb_ptr != Qnil ) {
851
- if (TYPE(_boast_rb_ptr) != T_ARRAY)
852
- rb_raise(rb_eArgError, "Cuda option local_work_size should be an array");
853
- for(_boast_i=0; _boast_i<3; _boast_i++) {
854
- _boast_rb_array_data = rb_ary_entry(_boast_rb_ptr, _boast_i);
855
- if( _boast_rb_array_data != Qnil )
856
- _boast_block_size[_boast_i] = (size_t) NUM2LONG( _boast_rb_array_data );
857
- }
858
- }
859
- }
860
- _boast_rb_ptr = rb_hash_aref(_boast_rb_opts, ID2SYM(rb_intern("block_number")));
861
- if( _boast_rb_ptr != Qnil ) {
862
- if (TYPE(_boast_rb_ptr) != T_ARRAY)
863
- rb_raise(rb_eArgError, "Cuda option block_number should be an array");
864
- for(_boast_i=0; _boast_i<3; _boast_i++) {
865
- _boast_rb_array_data = rb_ary_entry(_boast_rb_ptr, _boast_i);
866
- if( _boast_rb_array_data != Qnil )
867
- _boast_block_number[_boast_i] = (size_t) NUM2LONG( _boast_rb_array_data );
868
- }
869
- } else {
870
- _boast_rb_ptr = rb_hash_aref(_boast_rb_opts, ID2SYM(rb_intern("global_work_size")));
871
- if( _boast_rb_ptr != Qnil ) {
872
- if (TYPE(_boast_rb_ptr) != T_ARRAY)
873
- rb_raise(rb_eArgError, "Cuda option global_work_size should be an array");
874
- for(_boast_i=0; _boast_i<3; _boast_i++) {
875
- _boast_rb_array_data = rb_ary_entry(_boast_rb_ptr, _boast_i);
876
- if( _boast_rb_array_data != Qnil )
877
- _boast_block_number[_boast_i] = (size_t) NUM2LONG( _boast_rb_array_data ) / _boast_block_size[_boast_i];
878
- }
879
- }
880
- }
881
- }
882
- EOF
883
- end
884
-
885
- def get_PAPI_options(module_file)
886
- module_file.print <<EOF
887
- if( _boast_rb_opts != Qnil ) {
888
- _boast_rb_ptr = rb_hash_aref(_boast_rb_opts, ID2SYM(rb_intern("PAPI")));
889
- if( _boast_rb_ptr != Qnil ) {
890
- VALUE _boast_PAPI = Qnil;
891
- VALUE _boast_EventSet = Qnil;
892
- rb_require("PAPI");
893
- _boast_PAPI = rb_const_get(rb_cObject, rb_intern("PAPI"));
894
- _boast_EventSet = rb_const_get(_boast_PAPI, rb_intern("EventSet"));
895
- _boast_event_set = rb_funcall(_boast_EventSet, rb_intern("new"), 0);
896
- rb_funcall(_boast_event_set, rb_intern("add_named"), 1, _boast_rb_ptr);
897
- rb_funcall(_boast_event_set, rb_intern("start"), 0);
898
- }
899
- }
900
- EOF
901
- end
902
-
903
- def get_PAPI_results(module_file)
904
- module_file.print <<EOF
905
- if( _boast_event_set != Qnil) {
906
- VALUE _boast_papi_results = Qnil;
907
- VALUE _boast_papi_stats = Qnil;
908
- _boast_papi_results = rb_funcall(_boast_event_set, rb_intern("stop"), 0);
909
- _boast_papi_stats = rb_ary_new3(1,rb_hash_aref(_boast_rb_opts, ID2SYM(rb_intern("PAPI"))));
910
- _boast_papi_stats = rb_funcall(_boast_papi_stats, rb_intern("flatten"), 0);
911
- _boast_papi_stats = rb_funcall(_boast_papi_stats, rb_intern("zip"), 1, _boast_papi_results);
912
- _boast_papi_stats = rb_funcall(rb_const_get(rb_cObject, rb_intern("Hash")), rb_intern("send"), 2, ID2SYM(rb_intern("[]")), _boast_papi_stats );
913
- rb_hash_aset(_boast_stats,ID2SYM(rb_intern(\"PAPI\")),_boast_papi_stats);
914
- }
915
- EOF
916
- end
917
-
918
- def create_procedure_call(module_file)
919
- if @lang == CUDA then
920
- module_file.print " _boast_duration = "
921
- elsif @procedure.properties[:return] then
922
- module_file.print " _boast_ret = "
923
- end
924
- module_file.print " #{@procedure.name}"
925
- module_file.print "_" if @lang == FORTRAN
926
- module_file.print "_wrapper" if @lang == CUDA
927
- module_file.print "("
928
- params = []
929
- if(@lang == FORTRAN) then
930
- @procedure.parameters.each { |param|
931
- if param.dimension then
932
- params.push( param.name )
933
- else
934
- params.push( "&"+param.name )
935
- end
936
- }
937
- else
938
- @procedure.parameters.each { |param|
939
- if param.dimension then
940
- params.push( param.name )
941
- elsif param.direction == :out or param.direction == :inout then
942
- params.push( "&"+param.name )
943
- else
944
- params.push( param.name )
945
- end
946
- }
947
- end
948
- if @lang == CUDA then
949
- params.push( "_boast_block_number", "_boast_block_size" )
950
- end
951
- module_file.print params.join(", ")
952
- module_file.print " );\n"
953
- end
954
-
955
- def get_results(module_file, argv, rb_ptr)
956
- set_decl_module(true)
957
- if @lang == CUDA then
958
- @procedure.parameters.each_index do |i|
959
- param = @procedure.parameters[i]
960
- if param.dimension then
961
- (rb_ptr === argv[i]).pr
962
- module_file.print <<EOF
963
- if ( IsNArray(_boast_rb_ptr) ) {
964
- EOF
965
- if param.direction == :out or param.direction == :inout then
966
- module_file.print <<EOF
967
- struct NARRAY *_boast_n_ary;
968
- size_t _boast_array_size;
969
- Data_Get_Struct(_boast_rb_ptr, struct NARRAY, _boast_n_ary);
970
- _boast_array_size = _boast_n_ary->total * na_sizeof[_boast_n_ary->type];
971
- cudaMemcpy((void *) _boast_n_ary->ptr, #{param.name}, _boast_array_size, cudaMemcpyDeviceToHost);
972
- EOF
973
- end
974
- module_file.print <<EOF
975
- cudaFree( (void *) #{param.name});
976
- } else {
977
- rb_raise(rb_eArgError, "wrong type of argument %d", #{i});
978
- }
979
- EOF
980
- end
981
- end
982
- else
983
- first = true
984
- @procedure.parameters.each_with_index do |param,i|
985
- if param.scalar_output? then
986
- if first then
987
- module_file.print " VALUE _boast_refs = rb_hash_new();\n"
988
- module_file.print " rb_hash_aset(_boast_stats,ID2SYM(rb_intern(\"reference_return\")),_boast_refs);\n"
989
- first = false
990
- end
991
- case param.type
992
- when Int
993
- module_file.print " rb_hash_aset(_boast_refs, ID2SYM(rb_intern(\"#{param}\")),rb_int_new((long long)#{param}));\n" if param.type.signed?
994
- module_file.print " rb_hash_aset(_boast_refs, ID2SYM(rb_intern(\"#{param}\")),rb_int_new((unsigned long long)#{param}));\n" if not param.type.signed?
995
- when Real
996
- module_file.print " rb_hash_aset(_boast_refs, ID2SYM(rb_intern(\"#{param}\")),rb_float_new((double)#{param}));\n"
997
- end
998
- end
999
- end
1000
- end
1001
- set_decl_module(false)
1002
- end
1003
-
1004
- def store_result(module_file)
1005
- module_file.print " rb_hash_aset(_boast_stats,ID2SYM(rb_intern(\"duration\")),rb_float_new((double)_boast_duration*(double)1e-9));\n"
1006
- if @procedure.properties[:return] then
1007
- type_ret = @procedure.properties[:return].type
1008
- module_file.print " rb_hash_aset(_boast_stats,ID2SYM(rb_intern(\"return\")),rb_int_new((long long)_boast_ret));\n" if type_ret.kind_of?(Int) and type_ret.signed
1009
- module_file.print " rb_hash_aset(_boast_stats,ID2SYM(rb_intern(\"return\")),rb_int_new((unsigned long long)_boast_ret));\n" if type_ret.kind_of?(Int) and not type_ret.signed
1010
- module_file.print " rb_hash_aset(_boast_stats,ID2SYM(rb_intern(\"return\")),rb_float_new((double)_boast_ret));\n" if type_ret.kind_of?(Real)
1011
- end
1012
- end
1013
-
1014
- def fill_module(module_file, module_name)
1015
- module_header(module_file)
1016
- @procedure.boast_header(@lang)
1017
- module_preamble(module_file, module_name)
1018
-
1019
- module_file.puts "VALUE method_run(int _boast_argc, VALUE *_boast_argv, VALUE _boast_self) {"
1020
- increment_indent_level
1021
- check_args(module_file)
1022
-
1023
- argc = @procedure.parameters.length
1024
- argv = Variable::new("_boast_argv", CustomType, :type_name => "VALUE", :dimension => [ Dimension::new(0,argc-1) ] )
1025
- rb_ptr = Variable::new("_boast_rb_ptr", CustomType, :type_name => "VALUE")
1026
- set_transition("VALUE", "VALUE", :default, CustomType::new(:type_name => "VALUE"))
1027
- rb_ptr.decl
1028
-
1029
- decl_module_params(module_file)
1030
-
1031
- get_params_value(module_file, argv, rb_ptr)
1032
-
1033
- if @lang == CUDA then
1034
- module_file.print get_cuda_launch_bounds(module_file)
1035
- end
1036
-
1037
- get_PAPI_options(module_file)
1038
-
1039
- if OS.mac? then
1040
- module_file.print " _mac_boast_start = mach_absolute_time();\n"
1041
- else
1042
- module_file.print " clock_gettime(CLOCK_REALTIME, &_boast_start);\n"
1043
- end
1044
-
1045
- create_procedure_call(module_file)
1046
-
1047
- if OS.mac? then
1048
- module_file.print " _mac_boast_stop = mach_absolute_time();\n"
1049
- else
1050
- module_file.print " clock_gettime(CLOCK_REALTIME, &_boast_stop);\n"
1051
- end
1052
-
1053
- get_PAPI_results(module_file)
1054
-
1055
- if @lang != CUDA then
1056
- if OS.mac? then
1057
- module_file.print " mach_timebase_info(&_mac_boast_timebase_info);\n"
1058
- module_file.print " _boast_duration = (_mac_boast_stop - _mac_boast_start) * _mac_boast_timebase_info.numer / _mac_boast_timebase_info.denom;\n"
1059
- else
1060
- module_file.print " _boast_duration = (_boast_stop.tv_sec - _boast_start.tv_sec) * (unsigned long long int)1000000000 + _boast_stop.tv_nsec - _boast_start.tv_nsec;\n"
1061
- end
1062
- end
1063
-
1064
- get_results(module_file, argv, rb_ptr)
1065
-
1066
- store_result(module_file)
1067
-
1068
- module_file.print " return _boast_stats;\n"
1069
- decrement_indent_level
1070
- module_file.print "}"
1071
- end
1072
-
1073
- def method_missing(meth, *args, &block)
1074
- if meth.to_s == "run" then
1075
- build
1076
- run(*args,&block)
1077
- else
1078
- super
1079
- end
1080
- end
1081
-
1082
- def load_ref_inputs(path = "", suffix = ".in" )
1083
- return load_ref_files( path, suffix, :in )
1084
- end
1085
-
1086
- def load_ref_outputs(path = "", suffix = ".out" )
1087
- return load_ref_files( path, suffix, :out )
1088
- end
1089
-
1090
- def compare_ref(ref_outputs, outputs, epsilon = nil)
1091
- res = {}
1092
- @procedure.parameters.each_with_index { |param, indx|
1093
- if param.direction == :in or param.constant then
1094
- next
1095
- end
1096
- if param.dimension then
1097
- diff = (outputs[indx] - ref_outputs[indx]).abs
1098
- if epsilon then
1099
- diff.each { |elem|
1100
- raise "Error: #{param.name} different from ref by: #{elem}!" if elem > epsilon
1101
- }
1102
- end
1103
- res[param.name] = diff.max
1104
- else
1105
- raise "Error: #{param.name} different from ref: #{outputs[indx]} != #{ref_outputs[indx]} !" if epsilon and (outputs[indx] - ref_outputs[indx]).abs > epsilon
1106
- res[param.name] = (outputs[indx] - ref_outputs[indx]).abs
1107
- end
1108
- }
1109
- return res
1110
- end
1111
-
1112
- def get_array_type(param)
1113
- if param.type.class == Real then
1114
- case param.type.size
1115
- when 4
1116
- type = NArray::SFLOAT
1117
- when 8
1118
- type = NArray::FLOAT
1119
- else
1120
- STDERR::puts "Unsupported Float size for NArray: #{param.type.size}, defaulting to byte" if debug?
1121
- type = NArray::BYTE
1122
- end
1123
- elsif param.type.class == Int then
1124
- case param.type.size
1125
- when 1
1126
- type = NArray::BYTE
1127
- when 2
1128
- type = NArray::SINT
1129
- when 4
1130
- type = NArray::SINT
1131
- else
1132
- STDERR::puts "Unsupported Int size for NArray: #{param.type.size}, defaulting to byte" if debug?
1133
- type = NArray::BYTE
1134
- end
1135
- else
1136
- STDERR::puts "Unkown array type for NArray: #{param.type}, defaulting to byte" if debug?
1137
- type = NArray::BYTE
1138
- end
1139
- return type
1140
- end
1141
-
1142
- def get_scalar_type(param)
1143
- if param.type.class == Real then
1144
- case param.type.size
1145
- when 4
1146
- type = "f"
1147
- when 8
1148
- type = "d"
1149
- else
1150
- raise "Unsupported Real scalar size: #{param.type.size}!"
1151
- end
1152
- elsif param.type.class == Int then
1153
- case param.type.size
1154
- when 1
1155
- type = "C"
1156
- when 2
1157
- type = "S"
1158
- when 4
1159
- type = "L"
1160
- when 8
1161
- type = "Q"
1162
- else
1163
- raise "Unsupported Int scalar size: #{param.type.size}!"
1164
- end
1165
- if param.type.signed? then
1166
- type.downcase!
1167
- end
1168
- end
1169
- return type
1170
- end
1171
-
1172
- def read_param(param, directory, suffix, intent)
1173
- if intent == :out and ( param.direction == :in or param.constant ) then
1174
- return nil
1175
- end
1176
- f = File::new( directory + "/" + param.name+suffix, "rb" )
1177
- if param.dimension then
1178
- type = get_array_type(param)
1179
- if f.size == 0 then
1180
- res = NArray::new(type, 1)
1181
- else
1182
- res = NArray.to_na(f.read, type)
1183
- end
1184
- else
1185
- type = get_scalar_type(param)
1186
- res = f.read.unpack(type).first
1187
- end
1188
- f.close
1189
- return res
1190
- end
1191
-
1192
- def get_gpu_dim(directory)
1193
- f = File::new( directory + "/problem_size", "r")
1194
- s = f.read
1195
- local_dim, global_dim = s.scan(/<(.*?)>/)
1196
- local_dim = local_dim.pop.split(",").collect!{ |e| e.to_i }
1197
- global_dim = global_dim.pop.split(",").collect!{ |e| e.to_i }
1198
- (local_dim.length..2).each{ |i| local_dim[i] = 1 }
1199
- (global_dim.length..2).each{ |i| global_dim[i] = 1 }
1200
- if @lang == CL then
1201
- local_dim.each_index { |indx| global_dim[indx] *= local_dim[indx] }
1202
- res = { :global_work_size => global_dim, :local_work_size => local_dim }
1203
- else
1204
- res = { :block_number => global_dim, :block_size => local_dim }
1205
- end
1206
- f.close
1207
- return res
1208
- end
1209
-
1210
- def load_ref_files( path = "", suffix = "", intent )
1211
- proc_path = path + "/#{@procedure.name}/"
1212
- res_h = {}
1213
- begin
1214
- dirs = Pathname.new(proc_path).children.select { |c| c.directory? }
1215
- rescue
1216
- return res_h
1217
- end
1218
- dirs.collect! { |d| d.to_s }
1219
- dirs.each { |d|
1220
- res = []
1221
- @procedure.parameters.collect { |param|
1222
- res.push read_param(param, d, suffix, intent)
1223
- }
1224
- if @lang == CUDA or @lang == CL then
1225
- res.push get_gpu_dim(d)
1226
- end
1227
- res_h[d] = res
1228
- }
1229
- return res_h
1230
- end
1231
-
1232
- def cost(*args)
1233
- @cost_function.call(*args)
1234
- end
1235
- end
1236
- end