BOAST 1.0.3 → 1.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/BOAST.gemspec +2 -31
  3. data/README.md +240 -0
  4. data/lib/BOAST/{OpenMP.rb → Language/OpenMP.rb} +1 -0
  5. data/lib/BOAST/{Variable.rb → Language/Variable.rb} +2 -1
  6. data/lib/BOAST/Runtime/CKernel.rb +94 -0
  7. data/lib/BOAST/Runtime/CRuntime.rb +32 -0
  8. data/lib/BOAST/Runtime/CUDARuntime.rb +158 -0
  9. data/lib/BOAST/Runtime/CompiledRuntime.rb +398 -0
  10. data/lib/BOAST/Runtime/Compilers.rb +205 -0
  11. data/lib/BOAST/Runtime/Config.rb +94 -0
  12. data/lib/BOAST/Runtime/FFIRuntime.rb +104 -0
  13. data/lib/BOAST/Runtime/FORTRANRuntime.rb +45 -0
  14. data/lib/BOAST/Runtime/MPPARuntime.rb +464 -0
  15. data/lib/BOAST/Runtime/NonRegression.rb +157 -0
  16. data/lib/BOAST/Runtime/OpenCLRuntime.rb +181 -0
  17. data/lib/BOAST/Runtime/Probe.rb +136 -0
  18. data/lib/BOAST.rb +37 -26
  19. metadata +40 -28
  20. data/lib/BOAST/CKernel.rb +0 -1236
  21. /data/lib/BOAST/{Algorithm.rb → Language/Algorithm.rb} +0 -0
  22. /data/lib/BOAST/{Arithmetic.rb → Language/Arithmetic.rb} +0 -0
  23. /data/lib/BOAST/{BOAST_OpenCL.rb → Language/BOAST_OpenCL.rb} +0 -0
  24. /data/lib/BOAST/{Case.rb → Language/Case.rb} +0 -0
  25. /data/lib/BOAST/{ControlStructure.rb → Language/ControlStructure.rb} +0 -0
  26. /data/lib/BOAST/{DataTypes.rb → Language/DataTypes.rb} +0 -0
  27. /data/lib/BOAST/{Expression.rb → Language/Expression.rb} +0 -0
  28. /data/lib/BOAST/{For.rb → Language/For.rb} +0 -0
  29. /data/lib/BOAST/{FuncCall.rb → Language/FuncCall.rb} +0 -0
  30. /data/lib/BOAST/{Functors.rb → Language/Functors.rb} +0 -0
  31. /data/lib/BOAST/{If.rb → Language/If.rb} +0 -0
  32. /data/lib/BOAST/{Index.rb → Language/Index.rb} +0 -0
  33. /data/lib/BOAST/{Inspectable.rb → Language/Inspectable.rb} +0 -0
  34. /data/lib/BOAST/{Operators.rb → Language/Operators.rb} +0 -0
  35. /data/lib/BOAST/{Optimization.rb → Language/Optimization.rb} +0 -0
  36. /data/lib/BOAST/{Parens.rb → Language/Parens.rb} +0 -0
  37. /data/lib/BOAST/{Pragma.rb → Language/Pragma.rb} +0 -0
  38. /data/lib/BOAST/{Print.rb → Language/Print.rb} +0 -0
  39. /data/lib/BOAST/{Procedure.rb → Language/Procedure.rb} +0 -0
  40. /data/lib/BOAST/{Slice.rb → Language/Slice.rb} +0 -0
  41. /data/lib/BOAST/{State.rb → Language/State.rb} +0 -0
  42. /data/lib/BOAST/{Transitions.rb → Language/Transitions.rb} +0 -0
  43. /data/lib/BOAST/{While.rb → Language/While.rb} +0 -0
data/lib/BOAST/CKernel.rb DELETED
@@ -1,1236 +0,0 @@
1
- require 'stringio'
2
- require 'rubygems'
3
- require 'rake'
4
- require 'tempfile'
5
- require 'rbconfig'
6
- require 'systemu'
7
- require 'yaml'
8
- require 'pathname'
9
- require 'os'
10
-
11
- module BOAST
12
- @@compiler_default_options = {
13
- :FC => 'gfortran',
14
- :FCFLAGS => '-O2 -Wall',
15
- :CC => 'gcc',
16
- :CFLAGS => '-O2 -Wall',
17
- :CXX => 'g++',
18
- :CXXFLAGS => '-O2 -Wall',
19
- :NVCC => 'nvcc',
20
- :NVCCFLAGS => '-O2',
21
- :LDFLAGS => '',
22
- :CLFLAGS => '',
23
- :CLVENDOR => nil,
24
- :CLPLATFORM => nil,
25
- :CLDEVICE => nil,
26
- :CLDEVICETYPE => nil,
27
- :openmp => false
28
- }
29
-
30
- @@openmp_default_flags = {
31
- "gcc" => "-fopenmp",
32
- "icc" => "-openmp",
33
- "gfortran" => "-fopenmp",
34
- "ifort" => "-openmp",
35
- "g++" => "-fopenmp",
36
- "icpc" => "-openmp"
37
- }
38
-
39
- module PrivateStateAccessor
40
- private_boolean_state_accessor :verbose
41
- private_boolean_state_accessor :debug_source
42
- private_boolean_state_accessor :ffi
43
- end
44
-
45
- boolean_state_accessor :verbose
46
- boolean_state_accessor :debug_source
47
- boolean_state_accessor :ffi
48
- @@ffi = false
49
- @@verbose = false
50
- @@debug_source = false
51
- FORTRAN_LINE_LENGTH = 72
52
-
53
- module_function
54
-
55
- def read_boast_config
56
- home_config_dir = ENV["XDG_CONFIG_HOME"]
57
- home_config_dir = "#{Dir.home}/.config" if not home_config_dir
58
- Dir.mkdir( home_config_dir ) if not File::exist?( home_config_dir )
59
- return if not File::directory?(home_config_dir)
60
- boast_config_dir = "#{home_config_dir}/BOAST"
61
- Dir.mkdir( boast_config_dir ) if not File::exist?( boast_config_dir )
62
- compiler_options_file = "#{boast_config_dir}/compiler_options"
63
- if File::exist?( compiler_options_file ) then
64
- File::open( compiler_options_file, "r" ) { |f|
65
- @@compiler_default_options.update( YAML::load( f.read ) )
66
- }
67
- else
68
- File::open( compiler_options_file, "w" ) { |f|
69
- f.write YAML::dump( @@compiler_default_options )
70
- }
71
- end
72
- openmp_flags_file = "#{boast_config_dir}/openmp_flags"
73
- if File::exist?( openmp_flags_file ) then
74
- File::open( openmp_flags_file, "r" ) { |f|
75
- @@openmp_default_flags.update( YAML::load( f.read ) )
76
- }
77
- else
78
- File::open( openmp_flags_file, "w" ) { |f|
79
- f.write YAML::dump( @@openmp_default_flags )
80
- }
81
- end
82
- @@compiler_default_options.each_key { |k|
83
- @@compiler_default_options[k] = ENV[k.to_s] if ENV[k.to_s]
84
- }
85
- @@compiler_default_options[:LD] = ENV["LD"] if ENV["LD"]
86
- @@verbose = ENV["VERBOSE"] if ENV["VERBOSE"]
87
- @@ffi = ENV["FFI"] if ENV["FFI"]
88
- @@debug_source = ENV["DEBUG_SOURCE"] if ENV["DEBUG_SOURCE"]
89
- end
90
-
91
- read_boast_config
92
-
93
- def get_openmp_flags
94
- return @@openmp_default_flags.clone
95
- end
96
-
97
- def get_compiler_options
98
- return @@compiler_default_options.clone
99
- end
100
-
101
- class CKernel
102
- include Rake::DSL
103
- include Inspectable
104
- include PrivateStateAccessor
105
- include TypeTransition
106
-
107
- attr_accessor :code
108
- attr_accessor :procedure
109
- attr_accessor :lang
110
- attr_accessor :binary
111
- attr_accessor :kernels
112
- attr_accessor :cost_function
113
-
114
- def initialize(options={})
115
- if options[:code] then
116
- @code = options[:code]
117
- elsif get_chain_code
118
- @code = get_output
119
- @code.seek(0,SEEK_END)
120
- else
121
- @code = StringIO::new
122
- end
123
- set_output(@code)
124
- if options[:kernels] then
125
- @kernels = options[:kernels]
126
- else
127
- @kernels = []
128
- end
129
- if options[:lang] then
130
- @lang = options[:lang]
131
- else
132
- @lang = get_lang
133
- end
134
- end
135
-
136
- def print
137
- @code.rewind
138
- puts @code.read
139
- end
140
-
141
- def to_s
142
- @code.rewind
143
- return code.read
144
- end
145
-
146
-
147
- def get_openmp_flags(compiler)
148
- openmp_flags = BOAST::get_openmp_flags[compiler]
149
- if not openmp_flags then
150
- keys = BOAST::get_openmp_flags.keys
151
- keys.each { |k|
152
- openmp_flags = BOAST::get_openmp_flags[k] if compiler.match(k)
153
- }
154
- end
155
- return openmp_flags
156
- end
157
-
158
- def get_includes(narray_path)
159
- includes = "-I#{RbConfig::CONFIG["archdir"]}"
160
- includes += " -I#{RbConfig::CONFIG["rubyhdrdir"]} -I#{RbConfig::CONFIG["rubyhdrdir"]}/#{RbConfig::CONFIG["arch"]}"
161
- includes += " -I#{RbConfig::CONFIG["rubyarchhdrdir"]}" if RbConfig::CONFIG["rubyarchhdrdir"]
162
- includes += " -I#{narray_path}" if narray_path
163
- return includes
164
- end
165
-
166
- def get_narray_path
167
- narray_path = nil
168
- begin
169
- spec = Gem::Specification::find_by_name('narray')
170
- narray_path = spec.full_gem_path
171
- rescue Gem::LoadError => e
172
- rescue NoMethodError => e
173
- spec = Gem::available?('narray')
174
- if spec then
175
- require 'narray'
176
- narray_path = Gem.loaded_specs['narray'].full_gem_path
177
- end
178
- end
179
- end
180
-
181
- def setup_c_compiler(options, includes, narray_path, runner)
182
- c_compiler = options[:CC]
183
- cflags = options[:CFLAGS]
184
- cflags += " -fPIC #{includes}"
185
- cflags += " -DHAVE_NARRAY_H" if narray_path
186
- if options[:openmp] and @lang == C then
187
- openmp_cflags = get_openmp_flags(c_compiler)
188
- raise "unkwown openmp flags for: #{c_compiler}" if not openmp_cflags
189
- cflags += " #{openmp_cflags}"
190
- end
191
-
192
- rule ".#{RbConfig::CONFIG["OBJEXT"]}" => '.c' do |t|
193
- c_call_string = "#{c_compiler} #{cflags} -c -o #{t.name} #{t.source}"
194
- runner.call(t, c_call_string)
195
- end
196
- end
197
-
198
- def setup_cxx_compiler(options, includes, runner)
199
- cxx_compiler = options[:CXX]
200
- cxxflags = options[:CXXFLAGS]
201
- cxxflags += " -fPIC #{includes}"
202
- if options[:openmp] and @lang == C then
203
- openmp_cxxflags = get_openmp_flags(cxx_compiler)
204
- raise "unkwown openmp flags for: #{cxx_compiler}" if not openmp_cxxflags
205
- cxxflags += " #{openmp_cxxflags}"
206
- end
207
-
208
- rule ".#{RbConfig::CONFIG["OBJEXT"]}" => '.cpp' do |t|
209
- cxx_call_string = "#{cxx_compiler} #{cxxflags} -c -o #{t.name} #{t.source}"
210
- runner.call(t, cxx_call_string)
211
- end
212
- end
213
-
214
- def setup_fortran_compiler(options, runner)
215
- f_compiler = options[:FC]
216
- fcflags = options[:FCFLAGS]
217
- fcflags += " -fPIC"
218
- fcflags += " -fno-second-underscore" if f_compiler == 'g95'
219
- if options[:openmp] and @lang == FORTRAN then
220
- openmp_fcflags = get_openmp_flags(f_compiler)
221
- raise "unkwown openmp flags for: #{f_compiler}" if not openmp_fcflags
222
- fcflags += " #{openmp_fcflags}"
223
- end
224
-
225
- rule ".#{RbConfig::CONFIG["OBJEXT"]}" => '.f90' do |t|
226
- f_call_string = "#{f_compiler} #{fcflags} -c -o #{t.name} #{t.source}"
227
- runner.call(t, f_call_string)
228
- end
229
- end
230
-
231
- def setup_cuda_compiler(options, runner)
232
- cuda_compiler = options[:NVCC]
233
- cudaflags = options[:NVCCFLAGS]
234
- cudaflags += " --compiler-options '-fPIC'"
235
-
236
- rule ".#{RbConfig::CONFIG["OBJEXT"]}" => '.cu' do |t|
237
- cuda_call_string = "#{cuda_compiler} #{cudaflags} -c -o #{t.name} #{t.source}"
238
- runner.call(t, cuda_call_string)
239
- end
240
- end
241
-
242
- def setup_linker(options)
243
- ldflags = options[:LDFLAGS]
244
- ldflags += " -L#{RbConfig::CONFIG["libdir"]} #{RbConfig::CONFIG["LIBRUBYARG"]}"
245
- ldflags += " -lrt" if not OS.mac?
246
- ldflags += " -lcudart" if @lang == CUDA
247
- c_compiler = options[:CC]
248
- c_compiler = "cc" if not c_compiler
249
- linker = options[:LD]
250
- linker = c_compiler if not linker
251
- if options[:openmp] then
252
- openmp_ldflags = get_openmp_flags(linker)
253
- raise "unkwown openmp flags for: #{linker}" if not openmp_ldflags
254
- ldflags += " #{openmp_ldflags}"
255
- end
256
-
257
- if OS.mac? then
258
- ldflags = "-Wl,-undefined,dynamic_lookup -Wl,-multiply_defined,suppress #{ldflags}"
259
- ldshared = "-dynamic -bundle"
260
- else
261
- ldflags = "-Wl,-Bsymbolic-functions -Wl,-z,relro -rdynamic -Wl,-export-dynamic #{ldflags}"
262
- ldshared = "-shared"
263
- end
264
-
265
- return [linker, ldshared, ldflags]
266
- end
267
-
268
- def setup_compilers(options = {})
269
- Rake::Task::clear
270
- verbose = options[:verbose]
271
- verbose = get_verbose if not verbose
272
- Rake::verbose(verbose)
273
- Rake::FileUtilsExt.verbose_flag=verbose
274
-
275
- narray_path = get_narray_path
276
- includes = get_includes(narray_path)
277
-
278
- runner = lambda { |t, call_string|
279
- if verbose then
280
- sh call_string
281
- else
282
- status, stdout, stderr = systemu call_string
283
- if not status.success? then
284
- puts stderr
285
- fail "#{t.source}: compilation failed"
286
- end
287
- status.success?
288
- end
289
- }
290
-
291
- setup_c_compiler(options, includes, narray_path, runner)
292
- setup_cxx_compiler(options, includes, runner)
293
- setup_fortran_compiler(options, runner)
294
- setup_cuda_compiler(options, runner)
295
-
296
- return setup_linker(options)
297
-
298
- end
299
-
300
- def select_cl_platform(options)
301
- platforms = OpenCL::get_platforms
302
- if options[:platform_vendor] then
303
- platforms.select!{ |p|
304
- p.vendor.match(options[:platform_vendor])
305
- }
306
- elsif options[:CLVENDOR] then
307
- platforms.select!{ |p|
308
- p.vendor.match(options[:CLVENDOR])
309
- }
310
- end
311
- if options[:CLPLATFORM] then
312
- platforms.select!{ |p|
313
- p.name.match(options[:CLPLATFORM])
314
- }
315
- end
316
- return platforms.first
317
- end
318
-
319
- def select_cl_device(options)
320
- platform = select_cl_platform(options)
321
- type = options[:device_type] ? OpenCL::Device::Type.const_get(options[:device_type]) : options[:CLDEVICETYPE] ? OpenCL::Device::Type.const_get(options[:CLDEVICETYPE]) : OpenCL::Device::Type::ALL
322
- devices = platform.devices(type)
323
- if options[:device_name] then
324
- devices.select!{ |d|
325
- d.name.match(options[:device_name])
326
- }
327
- elsif options[:CLDEVICE] then
328
- devices.select!{ |d|
329
- d.name.match(options[:CLDEVICE])
330
- }
331
- end
332
- return devices.first
333
- end
334
-
335
- def init_opencl_types
336
- @@opencl_real_types = {
337
- 2 => OpenCL::Half,
338
- 4 => OpenCL::Float,
339
- 8 => OpenCL::Double
340
- }
341
-
342
- @@opencl_int_types = {
343
- true => {
344
- 1 => OpenCL::Char,
345
- 2 => OpenCL::Short,
346
- 4 => OpenCL::Int,
347
- 8 => OpenCL::Long
348
- },
349
- false => {
350
- 1 => OpenCL::UChar,
351
- 2 => OpenCL::UShort,
352
- 4 => OpenCL::UInt,
353
- 8 => OpenCL::ULong
354
- }
355
- }
356
- end
357
-
358
- def init_opencl(options)
359
- require 'opencl_ruby_ffi'
360
- init_opencl_types
361
- device = select_cl_device(options)
362
- @context = OpenCL::create_context([device])
363
- program = @context.create_program_with_source([@code.string])
364
- opts = options[:CLFLAGS]
365
- begin
366
- program.build(:options => options[:CLFLAGS])
367
- rescue OpenCL::Error => e
368
- puts e.to_s
369
- puts program.build_status
370
- puts program.build_log
371
- if options[:verbose] or get_verbose then
372
- puts @code.string
373
- end
374
- raise "OpenCL Failed to build #{@procedure.name}"
375
- end
376
- if options[:verbose] or get_verbose then
377
- program.build_log.each {|dev,log|
378
- puts "#{device.name}: #{log}"
379
- }
380
- end
381
- @queue = @context.create_command_queue(device, :properties => OpenCL::CommandQueue::PROFILING_ENABLE)
382
- @kernel = program.create_kernel(@procedure.name)
383
- return self
384
- end
385
-
386
- def create_opencl_array(arg, parameter)
387
- if parameter.direction == :in then
388
- flags = OpenCL::Mem::Flags::READ_ONLY
389
- elsif parameter.direction == :out then
390
- flags = OpenCL::Mem::Flags::WRITE_ONLY
391
- else
392
- flags = OpenCL::Mem::Flags::READ_WRITE
393
- end
394
- if parameter.texture then
395
- param = @context.create_image_2D( OpenCL::ImageFormat::new( OpenCL::ChannelOrder::R, OpenCL::ChannelType::UNORM_INT8 ), arg.size * arg.element_size, 1, :flags => flags )
396
- @queue.enqueue_write_image( param, arg, :blocking => true )
397
- else
398
- param = @context.create_buffer( arg.size * arg.element_size, :flags => flags )
399
- @queue.enqueue_write_buffer( param, arg, :blocking => true )
400
- end
401
- return param
402
- end
403
-
404
- def create_opencl_scalar(arg, parameter)
405
- if parameter.type.is_a?(Real) then
406
- return @@opencl_real_types[parameter.type.size]::new(arg)
407
- elsif parameter.type.is_a?(Int) then
408
- return @@opencl_int_types[parameter.type.signed][parameter.type.size]::new(arg)
409
- else
410
- return arg
411
- end
412
- end
413
-
414
- def create_opencl_param(arg, parameter)
415
- if parameter.dimension then
416
- return create_opencl_array(arg, parameter)
417
- else
418
- return create_opencl_scalar(arg, parameter)
419
- end
420
- end
421
-
422
- def read_opencl_param(param, arg, parameter)
423
- if parameter.texture then
424
- @queue.enqueue_read_image( param, arg, :blocking => true )
425
- else
426
- @queue.enqueue_read_buffer( param, arg, :blocking => true )
427
- end
428
- end
429
-
430
- def build_opencl(options)
431
- init_opencl(options)
432
-
433
- run_method = <<EOF
434
- def self.run(*args)
435
- raise "Wrong number of arguments \#{args.length} for #{@procedure.parameters.length}" if args.length > #{@procedure.parameters.length+1} or args.length < #{@procedure.parameters.length}
436
- params = []
437
- opts = {}
438
- opts = args.pop if args.length == #{@procedure.parameters.length+1}
439
- @procedure.parameters.each_index { |i|
440
- params[i] = create_opencl_param( args[i], @procedure.parameters[i] )
441
- }
442
- params.each_index{ |i|
443
- @kernel.set_arg(i, params[i])
444
- }
445
- gws = opts[:global_work_size]
446
- if not gws then
447
- gws = []
448
- opts[:block_number].each_index { |i|
449
- gws.push(opts[:block_number][i]*opts[:block_size][i])
450
- }
451
- end
452
- lws = opts[:local_work_size]
453
- if not lws then
454
- lws = opts[:block_size]
455
- end
456
- event = @queue.enqueue_NDrange_kernel(@kernel, gws, :local_work_size => lws)
457
- @procedure.parameters.each_index { |i|
458
- if @procedure.parameters[i].dimension and (@procedure.parameters[i].direction == :inout or @procedure.parameters[i].direction == :out) then
459
- read_opencl_param( params[i], args[i], @procedure.parameters[i] )
460
- end
461
- }
462
- result = {}
463
- result[:start] = event.profiling_command_start
464
- result[:end] = event.profiling_command_end
465
- result[:duration] = (result[:end] - result[:start])/1000000000.0
466
- return result
467
- end
468
- EOF
469
- eval run_method
470
- return self
471
- end
472
-
473
- @@extensions = {
474
- C => ".c",
475
- CUDA => ".cu",
476
- FORTRAN => ".f90"
477
- }
478
-
479
- def get_sub_kernels
480
- kernel_files = []
481
- @kernels.each { |kernel|
482
- kernel_file = Tempfile::new([kernel.procedure.name,".#{RbConfig::CONFIG["OBJEXT"]}"])
483
- kernel.binary.rewind
484
- kernel_file.write( kernel.binary.read )
485
- kernel_file.close
486
- kernel_files.push(kernel_file)
487
- }
488
- end
489
-
490
- def create_module_source(path)
491
- previous_lang = get_lang
492
- previous_output = get_output
493
- set_lang( C )
494
- module_file_name = File::split(path.chomp(File::extname(path)))[0] + "/Mod_" + File::split(path.chomp(File::extname(path)))[1].gsub("-","_") + ".c"
495
- module_name = File::split(module_file_name.chomp(File::extname(module_file_name)))[1]
496
- module_file = File::open(module_file_name,"w+")
497
- set_output( module_file )
498
- fill_module(module_file, module_name)
499
- if debug_source? then
500
- module_file.rewind
501
- puts module_file.read
502
- end
503
- module_file.close
504
- set_lang( previous_lang )
505
- set_output( previous_output )
506
- return [module_file_name, module_name]
507
- end
508
-
509
- def save_binary(target)
510
- f = File::open(target,"rb")
511
- @binary = StringIO::new
512
- @binary.write( f.read )
513
- f.close
514
- end
515
-
516
- def create_source
517
- extension = @@extensions[@lang]
518
- source_file = Tempfile::new([@procedure.name,extension])
519
- path = source_file.path
520
- target = path.chomp(File::extname(path))+".#{RbConfig::CONFIG["OBJEXT"]}"
521
- fill_code(source_file)
522
- if debug_source? then
523
- source_file.rewind
524
- puts source_file.read
525
- end
526
- source_file.close
527
- return [source_file, path, target]
528
- end
529
-
530
- def create_ffi_module(module_name, module_final)
531
- s =<<EOF
532
- require 'ffi'
533
- require 'narray_ffi'
534
- module #{module_name}
535
- extend FFI::Library
536
- ffi_lib "#{module_final}"
537
- attach_function :#{@procedure.name}#{@lang == FORTRAN ? "_" : ""}, [ #{@procedure.parameters.collect{ |p| ":"+p.decl_ffi.to_s }.join(", ")} ], :#{@procedure.properties[:return] ? @procedure.properties[:return].type.decl_ffi : "void" }
538
- def run(*args)
539
- if args.length < @procedure.parameters.length or args.length > @procedure.parameters.length + 1 then
540
- raise "Wrong number of arguments for \#{@procedure.name} (\#{args.length} for \#{@procedure.parameters.length})"
541
- else
542
- ev_set = nil
543
- if args.length == @procedure.parameters.length + 1 then
544
- options = args.last
545
- if options[:PAPI] then
546
- require 'PAPI'
547
- ev_set = PAPI::EventSet::new
548
- ev_set.add_named(options[:PAPI])
549
- end
550
- end
551
- t_args = []
552
- r_args = {}
553
- if @lang == FORTRAN then
554
- @procedure.parameters.each_with_index { |p, i|
555
- if p.decl_ffi(true) != :pointer then
556
- arg_p = FFI::MemoryPointer::new(p.decl_ffi(true))
557
- arg_p.send("write_\#{p.decl_ffi(true)}",args[i])
558
- t_args.push(arg_p)
559
- r_args[p] = arg_p if p.scalar_output?
560
- else
561
- t_args.push( args[i] )
562
- end
563
- }
564
- else
565
- @procedure.parameters.each_with_index { |p, i|
566
- if p.scalar_output? then
567
- arg_p = FFI::MemoryPointer::new(p.decl_ffi(true))
568
- arg_p.send("write_\#{p.decl_ffi(true)}",args[i])
569
- t_args.push(arg_p)
570
- r_args[p] = arg_p
571
- else
572
- t_args.push( args[i] )
573
- end
574
- }
575
- end
576
- results = {}
577
- counters = nil
578
- ev_set.start if ev_set
579
- begin
580
- start = Time::new
581
- ret = #{@procedure.name}#{@lang == FORTRAN ? "_" : ""}(*t_args)
582
- stop = Time::new
583
- ensure
584
- if ev_set then
585
- counters = ev_set.stop
586
- ev_set.cleanup
587
- ev_set.destroy
588
- end
589
- end
590
- results = { :start => start, :stop => stop, :duration => stop - start, :return => ret }
591
- results[:PAPI] = Hash[[options[:PAPI]].flatten.zip(counters)] if ev_set
592
- if r_args.length > 0 then
593
- ref_return = {}
594
- r_args.each { |p, p_arg|
595
- ref_return[p.name.to_sym] = p_arg.send("read_\#{p.decl_ffi(true)}")
596
- }
597
- results[:reference_return] = ref_return
598
- end
599
- return results
600
- end
601
- end
602
- end
603
- EOF
604
- eval s
605
- end
606
-
607
- def build(options = {})
608
- compiler_options = BOAST::get_compiler_options
609
- compiler_options.update(options)
610
- return build_opencl(compiler_options) if @lang == CL
611
-
612
- linker, ldshared, ldflags = setup_compilers(compiler_options)
613
-
614
- extension = @@extensions[@lang]
615
-
616
- source_file, path, target = create_source
617
-
618
- if not ffi? then
619
- module_file_name, module_name = create_module_source(path)
620
- module_target = module_file_name.chomp(File::extname(module_file_name))+"."+RbConfig::CONFIG["OBJEXT"]
621
- module_final = module_file_name.chomp(File::extname(module_file_name))+"."+RbConfig::CONFIG["DLEXT"]
622
- else
623
- module_final = path.chomp(File::extname(path))+"."+RbConfig::CONFIG["DLEXT"]
624
- module_name = "Mod_" + File::split(path.chomp(File::extname(path)))[1].gsub("-","_")
625
- end
626
-
627
- kernel_files = get_sub_kernels
628
-
629
- if not ffi? then
630
- file module_final => [module_target, target] do
631
- #puts "#{linker} #{ldshared} -o #{module_final} #{module_target} #{target} #{kernel_files.join(" ")} #{ldflags}"
632
- sh "#{linker} #{ldshared} -o #{module_final} #{module_target} #{target} #{(kernel_files.collect {|f| f.path}).join(" ")} #{ldflags}"
633
- end
634
- Rake::Task[module_final].invoke
635
-
636
- require(module_final)
637
- else
638
- file module_final => [target] do
639
- #puts "#{linker} #{ldshared} -o #{module_final} #{target} #{kernel_files.join(" ")} #{ldflags}"
640
- sh "#{linker} #{ldshared} -o #{module_final} #{target} #{(kernel_files.collect {|f| f.path}).join(" ")} #{ldflags}"
641
- end
642
- Rake::Task[module_final].invoke
643
- create_ffi_module(module_name, module_final)
644
- end
645
- eval "self.extend(#{module_name})"
646
- save_binary(target)
647
-
648
- if not ffi? then
649
- [target, module_target, module_file_name, module_final].each { |fn|
650
- File::unlink(fn)
651
- }
652
- else
653
- [target, module_final].each { |fn|
654
- File::unlink(fn)
655
- }
656
- end
657
- kernel_files.each { |f|
658
- f.unlink
659
- }
660
- return self
661
- end
662
-
663
- def fill_code(source_file)
664
- @code.rewind
665
- source_file.puts "#include <inttypes.h>" if @lang == C or @lang == CUDA
666
- source_file.puts "#include <cuda.h>" if @lang == CUDA
667
- # check for too long FORTRAN lines
668
- if @lang == FORTRAN then
669
- @code.each_line { |line|
670
- # check for omp pragmas
671
- if line.match(/^\s*!\$/) then
672
- if line.match(/^\s*!\$(omp|OMP)/) then
673
- chunks = line.scan(/.{1,#{FORTRAN_LINE_LENGTH-7}}/)
674
- source_file.puts chunks.join("&\n!$omp&")
675
- else
676
- chunks = line.scan(/.{1,#{FORTRAN_LINE_LENGTH-4}}/)
677
- source_file.puts chunks.join("&\n!$&")
678
- end
679
- elsif line.match(/^\w*!/) then
680
- source_file.write line
681
- else
682
- chunks = line.scan(/.{1,#{FORTRAN_LINE_LENGTH-2}}/)
683
- source_file.puts chunks.join("&\n&")
684
- end
685
- }
686
- else
687
- source_file.write @code.read
688
- end
689
- if @lang == CUDA then
690
- source_file.write <<EOF
691
- extern "C" {
692
- #{@procedure.boast_header_s(CUDA)}{
693
- dim3 dimBlock(block_size[0], block_size[1], block_size[2]);
694
- dim3 dimGrid(block_number[0], block_number[1], block_number[2]);
695
- cudaEvent_t __start, __stop;
696
- float __time;
697
- cudaEventCreate(&__start);
698
- cudaEventCreate(&__stop);
699
- cudaEventRecord(__start, 0);
700
- #{@procedure.name}<<<dimGrid,dimBlock>>>(#{@procedure.parameters.join(", ")});
701
- cudaEventRecord(__stop, 0);
702
- cudaEventSynchronize(__stop);
703
- cudaEventElapsedTime(&__time, __start, __stop);
704
- return (unsigned long long int)((double)__time*(double)1e6);
705
- }
706
- }
707
- EOF
708
- end
709
- @code.rewind
710
- end
711
-
712
- def module_header(module_file)
713
- module_file.print <<EOF
714
- #include "ruby.h"
715
- #include <inttypes.h>
716
- #ifdef HAVE_NARRAY_H
717
- #include "narray.h"
718
- #endif
719
- EOF
720
- if OS.mac? then
721
- module_file.print <<EOF
722
- #if __cplusplus
723
- extern "C" {
724
- #endif
725
- #include <mach/mach_time.h>
726
- #if __cplusplus
727
- }
728
- #endif
729
- EOF
730
- else
731
- module_file.print "#include <time.h>\n"
732
- end
733
- if @lang == CUDA then
734
- module_file.print "#include <cuda_runtime.h>\n"
735
- end
736
- end
737
-
738
- def module_preamble(module_file, module_name)
739
- module_file.print <<EOF
740
- VALUE #{module_name} = Qnil;
741
- void Init_#{module_name}();
742
- VALUE method_run(int _boast_argc, VALUE *_boast_argv, VALUE _boast_self);
743
- void Init_#{module_name}() {
744
- #{module_name} = rb_define_module("#{module_name}");
745
- rb_define_method(#{module_name}, "run", method_run, -1);
746
- }
747
- EOF
748
- end
749
-
750
- def check_args(module_file)
751
- module_file.print <<EOF
752
- VALUE _boast_rb_opts;
753
- if( _boast_argc < #{@procedure.parameters.length} || _boast_argc > #{@procedure.parameters.length + 1} )
754
- rb_raise(rb_eArgError, "wrong number of arguments for #{@procedure.name} (%d for #{@procedure.parameters.length})", _boast_argc);
755
- _boast_rb_opts = Qnil;
756
- if( _boast_argc == #{@procedure.parameters.length + 1} ) {
757
- _boast_rb_opts = _boast_argv[_boast_argc -1];
758
- if ( _boast_rb_opts != Qnil ) {
759
- if (TYPE(_boast_rb_opts) != T_HASH)
760
- rb_raise(rb_eArgError, "Options should be passed as a hash");
761
- }
762
- }
763
- EOF
764
- end
765
-
766
- def get_params_value(module_file, argv, rb_ptr)
767
- set_decl_module(true)
768
- @procedure.parameters.each_index do |i|
769
- param = @procedure.parameters[i]
770
- if not param.dimension then
771
- case param.type
772
- when Int
773
- (param === FuncCall::new("NUM2INT", argv[i])).pr if param.type.size == 4
774
- (param === FuncCall::new("NUM2LONG", argv[i])).pr if param.type.size == 8
775
- when Real
776
- (param === FuncCall::new("NUM2DBL", argv[i])).pr
777
- end
778
- else
779
- (rb_ptr === argv[i]).pr
780
- if @lang == CUDA then
781
- module_file.print <<EOF
782
- if ( IsNArray(_boast_rb_ptr) ) {
783
- struct NARRAY *_boast_n_ary;
784
- size_t _boast_array_size;
785
- Data_Get_Struct(_boast_rb_ptr, struct NARRAY, _boast_n_ary);
786
- _boast_array_size = _boast_n_ary->total * na_sizeof[_boast_n_ary->type];
787
- cudaMalloc( (void **) &#{param.name}, _boast_array_size);
788
- cudaMemcpy(#{param.name}, (void *) _boast_n_ary->ptr, _boast_array_size, cudaMemcpyHostToDevice);
789
- } else {
790
- rb_raise(rb_eArgError, "wrong type of argument %d", #{i});
791
- }
792
- EOF
793
- else
794
- module_file.print <<EOF
795
- if (TYPE(_boast_rb_ptr) == T_STRING) {
796
- #{param.name} = (void *) RSTRING_PTR(_boast_rb_ptr);
797
- } else if ( IsNArray(_boast_rb_ptr) ) {
798
- struct NARRAY *_boast_n_ary;
799
- Data_Get_Struct(_boast_rb_ptr, struct NARRAY, _boast_n_ary);
800
- #{param.name} = (void *) _boast_n_ary->ptr;
801
- } else {
802
- rb_raise(rb_eArgError, "wrong type of argument %d", #{i});
803
- }
804
- EOF
805
- end
806
- end
807
- end
808
- set_decl_module(false)
809
- end
810
-
811
- def decl_module_params(module_file)
812
- set_decl_module(true)
813
- @procedure.parameters.each { |param|
814
- param_copy = param.copy
815
- param_copy.constant = nil
816
- param_copy.direction = nil
817
- param_copy.decl
818
- }
819
- set_decl_module(false)
820
- module_file.print " #{@procedure.properties[:return].type.decl} _boast_ret;\n" if @procedure.properties[:return]
821
- module_file.print " VALUE _boast_stats = rb_hash_new();\n"
822
- module_file.print " VALUE _boast_event_set = Qnil;\n"
823
- if OS.mac? then
824
- module_file.print " uint64_t _mac_boast_start, _mac_boast_stop;\n"
825
- module_file.print " mach_timebase_info_data_t _mac_boast_timebase_info;\n"
826
- else
827
- module_file.print " struct timespec _boast_start, _boast_stop;\n"
828
- end
829
- module_file.print " unsigned long long int _boast_duration;\n"
830
- end
831
-
832
- def get_cuda_launch_bounds(module_file)
833
- module_file.print <<EOF
834
- size_t _boast_block_size[3] = {1,1,1};
835
- size_t _boast_block_number[3] = {1,1,1};
836
- if( _boast_rb_opts != Qnil ) {
837
- VALUE _boast_rb_array_data = Qnil;
838
- int _boast_i;
839
- _boast_rb_ptr = rb_hash_aref(_boast_rb_opts, ID2SYM(rb_intern("block_size")));
840
- if( _boast_rb_ptr != Qnil ) {
841
- if (TYPE(_boast_rb_ptr) != T_ARRAY)
842
- rb_raise(rb_eArgError, "Cuda option block_size should be an array");
843
- for(_boast_i=0; _boast_i<3; _boast_i++) {
844
- _boast_rb_array_data = rb_ary_entry(_boast_rb_ptr, _boast_i);
845
- if( _boast_rb_array_data != Qnil )
846
- _boast_block_size[_boast_i] = (size_t) NUM2LONG( _boast_rb_array_data );
847
- }
848
- } else {
849
- _boast_rb_ptr = rb_hash_aref(_boast_rb_opts, ID2SYM(rb_intern("local_work_size")));
850
- if( _boast_rb_ptr != Qnil ) {
851
- if (TYPE(_boast_rb_ptr) != T_ARRAY)
852
- rb_raise(rb_eArgError, "Cuda option local_work_size should be an array");
853
- for(_boast_i=0; _boast_i<3; _boast_i++) {
854
- _boast_rb_array_data = rb_ary_entry(_boast_rb_ptr, _boast_i);
855
- if( _boast_rb_array_data != Qnil )
856
- _boast_block_size[_boast_i] = (size_t) NUM2LONG( _boast_rb_array_data );
857
- }
858
- }
859
- }
860
- _boast_rb_ptr = rb_hash_aref(_boast_rb_opts, ID2SYM(rb_intern("block_number")));
861
- if( _boast_rb_ptr != Qnil ) {
862
- if (TYPE(_boast_rb_ptr) != T_ARRAY)
863
- rb_raise(rb_eArgError, "Cuda option block_number should be an array");
864
- for(_boast_i=0; _boast_i<3; _boast_i++) {
865
- _boast_rb_array_data = rb_ary_entry(_boast_rb_ptr, _boast_i);
866
- if( _boast_rb_array_data != Qnil )
867
- _boast_block_number[_boast_i] = (size_t) NUM2LONG( _boast_rb_array_data );
868
- }
869
- } else {
870
- _boast_rb_ptr = rb_hash_aref(_boast_rb_opts, ID2SYM(rb_intern("global_work_size")));
871
- if( _boast_rb_ptr != Qnil ) {
872
- if (TYPE(_boast_rb_ptr) != T_ARRAY)
873
- rb_raise(rb_eArgError, "Cuda option global_work_size should be an array");
874
- for(_boast_i=0; _boast_i<3; _boast_i++) {
875
- _boast_rb_array_data = rb_ary_entry(_boast_rb_ptr, _boast_i);
876
- if( _boast_rb_array_data != Qnil )
877
- _boast_block_number[_boast_i] = (size_t) NUM2LONG( _boast_rb_array_data ) / _boast_block_size[_boast_i];
878
- }
879
- }
880
- }
881
- }
882
- EOF
883
- end
884
-
885
- def get_PAPI_options(module_file)
886
- module_file.print <<EOF
887
- if( _boast_rb_opts != Qnil ) {
888
- _boast_rb_ptr = rb_hash_aref(_boast_rb_opts, ID2SYM(rb_intern("PAPI")));
889
- if( _boast_rb_ptr != Qnil ) {
890
- VALUE _boast_PAPI = Qnil;
891
- VALUE _boast_EventSet = Qnil;
892
- rb_require("PAPI");
893
- _boast_PAPI = rb_const_get(rb_cObject, rb_intern("PAPI"));
894
- _boast_EventSet = rb_const_get(_boast_PAPI, rb_intern("EventSet"));
895
- _boast_event_set = rb_funcall(_boast_EventSet, rb_intern("new"), 0);
896
- rb_funcall(_boast_event_set, rb_intern("add_named"), 1, _boast_rb_ptr);
897
- rb_funcall(_boast_event_set, rb_intern("start"), 0);
898
- }
899
- }
900
- EOF
901
- end
902
-
903
- def get_PAPI_results(module_file)
904
- module_file.print <<EOF
905
- if( _boast_event_set != Qnil) {
906
- VALUE _boast_papi_results = Qnil;
907
- VALUE _boast_papi_stats = Qnil;
908
- _boast_papi_results = rb_funcall(_boast_event_set, rb_intern("stop"), 0);
909
- _boast_papi_stats = rb_ary_new3(1,rb_hash_aref(_boast_rb_opts, ID2SYM(rb_intern("PAPI"))));
910
- _boast_papi_stats = rb_funcall(_boast_papi_stats, rb_intern("flatten"), 0);
911
- _boast_papi_stats = rb_funcall(_boast_papi_stats, rb_intern("zip"), 1, _boast_papi_results);
912
- _boast_papi_stats = rb_funcall(rb_const_get(rb_cObject, rb_intern("Hash")), rb_intern("send"), 2, ID2SYM(rb_intern("[]")), _boast_papi_stats );
913
- rb_hash_aset(_boast_stats,ID2SYM(rb_intern(\"PAPI\")),_boast_papi_stats);
914
- }
915
- EOF
916
- end
917
-
918
- def create_procedure_call(module_file)
919
- if @lang == CUDA then
920
- module_file.print " _boast_duration = "
921
- elsif @procedure.properties[:return] then
922
- module_file.print " _boast_ret = "
923
- end
924
- module_file.print " #{@procedure.name}"
925
- module_file.print "_" if @lang == FORTRAN
926
- module_file.print "_wrapper" if @lang == CUDA
927
- module_file.print "("
928
- params = []
929
- if(@lang == FORTRAN) then
930
- @procedure.parameters.each { |param|
931
- if param.dimension then
932
- params.push( param.name )
933
- else
934
- params.push( "&"+param.name )
935
- end
936
- }
937
- else
938
- @procedure.parameters.each { |param|
939
- if param.dimension then
940
- params.push( param.name )
941
- elsif param.direction == :out or param.direction == :inout then
942
- params.push( "&"+param.name )
943
- else
944
- params.push( param.name )
945
- end
946
- }
947
- end
948
- if @lang == CUDA then
949
- params.push( "_boast_block_number", "_boast_block_size" )
950
- end
951
- module_file.print params.join(", ")
952
- module_file.print " );\n"
953
- end
954
-
955
- def get_results(module_file, argv, rb_ptr)
956
- set_decl_module(true)
957
- if @lang == CUDA then
958
- @procedure.parameters.each_index do |i|
959
- param = @procedure.parameters[i]
960
- if param.dimension then
961
- (rb_ptr === argv[i]).pr
962
- module_file.print <<EOF
963
- if ( IsNArray(_boast_rb_ptr) ) {
964
- EOF
965
- if param.direction == :out or param.direction == :inout then
966
- module_file.print <<EOF
967
- struct NARRAY *_boast_n_ary;
968
- size_t _boast_array_size;
969
- Data_Get_Struct(_boast_rb_ptr, struct NARRAY, _boast_n_ary);
970
- _boast_array_size = _boast_n_ary->total * na_sizeof[_boast_n_ary->type];
971
- cudaMemcpy((void *) _boast_n_ary->ptr, #{param.name}, _boast_array_size, cudaMemcpyDeviceToHost);
972
- EOF
973
- end
974
- module_file.print <<EOF
975
- cudaFree( (void *) #{param.name});
976
- } else {
977
- rb_raise(rb_eArgError, "wrong type of argument %d", #{i});
978
- }
979
- EOF
980
- end
981
- end
982
- else
983
- first = true
984
- @procedure.parameters.each_with_index do |param,i|
985
- if param.scalar_output? then
986
- if first then
987
- module_file.print " VALUE _boast_refs = rb_hash_new();\n"
988
- module_file.print " rb_hash_aset(_boast_stats,ID2SYM(rb_intern(\"reference_return\")),_boast_refs);\n"
989
- first = false
990
- end
991
- case param.type
992
- when Int
993
- module_file.print " rb_hash_aset(_boast_refs, ID2SYM(rb_intern(\"#{param}\")),rb_int_new((long long)#{param}));\n" if param.type.signed?
994
- module_file.print " rb_hash_aset(_boast_refs, ID2SYM(rb_intern(\"#{param}\")),rb_int_new((unsigned long long)#{param}));\n" if not param.type.signed?
995
- when Real
996
- module_file.print " rb_hash_aset(_boast_refs, ID2SYM(rb_intern(\"#{param}\")),rb_float_new((double)#{param}));\n"
997
- end
998
- end
999
- end
1000
- end
1001
- set_decl_module(false)
1002
- end
1003
-
1004
- def store_result(module_file)
1005
- module_file.print " rb_hash_aset(_boast_stats,ID2SYM(rb_intern(\"duration\")),rb_float_new((double)_boast_duration*(double)1e-9));\n"
1006
- if @procedure.properties[:return] then
1007
- type_ret = @procedure.properties[:return].type
1008
- module_file.print " rb_hash_aset(_boast_stats,ID2SYM(rb_intern(\"return\")),rb_int_new((long long)_boast_ret));\n" if type_ret.kind_of?(Int) and type_ret.signed
1009
- module_file.print " rb_hash_aset(_boast_stats,ID2SYM(rb_intern(\"return\")),rb_int_new((unsigned long long)_boast_ret));\n" if type_ret.kind_of?(Int) and not type_ret.signed
1010
- module_file.print " rb_hash_aset(_boast_stats,ID2SYM(rb_intern(\"return\")),rb_float_new((double)_boast_ret));\n" if type_ret.kind_of?(Real)
1011
- end
1012
- end
1013
-
1014
- def fill_module(module_file, module_name)
1015
- module_header(module_file)
1016
- @procedure.boast_header(@lang)
1017
- module_preamble(module_file, module_name)
1018
-
1019
- module_file.puts "VALUE method_run(int _boast_argc, VALUE *_boast_argv, VALUE _boast_self) {"
1020
- increment_indent_level
1021
- check_args(module_file)
1022
-
1023
- argc = @procedure.parameters.length
1024
- argv = Variable::new("_boast_argv", CustomType, :type_name => "VALUE", :dimension => [ Dimension::new(0,argc-1) ] )
1025
- rb_ptr = Variable::new("_boast_rb_ptr", CustomType, :type_name => "VALUE")
1026
- set_transition("VALUE", "VALUE", :default, CustomType::new(:type_name => "VALUE"))
1027
- rb_ptr.decl
1028
-
1029
- decl_module_params(module_file)
1030
-
1031
- get_params_value(module_file, argv, rb_ptr)
1032
-
1033
- if @lang == CUDA then
1034
- module_file.print get_cuda_launch_bounds(module_file)
1035
- end
1036
-
1037
- get_PAPI_options(module_file)
1038
-
1039
- if OS.mac? then
1040
- module_file.print " _mac_boast_start = mach_absolute_time();\n"
1041
- else
1042
- module_file.print " clock_gettime(CLOCK_REALTIME, &_boast_start);\n"
1043
- end
1044
-
1045
- create_procedure_call(module_file)
1046
-
1047
- if OS.mac? then
1048
- module_file.print " _mac_boast_stop = mach_absolute_time();\n"
1049
- else
1050
- module_file.print " clock_gettime(CLOCK_REALTIME, &_boast_stop);\n"
1051
- end
1052
-
1053
- get_PAPI_results(module_file)
1054
-
1055
- if @lang != CUDA then
1056
- if OS.mac? then
1057
- module_file.print " mach_timebase_info(&_mac_boast_timebase_info);\n"
1058
- module_file.print " _boast_duration = (_mac_boast_stop - _mac_boast_start) * _mac_boast_timebase_info.numer / _mac_boast_timebase_info.denom;\n"
1059
- else
1060
- module_file.print " _boast_duration = (_boast_stop.tv_sec - _boast_start.tv_sec) * (unsigned long long int)1000000000 + _boast_stop.tv_nsec - _boast_start.tv_nsec;\n"
1061
- end
1062
- end
1063
-
1064
- get_results(module_file, argv, rb_ptr)
1065
-
1066
- store_result(module_file)
1067
-
1068
- module_file.print " return _boast_stats;\n"
1069
- decrement_indent_level
1070
- module_file.print "}"
1071
- end
1072
-
1073
- def method_missing(meth, *args, &block)
1074
- if meth.to_s == "run" then
1075
- build
1076
- run(*args,&block)
1077
- else
1078
- super
1079
- end
1080
- end
1081
-
1082
- def load_ref_inputs(path = "", suffix = ".in" )
1083
- return load_ref_files( path, suffix, :in )
1084
- end
1085
-
1086
- def load_ref_outputs(path = "", suffix = ".out" )
1087
- return load_ref_files( path, suffix, :out )
1088
- end
1089
-
1090
- def compare_ref(ref_outputs, outputs, epsilon = nil)
1091
- res = {}
1092
- @procedure.parameters.each_with_index { |param, indx|
1093
- if param.direction == :in or param.constant then
1094
- next
1095
- end
1096
- if param.dimension then
1097
- diff = (outputs[indx] - ref_outputs[indx]).abs
1098
- if epsilon then
1099
- diff.each { |elem|
1100
- raise "Error: #{param.name} different from ref by: #{elem}!" if elem > epsilon
1101
- }
1102
- end
1103
- res[param.name] = diff.max
1104
- else
1105
- raise "Error: #{param.name} different from ref: #{outputs[indx]} != #{ref_outputs[indx]} !" if epsilon and (outputs[indx] - ref_outputs[indx]).abs > epsilon
1106
- res[param.name] = (outputs[indx] - ref_outputs[indx]).abs
1107
- end
1108
- }
1109
- return res
1110
- end
1111
-
1112
- def get_array_type(param)
1113
- if param.type.class == Real then
1114
- case param.type.size
1115
- when 4
1116
- type = NArray::SFLOAT
1117
- when 8
1118
- type = NArray::FLOAT
1119
- else
1120
- STDERR::puts "Unsupported Float size for NArray: #{param.type.size}, defaulting to byte" if debug?
1121
- type = NArray::BYTE
1122
- end
1123
- elsif param.type.class == Int then
1124
- case param.type.size
1125
- when 1
1126
- type = NArray::BYTE
1127
- when 2
1128
- type = NArray::SINT
1129
- when 4
1130
- type = NArray::SINT
1131
- else
1132
- STDERR::puts "Unsupported Int size for NArray: #{param.type.size}, defaulting to byte" if debug?
1133
- type = NArray::BYTE
1134
- end
1135
- else
1136
- STDERR::puts "Unkown array type for NArray: #{param.type}, defaulting to byte" if debug?
1137
- type = NArray::BYTE
1138
- end
1139
- return type
1140
- end
1141
-
1142
- def get_scalar_type(param)
1143
- if param.type.class == Real then
1144
- case param.type.size
1145
- when 4
1146
- type = "f"
1147
- when 8
1148
- type = "d"
1149
- else
1150
- raise "Unsupported Real scalar size: #{param.type.size}!"
1151
- end
1152
- elsif param.type.class == Int then
1153
- case param.type.size
1154
- when 1
1155
- type = "C"
1156
- when 2
1157
- type = "S"
1158
- when 4
1159
- type = "L"
1160
- when 8
1161
- type = "Q"
1162
- else
1163
- raise "Unsupported Int scalar size: #{param.type.size}!"
1164
- end
1165
- if param.type.signed? then
1166
- type.downcase!
1167
- end
1168
- end
1169
- return type
1170
- end
1171
-
1172
- def read_param(param, directory, suffix, intent)
1173
- if intent == :out and ( param.direction == :in or param.constant ) then
1174
- return nil
1175
- end
1176
- f = File::new( directory + "/" + param.name+suffix, "rb" )
1177
- if param.dimension then
1178
- type = get_array_type(param)
1179
- if f.size == 0 then
1180
- res = NArray::new(type, 1)
1181
- else
1182
- res = NArray.to_na(f.read, type)
1183
- end
1184
- else
1185
- type = get_scalar_type(param)
1186
- res = f.read.unpack(type).first
1187
- end
1188
- f.close
1189
- return res
1190
- end
1191
-
1192
- def get_gpu_dim(directory)
1193
- f = File::new( directory + "/problem_size", "r")
1194
- s = f.read
1195
- local_dim, global_dim = s.scan(/<(.*?)>/)
1196
- local_dim = local_dim.pop.split(",").collect!{ |e| e.to_i }
1197
- global_dim = global_dim.pop.split(",").collect!{ |e| e.to_i }
1198
- (local_dim.length..2).each{ |i| local_dim[i] = 1 }
1199
- (global_dim.length..2).each{ |i| global_dim[i] = 1 }
1200
- if @lang == CL then
1201
- local_dim.each_index { |indx| global_dim[indx] *= local_dim[indx] }
1202
- res = { :global_work_size => global_dim, :local_work_size => local_dim }
1203
- else
1204
- res = { :block_number => global_dim, :block_size => local_dim }
1205
- end
1206
- f.close
1207
- return res
1208
- end
1209
-
1210
- def load_ref_files( path = "", suffix = "", intent )
1211
- proc_path = path + "/#{@procedure.name}/"
1212
- res_h = {}
1213
- begin
1214
- dirs = Pathname.new(proc_path).children.select { |c| c.directory? }
1215
- rescue
1216
- return res_h
1217
- end
1218
- dirs.collect! { |d| d.to_s }
1219
- dirs.each { |d|
1220
- res = []
1221
- @procedure.parameters.collect { |param|
1222
- res.push read_param(param, d, suffix, intent)
1223
- }
1224
- if @lang == CUDA or @lang == CL then
1225
- res.push get_gpu_dim(d)
1226
- end
1227
- res_h[d] = res
1228
- }
1229
- return res_h
1230
- end
1231
-
1232
- def cost(*args)
1233
- @cost_function.call(*args)
1234
- end
1235
- end
1236
- end