BOAST 1.0.3 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/BOAST.gemspec +2 -31
- data/README.md +240 -0
- data/lib/BOAST/{OpenMP.rb → Language/OpenMP.rb} +1 -0
- data/lib/BOAST/{Variable.rb → Language/Variable.rb} +2 -1
- data/lib/BOAST/Runtime/CKernel.rb +94 -0
- data/lib/BOAST/Runtime/CRuntime.rb +32 -0
- data/lib/BOAST/Runtime/CUDARuntime.rb +158 -0
- data/lib/BOAST/Runtime/CompiledRuntime.rb +398 -0
- data/lib/BOAST/Runtime/Compilers.rb +205 -0
- data/lib/BOAST/Runtime/Config.rb +94 -0
- data/lib/BOAST/Runtime/FFIRuntime.rb +104 -0
- data/lib/BOAST/Runtime/FORTRANRuntime.rb +45 -0
- data/lib/BOAST/Runtime/MPPARuntime.rb +464 -0
- data/lib/BOAST/Runtime/NonRegression.rb +157 -0
- data/lib/BOAST/Runtime/OpenCLRuntime.rb +181 -0
- data/lib/BOAST/Runtime/Probe.rb +136 -0
- data/lib/BOAST.rb +37 -26
- metadata +40 -28
- data/lib/BOAST/CKernel.rb +0 -1236
- /data/lib/BOAST/{Algorithm.rb → Language/Algorithm.rb} +0 -0
- /data/lib/BOAST/{Arithmetic.rb → Language/Arithmetic.rb} +0 -0
- /data/lib/BOAST/{BOAST_OpenCL.rb → Language/BOAST_OpenCL.rb} +0 -0
- /data/lib/BOAST/{Case.rb → Language/Case.rb} +0 -0
- /data/lib/BOAST/{ControlStructure.rb → Language/ControlStructure.rb} +0 -0
- /data/lib/BOAST/{DataTypes.rb → Language/DataTypes.rb} +0 -0
- /data/lib/BOAST/{Expression.rb → Language/Expression.rb} +0 -0
- /data/lib/BOAST/{For.rb → Language/For.rb} +0 -0
- /data/lib/BOAST/{FuncCall.rb → Language/FuncCall.rb} +0 -0
- /data/lib/BOAST/{Functors.rb → Language/Functors.rb} +0 -0
- /data/lib/BOAST/{If.rb → Language/If.rb} +0 -0
- /data/lib/BOAST/{Index.rb → Language/Index.rb} +0 -0
- /data/lib/BOAST/{Inspectable.rb → Language/Inspectable.rb} +0 -0
- /data/lib/BOAST/{Operators.rb → Language/Operators.rb} +0 -0
- /data/lib/BOAST/{Optimization.rb → Language/Optimization.rb} +0 -0
- /data/lib/BOAST/{Parens.rb → Language/Parens.rb} +0 -0
- /data/lib/BOAST/{Pragma.rb → Language/Pragma.rb} +0 -0
- /data/lib/BOAST/{Print.rb → Language/Print.rb} +0 -0
- /data/lib/BOAST/{Procedure.rb → Language/Procedure.rb} +0 -0
- /data/lib/BOAST/{Slice.rb → Language/Slice.rb} +0 -0
- /data/lib/BOAST/{State.rb → Language/State.rb} +0 -0
- /data/lib/BOAST/{Transitions.rb → Language/Transitions.rb} +0 -0
- /data/lib/BOAST/{While.rb → Language/While.rb} +0 -0
data/lib/BOAST/CKernel.rb
DELETED
@@ -1,1236 +0,0 @@
|
|
1
|
-
require 'stringio'
|
2
|
-
require 'rubygems'
|
3
|
-
require 'rake'
|
4
|
-
require 'tempfile'
|
5
|
-
require 'rbconfig'
|
6
|
-
require 'systemu'
|
7
|
-
require 'yaml'
|
8
|
-
require 'pathname'
|
9
|
-
require 'os'
|
10
|
-
|
11
|
-
module BOAST
|
12
|
-
@@compiler_default_options = {
|
13
|
-
:FC => 'gfortran',
|
14
|
-
:FCFLAGS => '-O2 -Wall',
|
15
|
-
:CC => 'gcc',
|
16
|
-
:CFLAGS => '-O2 -Wall',
|
17
|
-
:CXX => 'g++',
|
18
|
-
:CXXFLAGS => '-O2 -Wall',
|
19
|
-
:NVCC => 'nvcc',
|
20
|
-
:NVCCFLAGS => '-O2',
|
21
|
-
:LDFLAGS => '',
|
22
|
-
:CLFLAGS => '',
|
23
|
-
:CLVENDOR => nil,
|
24
|
-
:CLPLATFORM => nil,
|
25
|
-
:CLDEVICE => nil,
|
26
|
-
:CLDEVICETYPE => nil,
|
27
|
-
:openmp => false
|
28
|
-
}
|
29
|
-
|
30
|
-
@@openmp_default_flags = {
|
31
|
-
"gcc" => "-fopenmp",
|
32
|
-
"icc" => "-openmp",
|
33
|
-
"gfortran" => "-fopenmp",
|
34
|
-
"ifort" => "-openmp",
|
35
|
-
"g++" => "-fopenmp",
|
36
|
-
"icpc" => "-openmp"
|
37
|
-
}
|
38
|
-
|
39
|
-
module PrivateStateAccessor
|
40
|
-
private_boolean_state_accessor :verbose
|
41
|
-
private_boolean_state_accessor :debug_source
|
42
|
-
private_boolean_state_accessor :ffi
|
43
|
-
end
|
44
|
-
|
45
|
-
boolean_state_accessor :verbose
|
46
|
-
boolean_state_accessor :debug_source
|
47
|
-
boolean_state_accessor :ffi
|
48
|
-
@@ffi = false
|
49
|
-
@@verbose = false
|
50
|
-
@@debug_source = false
|
51
|
-
FORTRAN_LINE_LENGTH = 72
|
52
|
-
|
53
|
-
module_function
|
54
|
-
|
55
|
-
def read_boast_config
|
56
|
-
home_config_dir = ENV["XDG_CONFIG_HOME"]
|
57
|
-
home_config_dir = "#{Dir.home}/.config" if not home_config_dir
|
58
|
-
Dir.mkdir( home_config_dir ) if not File::exist?( home_config_dir )
|
59
|
-
return if not File::directory?(home_config_dir)
|
60
|
-
boast_config_dir = "#{home_config_dir}/BOAST"
|
61
|
-
Dir.mkdir( boast_config_dir ) if not File::exist?( boast_config_dir )
|
62
|
-
compiler_options_file = "#{boast_config_dir}/compiler_options"
|
63
|
-
if File::exist?( compiler_options_file ) then
|
64
|
-
File::open( compiler_options_file, "r" ) { |f|
|
65
|
-
@@compiler_default_options.update( YAML::load( f.read ) )
|
66
|
-
}
|
67
|
-
else
|
68
|
-
File::open( compiler_options_file, "w" ) { |f|
|
69
|
-
f.write YAML::dump( @@compiler_default_options )
|
70
|
-
}
|
71
|
-
end
|
72
|
-
openmp_flags_file = "#{boast_config_dir}/openmp_flags"
|
73
|
-
if File::exist?( openmp_flags_file ) then
|
74
|
-
File::open( openmp_flags_file, "r" ) { |f|
|
75
|
-
@@openmp_default_flags.update( YAML::load( f.read ) )
|
76
|
-
}
|
77
|
-
else
|
78
|
-
File::open( openmp_flags_file, "w" ) { |f|
|
79
|
-
f.write YAML::dump( @@openmp_default_flags )
|
80
|
-
}
|
81
|
-
end
|
82
|
-
@@compiler_default_options.each_key { |k|
|
83
|
-
@@compiler_default_options[k] = ENV[k.to_s] if ENV[k.to_s]
|
84
|
-
}
|
85
|
-
@@compiler_default_options[:LD] = ENV["LD"] if ENV["LD"]
|
86
|
-
@@verbose = ENV["VERBOSE"] if ENV["VERBOSE"]
|
87
|
-
@@ffi = ENV["FFI"] if ENV["FFI"]
|
88
|
-
@@debug_source = ENV["DEBUG_SOURCE"] if ENV["DEBUG_SOURCE"]
|
89
|
-
end
|
90
|
-
|
91
|
-
read_boast_config
|
92
|
-
|
93
|
-
def get_openmp_flags
|
94
|
-
return @@openmp_default_flags.clone
|
95
|
-
end
|
96
|
-
|
97
|
-
def get_compiler_options
|
98
|
-
return @@compiler_default_options.clone
|
99
|
-
end
|
100
|
-
|
101
|
-
class CKernel
|
102
|
-
include Rake::DSL
|
103
|
-
include Inspectable
|
104
|
-
include PrivateStateAccessor
|
105
|
-
include TypeTransition
|
106
|
-
|
107
|
-
attr_accessor :code
|
108
|
-
attr_accessor :procedure
|
109
|
-
attr_accessor :lang
|
110
|
-
attr_accessor :binary
|
111
|
-
attr_accessor :kernels
|
112
|
-
attr_accessor :cost_function
|
113
|
-
|
114
|
-
def initialize(options={})
|
115
|
-
if options[:code] then
|
116
|
-
@code = options[:code]
|
117
|
-
elsif get_chain_code
|
118
|
-
@code = get_output
|
119
|
-
@code.seek(0,SEEK_END)
|
120
|
-
else
|
121
|
-
@code = StringIO::new
|
122
|
-
end
|
123
|
-
set_output(@code)
|
124
|
-
if options[:kernels] then
|
125
|
-
@kernels = options[:kernels]
|
126
|
-
else
|
127
|
-
@kernels = []
|
128
|
-
end
|
129
|
-
if options[:lang] then
|
130
|
-
@lang = options[:lang]
|
131
|
-
else
|
132
|
-
@lang = get_lang
|
133
|
-
end
|
134
|
-
end
|
135
|
-
|
136
|
-
def print
|
137
|
-
@code.rewind
|
138
|
-
puts @code.read
|
139
|
-
end
|
140
|
-
|
141
|
-
def to_s
|
142
|
-
@code.rewind
|
143
|
-
return code.read
|
144
|
-
end
|
145
|
-
|
146
|
-
|
147
|
-
def get_openmp_flags(compiler)
|
148
|
-
openmp_flags = BOAST::get_openmp_flags[compiler]
|
149
|
-
if not openmp_flags then
|
150
|
-
keys = BOAST::get_openmp_flags.keys
|
151
|
-
keys.each { |k|
|
152
|
-
openmp_flags = BOAST::get_openmp_flags[k] if compiler.match(k)
|
153
|
-
}
|
154
|
-
end
|
155
|
-
return openmp_flags
|
156
|
-
end
|
157
|
-
|
158
|
-
def get_includes(narray_path)
|
159
|
-
includes = "-I#{RbConfig::CONFIG["archdir"]}"
|
160
|
-
includes += " -I#{RbConfig::CONFIG["rubyhdrdir"]} -I#{RbConfig::CONFIG["rubyhdrdir"]}/#{RbConfig::CONFIG["arch"]}"
|
161
|
-
includes += " -I#{RbConfig::CONFIG["rubyarchhdrdir"]}" if RbConfig::CONFIG["rubyarchhdrdir"]
|
162
|
-
includes += " -I#{narray_path}" if narray_path
|
163
|
-
return includes
|
164
|
-
end
|
165
|
-
|
166
|
-
def get_narray_path
|
167
|
-
narray_path = nil
|
168
|
-
begin
|
169
|
-
spec = Gem::Specification::find_by_name('narray')
|
170
|
-
narray_path = spec.full_gem_path
|
171
|
-
rescue Gem::LoadError => e
|
172
|
-
rescue NoMethodError => e
|
173
|
-
spec = Gem::available?('narray')
|
174
|
-
if spec then
|
175
|
-
require 'narray'
|
176
|
-
narray_path = Gem.loaded_specs['narray'].full_gem_path
|
177
|
-
end
|
178
|
-
end
|
179
|
-
end
|
180
|
-
|
181
|
-
def setup_c_compiler(options, includes, narray_path, runner)
|
182
|
-
c_compiler = options[:CC]
|
183
|
-
cflags = options[:CFLAGS]
|
184
|
-
cflags += " -fPIC #{includes}"
|
185
|
-
cflags += " -DHAVE_NARRAY_H" if narray_path
|
186
|
-
if options[:openmp] and @lang == C then
|
187
|
-
openmp_cflags = get_openmp_flags(c_compiler)
|
188
|
-
raise "unkwown openmp flags for: #{c_compiler}" if not openmp_cflags
|
189
|
-
cflags += " #{openmp_cflags}"
|
190
|
-
end
|
191
|
-
|
192
|
-
rule ".#{RbConfig::CONFIG["OBJEXT"]}" => '.c' do |t|
|
193
|
-
c_call_string = "#{c_compiler} #{cflags} -c -o #{t.name} #{t.source}"
|
194
|
-
runner.call(t, c_call_string)
|
195
|
-
end
|
196
|
-
end
|
197
|
-
|
198
|
-
def setup_cxx_compiler(options, includes, runner)
|
199
|
-
cxx_compiler = options[:CXX]
|
200
|
-
cxxflags = options[:CXXFLAGS]
|
201
|
-
cxxflags += " -fPIC #{includes}"
|
202
|
-
if options[:openmp] and @lang == C then
|
203
|
-
openmp_cxxflags = get_openmp_flags(cxx_compiler)
|
204
|
-
raise "unkwown openmp flags for: #{cxx_compiler}" if not openmp_cxxflags
|
205
|
-
cxxflags += " #{openmp_cxxflags}"
|
206
|
-
end
|
207
|
-
|
208
|
-
rule ".#{RbConfig::CONFIG["OBJEXT"]}" => '.cpp' do |t|
|
209
|
-
cxx_call_string = "#{cxx_compiler} #{cxxflags} -c -o #{t.name} #{t.source}"
|
210
|
-
runner.call(t, cxx_call_string)
|
211
|
-
end
|
212
|
-
end
|
213
|
-
|
214
|
-
def setup_fortran_compiler(options, runner)
|
215
|
-
f_compiler = options[:FC]
|
216
|
-
fcflags = options[:FCFLAGS]
|
217
|
-
fcflags += " -fPIC"
|
218
|
-
fcflags += " -fno-second-underscore" if f_compiler == 'g95'
|
219
|
-
if options[:openmp] and @lang == FORTRAN then
|
220
|
-
openmp_fcflags = get_openmp_flags(f_compiler)
|
221
|
-
raise "unkwown openmp flags for: #{f_compiler}" if not openmp_fcflags
|
222
|
-
fcflags += " #{openmp_fcflags}"
|
223
|
-
end
|
224
|
-
|
225
|
-
rule ".#{RbConfig::CONFIG["OBJEXT"]}" => '.f90' do |t|
|
226
|
-
f_call_string = "#{f_compiler} #{fcflags} -c -o #{t.name} #{t.source}"
|
227
|
-
runner.call(t, f_call_string)
|
228
|
-
end
|
229
|
-
end
|
230
|
-
|
231
|
-
def setup_cuda_compiler(options, runner)
|
232
|
-
cuda_compiler = options[:NVCC]
|
233
|
-
cudaflags = options[:NVCCFLAGS]
|
234
|
-
cudaflags += " --compiler-options '-fPIC'"
|
235
|
-
|
236
|
-
rule ".#{RbConfig::CONFIG["OBJEXT"]}" => '.cu' do |t|
|
237
|
-
cuda_call_string = "#{cuda_compiler} #{cudaflags} -c -o #{t.name} #{t.source}"
|
238
|
-
runner.call(t, cuda_call_string)
|
239
|
-
end
|
240
|
-
end
|
241
|
-
|
242
|
-
def setup_linker(options)
|
243
|
-
ldflags = options[:LDFLAGS]
|
244
|
-
ldflags += " -L#{RbConfig::CONFIG["libdir"]} #{RbConfig::CONFIG["LIBRUBYARG"]}"
|
245
|
-
ldflags += " -lrt" if not OS.mac?
|
246
|
-
ldflags += " -lcudart" if @lang == CUDA
|
247
|
-
c_compiler = options[:CC]
|
248
|
-
c_compiler = "cc" if not c_compiler
|
249
|
-
linker = options[:LD]
|
250
|
-
linker = c_compiler if not linker
|
251
|
-
if options[:openmp] then
|
252
|
-
openmp_ldflags = get_openmp_flags(linker)
|
253
|
-
raise "unkwown openmp flags for: #{linker}" if not openmp_ldflags
|
254
|
-
ldflags += " #{openmp_ldflags}"
|
255
|
-
end
|
256
|
-
|
257
|
-
if OS.mac? then
|
258
|
-
ldflags = "-Wl,-undefined,dynamic_lookup -Wl,-multiply_defined,suppress #{ldflags}"
|
259
|
-
ldshared = "-dynamic -bundle"
|
260
|
-
else
|
261
|
-
ldflags = "-Wl,-Bsymbolic-functions -Wl,-z,relro -rdynamic -Wl,-export-dynamic #{ldflags}"
|
262
|
-
ldshared = "-shared"
|
263
|
-
end
|
264
|
-
|
265
|
-
return [linker, ldshared, ldflags]
|
266
|
-
end
|
267
|
-
|
268
|
-
def setup_compilers(options = {})
|
269
|
-
Rake::Task::clear
|
270
|
-
verbose = options[:verbose]
|
271
|
-
verbose = get_verbose if not verbose
|
272
|
-
Rake::verbose(verbose)
|
273
|
-
Rake::FileUtilsExt.verbose_flag=verbose
|
274
|
-
|
275
|
-
narray_path = get_narray_path
|
276
|
-
includes = get_includes(narray_path)
|
277
|
-
|
278
|
-
runner = lambda { |t, call_string|
|
279
|
-
if verbose then
|
280
|
-
sh call_string
|
281
|
-
else
|
282
|
-
status, stdout, stderr = systemu call_string
|
283
|
-
if not status.success? then
|
284
|
-
puts stderr
|
285
|
-
fail "#{t.source}: compilation failed"
|
286
|
-
end
|
287
|
-
status.success?
|
288
|
-
end
|
289
|
-
}
|
290
|
-
|
291
|
-
setup_c_compiler(options, includes, narray_path, runner)
|
292
|
-
setup_cxx_compiler(options, includes, runner)
|
293
|
-
setup_fortran_compiler(options, runner)
|
294
|
-
setup_cuda_compiler(options, runner)
|
295
|
-
|
296
|
-
return setup_linker(options)
|
297
|
-
|
298
|
-
end
|
299
|
-
|
300
|
-
def select_cl_platform(options)
|
301
|
-
platforms = OpenCL::get_platforms
|
302
|
-
if options[:platform_vendor] then
|
303
|
-
platforms.select!{ |p|
|
304
|
-
p.vendor.match(options[:platform_vendor])
|
305
|
-
}
|
306
|
-
elsif options[:CLVENDOR] then
|
307
|
-
platforms.select!{ |p|
|
308
|
-
p.vendor.match(options[:CLVENDOR])
|
309
|
-
}
|
310
|
-
end
|
311
|
-
if options[:CLPLATFORM] then
|
312
|
-
platforms.select!{ |p|
|
313
|
-
p.name.match(options[:CLPLATFORM])
|
314
|
-
}
|
315
|
-
end
|
316
|
-
return platforms.first
|
317
|
-
end
|
318
|
-
|
319
|
-
def select_cl_device(options)
|
320
|
-
platform = select_cl_platform(options)
|
321
|
-
type = options[:device_type] ? OpenCL::Device::Type.const_get(options[:device_type]) : options[:CLDEVICETYPE] ? OpenCL::Device::Type.const_get(options[:CLDEVICETYPE]) : OpenCL::Device::Type::ALL
|
322
|
-
devices = platform.devices(type)
|
323
|
-
if options[:device_name] then
|
324
|
-
devices.select!{ |d|
|
325
|
-
d.name.match(options[:device_name])
|
326
|
-
}
|
327
|
-
elsif options[:CLDEVICE] then
|
328
|
-
devices.select!{ |d|
|
329
|
-
d.name.match(options[:CLDEVICE])
|
330
|
-
}
|
331
|
-
end
|
332
|
-
return devices.first
|
333
|
-
end
|
334
|
-
|
335
|
-
def init_opencl_types
|
336
|
-
@@opencl_real_types = {
|
337
|
-
2 => OpenCL::Half,
|
338
|
-
4 => OpenCL::Float,
|
339
|
-
8 => OpenCL::Double
|
340
|
-
}
|
341
|
-
|
342
|
-
@@opencl_int_types = {
|
343
|
-
true => {
|
344
|
-
1 => OpenCL::Char,
|
345
|
-
2 => OpenCL::Short,
|
346
|
-
4 => OpenCL::Int,
|
347
|
-
8 => OpenCL::Long
|
348
|
-
},
|
349
|
-
false => {
|
350
|
-
1 => OpenCL::UChar,
|
351
|
-
2 => OpenCL::UShort,
|
352
|
-
4 => OpenCL::UInt,
|
353
|
-
8 => OpenCL::ULong
|
354
|
-
}
|
355
|
-
}
|
356
|
-
end
|
357
|
-
|
358
|
-
def init_opencl(options)
|
359
|
-
require 'opencl_ruby_ffi'
|
360
|
-
init_opencl_types
|
361
|
-
device = select_cl_device(options)
|
362
|
-
@context = OpenCL::create_context([device])
|
363
|
-
program = @context.create_program_with_source([@code.string])
|
364
|
-
opts = options[:CLFLAGS]
|
365
|
-
begin
|
366
|
-
program.build(:options => options[:CLFLAGS])
|
367
|
-
rescue OpenCL::Error => e
|
368
|
-
puts e.to_s
|
369
|
-
puts program.build_status
|
370
|
-
puts program.build_log
|
371
|
-
if options[:verbose] or get_verbose then
|
372
|
-
puts @code.string
|
373
|
-
end
|
374
|
-
raise "OpenCL Failed to build #{@procedure.name}"
|
375
|
-
end
|
376
|
-
if options[:verbose] or get_verbose then
|
377
|
-
program.build_log.each {|dev,log|
|
378
|
-
puts "#{device.name}: #{log}"
|
379
|
-
}
|
380
|
-
end
|
381
|
-
@queue = @context.create_command_queue(device, :properties => OpenCL::CommandQueue::PROFILING_ENABLE)
|
382
|
-
@kernel = program.create_kernel(@procedure.name)
|
383
|
-
return self
|
384
|
-
end
|
385
|
-
|
386
|
-
def create_opencl_array(arg, parameter)
|
387
|
-
if parameter.direction == :in then
|
388
|
-
flags = OpenCL::Mem::Flags::READ_ONLY
|
389
|
-
elsif parameter.direction == :out then
|
390
|
-
flags = OpenCL::Mem::Flags::WRITE_ONLY
|
391
|
-
else
|
392
|
-
flags = OpenCL::Mem::Flags::READ_WRITE
|
393
|
-
end
|
394
|
-
if parameter.texture then
|
395
|
-
param = @context.create_image_2D( OpenCL::ImageFormat::new( OpenCL::ChannelOrder::R, OpenCL::ChannelType::UNORM_INT8 ), arg.size * arg.element_size, 1, :flags => flags )
|
396
|
-
@queue.enqueue_write_image( param, arg, :blocking => true )
|
397
|
-
else
|
398
|
-
param = @context.create_buffer( arg.size * arg.element_size, :flags => flags )
|
399
|
-
@queue.enqueue_write_buffer( param, arg, :blocking => true )
|
400
|
-
end
|
401
|
-
return param
|
402
|
-
end
|
403
|
-
|
404
|
-
def create_opencl_scalar(arg, parameter)
|
405
|
-
if parameter.type.is_a?(Real) then
|
406
|
-
return @@opencl_real_types[parameter.type.size]::new(arg)
|
407
|
-
elsif parameter.type.is_a?(Int) then
|
408
|
-
return @@opencl_int_types[parameter.type.signed][parameter.type.size]::new(arg)
|
409
|
-
else
|
410
|
-
return arg
|
411
|
-
end
|
412
|
-
end
|
413
|
-
|
414
|
-
def create_opencl_param(arg, parameter)
|
415
|
-
if parameter.dimension then
|
416
|
-
return create_opencl_array(arg, parameter)
|
417
|
-
else
|
418
|
-
return create_opencl_scalar(arg, parameter)
|
419
|
-
end
|
420
|
-
end
|
421
|
-
|
422
|
-
def read_opencl_param(param, arg, parameter)
|
423
|
-
if parameter.texture then
|
424
|
-
@queue.enqueue_read_image( param, arg, :blocking => true )
|
425
|
-
else
|
426
|
-
@queue.enqueue_read_buffer( param, arg, :blocking => true )
|
427
|
-
end
|
428
|
-
end
|
429
|
-
|
430
|
-
def build_opencl(options)
|
431
|
-
init_opencl(options)
|
432
|
-
|
433
|
-
run_method = <<EOF
|
434
|
-
def self.run(*args)
|
435
|
-
raise "Wrong number of arguments \#{args.length} for #{@procedure.parameters.length}" if args.length > #{@procedure.parameters.length+1} or args.length < #{@procedure.parameters.length}
|
436
|
-
params = []
|
437
|
-
opts = {}
|
438
|
-
opts = args.pop if args.length == #{@procedure.parameters.length+1}
|
439
|
-
@procedure.parameters.each_index { |i|
|
440
|
-
params[i] = create_opencl_param( args[i], @procedure.parameters[i] )
|
441
|
-
}
|
442
|
-
params.each_index{ |i|
|
443
|
-
@kernel.set_arg(i, params[i])
|
444
|
-
}
|
445
|
-
gws = opts[:global_work_size]
|
446
|
-
if not gws then
|
447
|
-
gws = []
|
448
|
-
opts[:block_number].each_index { |i|
|
449
|
-
gws.push(opts[:block_number][i]*opts[:block_size][i])
|
450
|
-
}
|
451
|
-
end
|
452
|
-
lws = opts[:local_work_size]
|
453
|
-
if not lws then
|
454
|
-
lws = opts[:block_size]
|
455
|
-
end
|
456
|
-
event = @queue.enqueue_NDrange_kernel(@kernel, gws, :local_work_size => lws)
|
457
|
-
@procedure.parameters.each_index { |i|
|
458
|
-
if @procedure.parameters[i].dimension and (@procedure.parameters[i].direction == :inout or @procedure.parameters[i].direction == :out) then
|
459
|
-
read_opencl_param( params[i], args[i], @procedure.parameters[i] )
|
460
|
-
end
|
461
|
-
}
|
462
|
-
result = {}
|
463
|
-
result[:start] = event.profiling_command_start
|
464
|
-
result[:end] = event.profiling_command_end
|
465
|
-
result[:duration] = (result[:end] - result[:start])/1000000000.0
|
466
|
-
return result
|
467
|
-
end
|
468
|
-
EOF
|
469
|
-
eval run_method
|
470
|
-
return self
|
471
|
-
end
|
472
|
-
|
473
|
-
@@extensions = {
|
474
|
-
C => ".c",
|
475
|
-
CUDA => ".cu",
|
476
|
-
FORTRAN => ".f90"
|
477
|
-
}
|
478
|
-
|
479
|
-
def get_sub_kernels
|
480
|
-
kernel_files = []
|
481
|
-
@kernels.each { |kernel|
|
482
|
-
kernel_file = Tempfile::new([kernel.procedure.name,".#{RbConfig::CONFIG["OBJEXT"]}"])
|
483
|
-
kernel.binary.rewind
|
484
|
-
kernel_file.write( kernel.binary.read )
|
485
|
-
kernel_file.close
|
486
|
-
kernel_files.push(kernel_file)
|
487
|
-
}
|
488
|
-
end
|
489
|
-
|
490
|
-
def create_module_source(path)
|
491
|
-
previous_lang = get_lang
|
492
|
-
previous_output = get_output
|
493
|
-
set_lang( C )
|
494
|
-
module_file_name = File::split(path.chomp(File::extname(path)))[0] + "/Mod_" + File::split(path.chomp(File::extname(path)))[1].gsub("-","_") + ".c"
|
495
|
-
module_name = File::split(module_file_name.chomp(File::extname(module_file_name)))[1]
|
496
|
-
module_file = File::open(module_file_name,"w+")
|
497
|
-
set_output( module_file )
|
498
|
-
fill_module(module_file, module_name)
|
499
|
-
if debug_source? then
|
500
|
-
module_file.rewind
|
501
|
-
puts module_file.read
|
502
|
-
end
|
503
|
-
module_file.close
|
504
|
-
set_lang( previous_lang )
|
505
|
-
set_output( previous_output )
|
506
|
-
return [module_file_name, module_name]
|
507
|
-
end
|
508
|
-
|
509
|
-
def save_binary(target)
|
510
|
-
f = File::open(target,"rb")
|
511
|
-
@binary = StringIO::new
|
512
|
-
@binary.write( f.read )
|
513
|
-
f.close
|
514
|
-
end
|
515
|
-
|
516
|
-
def create_source
|
517
|
-
extension = @@extensions[@lang]
|
518
|
-
source_file = Tempfile::new([@procedure.name,extension])
|
519
|
-
path = source_file.path
|
520
|
-
target = path.chomp(File::extname(path))+".#{RbConfig::CONFIG["OBJEXT"]}"
|
521
|
-
fill_code(source_file)
|
522
|
-
if debug_source? then
|
523
|
-
source_file.rewind
|
524
|
-
puts source_file.read
|
525
|
-
end
|
526
|
-
source_file.close
|
527
|
-
return [source_file, path, target]
|
528
|
-
end
|
529
|
-
|
530
|
-
def create_ffi_module(module_name, module_final)
|
531
|
-
s =<<EOF
|
532
|
-
require 'ffi'
|
533
|
-
require 'narray_ffi'
|
534
|
-
module #{module_name}
|
535
|
-
extend FFI::Library
|
536
|
-
ffi_lib "#{module_final}"
|
537
|
-
attach_function :#{@procedure.name}#{@lang == FORTRAN ? "_" : ""}, [ #{@procedure.parameters.collect{ |p| ":"+p.decl_ffi.to_s }.join(", ")} ], :#{@procedure.properties[:return] ? @procedure.properties[:return].type.decl_ffi : "void" }
|
538
|
-
def run(*args)
|
539
|
-
if args.length < @procedure.parameters.length or args.length > @procedure.parameters.length + 1 then
|
540
|
-
raise "Wrong number of arguments for \#{@procedure.name} (\#{args.length} for \#{@procedure.parameters.length})"
|
541
|
-
else
|
542
|
-
ev_set = nil
|
543
|
-
if args.length == @procedure.parameters.length + 1 then
|
544
|
-
options = args.last
|
545
|
-
if options[:PAPI] then
|
546
|
-
require 'PAPI'
|
547
|
-
ev_set = PAPI::EventSet::new
|
548
|
-
ev_set.add_named(options[:PAPI])
|
549
|
-
end
|
550
|
-
end
|
551
|
-
t_args = []
|
552
|
-
r_args = {}
|
553
|
-
if @lang == FORTRAN then
|
554
|
-
@procedure.parameters.each_with_index { |p, i|
|
555
|
-
if p.decl_ffi(true) != :pointer then
|
556
|
-
arg_p = FFI::MemoryPointer::new(p.decl_ffi(true))
|
557
|
-
arg_p.send("write_\#{p.decl_ffi(true)}",args[i])
|
558
|
-
t_args.push(arg_p)
|
559
|
-
r_args[p] = arg_p if p.scalar_output?
|
560
|
-
else
|
561
|
-
t_args.push( args[i] )
|
562
|
-
end
|
563
|
-
}
|
564
|
-
else
|
565
|
-
@procedure.parameters.each_with_index { |p, i|
|
566
|
-
if p.scalar_output? then
|
567
|
-
arg_p = FFI::MemoryPointer::new(p.decl_ffi(true))
|
568
|
-
arg_p.send("write_\#{p.decl_ffi(true)}",args[i])
|
569
|
-
t_args.push(arg_p)
|
570
|
-
r_args[p] = arg_p
|
571
|
-
else
|
572
|
-
t_args.push( args[i] )
|
573
|
-
end
|
574
|
-
}
|
575
|
-
end
|
576
|
-
results = {}
|
577
|
-
counters = nil
|
578
|
-
ev_set.start if ev_set
|
579
|
-
begin
|
580
|
-
start = Time::new
|
581
|
-
ret = #{@procedure.name}#{@lang == FORTRAN ? "_" : ""}(*t_args)
|
582
|
-
stop = Time::new
|
583
|
-
ensure
|
584
|
-
if ev_set then
|
585
|
-
counters = ev_set.stop
|
586
|
-
ev_set.cleanup
|
587
|
-
ev_set.destroy
|
588
|
-
end
|
589
|
-
end
|
590
|
-
results = { :start => start, :stop => stop, :duration => stop - start, :return => ret }
|
591
|
-
results[:PAPI] = Hash[[options[:PAPI]].flatten.zip(counters)] if ev_set
|
592
|
-
if r_args.length > 0 then
|
593
|
-
ref_return = {}
|
594
|
-
r_args.each { |p, p_arg|
|
595
|
-
ref_return[p.name.to_sym] = p_arg.send("read_\#{p.decl_ffi(true)}")
|
596
|
-
}
|
597
|
-
results[:reference_return] = ref_return
|
598
|
-
end
|
599
|
-
return results
|
600
|
-
end
|
601
|
-
end
|
602
|
-
end
|
603
|
-
EOF
|
604
|
-
eval s
|
605
|
-
end
|
606
|
-
|
607
|
-
def build(options = {})
|
608
|
-
compiler_options = BOAST::get_compiler_options
|
609
|
-
compiler_options.update(options)
|
610
|
-
return build_opencl(compiler_options) if @lang == CL
|
611
|
-
|
612
|
-
linker, ldshared, ldflags = setup_compilers(compiler_options)
|
613
|
-
|
614
|
-
extension = @@extensions[@lang]
|
615
|
-
|
616
|
-
source_file, path, target = create_source
|
617
|
-
|
618
|
-
if not ffi? then
|
619
|
-
module_file_name, module_name = create_module_source(path)
|
620
|
-
module_target = module_file_name.chomp(File::extname(module_file_name))+"."+RbConfig::CONFIG["OBJEXT"]
|
621
|
-
module_final = module_file_name.chomp(File::extname(module_file_name))+"."+RbConfig::CONFIG["DLEXT"]
|
622
|
-
else
|
623
|
-
module_final = path.chomp(File::extname(path))+"."+RbConfig::CONFIG["DLEXT"]
|
624
|
-
module_name = "Mod_" + File::split(path.chomp(File::extname(path)))[1].gsub("-","_")
|
625
|
-
end
|
626
|
-
|
627
|
-
kernel_files = get_sub_kernels
|
628
|
-
|
629
|
-
if not ffi? then
|
630
|
-
file module_final => [module_target, target] do
|
631
|
-
#puts "#{linker} #{ldshared} -o #{module_final} #{module_target} #{target} #{kernel_files.join(" ")} #{ldflags}"
|
632
|
-
sh "#{linker} #{ldshared} -o #{module_final} #{module_target} #{target} #{(kernel_files.collect {|f| f.path}).join(" ")} #{ldflags}"
|
633
|
-
end
|
634
|
-
Rake::Task[module_final].invoke
|
635
|
-
|
636
|
-
require(module_final)
|
637
|
-
else
|
638
|
-
file module_final => [target] do
|
639
|
-
#puts "#{linker} #{ldshared} -o #{module_final} #{target} #{kernel_files.join(" ")} #{ldflags}"
|
640
|
-
sh "#{linker} #{ldshared} -o #{module_final} #{target} #{(kernel_files.collect {|f| f.path}).join(" ")} #{ldflags}"
|
641
|
-
end
|
642
|
-
Rake::Task[module_final].invoke
|
643
|
-
create_ffi_module(module_name, module_final)
|
644
|
-
end
|
645
|
-
eval "self.extend(#{module_name})"
|
646
|
-
save_binary(target)
|
647
|
-
|
648
|
-
if not ffi? then
|
649
|
-
[target, module_target, module_file_name, module_final].each { |fn|
|
650
|
-
File::unlink(fn)
|
651
|
-
}
|
652
|
-
else
|
653
|
-
[target, module_final].each { |fn|
|
654
|
-
File::unlink(fn)
|
655
|
-
}
|
656
|
-
end
|
657
|
-
kernel_files.each { |f|
|
658
|
-
f.unlink
|
659
|
-
}
|
660
|
-
return self
|
661
|
-
end
|
662
|
-
|
663
|
-
def fill_code(source_file)
|
664
|
-
@code.rewind
|
665
|
-
source_file.puts "#include <inttypes.h>" if @lang == C or @lang == CUDA
|
666
|
-
source_file.puts "#include <cuda.h>" if @lang == CUDA
|
667
|
-
# check for too long FORTRAN lines
|
668
|
-
if @lang == FORTRAN then
|
669
|
-
@code.each_line { |line|
|
670
|
-
# check for omp pragmas
|
671
|
-
if line.match(/^\s*!\$/) then
|
672
|
-
if line.match(/^\s*!\$(omp|OMP)/) then
|
673
|
-
chunks = line.scan(/.{1,#{FORTRAN_LINE_LENGTH-7}}/)
|
674
|
-
source_file.puts chunks.join("&\n!$omp&")
|
675
|
-
else
|
676
|
-
chunks = line.scan(/.{1,#{FORTRAN_LINE_LENGTH-4}}/)
|
677
|
-
source_file.puts chunks.join("&\n!$&")
|
678
|
-
end
|
679
|
-
elsif line.match(/^\w*!/) then
|
680
|
-
source_file.write line
|
681
|
-
else
|
682
|
-
chunks = line.scan(/.{1,#{FORTRAN_LINE_LENGTH-2}}/)
|
683
|
-
source_file.puts chunks.join("&\n&")
|
684
|
-
end
|
685
|
-
}
|
686
|
-
else
|
687
|
-
source_file.write @code.read
|
688
|
-
end
|
689
|
-
if @lang == CUDA then
|
690
|
-
source_file.write <<EOF
|
691
|
-
extern "C" {
|
692
|
-
#{@procedure.boast_header_s(CUDA)}{
|
693
|
-
dim3 dimBlock(block_size[0], block_size[1], block_size[2]);
|
694
|
-
dim3 dimGrid(block_number[0], block_number[1], block_number[2]);
|
695
|
-
cudaEvent_t __start, __stop;
|
696
|
-
float __time;
|
697
|
-
cudaEventCreate(&__start);
|
698
|
-
cudaEventCreate(&__stop);
|
699
|
-
cudaEventRecord(__start, 0);
|
700
|
-
#{@procedure.name}<<<dimGrid,dimBlock>>>(#{@procedure.parameters.join(", ")});
|
701
|
-
cudaEventRecord(__stop, 0);
|
702
|
-
cudaEventSynchronize(__stop);
|
703
|
-
cudaEventElapsedTime(&__time, __start, __stop);
|
704
|
-
return (unsigned long long int)((double)__time*(double)1e6);
|
705
|
-
}
|
706
|
-
}
|
707
|
-
EOF
|
708
|
-
end
|
709
|
-
@code.rewind
|
710
|
-
end
|
711
|
-
|
712
|
-
def module_header(module_file)
|
713
|
-
module_file.print <<EOF
|
714
|
-
#include "ruby.h"
|
715
|
-
#include <inttypes.h>
|
716
|
-
#ifdef HAVE_NARRAY_H
|
717
|
-
#include "narray.h"
|
718
|
-
#endif
|
719
|
-
EOF
|
720
|
-
if OS.mac? then
|
721
|
-
module_file.print <<EOF
|
722
|
-
#if __cplusplus
|
723
|
-
extern "C" {
|
724
|
-
#endif
|
725
|
-
#include <mach/mach_time.h>
|
726
|
-
#if __cplusplus
|
727
|
-
}
|
728
|
-
#endif
|
729
|
-
EOF
|
730
|
-
else
|
731
|
-
module_file.print "#include <time.h>\n"
|
732
|
-
end
|
733
|
-
if @lang == CUDA then
|
734
|
-
module_file.print "#include <cuda_runtime.h>\n"
|
735
|
-
end
|
736
|
-
end
|
737
|
-
|
738
|
-
def module_preamble(module_file, module_name)
|
739
|
-
module_file.print <<EOF
|
740
|
-
VALUE #{module_name} = Qnil;
|
741
|
-
void Init_#{module_name}();
|
742
|
-
VALUE method_run(int _boast_argc, VALUE *_boast_argv, VALUE _boast_self);
|
743
|
-
void Init_#{module_name}() {
|
744
|
-
#{module_name} = rb_define_module("#{module_name}");
|
745
|
-
rb_define_method(#{module_name}, "run", method_run, -1);
|
746
|
-
}
|
747
|
-
EOF
|
748
|
-
end
|
749
|
-
|
750
|
-
def check_args(module_file)
|
751
|
-
module_file.print <<EOF
|
752
|
-
VALUE _boast_rb_opts;
|
753
|
-
if( _boast_argc < #{@procedure.parameters.length} || _boast_argc > #{@procedure.parameters.length + 1} )
|
754
|
-
rb_raise(rb_eArgError, "wrong number of arguments for #{@procedure.name} (%d for #{@procedure.parameters.length})", _boast_argc);
|
755
|
-
_boast_rb_opts = Qnil;
|
756
|
-
if( _boast_argc == #{@procedure.parameters.length + 1} ) {
|
757
|
-
_boast_rb_opts = _boast_argv[_boast_argc -1];
|
758
|
-
if ( _boast_rb_opts != Qnil ) {
|
759
|
-
if (TYPE(_boast_rb_opts) != T_HASH)
|
760
|
-
rb_raise(rb_eArgError, "Options should be passed as a hash");
|
761
|
-
}
|
762
|
-
}
|
763
|
-
EOF
|
764
|
-
end
|
765
|
-
|
766
|
-
def get_params_value(module_file, argv, rb_ptr)
|
767
|
-
set_decl_module(true)
|
768
|
-
@procedure.parameters.each_index do |i|
|
769
|
-
param = @procedure.parameters[i]
|
770
|
-
if not param.dimension then
|
771
|
-
case param.type
|
772
|
-
when Int
|
773
|
-
(param === FuncCall::new("NUM2INT", argv[i])).pr if param.type.size == 4
|
774
|
-
(param === FuncCall::new("NUM2LONG", argv[i])).pr if param.type.size == 8
|
775
|
-
when Real
|
776
|
-
(param === FuncCall::new("NUM2DBL", argv[i])).pr
|
777
|
-
end
|
778
|
-
else
|
779
|
-
(rb_ptr === argv[i]).pr
|
780
|
-
if @lang == CUDA then
|
781
|
-
module_file.print <<EOF
|
782
|
-
if ( IsNArray(_boast_rb_ptr) ) {
|
783
|
-
struct NARRAY *_boast_n_ary;
|
784
|
-
size_t _boast_array_size;
|
785
|
-
Data_Get_Struct(_boast_rb_ptr, struct NARRAY, _boast_n_ary);
|
786
|
-
_boast_array_size = _boast_n_ary->total * na_sizeof[_boast_n_ary->type];
|
787
|
-
cudaMalloc( (void **) &#{param.name}, _boast_array_size);
|
788
|
-
cudaMemcpy(#{param.name}, (void *) _boast_n_ary->ptr, _boast_array_size, cudaMemcpyHostToDevice);
|
789
|
-
} else {
|
790
|
-
rb_raise(rb_eArgError, "wrong type of argument %d", #{i});
|
791
|
-
}
|
792
|
-
EOF
|
793
|
-
else
|
794
|
-
module_file.print <<EOF
|
795
|
-
if (TYPE(_boast_rb_ptr) == T_STRING) {
|
796
|
-
#{param.name} = (void *) RSTRING_PTR(_boast_rb_ptr);
|
797
|
-
} else if ( IsNArray(_boast_rb_ptr) ) {
|
798
|
-
struct NARRAY *_boast_n_ary;
|
799
|
-
Data_Get_Struct(_boast_rb_ptr, struct NARRAY, _boast_n_ary);
|
800
|
-
#{param.name} = (void *) _boast_n_ary->ptr;
|
801
|
-
} else {
|
802
|
-
rb_raise(rb_eArgError, "wrong type of argument %d", #{i});
|
803
|
-
}
|
804
|
-
EOF
|
805
|
-
end
|
806
|
-
end
|
807
|
-
end
|
808
|
-
set_decl_module(false)
|
809
|
-
end
|
810
|
-
|
811
|
-
def decl_module_params(module_file)
|
812
|
-
set_decl_module(true)
|
813
|
-
@procedure.parameters.each { |param|
|
814
|
-
param_copy = param.copy
|
815
|
-
param_copy.constant = nil
|
816
|
-
param_copy.direction = nil
|
817
|
-
param_copy.decl
|
818
|
-
}
|
819
|
-
set_decl_module(false)
|
820
|
-
module_file.print " #{@procedure.properties[:return].type.decl} _boast_ret;\n" if @procedure.properties[:return]
|
821
|
-
module_file.print " VALUE _boast_stats = rb_hash_new();\n"
|
822
|
-
module_file.print " VALUE _boast_event_set = Qnil;\n"
|
823
|
-
if OS.mac? then
|
824
|
-
module_file.print " uint64_t _mac_boast_start, _mac_boast_stop;\n"
|
825
|
-
module_file.print " mach_timebase_info_data_t _mac_boast_timebase_info;\n"
|
826
|
-
else
|
827
|
-
module_file.print " struct timespec _boast_start, _boast_stop;\n"
|
828
|
-
end
|
829
|
-
module_file.print " unsigned long long int _boast_duration;\n"
|
830
|
-
end
|
831
|
-
|
832
|
-
def get_cuda_launch_bounds(module_file)
|
833
|
-
module_file.print <<EOF
|
834
|
-
size_t _boast_block_size[3] = {1,1,1};
|
835
|
-
size_t _boast_block_number[3] = {1,1,1};
|
836
|
-
if( _boast_rb_opts != Qnil ) {
|
837
|
-
VALUE _boast_rb_array_data = Qnil;
|
838
|
-
int _boast_i;
|
839
|
-
_boast_rb_ptr = rb_hash_aref(_boast_rb_opts, ID2SYM(rb_intern("block_size")));
|
840
|
-
if( _boast_rb_ptr != Qnil ) {
|
841
|
-
if (TYPE(_boast_rb_ptr) != T_ARRAY)
|
842
|
-
rb_raise(rb_eArgError, "Cuda option block_size should be an array");
|
843
|
-
for(_boast_i=0; _boast_i<3; _boast_i++) {
|
844
|
-
_boast_rb_array_data = rb_ary_entry(_boast_rb_ptr, _boast_i);
|
845
|
-
if( _boast_rb_array_data != Qnil )
|
846
|
-
_boast_block_size[_boast_i] = (size_t) NUM2LONG( _boast_rb_array_data );
|
847
|
-
}
|
848
|
-
} else {
|
849
|
-
_boast_rb_ptr = rb_hash_aref(_boast_rb_opts, ID2SYM(rb_intern("local_work_size")));
|
850
|
-
if( _boast_rb_ptr != Qnil ) {
|
851
|
-
if (TYPE(_boast_rb_ptr) != T_ARRAY)
|
852
|
-
rb_raise(rb_eArgError, "Cuda option local_work_size should be an array");
|
853
|
-
for(_boast_i=0; _boast_i<3; _boast_i++) {
|
854
|
-
_boast_rb_array_data = rb_ary_entry(_boast_rb_ptr, _boast_i);
|
855
|
-
if( _boast_rb_array_data != Qnil )
|
856
|
-
_boast_block_size[_boast_i] = (size_t) NUM2LONG( _boast_rb_array_data );
|
857
|
-
}
|
858
|
-
}
|
859
|
-
}
|
860
|
-
_boast_rb_ptr = rb_hash_aref(_boast_rb_opts, ID2SYM(rb_intern("block_number")));
|
861
|
-
if( _boast_rb_ptr != Qnil ) {
|
862
|
-
if (TYPE(_boast_rb_ptr) != T_ARRAY)
|
863
|
-
rb_raise(rb_eArgError, "Cuda option block_number should be an array");
|
864
|
-
for(_boast_i=0; _boast_i<3; _boast_i++) {
|
865
|
-
_boast_rb_array_data = rb_ary_entry(_boast_rb_ptr, _boast_i);
|
866
|
-
if( _boast_rb_array_data != Qnil )
|
867
|
-
_boast_block_number[_boast_i] = (size_t) NUM2LONG( _boast_rb_array_data );
|
868
|
-
}
|
869
|
-
} else {
|
870
|
-
_boast_rb_ptr = rb_hash_aref(_boast_rb_opts, ID2SYM(rb_intern("global_work_size")));
|
871
|
-
if( _boast_rb_ptr != Qnil ) {
|
872
|
-
if (TYPE(_boast_rb_ptr) != T_ARRAY)
|
873
|
-
rb_raise(rb_eArgError, "Cuda option global_work_size should be an array");
|
874
|
-
for(_boast_i=0; _boast_i<3; _boast_i++) {
|
875
|
-
_boast_rb_array_data = rb_ary_entry(_boast_rb_ptr, _boast_i);
|
876
|
-
if( _boast_rb_array_data != Qnil )
|
877
|
-
_boast_block_number[_boast_i] = (size_t) NUM2LONG( _boast_rb_array_data ) / _boast_block_size[_boast_i];
|
878
|
-
}
|
879
|
-
}
|
880
|
-
}
|
881
|
-
}
|
882
|
-
EOF
|
883
|
-
end
|
884
|
-
|
885
|
-
def get_PAPI_options(module_file)
|
886
|
-
module_file.print <<EOF
|
887
|
-
if( _boast_rb_opts != Qnil ) {
|
888
|
-
_boast_rb_ptr = rb_hash_aref(_boast_rb_opts, ID2SYM(rb_intern("PAPI")));
|
889
|
-
if( _boast_rb_ptr != Qnil ) {
|
890
|
-
VALUE _boast_PAPI = Qnil;
|
891
|
-
VALUE _boast_EventSet = Qnil;
|
892
|
-
rb_require("PAPI");
|
893
|
-
_boast_PAPI = rb_const_get(rb_cObject, rb_intern("PAPI"));
|
894
|
-
_boast_EventSet = rb_const_get(_boast_PAPI, rb_intern("EventSet"));
|
895
|
-
_boast_event_set = rb_funcall(_boast_EventSet, rb_intern("new"), 0);
|
896
|
-
rb_funcall(_boast_event_set, rb_intern("add_named"), 1, _boast_rb_ptr);
|
897
|
-
rb_funcall(_boast_event_set, rb_intern("start"), 0);
|
898
|
-
}
|
899
|
-
}
|
900
|
-
EOF
|
901
|
-
end
|
902
|
-
|
903
|
-
def get_PAPI_results(module_file)
|
904
|
-
module_file.print <<EOF
|
905
|
-
if( _boast_event_set != Qnil) {
|
906
|
-
VALUE _boast_papi_results = Qnil;
|
907
|
-
VALUE _boast_papi_stats = Qnil;
|
908
|
-
_boast_papi_results = rb_funcall(_boast_event_set, rb_intern("stop"), 0);
|
909
|
-
_boast_papi_stats = rb_ary_new3(1,rb_hash_aref(_boast_rb_opts, ID2SYM(rb_intern("PAPI"))));
|
910
|
-
_boast_papi_stats = rb_funcall(_boast_papi_stats, rb_intern("flatten"), 0);
|
911
|
-
_boast_papi_stats = rb_funcall(_boast_papi_stats, rb_intern("zip"), 1, _boast_papi_results);
|
912
|
-
_boast_papi_stats = rb_funcall(rb_const_get(rb_cObject, rb_intern("Hash")), rb_intern("send"), 2, ID2SYM(rb_intern("[]")), _boast_papi_stats );
|
913
|
-
rb_hash_aset(_boast_stats,ID2SYM(rb_intern(\"PAPI\")),_boast_papi_stats);
|
914
|
-
}
|
915
|
-
EOF
|
916
|
-
end
|
917
|
-
|
918
|
-
def create_procedure_call(module_file)
|
919
|
-
if @lang == CUDA then
|
920
|
-
module_file.print " _boast_duration = "
|
921
|
-
elsif @procedure.properties[:return] then
|
922
|
-
module_file.print " _boast_ret = "
|
923
|
-
end
|
924
|
-
module_file.print " #{@procedure.name}"
|
925
|
-
module_file.print "_" if @lang == FORTRAN
|
926
|
-
module_file.print "_wrapper" if @lang == CUDA
|
927
|
-
module_file.print "("
|
928
|
-
params = []
|
929
|
-
if(@lang == FORTRAN) then
|
930
|
-
@procedure.parameters.each { |param|
|
931
|
-
if param.dimension then
|
932
|
-
params.push( param.name )
|
933
|
-
else
|
934
|
-
params.push( "&"+param.name )
|
935
|
-
end
|
936
|
-
}
|
937
|
-
else
|
938
|
-
@procedure.parameters.each { |param|
|
939
|
-
if param.dimension then
|
940
|
-
params.push( param.name )
|
941
|
-
elsif param.direction == :out or param.direction == :inout then
|
942
|
-
params.push( "&"+param.name )
|
943
|
-
else
|
944
|
-
params.push( param.name )
|
945
|
-
end
|
946
|
-
}
|
947
|
-
end
|
948
|
-
if @lang == CUDA then
|
949
|
-
params.push( "_boast_block_number", "_boast_block_size" )
|
950
|
-
end
|
951
|
-
module_file.print params.join(", ")
|
952
|
-
module_file.print " );\n"
|
953
|
-
end
|
954
|
-
|
955
|
-
def get_results(module_file, argv, rb_ptr)
|
956
|
-
set_decl_module(true)
|
957
|
-
if @lang == CUDA then
|
958
|
-
@procedure.parameters.each_index do |i|
|
959
|
-
param = @procedure.parameters[i]
|
960
|
-
if param.dimension then
|
961
|
-
(rb_ptr === argv[i]).pr
|
962
|
-
module_file.print <<EOF
|
963
|
-
if ( IsNArray(_boast_rb_ptr) ) {
|
964
|
-
EOF
|
965
|
-
if param.direction == :out or param.direction == :inout then
|
966
|
-
module_file.print <<EOF
|
967
|
-
struct NARRAY *_boast_n_ary;
|
968
|
-
size_t _boast_array_size;
|
969
|
-
Data_Get_Struct(_boast_rb_ptr, struct NARRAY, _boast_n_ary);
|
970
|
-
_boast_array_size = _boast_n_ary->total * na_sizeof[_boast_n_ary->type];
|
971
|
-
cudaMemcpy((void *) _boast_n_ary->ptr, #{param.name}, _boast_array_size, cudaMemcpyDeviceToHost);
|
972
|
-
EOF
|
973
|
-
end
|
974
|
-
module_file.print <<EOF
|
975
|
-
cudaFree( (void *) #{param.name});
|
976
|
-
} else {
|
977
|
-
rb_raise(rb_eArgError, "wrong type of argument %d", #{i});
|
978
|
-
}
|
979
|
-
EOF
|
980
|
-
end
|
981
|
-
end
|
982
|
-
else
|
983
|
-
first = true
|
984
|
-
@procedure.parameters.each_with_index do |param,i|
|
985
|
-
if param.scalar_output? then
|
986
|
-
if first then
|
987
|
-
module_file.print " VALUE _boast_refs = rb_hash_new();\n"
|
988
|
-
module_file.print " rb_hash_aset(_boast_stats,ID2SYM(rb_intern(\"reference_return\")),_boast_refs);\n"
|
989
|
-
first = false
|
990
|
-
end
|
991
|
-
case param.type
|
992
|
-
when Int
|
993
|
-
module_file.print " rb_hash_aset(_boast_refs, ID2SYM(rb_intern(\"#{param}\")),rb_int_new((long long)#{param}));\n" if param.type.signed?
|
994
|
-
module_file.print " rb_hash_aset(_boast_refs, ID2SYM(rb_intern(\"#{param}\")),rb_int_new((unsigned long long)#{param}));\n" if not param.type.signed?
|
995
|
-
when Real
|
996
|
-
module_file.print " rb_hash_aset(_boast_refs, ID2SYM(rb_intern(\"#{param}\")),rb_float_new((double)#{param}));\n"
|
997
|
-
end
|
998
|
-
end
|
999
|
-
end
|
1000
|
-
end
|
1001
|
-
set_decl_module(false)
|
1002
|
-
end
|
1003
|
-
|
1004
|
-
def store_result(module_file)
|
1005
|
-
module_file.print " rb_hash_aset(_boast_stats,ID2SYM(rb_intern(\"duration\")),rb_float_new((double)_boast_duration*(double)1e-9));\n"
|
1006
|
-
if @procedure.properties[:return] then
|
1007
|
-
type_ret = @procedure.properties[:return].type
|
1008
|
-
module_file.print " rb_hash_aset(_boast_stats,ID2SYM(rb_intern(\"return\")),rb_int_new((long long)_boast_ret));\n" if type_ret.kind_of?(Int) and type_ret.signed
|
1009
|
-
module_file.print " rb_hash_aset(_boast_stats,ID2SYM(rb_intern(\"return\")),rb_int_new((unsigned long long)_boast_ret));\n" if type_ret.kind_of?(Int) and not type_ret.signed
|
1010
|
-
module_file.print " rb_hash_aset(_boast_stats,ID2SYM(rb_intern(\"return\")),rb_float_new((double)_boast_ret));\n" if type_ret.kind_of?(Real)
|
1011
|
-
end
|
1012
|
-
end
|
1013
|
-
|
1014
|
-
def fill_module(module_file, module_name)
|
1015
|
-
module_header(module_file)
|
1016
|
-
@procedure.boast_header(@lang)
|
1017
|
-
module_preamble(module_file, module_name)
|
1018
|
-
|
1019
|
-
module_file.puts "VALUE method_run(int _boast_argc, VALUE *_boast_argv, VALUE _boast_self) {"
|
1020
|
-
increment_indent_level
|
1021
|
-
check_args(module_file)
|
1022
|
-
|
1023
|
-
argc = @procedure.parameters.length
|
1024
|
-
argv = Variable::new("_boast_argv", CustomType, :type_name => "VALUE", :dimension => [ Dimension::new(0,argc-1) ] )
|
1025
|
-
rb_ptr = Variable::new("_boast_rb_ptr", CustomType, :type_name => "VALUE")
|
1026
|
-
set_transition("VALUE", "VALUE", :default, CustomType::new(:type_name => "VALUE"))
|
1027
|
-
rb_ptr.decl
|
1028
|
-
|
1029
|
-
decl_module_params(module_file)
|
1030
|
-
|
1031
|
-
get_params_value(module_file, argv, rb_ptr)
|
1032
|
-
|
1033
|
-
if @lang == CUDA then
|
1034
|
-
module_file.print get_cuda_launch_bounds(module_file)
|
1035
|
-
end
|
1036
|
-
|
1037
|
-
get_PAPI_options(module_file)
|
1038
|
-
|
1039
|
-
if OS.mac? then
|
1040
|
-
module_file.print " _mac_boast_start = mach_absolute_time();\n"
|
1041
|
-
else
|
1042
|
-
module_file.print " clock_gettime(CLOCK_REALTIME, &_boast_start);\n"
|
1043
|
-
end
|
1044
|
-
|
1045
|
-
create_procedure_call(module_file)
|
1046
|
-
|
1047
|
-
if OS.mac? then
|
1048
|
-
module_file.print " _mac_boast_stop = mach_absolute_time();\n"
|
1049
|
-
else
|
1050
|
-
module_file.print " clock_gettime(CLOCK_REALTIME, &_boast_stop);\n"
|
1051
|
-
end
|
1052
|
-
|
1053
|
-
get_PAPI_results(module_file)
|
1054
|
-
|
1055
|
-
if @lang != CUDA then
|
1056
|
-
if OS.mac? then
|
1057
|
-
module_file.print " mach_timebase_info(&_mac_boast_timebase_info);\n"
|
1058
|
-
module_file.print " _boast_duration = (_mac_boast_stop - _mac_boast_start) * _mac_boast_timebase_info.numer / _mac_boast_timebase_info.denom;\n"
|
1059
|
-
else
|
1060
|
-
module_file.print " _boast_duration = (_boast_stop.tv_sec - _boast_start.tv_sec) * (unsigned long long int)1000000000 + _boast_stop.tv_nsec - _boast_start.tv_nsec;\n"
|
1061
|
-
end
|
1062
|
-
end
|
1063
|
-
|
1064
|
-
get_results(module_file, argv, rb_ptr)
|
1065
|
-
|
1066
|
-
store_result(module_file)
|
1067
|
-
|
1068
|
-
module_file.print " return _boast_stats;\n"
|
1069
|
-
decrement_indent_level
|
1070
|
-
module_file.print "}"
|
1071
|
-
end
|
1072
|
-
|
1073
|
-
def method_missing(meth, *args, &block)
|
1074
|
-
if meth.to_s == "run" then
|
1075
|
-
build
|
1076
|
-
run(*args,&block)
|
1077
|
-
else
|
1078
|
-
super
|
1079
|
-
end
|
1080
|
-
end
|
1081
|
-
|
1082
|
-
def load_ref_inputs(path = "", suffix = ".in" )
|
1083
|
-
return load_ref_files( path, suffix, :in )
|
1084
|
-
end
|
1085
|
-
|
1086
|
-
def load_ref_outputs(path = "", suffix = ".out" )
|
1087
|
-
return load_ref_files( path, suffix, :out )
|
1088
|
-
end
|
1089
|
-
|
1090
|
-
def compare_ref(ref_outputs, outputs, epsilon = nil)
|
1091
|
-
res = {}
|
1092
|
-
@procedure.parameters.each_with_index { |param, indx|
|
1093
|
-
if param.direction == :in or param.constant then
|
1094
|
-
next
|
1095
|
-
end
|
1096
|
-
if param.dimension then
|
1097
|
-
diff = (outputs[indx] - ref_outputs[indx]).abs
|
1098
|
-
if epsilon then
|
1099
|
-
diff.each { |elem|
|
1100
|
-
raise "Error: #{param.name} different from ref by: #{elem}!" if elem > epsilon
|
1101
|
-
}
|
1102
|
-
end
|
1103
|
-
res[param.name] = diff.max
|
1104
|
-
else
|
1105
|
-
raise "Error: #{param.name} different from ref: #{outputs[indx]} != #{ref_outputs[indx]} !" if epsilon and (outputs[indx] - ref_outputs[indx]).abs > epsilon
|
1106
|
-
res[param.name] = (outputs[indx] - ref_outputs[indx]).abs
|
1107
|
-
end
|
1108
|
-
}
|
1109
|
-
return res
|
1110
|
-
end
|
1111
|
-
|
1112
|
-
def get_array_type(param)
|
1113
|
-
if param.type.class == Real then
|
1114
|
-
case param.type.size
|
1115
|
-
when 4
|
1116
|
-
type = NArray::SFLOAT
|
1117
|
-
when 8
|
1118
|
-
type = NArray::FLOAT
|
1119
|
-
else
|
1120
|
-
STDERR::puts "Unsupported Float size for NArray: #{param.type.size}, defaulting to byte" if debug?
|
1121
|
-
type = NArray::BYTE
|
1122
|
-
end
|
1123
|
-
elsif param.type.class == Int then
|
1124
|
-
case param.type.size
|
1125
|
-
when 1
|
1126
|
-
type = NArray::BYTE
|
1127
|
-
when 2
|
1128
|
-
type = NArray::SINT
|
1129
|
-
when 4
|
1130
|
-
type = NArray::SINT
|
1131
|
-
else
|
1132
|
-
STDERR::puts "Unsupported Int size for NArray: #{param.type.size}, defaulting to byte" if debug?
|
1133
|
-
type = NArray::BYTE
|
1134
|
-
end
|
1135
|
-
else
|
1136
|
-
STDERR::puts "Unkown array type for NArray: #{param.type}, defaulting to byte" if debug?
|
1137
|
-
type = NArray::BYTE
|
1138
|
-
end
|
1139
|
-
return type
|
1140
|
-
end
|
1141
|
-
|
1142
|
-
def get_scalar_type(param)
|
1143
|
-
if param.type.class == Real then
|
1144
|
-
case param.type.size
|
1145
|
-
when 4
|
1146
|
-
type = "f"
|
1147
|
-
when 8
|
1148
|
-
type = "d"
|
1149
|
-
else
|
1150
|
-
raise "Unsupported Real scalar size: #{param.type.size}!"
|
1151
|
-
end
|
1152
|
-
elsif param.type.class == Int then
|
1153
|
-
case param.type.size
|
1154
|
-
when 1
|
1155
|
-
type = "C"
|
1156
|
-
when 2
|
1157
|
-
type = "S"
|
1158
|
-
when 4
|
1159
|
-
type = "L"
|
1160
|
-
when 8
|
1161
|
-
type = "Q"
|
1162
|
-
else
|
1163
|
-
raise "Unsupported Int scalar size: #{param.type.size}!"
|
1164
|
-
end
|
1165
|
-
if param.type.signed? then
|
1166
|
-
type.downcase!
|
1167
|
-
end
|
1168
|
-
end
|
1169
|
-
return type
|
1170
|
-
end
|
1171
|
-
|
1172
|
-
def read_param(param, directory, suffix, intent)
|
1173
|
-
if intent == :out and ( param.direction == :in or param.constant ) then
|
1174
|
-
return nil
|
1175
|
-
end
|
1176
|
-
f = File::new( directory + "/" + param.name+suffix, "rb" )
|
1177
|
-
if param.dimension then
|
1178
|
-
type = get_array_type(param)
|
1179
|
-
if f.size == 0 then
|
1180
|
-
res = NArray::new(type, 1)
|
1181
|
-
else
|
1182
|
-
res = NArray.to_na(f.read, type)
|
1183
|
-
end
|
1184
|
-
else
|
1185
|
-
type = get_scalar_type(param)
|
1186
|
-
res = f.read.unpack(type).first
|
1187
|
-
end
|
1188
|
-
f.close
|
1189
|
-
return res
|
1190
|
-
end
|
1191
|
-
|
1192
|
-
def get_gpu_dim(directory)
|
1193
|
-
f = File::new( directory + "/problem_size", "r")
|
1194
|
-
s = f.read
|
1195
|
-
local_dim, global_dim = s.scan(/<(.*?)>/)
|
1196
|
-
local_dim = local_dim.pop.split(",").collect!{ |e| e.to_i }
|
1197
|
-
global_dim = global_dim.pop.split(",").collect!{ |e| e.to_i }
|
1198
|
-
(local_dim.length..2).each{ |i| local_dim[i] = 1 }
|
1199
|
-
(global_dim.length..2).each{ |i| global_dim[i] = 1 }
|
1200
|
-
if @lang == CL then
|
1201
|
-
local_dim.each_index { |indx| global_dim[indx] *= local_dim[indx] }
|
1202
|
-
res = { :global_work_size => global_dim, :local_work_size => local_dim }
|
1203
|
-
else
|
1204
|
-
res = { :block_number => global_dim, :block_size => local_dim }
|
1205
|
-
end
|
1206
|
-
f.close
|
1207
|
-
return res
|
1208
|
-
end
|
1209
|
-
|
1210
|
-
def load_ref_files( path = "", suffix = "", intent )
|
1211
|
-
proc_path = path + "/#{@procedure.name}/"
|
1212
|
-
res_h = {}
|
1213
|
-
begin
|
1214
|
-
dirs = Pathname.new(proc_path).children.select { |c| c.directory? }
|
1215
|
-
rescue
|
1216
|
-
return res_h
|
1217
|
-
end
|
1218
|
-
dirs.collect! { |d| d.to_s }
|
1219
|
-
dirs.each { |d|
|
1220
|
-
res = []
|
1221
|
-
@procedure.parameters.collect { |param|
|
1222
|
-
res.push read_param(param, d, suffix, intent)
|
1223
|
-
}
|
1224
|
-
if @lang == CUDA or @lang == CL then
|
1225
|
-
res.push get_gpu_dim(d)
|
1226
|
-
end
|
1227
|
-
res_h[d] = res
|
1228
|
-
}
|
1229
|
-
return res_h
|
1230
|
-
end
|
1231
|
-
|
1232
|
-
def cost(*args)
|
1233
|
-
@cost_function.call(*args)
|
1234
|
-
end
|
1235
|
-
end
|
1236
|
-
end
|