BOAST 0.9995 → 0.9996
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/BOAST.gemspec +3 -1
- data/lib/BOAST/Algorithm.rb +82 -189
- data/lib/BOAST/Arithmetic.rb +16 -16
- data/lib/BOAST/BOAST_OpenCL.rb +44 -41
- data/lib/BOAST/CKernel.rb +471 -344
- data/lib/BOAST/Case.rb +21 -23
- data/lib/BOAST/ControlStructure.rb +9 -1
- data/lib/BOAST/DataTypes.rb +70 -51
- data/lib/BOAST/Expression.rb +22 -17
- data/lib/BOAST/For.rb +37 -23
- data/lib/BOAST/FuncCall.rb +11 -10
- data/lib/BOAST/Functors.rb +29 -4
- data/lib/BOAST/If.rb +21 -23
- data/lib/BOAST/Index.rb +14 -13
- data/lib/BOAST/Inspectable.rb +6 -13
- data/lib/BOAST/Operators.rb +82 -61
- data/lib/BOAST/Parens.rb +4 -25
- data/lib/BOAST/Pragma.rb +7 -6
- data/lib/BOAST/Print.rb +7 -0
- data/lib/BOAST/Procedure.rb +46 -34
- data/lib/BOAST/State.rb +79 -0
- data/lib/BOAST/Transitions.rb +13 -5
- data/lib/BOAST/Variable.rb +104 -68
- data/lib/BOAST/While.rb +15 -17
- data/lib/BOAST.rb +2 -0
- metadata +4 -2
data/lib/BOAST/CKernel.rb
CHANGED
@@ -8,7 +8,6 @@ require 'yaml'
|
|
8
8
|
require 'pathname'
|
9
9
|
|
10
10
|
module BOAST
|
11
|
-
@@verbose = false
|
12
11
|
@@compiler_default_options = {
|
13
12
|
:FC => 'gfortran',
|
14
13
|
:FCFLAGS => '-O2 -Wall',
|
@@ -36,7 +35,16 @@ module BOAST
|
|
36
35
|
"icpc" => "-openmp"
|
37
36
|
}
|
38
37
|
|
39
|
-
|
38
|
+
module PrivateStateAccessor
|
39
|
+
private_boolean_state_accessor :verbose
|
40
|
+
end
|
41
|
+
|
42
|
+
boolean_state_accessor :verbose
|
43
|
+
@@verbose = false
|
44
|
+
|
45
|
+
module_function
|
46
|
+
|
47
|
+
def read_boast_config
|
40
48
|
home_config_dir = ENV["XDG_CONFIG_HOME"]
|
41
49
|
home_config_dir = "#{Dir.home}/.config" if not home_config_dir
|
42
50
|
Dir.mkdir( home_config_dir ) if not File::exist?( home_config_dir )
|
@@ -70,36 +78,22 @@ module BOAST
|
|
70
78
|
@@verbose = ENV["VERBOSE"] if ENV["VERBOSE"]
|
71
79
|
end
|
72
80
|
|
73
|
-
|
81
|
+
read_boast_config
|
74
82
|
|
75
|
-
def
|
83
|
+
def get_openmp_flags
|
76
84
|
return @@openmp_default_flags.clone
|
77
85
|
end
|
78
86
|
|
79
|
-
def
|
87
|
+
def get_compiler_options
|
80
88
|
return @@compiler_default_options.clone
|
81
89
|
end
|
82
90
|
|
83
|
-
def self.verbose
|
84
|
-
return @@verbose
|
85
|
-
end
|
86
|
-
|
87
|
-
|
88
|
-
def self.get_verbose
|
89
|
-
return @@verbose
|
90
|
-
end
|
91
|
-
|
92
|
-
def self.verbose=(verbose)
|
93
|
-
@@verbose = verbose
|
94
|
-
end
|
95
|
-
|
96
|
-
def self.set_verbose(verbose)
|
97
|
-
@@verbose = verbose
|
98
|
-
end
|
99
|
-
|
100
91
|
class CKernel
|
101
92
|
include Rake::DSL
|
102
|
-
include
|
93
|
+
include Inspectable
|
94
|
+
include PrivateStateAccessor
|
95
|
+
include TypeTransition
|
96
|
+
|
103
97
|
attr_accessor :code
|
104
98
|
attr_accessor :procedure
|
105
99
|
attr_accessor :lang
|
@@ -110,13 +104,13 @@ module BOAST
|
|
110
104
|
def initialize(options={})
|
111
105
|
if options[:code] then
|
112
106
|
@code = options[:code]
|
113
|
-
elsif
|
114
|
-
@code =
|
107
|
+
elsif get_chain_code
|
108
|
+
@code = get_output
|
115
109
|
@code.seek(0,SEEK_END)
|
116
110
|
else
|
117
111
|
@code = StringIO::new
|
118
112
|
end
|
119
|
-
|
113
|
+
set_output(@code)
|
120
114
|
if options[:kernels] then
|
121
115
|
@kernels = options[:kernels]
|
122
116
|
else
|
@@ -125,7 +119,7 @@ module BOAST
|
|
125
119
|
if options[:lang] then
|
126
120
|
@lang = options[:lang]
|
127
121
|
else
|
128
|
-
@lang =
|
122
|
+
@lang = get_lang
|
129
123
|
end
|
130
124
|
end
|
131
125
|
|
@@ -139,29 +133,27 @@ module BOAST
|
|
139
133
|
return code.read
|
140
134
|
end
|
141
135
|
|
142
|
-
def setup_compiler(options = {})
|
143
|
-
Rake::Task::clear
|
144
|
-
verbose = options[:verbose]
|
145
|
-
verbose = BOAST::get_verbose if not verbose
|
146
|
-
Rake::verbose(verbose)
|
147
|
-
Rake::FileUtilsExt.verbose_flag=verbose
|
148
|
-
f_compiler = options[:FC]
|
149
|
-
c_compiler = options[:CC]
|
150
|
-
cxx_compiler = options[:CXX]
|
151
|
-
cuda_compiler = options[:NVCC]
|
152
|
-
f_flags = options[:FCFLAGS]
|
153
|
-
f_flags += " -fPIC"
|
154
|
-
f_flags += " -fno-second-underscore" if f_compiler == 'g95'
|
155
|
-
ld_flags = options[:LDFLAGS]
|
156
|
-
cuda_flags = options[:NVCCFLAGS]
|
157
|
-
cuda_flags += " --compiler-options '-fPIC'"
|
158
136
|
|
137
|
+
def get_openmp_flags(compiler)
|
138
|
+
openmp_flags = BOAST::get_openmp_flags[compiler]
|
139
|
+
if not openmp_flags then
|
140
|
+
keys = BOAST::get_openmp_flags.keys
|
141
|
+
keys.each { |k|
|
142
|
+
openmp_flags = BOAST::get_openmp_flags[k] if compiler.match(k)
|
143
|
+
}
|
144
|
+
end
|
145
|
+
return openmp_flags
|
146
|
+
end
|
159
147
|
|
148
|
+
def get_includes(narray_path)
|
160
149
|
includes = "-I#{RbConfig::CONFIG["archdir"]}"
|
161
150
|
includes += " -I#{RbConfig::CONFIG["rubyhdrdir"]} -I#{RbConfig::CONFIG["rubyhdrdir"]}/#{RbConfig::CONFIG["arch"]}"
|
162
151
|
includes += " -I#{RbConfig::CONFIG["rubyarchhdrdir"]}" if RbConfig::CONFIG["rubyarchhdrdir"]
|
163
|
-
|
164
|
-
|
152
|
+
includes += " -I#{narray_path}" if narray_path
|
153
|
+
return includes
|
154
|
+
end
|
155
|
+
|
156
|
+
def get_narray_path
|
165
157
|
narray_path = nil
|
166
158
|
begin
|
167
159
|
spec = Gem::Specification::find_by_name('narray')
|
@@ -174,49 +166,96 @@ module BOAST
|
|
174
166
|
narray_path = Gem.loaded_specs['narray'].full_gem_path
|
175
167
|
end
|
176
168
|
end
|
177
|
-
|
169
|
+
end
|
170
|
+
|
171
|
+
def setup_c_compiler(options, includes, narray_path, runner)
|
172
|
+
c_compiler = options[:CC]
|
178
173
|
cflags = options[:CFLAGS]
|
179
|
-
cxxflags = options[:CXXFLAGS]
|
180
174
|
cflags += " -fPIC #{includes}"
|
181
|
-
cxxflags += " -fPIC #{includes}"
|
182
175
|
cflags += " -DHAVE_NARRAY_H" if narray_path
|
183
|
-
|
184
|
-
|
176
|
+
if options[:openmp] and @lang == C then
|
177
|
+
openmp_cflags = get_openmp_flags(c_compiler)
|
178
|
+
raise "unkwown openmp flags for: #{c_compiler}" if not openmp_cflags
|
179
|
+
cflags += " #{openmp_cflags}"
|
180
|
+
end
|
185
181
|
|
182
|
+
rule '.o' => '.c' do |t|
|
183
|
+
c_call_string = "#{c_compiler} #{cflags} -c -o #{t.name} #{t.source}"
|
184
|
+
runner.call(t, c_call_string)
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
def setup_cxx_compiler(options, includes, runner)
|
189
|
+
cxx_compiler = options[:CXX]
|
190
|
+
cxxflags = options[:CXXFLAGS]
|
191
|
+
cxxflags += " -fPIC #{includes}"
|
192
|
+
if options[:openmp] and @lang == C then
|
193
|
+
openmp_cxxflags = get_openmp_flags(cxx_compiler)
|
194
|
+
raise "unkwown openmp flags for: #{cxx_compiler}" if not openmp_cxxflags
|
195
|
+
cxxflags += " #{openmp_cxxflags}"
|
196
|
+
end
|
197
|
+
|
198
|
+
rule '.o' => '.cpp' do |t|
|
199
|
+
cxx_call_string = "#{cxx_compiler} #{cxxflags} -c -o #{t.name} #{t.source}"
|
200
|
+
runner.call(t, cxx_call_string)
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
def setup_fortran_compiler(options, runner)
|
205
|
+
f_compiler = options[:FC]
|
206
|
+
fcflags = options[:FCFLAGS]
|
207
|
+
fcflags += " -fPIC"
|
208
|
+
fcflags += " -fno-second-underscore" if f_compiler == 'g95'
|
209
|
+
if options[:openmp] and @lang == FORTRAN then
|
210
|
+
openmp_fcflags = get_openmp_flags(f_compiler)
|
211
|
+
raise "unkwown openmp flags for: #{f_compiler}" if not openmp_fcflags
|
212
|
+
fcflags += " #{openmp_fcflags}"
|
213
|
+
end
|
214
|
+
|
215
|
+
rule '.o' => '.f90' do |t|
|
216
|
+
f_call_string = "#{f_compiler} #{fcflags} -c -o #{t.name} #{t.source}"
|
217
|
+
runner.call(t, f_call_string)
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
def setup_cuda_compiler(options, runner)
|
222
|
+
cuda_compiler = options[:NVCC]
|
223
|
+
cudaflags = options[:NVCCFLAGS]
|
224
|
+
cudaflags += " --compiler-options '-fPIC'"
|
225
|
+
|
226
|
+
rule '.o' => '.cu' do |t|
|
227
|
+
cuda_call_string = "#{cuda_compiler} #{cudaflags} -c -o #{t.name} #{t.source}"
|
228
|
+
runner.call(t, cuda_call_string)
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
232
|
+
def setup_linker(options)
|
233
|
+
ldflags = options[:LDFLAGS]
|
234
|
+
ldflags += " -L#{RbConfig::CONFIG["libdir"]} #{RbConfig::CONFIG["LIBRUBYARG"]} -lrt"
|
235
|
+
ldflags += " -lcudart" if @lang == CUDA
|
236
|
+
c_compiler = options[:CC]
|
237
|
+
c_compiler = "cc" if not c_compiler
|
238
|
+
linker = options[:LD]
|
239
|
+
linker = c_compiler if not linker
|
186
240
|
if options[:openmp] then
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
if not openmp_c_flags then
|
191
|
-
keys = BOAST::get_openmp_flags.keys
|
192
|
-
keys.each { |k|
|
193
|
-
openmp_c_flags = BOAST::get_openmp_flags[k] if c_compiler.match(k)
|
194
|
-
}
|
195
|
-
end
|
196
|
-
raise "unkwown openmp flags for: #{c_compiler}" if not openmp_c_flags
|
197
|
-
cflags += " #{openmp_c_flags}"
|
198
|
-
openmp_cxx_flags = BOAST::get_openmp_flags[cxx_compiler]
|
199
|
-
if not openmp_cxx_flags then
|
200
|
-
keys = BOAST::get_openmp_flags.keys
|
201
|
-
keys.each { |k|
|
202
|
-
openmp_cxx_flags = BOAST::get_openmp_flags[k] if cxx_compiler.match(k)
|
203
|
-
}
|
204
|
-
end
|
205
|
-
raise "unkwown openmp flags for: #{cxx_compiler}" if not openmp_cxx_flags
|
206
|
-
cxxflags += " #{openmp_cxx_flags}"
|
207
|
-
when BOAST::FORTRAN
|
208
|
-
openmp_f_flags = BOAST::get_openmp_flags[f_compiler]
|
209
|
-
if not openmp_f_flags then
|
210
|
-
keys = BOAST::get_openmp_flags.keys
|
211
|
-
keys.each { |k|
|
212
|
-
openmp_f_flags = BOAST::get_openmp_flags[k] if f_compiler.match(k)
|
213
|
-
}
|
214
|
-
end
|
215
|
-
raise "unkwown openmp flags for: #{f_compiler}" if not openmp_f_flags
|
216
|
-
fcflags += " #{openmp_f_flags}"
|
217
|
-
end
|
241
|
+
openmp_ldflags = get_openmp_flags(linker)
|
242
|
+
raise "unkwown openmp flags for: #{linker}" if not openmp_ldflags
|
243
|
+
ldflags += " #{openmp_ldflags}"
|
218
244
|
end
|
219
245
|
|
246
|
+
return [linker, ldflags]
|
247
|
+
end
|
248
|
+
|
249
|
+
def setup_compilers(options = {})
|
250
|
+
Rake::Task::clear
|
251
|
+
verbose = options[:verbose]
|
252
|
+
verbose = get_verbose if not verbose
|
253
|
+
Rake::verbose(verbose)
|
254
|
+
Rake::FileUtilsExt.verbose_flag=verbose
|
255
|
+
|
256
|
+
narray_path = get_narray_path
|
257
|
+
includes = get_includes(narray_path)
|
258
|
+
|
220
259
|
runner = lambda { |t, call_string|
|
221
260
|
if verbose then
|
222
261
|
sh call_string
|
@@ -230,26 +269,13 @@ module BOAST
|
|
230
269
|
end
|
231
270
|
}
|
232
271
|
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
272
|
+
setup_c_compiler(options, includes, narray_path, runner)
|
273
|
+
setup_cxx_compiler(options, includes, runner)
|
274
|
+
setup_fortran_compiler(options, runner)
|
275
|
+
setup_cuda_compiler(options, runner)
|
237
276
|
|
238
|
-
|
239
|
-
f_call_string = "#{f_compiler} #{fcflags} -c -o #{t.name} #{t.source}"
|
240
|
-
runner.call(t, f_call_string)
|
241
|
-
end
|
242
|
-
|
243
|
-
rule '.o' => '.cpp' do |t|
|
244
|
-
cxx_call_string = "#{cxx_compiler} #{cxxflags} -c -o #{t.name} #{t.source}"
|
245
|
-
runner.call(t, cxx_call_string)
|
246
|
-
end
|
277
|
+
return setup_linker(options)
|
247
278
|
|
248
|
-
rule '.o' => '.cu' do |t|
|
249
|
-
cuda_call_string = "#{cuda_compiler} #{cudaflags} -c -o #{t.name} #{t.source}"
|
250
|
-
runner.call(t, cuda_call_string)
|
251
|
-
end
|
252
|
-
return ld_flags
|
253
279
|
end
|
254
280
|
|
255
281
|
def select_cl_platform(options)
|
@@ -287,8 +313,32 @@ module BOAST
|
|
287
313
|
return devices.first
|
288
314
|
end
|
289
315
|
|
290
|
-
def
|
316
|
+
def init_opencl_types
|
317
|
+
@@opencl_real_types = {
|
318
|
+
2 => OpenCL::Half,
|
319
|
+
4 => OpenCL::Float,
|
320
|
+
8 => OpenCL::Double
|
321
|
+
}
|
322
|
+
|
323
|
+
@@opencl_int_types = {
|
324
|
+
true => {
|
325
|
+
1 => OpenCL::Char,
|
326
|
+
2 => OpenCL::Short,
|
327
|
+
4 => OpenCL::Int,
|
328
|
+
8 => OpenCL::Long
|
329
|
+
},
|
330
|
+
false => {
|
331
|
+
1 => OpenCL::UChar,
|
332
|
+
2 => OpenCL::UShort,
|
333
|
+
4 => OpenCL::UInt,
|
334
|
+
8 => OpenCL::ULong
|
335
|
+
}
|
336
|
+
}
|
337
|
+
end
|
338
|
+
|
339
|
+
def init_opencl(options)
|
291
340
|
require 'opencl_ruby_ffi'
|
341
|
+
init_opencl_types
|
292
342
|
device = select_cl_device(options)
|
293
343
|
@context = OpenCL::create_context([device])
|
294
344
|
program = @context.create_program_with_source([@code.string])
|
@@ -299,81 +349,84 @@ module BOAST
|
|
299
349
|
puts e.to_s
|
300
350
|
puts program.build_status
|
301
351
|
puts program.build_log
|
302
|
-
if options[:verbose] or
|
352
|
+
if options[:verbose] or get_verbose then
|
303
353
|
puts @code.string
|
304
354
|
end
|
305
355
|
raise "OpenCL Failed to build #{@procedure.name}"
|
306
356
|
end
|
307
|
-
if options[:verbose] or
|
357
|
+
if options[:verbose] or get_verbose then
|
308
358
|
program.build_log.each {|dev,log|
|
309
359
|
puts "#{device.name}: #{log}"
|
310
360
|
}
|
311
361
|
end
|
312
362
|
@queue = @context.create_command_queue(device, :properties => OpenCL::CommandQueue::PROFILING_ENABLE)
|
313
363
|
@kernel = program.create_kernel(@procedure.name)
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
opts = args.pop if args.length == #{@procedure.parameters.length+1}
|
320
|
-
@procedure.parameters.each_index { |i|
|
321
|
-
if @procedure.parameters[i].dimension then
|
322
|
-
if @procedure.parameters[i].direction == :in then
|
364
|
+
return self
|
365
|
+
end
|
366
|
+
|
367
|
+
def create_opencl_array(arg, parameter)
|
368
|
+
if parameter.direction == :in then
|
323
369
|
flags = OpenCL::Mem::Flags::READ_ONLY
|
324
|
-
elsif
|
370
|
+
elsif parameter.direction == :out then
|
325
371
|
flags = OpenCL::Mem::Flags::WRITE_ONLY
|
326
372
|
else
|
327
373
|
flags = OpenCL::Mem::Flags::READ_WRITE
|
328
374
|
end
|
329
|
-
if
|
330
|
-
|
331
|
-
|
332
|
-
@queue.enqueue_write_image( params[i], args[i], :blocking => true )
|
333
|
-
# end
|
375
|
+
if parameter.texture then
|
376
|
+
param = @context.create_image_2D( OpenCL::ImageFormat::new( OpenCL::ChannelOrder::R, OpenCL::ChannelType::UNORM_INT8 ), arg.size * arg.element_size, 1, :flags => flags )
|
377
|
+
@queue.enqueue_write_image( param, arg, :blocking => true )
|
334
378
|
else
|
335
|
-
|
336
|
-
|
337
|
-
@queue.enqueue_write_buffer( params[i], args[i], :blocking => true )
|
338
|
-
# end
|
379
|
+
param = @context.create_buffer( arg.size * arg.element_size, :flags => flags )
|
380
|
+
@queue.enqueue_write_buffer( param, arg, :blocking => true )
|
339
381
|
end
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
params[i] = OpenCL::Short::new(args[i]) if @procedure.parameters[i].type.size == 2
|
349
|
-
params[i] = OpenCL::Int::new(args[i]) if @procedure.parameters[i].type.size == 4
|
350
|
-
params[i] = OpenCL::Long::new(args[i]) if @procedure.parameters[i].type.size == 8
|
351
|
-
else
|
352
|
-
params[i] = OpenCL::UChar::new(args[i]) if @procedure.parameters[i].type.size == 1
|
353
|
-
params[i] = OpenCL::UShort::new(args[i]) if @procedure.parameters[i].type.size == 2
|
354
|
-
params[i] = OpenCL::UInt::new(args[i]) if @procedure.parameters[i].type.size == 4
|
355
|
-
params[i] = OpenCL::ULong::new(args[i]) if @procedure.parameters[i].type.size == 8
|
356
|
-
end
|
382
|
+
return param
|
383
|
+
end
|
384
|
+
|
385
|
+
def create_opencl_scalar(arg, parameter)
|
386
|
+
if parameter.type.is_a?(Real) then
|
387
|
+
return @@opencl_real_types[parameter.type.size]::new(arg)
|
388
|
+
elsif parameter.type.is_a?(Int) then
|
389
|
+
return @@opencl_int_types[parameter.type.signed][parameter.type.size]::new(arg)
|
357
390
|
else
|
358
|
-
|
391
|
+
return arg
|
359
392
|
end
|
360
393
|
end
|
394
|
+
|
395
|
+
def create_opencl_param(arg, parameter)
|
396
|
+
if parameter.dimension then
|
397
|
+
return create_opencl_array(arg, parameter)
|
398
|
+
else
|
399
|
+
return create_opencl_scalar(arg, parameter)
|
400
|
+
end
|
401
|
+
end
|
402
|
+
|
403
|
+
def read_opencl_param(param, arg, parameter)
|
404
|
+
if parameter.texture then
|
405
|
+
@queue.enqueue_read_image( param, arg, :blocking => true )
|
406
|
+
else
|
407
|
+
@queue.enqueue_read_buffer( param, arg, :blocking => true )
|
408
|
+
end
|
409
|
+
end
|
410
|
+
|
411
|
+
def build_opencl(options)
|
412
|
+
init_opencl(options)
|
413
|
+
|
414
|
+
run_method = <<EOF
|
415
|
+
def self.run(*args)
|
416
|
+
raise "Wrong number of arguments \#{args.length} for #{@procedure.parameters.length}" if args.length > #{@procedure.parameters.length+1} or args.length < #{@procedure.parameters.length}
|
417
|
+
params = []
|
418
|
+
opts = {}
|
419
|
+
opts = args.pop if args.length == #{@procedure.parameters.length+1}
|
420
|
+
@procedure.parameters.each_index { |i|
|
421
|
+
params[i] = create_opencl_param( args[i], @procedure.parameters[i] )
|
361
422
|
}
|
362
423
|
params.each_index{ |i|
|
363
424
|
@kernel.set_arg(i, params[i])
|
364
425
|
}
|
365
426
|
event = @queue.enqueue_NDrange_kernel(@kernel, opts[:global_work_size], :local_work_size => opts[:local_work_size])
|
366
427
|
@procedure.parameters.each_index { |i|
|
367
|
-
if @procedure.parameters[i].dimension then
|
368
|
-
|
369
|
-
if @procedure.parameters[i].direction == :inout or @procedure.parameters[i].direction == :out then
|
370
|
-
@queue.enqueue_read_image( params[i], args[i], :blocking => true )
|
371
|
-
end
|
372
|
-
else
|
373
|
-
if @procedure.parameters[i].direction == :inout or @procedure.parameters[i].direction == :out then
|
374
|
-
@queue.enqueue_read_buffer( params[i], args[i], :blocking => true )
|
375
|
-
end
|
376
|
-
end
|
428
|
+
if @procedure.parameters[i].dimension and (@procedure.parameters[i].direction == :inout or @procedure.parameters[i].direction == :out) then
|
429
|
+
read_opencl_param( params[i], args[i], @procedure.parameters[i] )
|
377
430
|
end
|
378
431
|
}
|
379
432
|
result = {}
|
@@ -387,75 +440,90 @@ EOF
|
|
387
440
|
return self
|
388
441
|
end
|
389
442
|
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
extension = ".c" if @lang == BOAST::C
|
396
|
-
extension = ".cu" if @lang == BOAST::CUDA
|
397
|
-
extension = ".f90" if @lang == BOAST::FORTRAN
|
398
|
-
#temporary
|
399
|
-
c_compiler = compiler_options[:CC]
|
400
|
-
c_compiler = "cc" if not c_compiler
|
401
|
-
linker = compiler_options[:LD]
|
402
|
-
linker = c_compiler if not linker
|
403
|
-
#end temporary
|
404
|
-
if options[:openmp] then
|
405
|
-
openmp_ld_flags = BOAST::get_openmp_flags[linker]
|
406
|
-
if not openmp_ld_flags then
|
407
|
-
keys = BOAST::get_openmp_flags.keys
|
408
|
-
keys.each { |k|
|
409
|
-
openmp_ld_flags = BOAST::get_openmp_flags[k] if linker.match(k)
|
410
|
-
}
|
411
|
-
end
|
412
|
-
raise "unkwown openmp flags for: #{linker}" if not openmp_ld_flags
|
413
|
-
ldflags += " #{openmp_ld_flags}"
|
414
|
-
end
|
415
|
-
source_file = Tempfile::new([@procedure.name,extension])
|
416
|
-
path = source_file.path
|
417
|
-
target = path.chomp(File::extname(path))+".o"
|
418
|
-
fill_code(source_file)
|
419
|
-
source_file.close
|
443
|
+
@@extensions = {
|
444
|
+
C => ".c",
|
445
|
+
CUDA => ".cu",
|
446
|
+
FORTRAN => ".f90"
|
447
|
+
}
|
420
448
|
|
421
|
-
|
422
|
-
|
423
|
-
|
449
|
+
def get_sub_kernels
|
450
|
+
kernel_files = []
|
451
|
+
@kernels.each { |kernel|
|
452
|
+
kernel_file = Tempfile::new([kernel.procedure.name,".o"])
|
453
|
+
kernel.binary.rewind
|
454
|
+
kernel_file.write( kernel.binary.read )
|
455
|
+
kernel_file.close
|
456
|
+
kernel_files.push(kernel_file)
|
457
|
+
}
|
458
|
+
end
|
459
|
+
|
460
|
+
def create_module_source(path)
|
461
|
+
previous_lang = get_lang
|
462
|
+
previous_output = get_output
|
463
|
+
set_lang( C )
|
424
464
|
module_file_name = File::split(path.chomp(File::extname(path)))[0] + "/Mod_" + File::split(path.chomp(File::extname(path)))[1].gsub("-","_") + ".c"
|
425
465
|
module_name = File::split(module_file_name.chomp(File::extname(module_file_name)))[1]
|
426
466
|
module_file = File::open(module_file_name,"w+")
|
427
|
-
|
467
|
+
set_output( module_file )
|
428
468
|
fill_module(module_file, module_name)
|
429
469
|
module_file.rewind
|
430
470
|
#puts module_file.read
|
431
471
|
module_file.close
|
432
|
-
|
433
|
-
|
472
|
+
set_lang( previous_lang )
|
473
|
+
set_output( previous_output )
|
474
|
+
return [module_file_name, module_name]
|
475
|
+
end
|
476
|
+
|
477
|
+
def save_binary(target)
|
478
|
+
f = File::open(target,"rb")
|
479
|
+
@binary = StringIO::new
|
480
|
+
@binary.write( f.read )
|
481
|
+
f.close
|
482
|
+
end
|
483
|
+
|
484
|
+
def create_source
|
485
|
+
extension = @@extensions[@lang]
|
486
|
+
source_file = Tempfile::new([@procedure.name,extension])
|
487
|
+
path = source_file.path
|
488
|
+
target = path.chomp(File::extname(path))+".o"
|
489
|
+
fill_code(source_file)
|
490
|
+
source_file.close
|
491
|
+
return [source_file, path, target]
|
492
|
+
end
|
493
|
+
|
494
|
+
def build(options = {})
|
495
|
+
compiler_options = BOAST::get_compiler_options
|
496
|
+
compiler_options.update(options)
|
497
|
+
return build_opencl(compiler_options) if @lang == CL
|
498
|
+
|
499
|
+
linker, ldflags = setup_compilers(compiler_options)
|
500
|
+
|
501
|
+
extension = @@extensions[@lang]
|
502
|
+
|
503
|
+
source_file, path, target = create_source
|
504
|
+
|
505
|
+
module_file_name, module_name = create_module_source(path)
|
506
|
+
|
434
507
|
module_target = module_file_name.chomp(File::extname(module_file_name))+".o"
|
435
508
|
module_final = module_file_name.chomp(File::extname(module_file_name))+".so"
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
kernel_file.write( kernel.binary.read )
|
441
|
-
kernel_file.close
|
442
|
-
kernel_files.push(kernel_file)
|
443
|
-
}
|
509
|
+
|
510
|
+
|
511
|
+
kernel_files = get_sub_kernels
|
512
|
+
|
444
513
|
file module_final => [module_target, target] do
|
445
514
|
#puts "#{linker} -shared -o #{module_final} #{module_target} #{target} #{kernel_files.join(" ")} -Wl,-Bsymbolic-functions -Wl,-z,relro -rdynamic -Wl,-export-dynamic #{ldflags}"
|
446
515
|
sh "#{linker} -shared -o #{module_final} #{module_target} #{target} #{(kernel_files.collect {|f| f.path}).join(" ")} -Wl,-Bsymbolic-functions -Wl,-z,relro -rdynamic -Wl,-export-dynamic #{ldflags}"
|
447
516
|
end
|
448
517
|
Rake::Task[module_final].invoke
|
518
|
+
|
449
519
|
require(module_final)
|
450
520
|
eval "self.extend(#{module_name})"
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
File.unlink(module_file_name)
|
458
|
-
File.unlink(module_final)
|
521
|
+
|
522
|
+
save_binary(target)
|
523
|
+
|
524
|
+
[target, module_target, module_file_name, module_final].each { |fn|
|
525
|
+
File::unlink(fn)
|
526
|
+
}
|
459
527
|
kernel_files.each { |f|
|
460
528
|
f.unlink
|
461
529
|
}
|
@@ -464,13 +532,13 @@ EOF
|
|
464
532
|
|
465
533
|
def fill_code(source_file)
|
466
534
|
@code.rewind
|
467
|
-
source_file.puts "#include <inttypes.h>" if @lang ==
|
468
|
-
source_file.puts "#include <cuda.h>" if @lang ==
|
535
|
+
source_file.puts "#include <inttypes.h>" if @lang == C or @lang == CUDA
|
536
|
+
source_file.puts "#include <cuda.h>" if @lang == CUDA
|
469
537
|
source_file.write @code.read
|
470
|
-
if @lang ==
|
538
|
+
if @lang == CUDA then
|
471
539
|
source_file.write <<EOF
|
472
540
|
extern "C" {
|
473
|
-
#{@procedure.boast_header_s(
|
541
|
+
#{@procedure.boast_header_s(CUDA)}{
|
474
542
|
dim3 dimBlock(block_size[0], block_size[1], block_size[2]);
|
475
543
|
dim3 dimGrid(block_number[0], block_number[1], block_number[2]);
|
476
544
|
cudaEvent_t __start, __stop;
|
@@ -490,8 +558,8 @@ EOF
|
|
490
558
|
@code.rewind
|
491
559
|
end
|
492
560
|
|
493
|
-
def
|
494
|
-
module_file.
|
561
|
+
def module_header(module_file)
|
562
|
+
module_file.print <<EOF
|
495
563
|
#include "ruby.h"
|
496
564
|
#include <inttypes.h>
|
497
565
|
#include <time.h>
|
@@ -499,11 +567,13 @@ EOF
|
|
499
567
|
#include "narray.h"
|
500
568
|
#endif
|
501
569
|
EOF
|
502
|
-
if( @lang ==
|
570
|
+
if( @lang == CUDA ) then
|
503
571
|
module_file.print "#include <cuda_runtime.h>\n"
|
504
572
|
end
|
505
|
-
|
506
|
-
|
573
|
+
end
|
574
|
+
|
575
|
+
def module_preamble(module_file, module_name)
|
576
|
+
module_file.print <<EOF
|
507
577
|
VALUE #{module_name} = Qnil;
|
508
578
|
void Init_#{module_name}();
|
509
579
|
VALUE method_run(int argc, VALUE *argv, VALUE self);
|
@@ -511,46 +581,37 @@ void Init_#{module_name}() {
|
|
511
581
|
#{module_name} = rb_define_module("#{module_name}");
|
512
582
|
rb_define_method(#{module_name}, "run", method_run, -1);
|
513
583
|
}
|
514
|
-
VALUE method_run(int argc, VALUE *argv, VALUE self) {
|
515
584
|
EOF
|
516
|
-
|
517
|
-
|
585
|
+
end
|
586
|
+
|
587
|
+
def check_args(module_file)
|
588
|
+
if @lang == CUDA then
|
589
|
+
module_file.print <<EOF
|
518
590
|
if( argc < #{@procedure.parameters.length} || argc > #{@procedure.parameters.length + 1} )
|
519
591
|
rb_raise(rb_eArgError, "wrong number of arguments for #{@procedure.name} (%d for #{@procedure.parameters.length})", argc);
|
520
|
-
VALUE rb_opts;
|
521
|
-
VALUE rb_ptr;
|
522
|
-
size_t block_size[3] = {1,1,1};
|
523
|
-
size_t block_number[3] = {1,1,1};
|
524
592
|
EOF
|
525
593
|
else
|
526
|
-
module_file.
|
594
|
+
module_file.print <<EOF
|
527
595
|
if( argc != #{@procedure.parameters.length} )
|
528
596
|
rb_raise(rb_eArgError, "wrong number of arguments for #{@procedure.name} (%d for #{@procedure.parameters.length})", argc);
|
529
|
-
VALUE rb_ptr;
|
530
597
|
EOF
|
531
598
|
end
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
@procedure.parameters.each { |param|
|
536
|
-
param_copy = param.copy
|
537
|
-
param_copy.constant = nil
|
538
|
-
param_copy.direction = nil
|
539
|
-
param_copy.decl
|
540
|
-
}
|
599
|
+
end
|
600
|
+
|
601
|
+
def get_params_value(module_file, argv, rb_ptr)
|
541
602
|
@procedure.parameters.each_index do |i|
|
542
603
|
param = @procedure.parameters[i]
|
543
604
|
if not param.dimension then
|
544
605
|
case param.type
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
606
|
+
when Int
|
607
|
+
(param === FuncCall::new("NUM2INT", argv[i])).pr if param.type.size == 4
|
608
|
+
(param === FuncCall::new("NUM2LONG", argv[i])).pr if param.type.size == 8
|
609
|
+
when Real
|
610
|
+
(param === FuncCall::new("NUM2DBL", argv[i])).pr
|
550
611
|
end
|
551
612
|
else
|
552
|
-
(rb_ptr === argv[i]).
|
553
|
-
if @lang ==
|
613
|
+
(rb_ptr === argv[i]).pr
|
614
|
+
if @lang == CUDA then
|
554
615
|
module_file.print <<EOF
|
555
616
|
if ( IsNArray(rb_ptr) ) {
|
556
617
|
struct NARRAY *n_ary;
|
@@ -558,16 +619,9 @@ EOF
|
|
558
619
|
Data_Get_Struct(rb_ptr, struct NARRAY, n_ary);
|
559
620
|
array_size = n_ary->total * na_sizeof[n_ary->type];
|
560
621
|
cudaMalloc( (void **) &#{param.name}, array_size);
|
561
|
-
EOF
|
562
|
-
# if param.direction == :in then
|
563
|
-
module_file.print <<EOF
|
564
622
|
cudaMemcpy(#{param.name}, (void *) n_ary->ptr, array_size, cudaMemcpyHostToDevice);
|
565
|
-
EOF
|
566
|
-
# end
|
567
|
-
module_file.print <<EOF
|
568
623
|
} else
|
569
624
|
rb_raise(rb_eArgError, "wrong type of argument %d", #{i});
|
570
|
-
|
571
625
|
EOF
|
572
626
|
else
|
573
627
|
module_file.print <<EOF
|
@@ -583,8 +637,26 @@ EOF
|
|
583
637
|
end
|
584
638
|
end
|
585
639
|
end
|
586
|
-
|
587
|
-
|
640
|
+
end
|
641
|
+
|
642
|
+
def decl_module_params(module_file)
|
643
|
+
@procedure.parameters.each { |param|
|
644
|
+
param_copy = param.copy
|
645
|
+
param_copy.constant = nil
|
646
|
+
param_copy.direction = nil
|
647
|
+
param_copy.decl
|
648
|
+
}
|
649
|
+
module_file.print " #{@procedure.properties[:return].type.decl} ret;\n" if @procedure.properties[:return]
|
650
|
+
module_file.print " VALUE stats = rb_hash_new();\n"
|
651
|
+
module_file.print " struct timespec start, stop;\n"
|
652
|
+
module_file.print " unsigned long long int duration;\n"
|
653
|
+
end
|
654
|
+
|
655
|
+
def get_cuda_launch_bounds(module_file)
|
656
|
+
module_file.print <<EOF
|
657
|
+
VALUE rb_opts;
|
658
|
+
size_t block_size[3] = {1,1,1};
|
659
|
+
size_t block_number[3] = {1,1,1};
|
588
660
|
if( argc == #{@procedure.parameters.length + 1} ) {
|
589
661
|
rb_opts = argv[argc -1];
|
590
662
|
if ( rb_opts != Qnil ) {
|
@@ -615,23 +687,20 @@ EOF
|
|
615
687
|
}
|
616
688
|
}
|
617
689
|
EOF
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
module_file.print " unsigned long long int duration;\n"
|
623
|
-
module_file.print " clock_gettime(CLOCK_REALTIME, &start);\n"
|
624
|
-
if @lang == BOAST::CUDA then
|
690
|
+
end
|
691
|
+
|
692
|
+
def create_procedure_call(module_file)
|
693
|
+
if @lang == CUDA then
|
625
694
|
module_file.print " duration = "
|
626
695
|
elsif @procedure.properties[:return] then
|
627
696
|
module_file.print " ret = "
|
628
697
|
end
|
629
698
|
module_file.print " #{@procedure.name}"
|
630
|
-
module_file.print "_" if @lang ==
|
631
|
-
module_file.print "_wrapper" if @lang ==
|
699
|
+
module_file.print "_" if @lang == FORTRAN
|
700
|
+
module_file.print "_wrapper" if @lang == CUDA
|
632
701
|
module_file.print "("
|
633
702
|
params = []
|
634
|
-
if(@lang ==
|
703
|
+
if(@lang == FORTRAN) then
|
635
704
|
@procedure.parameters.each { |param|
|
636
705
|
if param.dimension then
|
637
706
|
params.push( param.name )
|
@@ -650,18 +719,19 @@ EOF
|
|
650
719
|
end
|
651
720
|
}
|
652
721
|
end
|
653
|
-
if @lang ==
|
722
|
+
if @lang == CUDA then
|
654
723
|
params.push( "block_number", "block_size" )
|
655
724
|
end
|
656
725
|
module_file.print params.join(", ")
|
657
726
|
module_file.print " );\n"
|
658
|
-
|
727
|
+
end
|
659
728
|
|
660
|
-
|
729
|
+
def get_results(module_file, argv, rb_ptr)
|
730
|
+
if @lang == CUDA then
|
661
731
|
@procedure.parameters.each_index do |i|
|
662
732
|
param = @procedure.parameters[i]
|
663
733
|
if param.dimension then
|
664
|
-
(rb_ptr === argv[i]).
|
734
|
+
(rb_ptr === argv[i]).pr
|
665
735
|
module_file.print <<EOF
|
666
736
|
if ( IsNArray(rb_ptr) ) {
|
667
737
|
EOF
|
@@ -683,7 +753,10 @@ EOF
|
|
683
753
|
end
|
684
754
|
end
|
685
755
|
end
|
686
|
-
|
756
|
+
end
|
757
|
+
|
758
|
+
def store_result(module_file)
|
759
|
+
if @lang != CUDA then
|
687
760
|
module_file.print " duration = (unsigned long long int)stop.tv_sec * (unsigned long long int)1000000000 + stop.tv_nsec;\n"
|
688
761
|
module_file.print " duration -= (unsigned long long int)start.tv_sec * (unsigned long long int)1000000000 + start.tv_nsec;\n"
|
689
762
|
end
|
@@ -694,14 +767,49 @@ EOF
|
|
694
767
|
module_file.print " rb_hash_aset(stats,ID2SYM(rb_intern(\"return\")),rb_int_new((unsigned long long)ret));\n" if type_ret.kind_of?(Int) and not type_ret.signed
|
695
768
|
module_file.print " rb_hash_aset(stats,ID2SYM(rb_intern(\"return\")),rb_float_new((double)ret));\n" if type_ret.kind_of?(Real)
|
696
769
|
end
|
770
|
+
end
|
771
|
+
|
772
|
+
def fill_module(module_file, module_name)
|
773
|
+
module_header(module_file)
|
774
|
+
@procedure.boast_header(@lang)
|
775
|
+
module_preamble(module_file, module_name)
|
776
|
+
|
777
|
+
module_file.puts "VALUE method_run(int argc, VALUE *argv, VALUE self) {"
|
778
|
+
|
779
|
+
check_args(module_file)
|
780
|
+
|
781
|
+
argc = @procedure.parameters.length
|
782
|
+
argv = Variable::new("argv", CustomType, :type_name => "VALUE", :dimension => [ Dimension::new(0,argc-1) ] )
|
783
|
+
rb_ptr = Variable::new("rb_ptr", CustomType, :type_name => "VALUE")
|
784
|
+
set_transition("VALUE", "VALUE", :default, CustomType::new(:type_name => "VALUE"))
|
785
|
+
rb_ptr.decl
|
786
|
+
|
787
|
+
decl_module_params(module_file)
|
788
|
+
|
789
|
+
get_params_value(module_file, argv, rb_ptr)
|
790
|
+
|
791
|
+
if @lang == CUDA then
|
792
|
+
module_file.print get_cuda_launch_bounds(module_file)
|
793
|
+
end
|
794
|
+
|
795
|
+
module_file.print " clock_gettime(CLOCK_REALTIME, &start);\n"
|
796
|
+
|
797
|
+
create_procedure_call(module_file)
|
798
|
+
|
799
|
+
module_file.print " clock_gettime(CLOCK_REALTIME, &stop);\n"
|
800
|
+
|
801
|
+
get_results(module_file, argv, rb_ptr)
|
802
|
+
|
803
|
+
store_result(module_file)
|
804
|
+
|
697
805
|
module_file.print " return stats;\n"
|
698
|
-
module_file.print
|
806
|
+
module_file.print "}"
|
699
807
|
end
|
700
808
|
|
701
809
|
def method_missing(meth, *args, &block)
|
702
810
|
if meth.to_s == "run" then
|
703
|
-
|
704
|
-
|
811
|
+
build
|
812
|
+
run(*args,&block)
|
705
813
|
else
|
706
814
|
super
|
707
815
|
end
|
@@ -737,6 +845,104 @@ EOF
|
|
737
845
|
return res
|
738
846
|
end
|
739
847
|
|
848
|
+
def get_array_type(param)
|
849
|
+
if param.type.class == Real then
|
850
|
+
case param.type.size
|
851
|
+
when 4
|
852
|
+
type = NArray::SFLOAT
|
853
|
+
when 8
|
854
|
+
type = NArray::FLOAT
|
855
|
+
else
|
856
|
+
STDERR::puts "Unsupported Float size for NArray: #{param.type.size}, defaulting to byte" if debug?
|
857
|
+
type = NArray::BYTE
|
858
|
+
end
|
859
|
+
elsif param.type.class == Int then
|
860
|
+
case param.type.size
|
861
|
+
when 1
|
862
|
+
type = NArray::BYTE
|
863
|
+
when 2
|
864
|
+
type = NArray::SINT
|
865
|
+
when 4
|
866
|
+
type = NArray::SINT
|
867
|
+
else
|
868
|
+
STDERR::puts "Unsupported Int size for NArray: #{param.type.size}, defaulting to byte" if debug?
|
869
|
+
type = NArray::BYTE
|
870
|
+
end
|
871
|
+
else
|
872
|
+
STDERR::puts "Unkown array type for NArray: #{param.type}, defaulting to byte" if debug?
|
873
|
+
type = NArray::BYTE
|
874
|
+
end
|
875
|
+
return type
|
876
|
+
end
|
877
|
+
|
878
|
+
def get_scalar_type(param)
|
879
|
+
if param.type.class == Real then
|
880
|
+
case param.type.size
|
881
|
+
when 4
|
882
|
+
type = "f"
|
883
|
+
when 8
|
884
|
+
type = "d"
|
885
|
+
else
|
886
|
+
raise "Unsupported Real scalar size: #{param.type.size}!"
|
887
|
+
end
|
888
|
+
elsif param.type.class == Int then
|
889
|
+
case param.type.size
|
890
|
+
when 1
|
891
|
+
type = "C"
|
892
|
+
when 2
|
893
|
+
type = "S"
|
894
|
+
when 4
|
895
|
+
type = "L"
|
896
|
+
when 8
|
897
|
+
type = "Q"
|
898
|
+
else
|
899
|
+
raise "Unsupported Int scalar size: #{param.type.size}!"
|
900
|
+
end
|
901
|
+
if param.type.signed? then
|
902
|
+
type.downcase!
|
903
|
+
end
|
904
|
+
end
|
905
|
+
return type
|
906
|
+
end
|
907
|
+
|
908
|
+
def read_param(param, directory, suffix, intent)
|
909
|
+
if intent == :out and ( param.direction == :in or param.constant ) then
|
910
|
+
return nil
|
911
|
+
end
|
912
|
+
f = File::new( directory + "/" + param.name+suffix, "rb" )
|
913
|
+
if param.dimension then
|
914
|
+
type = get_array_type(param)
|
915
|
+
if f.size == 0 then
|
916
|
+
res = NArray::new(type, 1)
|
917
|
+
else
|
918
|
+
res = NArray.to_na(f.read, type)
|
919
|
+
end
|
920
|
+
else
|
921
|
+
type = get_scalar_type(param)
|
922
|
+
res = f.read.unpack(type).first
|
923
|
+
end
|
924
|
+
f.close
|
925
|
+
return res
|
926
|
+
end
|
927
|
+
|
928
|
+
def get_gpu_dim(directory)
|
929
|
+
f = File::new( directory + "/problem_size", "r")
|
930
|
+
s = f.read
|
931
|
+
local_dim, global_dim = s.scan(/<(.*?)>/)
|
932
|
+
local_dim = local_dim.pop.split(",").collect!{ |e| e.to_i }
|
933
|
+
global_dim = global_dim.pop.split(",").collect!{ |e| e.to_i }
|
934
|
+
(local_dim.length..2).each{ |i| local_dim[i] = 1 }
|
935
|
+
(global_dim.length..2).each{ |i| global_dim[i] = 1 }
|
936
|
+
if @lang == CL then
|
937
|
+
local_dim.each_index { |indx| global_dim[indx] *= local_dim[indx] }
|
938
|
+
res = { :global_work_size => global_dim, :local_work_size => local_dim }
|
939
|
+
else
|
940
|
+
res = { :block_number => global_dim, :block_size => local_dim }
|
941
|
+
end
|
942
|
+
f.close
|
943
|
+
return res
|
944
|
+
end
|
945
|
+
|
740
946
|
def load_ref_files( path = "", suffix = "", intent )
|
741
947
|
proc_path = path + "/#{@procedure.name}/"
|
742
948
|
res_h = {}
|
@@ -749,89 +955,10 @@ EOF
|
|
749
955
|
dirs.each { |d|
|
750
956
|
res = []
|
751
957
|
@procedure.parameters.collect { |param|
|
752
|
-
|
753
|
-
res.push nil
|
754
|
-
next
|
755
|
-
end
|
756
|
-
f = File::new( d+"/"+param.name+suffix, "rb" )
|
757
|
-
if param.dimension then
|
758
|
-
if param.type.class == BOAST::Real then
|
759
|
-
case param.type.size
|
760
|
-
when 4
|
761
|
-
type = NArray::SFLOAT
|
762
|
-
when 8
|
763
|
-
type = NArray::FLOAT
|
764
|
-
else
|
765
|
-
STDERR::puts "Unsupported Float size for NArray: #{param.type.size}, defaulting to byte" if BOAST::debug
|
766
|
-
type = NArray::BYTE
|
767
|
-
end
|
768
|
-
elsif param.type.class == BOAST::Int then
|
769
|
-
case param.type.size
|
770
|
-
when 1
|
771
|
-
type = NArray::BYTE
|
772
|
-
when 2
|
773
|
-
type = NArray::SINT
|
774
|
-
when 4
|
775
|
-
type = NArray::SINT
|
776
|
-
else
|
777
|
-
STDERR::puts "Unsupported Int size for NArray: #{param.type.size}, defaulting to byte" if BOAST::debug
|
778
|
-
type = NArray::BYTE
|
779
|
-
end
|
780
|
-
else
|
781
|
-
STDERR::puts "Unkown array type for NArray: #{param.type}, defaulting to byte" if BOAST::debug
|
782
|
-
type = NArray::BYTE
|
783
|
-
end
|
784
|
-
if f.size == 0 then
|
785
|
-
res.push NArray::new(type, 1)
|
786
|
-
else
|
787
|
-
res.push NArray.to_na(f.read, type)
|
788
|
-
end
|
789
|
-
else
|
790
|
-
if param.type.class == BOAST::Real then
|
791
|
-
case param.type.size
|
792
|
-
when 4
|
793
|
-
type = "f"
|
794
|
-
when 8
|
795
|
-
type = "d"
|
796
|
-
else
|
797
|
-
raise "Unsupported Real scalar size: #{param.type.size}!"
|
798
|
-
end
|
799
|
-
elsif param.type.class == BOAST::Int then
|
800
|
-
case param.type.size
|
801
|
-
when 1
|
802
|
-
type = "C"
|
803
|
-
when 2
|
804
|
-
type = "S"
|
805
|
-
when 4
|
806
|
-
type = "L"
|
807
|
-
when 8
|
808
|
-
type = "Q"
|
809
|
-
else
|
810
|
-
raise "Unsupported Int scalar size: #{param.type.size}!"
|
811
|
-
end
|
812
|
-
if param.type.signed? then
|
813
|
-
type.downcase!
|
814
|
-
end
|
815
|
-
end
|
816
|
-
res.push f.read.unpack(type).first
|
817
|
-
end
|
818
|
-
f.close
|
958
|
+
res.push read_param(param, d, suffix, intent)
|
819
959
|
}
|
820
|
-
if @lang ==
|
821
|
-
|
822
|
-
s = f.read
|
823
|
-
local_dim, global_dim = s.scan(/<(.*?)>/)
|
824
|
-
local_dim = local_dim.pop.split(",").collect!{ |e| e.to_i }
|
825
|
-
global_dim = global_dim.pop.split(",").collect!{ |e| e.to_i }
|
826
|
-
(local_dim.length..2).each{ |i| local_dim[i] = 1 }
|
827
|
-
(global_dim.length..2).each{ |i| global_dim[i] = 1 }
|
828
|
-
if @lang == BOAST::CL then
|
829
|
-
local_dim.each_index { |indx| global_dim[indx] *= local_dim[indx] }
|
830
|
-
res.push( { :global_work_size => global_dim, :local_work_size => local_dim } )
|
831
|
-
else
|
832
|
-
res.push( { :block_number => global_dim, :block_size => local_dim } )
|
833
|
-
end
|
834
|
-
f.close
|
960
|
+
if @lang == CUDA or @lang == CL then
|
961
|
+
res.push get_gpu_dim(d)
|
835
962
|
end
|
836
963
|
res_h[d] = res
|
837
964
|
}
|