BOAST 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/BOAST.gemspec +16 -0
- data/LICENSE +22 -0
- data/lib/BOAST.rb +3 -0
- data/lib/BOAST/Algorithm.rb +1759 -0
- data/lib/BOAST/BOAST_OpenCL.rb +104 -0
- data/lib/BOAST/CKernel.rb +545 -0
- metadata +100 -0
@@ -0,0 +1,104 @@
|
|
1
|
+
module BOAST
|
2
|
+
@@ocl_cuda_dim_assoc = { 0 => "x", 1 => "y", 2 => "z" }
|
3
|
+
|
4
|
+
def BOAST::barrier(*locality)
|
5
|
+
if @@lang == CL then
|
6
|
+
loc=""
|
7
|
+
if locality.include?(:local) and locality.include?(:global) then
|
8
|
+
return FuncCall::new("barrier","CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE")
|
9
|
+
elsif locality.include?(:local) then
|
10
|
+
return FuncCall::new("barrier","CLK_LOCAL_MEM_FENCE")
|
11
|
+
elsif locality.include?(:global) then
|
12
|
+
return FuncCall::new("barrier","CLK_GLOBAL_MEM_FENCE")
|
13
|
+
else
|
14
|
+
raise "Unsupported locality"
|
15
|
+
end
|
16
|
+
elsif @@lang == CUDA then
|
17
|
+
return FuncCall::new("__syncthreads")
|
18
|
+
else
|
19
|
+
raise "Unsupported language!"
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
def BOAST::get_work_dim
|
25
|
+
if @@lang == CL then
|
26
|
+
return FuncCall::new("get_work_dim")
|
27
|
+
else
|
28
|
+
raise "Unsupported language!"
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def BOAST::get_global_size(dim)
|
33
|
+
if @@lang == CL then
|
34
|
+
return FuncCall::new("get_global_size",dim)
|
35
|
+
elsif @@lang == CUDA then
|
36
|
+
d = @@ocl_cuda_dim_assoc[dim]
|
37
|
+
raise "Unsupported dimension!" if not d
|
38
|
+
return Expression::new(".", "gridDim", d)*Expression::new(".", "blockDim", d)
|
39
|
+
else
|
40
|
+
raise "Unsupported language!"
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def BOAST::get_global_id(dim)
|
45
|
+
if @@lang == CL then
|
46
|
+
return FuncCall::new("get_global_id",dim)
|
47
|
+
elsif @@lang == CUDA then
|
48
|
+
d = @@ocl_cuda_dim_assoc[dim]
|
49
|
+
raise "Unsupported dimension!" if not d
|
50
|
+
return Expression::new(".", "threadIdx", d)+Expression::new(".", "blockIdx", d)*Expression::new(".", "blockDim", d)
|
51
|
+
else
|
52
|
+
raise "Unsupported language!"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def BOAST::get_local_size(dim)
|
57
|
+
if @@lang == CL then
|
58
|
+
return FuncCall::new("get_local_size",dim)
|
59
|
+
elsif @@lang == CUDA then
|
60
|
+
d = @@ocl_cuda_dim_assoc[dim]
|
61
|
+
raise "Unsupported dimension!" if not d
|
62
|
+
return Expression::new(".", "blockDim", d)
|
63
|
+
else
|
64
|
+
raise "Unsupported language!"
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def BOAST::get_local_id(dim)
|
69
|
+
if @@lang == CL then
|
70
|
+
return FuncCall::new("get_local_id",dim)
|
71
|
+
elsif @@lang == CUDA then
|
72
|
+
d = @@ocl_cuda_dim_assoc[dim]
|
73
|
+
raise "Unsupported dimension!" if not d
|
74
|
+
return Expression::new(".", "threadIdx", d)
|
75
|
+
else
|
76
|
+
raise "Unsupported language!"
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def BOAST::get_num_groups(dim)
|
81
|
+
if @@lang == CL then
|
82
|
+
return FuncCall::new("get_num_groups",dim)
|
83
|
+
elsif @@lang == CUDA then
|
84
|
+
d = @@ocl_cuda_dim_assoc[dim]
|
85
|
+
raise "Unsupported dimension!" if not d
|
86
|
+
return Expression::new(".", "gridDim", d)
|
87
|
+
else
|
88
|
+
raise "Unsupported language!"
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def BOAST::get_group_id(dim)
|
93
|
+
if @@lang == CL then
|
94
|
+
return FuncCall::new("get_group_id",dim)
|
95
|
+
elsif @@lang == CUDA then
|
96
|
+
d = @@ocl_cuda_dim_assoc[dim]
|
97
|
+
raise "Unsupported dimension!" if not d
|
98
|
+
return Expression::new(".", "blockIdx", d)
|
99
|
+
else
|
100
|
+
raise "Unsupported language!"
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
@@ -0,0 +1,545 @@
|
|
1
|
+
require 'stringio'
|
2
|
+
require 'rubygems'
|
3
|
+
require 'rake'
|
4
|
+
require 'tempfile'
|
5
|
+
require 'rbconfig'
|
6
|
+
require 'systemu'
|
7
|
+
|
8
|
+
module BOAST
|
9
|
+
@@verbose = false
|
10
|
+
|
11
|
+
def BOAST::get_verbose
|
12
|
+
return @@verbose
|
13
|
+
end
|
14
|
+
|
15
|
+
def BOAST::set_verbose(verbose)
|
16
|
+
@@verbose = verbose
|
17
|
+
end
|
18
|
+
|
19
|
+
class CKernel
|
20
|
+
include Rake::DSL
|
21
|
+
attr_accessor :code
|
22
|
+
attr_accessor :procedure
|
23
|
+
attr_accessor :lang
|
24
|
+
attr_accessor :binary
|
25
|
+
attr_accessor :kernels
|
26
|
+
|
27
|
+
def initialize(options={})
|
28
|
+
if options[:code] then
|
29
|
+
@code = options[:code]
|
30
|
+
elsif BOAST::get_chain_code
|
31
|
+
@code = BOAST::get_output
|
32
|
+
@code.seek(0,SEEK_END)
|
33
|
+
else
|
34
|
+
@code = StringIO::new
|
35
|
+
end
|
36
|
+
BOAST::set_output( @code )
|
37
|
+
if options[:kernels] then
|
38
|
+
@kernels = options[:kernels]
|
39
|
+
else
|
40
|
+
@kernels = []
|
41
|
+
end
|
42
|
+
if options[:lang] then
|
43
|
+
@lang = options[:lang]
|
44
|
+
else
|
45
|
+
@lang = BOAST::get_lang
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def print
|
50
|
+
@code.rewind
|
51
|
+
puts @code.read
|
52
|
+
end
|
53
|
+
|
54
|
+
def to_str
|
55
|
+
@code.rewind
|
56
|
+
return code.read
|
57
|
+
end
|
58
|
+
|
59
|
+
def to_s
|
60
|
+
@code.rewind
|
61
|
+
return code.read
|
62
|
+
end
|
63
|
+
|
64
|
+
def setup_compiler(options = {})
|
65
|
+
Rake::Task::clear
|
66
|
+
verbose = options[:verbose]
|
67
|
+
verbose = BOAST::get_verbose if not verbose
|
68
|
+
Rake::verbose(verbose)
|
69
|
+
Rake::FileUtilsExt.verbose_flag=verbose
|
70
|
+
f_compiler = options[:FC]
|
71
|
+
f_compiler = "gfortran" if not f_compiler
|
72
|
+
c_compiler = options[:CC]
|
73
|
+
c_compiler = "cc" if not c_compiler
|
74
|
+
cxx_compiler = options[:CXX]
|
75
|
+
cxx_compiler = "g++" if not cxx_compiler
|
76
|
+
cuda_compiler = options[:NVCC]
|
77
|
+
cuda_compiler = "nvcc"if not cuda_compiler
|
78
|
+
f_flags = options[:FCFLAGS]
|
79
|
+
f_flags = "-O2 -Wall" if not f_flags
|
80
|
+
f_flags += " -fPIC"
|
81
|
+
f_flags += " -fno-second-underscore" if f_compiler == 'g95'
|
82
|
+
ld_flags = options[:LDFLAGS]
|
83
|
+
ld_flags = "" if not ld_flags
|
84
|
+
cuda_flags = options[:NVCCFLAGS]
|
85
|
+
cuda_flags = "-O2" if not cuda_flags
|
86
|
+
cuda_flags += " --compiler-options '-fPIC'"
|
87
|
+
|
88
|
+
|
89
|
+
includes = "-I#{RbConfig::CONFIG["archdir"]}"
|
90
|
+
includes += " -I#{RbConfig::CONFIG["rubyhdrdir"]} -I#{RbConfig::CONFIG["rubyhdrdir"]}/#{RbConfig::CONFIG["arch"]}"
|
91
|
+
ld_flags += " -L#{RbConfig::CONFIG["libdir"]} #{RbConfig::CONFIG["LIBRUBYARG"]} -lrt"
|
92
|
+
ld_flags += " -lcudart" if @lang == BOAST::CUDA
|
93
|
+
narray_path = nil
|
94
|
+
begin
|
95
|
+
spec = Gem::Specification::find_by_name('narray')
|
96
|
+
narray_path = spec.full_gem_path
|
97
|
+
rescue Gem::LoadError => e
|
98
|
+
rescue NoMethodError => e
|
99
|
+
spec = Gem::available?('narray')
|
100
|
+
if spec then
|
101
|
+
require 'narray'
|
102
|
+
narray_path = Gem.loaded_specs['narray'].full_gem_path
|
103
|
+
end
|
104
|
+
end
|
105
|
+
includes += " -I#{narray_path}" if narray_path
|
106
|
+
cflags = "-O2 -Wall -fPIC #{includes}"
|
107
|
+
cxxflags = String::new(cflags)
|
108
|
+
cflags += " -DHAVE_NARRAY_H" if narray_path
|
109
|
+
cflags += options[:CFLAGS] if options[:CFLAGS]
|
110
|
+
fcflags = f_flags
|
111
|
+
cudaflags = cuda_flags
|
112
|
+
|
113
|
+
runner = lambda { |t, call_string|
|
114
|
+
if verbose then
|
115
|
+
sh call_string
|
116
|
+
else
|
117
|
+
status, stdout, stderr = systemu call_string
|
118
|
+
if not status.success? then
|
119
|
+
puts stderr
|
120
|
+
fail "#{t.source}: compilation failed"
|
121
|
+
end
|
122
|
+
status.success?
|
123
|
+
end
|
124
|
+
}
|
125
|
+
|
126
|
+
rule '.o' => '.c' do |t|
|
127
|
+
c_call_string = "#{c_compiler} #{cflags} -c -o #{t.name} #{t.source}"
|
128
|
+
runner.call(t, c_call_string)
|
129
|
+
end
|
130
|
+
|
131
|
+
rule '.o' => '.f90' do |t|
|
132
|
+
f_call_string = "#{f_compiler} #{fcflags} -c -o #{t.name} #{t.source}"
|
133
|
+
runner.call(t, f_call_string)
|
134
|
+
end
|
135
|
+
|
136
|
+
rule '.o' => '.cpp' do |t|
|
137
|
+
cxx_call_string = "#{cxx_compiler} #{cxxflags} -c -o #{t.name} #{t.source}"
|
138
|
+
runner.call(t, cxx_call_string)
|
139
|
+
end
|
140
|
+
|
141
|
+
rule '.o' => '.cu' do |t|
|
142
|
+
cuda_call_string = "#{cuda_compiler} #{cudaflags} -c -o #{t.name} #{t.source}"
|
143
|
+
runner.call(t, cuda_call_string)
|
144
|
+
end
|
145
|
+
return ld_flags
|
146
|
+
end
|
147
|
+
|
148
|
+
def build_opencl(options)
|
149
|
+
require 'opencl_ruby_ffi'
|
150
|
+
platform = nil
|
151
|
+
platforms = OpenCL::get_platforms
|
152
|
+
if options[:platform_vendor] then
|
153
|
+
platforms.each{ |p|
|
154
|
+
platform = p if p.vendor.match(options[:platform_vendor])
|
155
|
+
}
|
156
|
+
else
|
157
|
+
platform = platforms.first
|
158
|
+
end
|
159
|
+
device = nil
|
160
|
+
type = options[:device_type] ? options[:device_type] : OpenCL::Device::Type::ALL
|
161
|
+
devices = platform.devices(type)
|
162
|
+
if options[:device_name] then
|
163
|
+
devices.each{ |d|
|
164
|
+
device = d if d.name.match(options[:device_name])
|
165
|
+
}
|
166
|
+
else
|
167
|
+
device = devices.first
|
168
|
+
end
|
169
|
+
@context = OpenCL::create_context([device])
|
170
|
+
program = @context.create_program_with_source([@code.string])
|
171
|
+
opts = options[:CLFLAGS]
|
172
|
+
program.build(:options => options[:CLFLAGS])
|
173
|
+
if options[:verbose] then
|
174
|
+
program.build_log.each {|dev,log|
|
175
|
+
STDERR.puts "#{device.name}: #{log}"
|
176
|
+
}
|
177
|
+
end
|
178
|
+
@queue = @context.create_command_queue(device, :properties => OpenCL::CommandQueue::PROFILING_ENABLE)
|
179
|
+
@kernel = program.create_kernel(@procedure.name)
|
180
|
+
run_method = <<EOF
|
181
|
+
def self.run(*args)
|
182
|
+
raise "Wrong number of arguments \#{args.length} for #{@procedure.parameters.length}" if args.length > #{@procedure.parameters.length+1} or args.length < #{@procedure.parameters.length}
|
183
|
+
params = []
|
184
|
+
opts = {}
|
185
|
+
opts = args.pop if args.length == #{@procedure.parameters.length+1}
|
186
|
+
@procedure.parameters.each_index { |i|
|
187
|
+
if @procedure.parameters[i].dimension then
|
188
|
+
if @procedure.parameters[i].direction == :in and @procedure.parameters[i].direction == :out then
|
189
|
+
params[i] = @context.create_buffer( args[i].size * args[i].element_size )
|
190
|
+
@queue.enqueue_write_buffer( params[i], args[i], :blocking => true )
|
191
|
+
elsif @procedure.parameters[i].direction == :in then
|
192
|
+
params[i] = @context.create_buffer( args[i].size * args[i].element_size, :flags => OpenCL::Mem::Flags::READ_ONLY )
|
193
|
+
@queue.enqueue_write_buffer( params[i], args[i], :blocking => true )
|
194
|
+
elsif @procedure.parameters[i].direction == :out then
|
195
|
+
params[i] = @context.create_buffer( args[i].size * args[i].element_size, :flags => OpenCL::Mem::Flags::WRITE_ONLY )
|
196
|
+
else
|
197
|
+
params[i] = @context.create_buffer( args[i].size * args[i].element_size )
|
198
|
+
end
|
199
|
+
else
|
200
|
+
if @procedure.parameters[i].type.is_a?(Real) then
|
201
|
+
params[i] = OpenCL::Half::new(args[i]) if @procedure.parameters[i].type.size == 2
|
202
|
+
params[i] = OpenCL::Float::new(args[i]) if @procedure.parameters[i].type.size == 4
|
203
|
+
params[i] = OpenCL::Double::new(args[i]) if @procedure.parameters[i].type.size == 8
|
204
|
+
elsif @procedure.parameters[i].type.is_a?(Int) then
|
205
|
+
if @procedure.parameters[i].type.signed
|
206
|
+
params[i] = OpenCL::Char::new(args[i]) if @procedure.parameters[i].type.size == 1
|
207
|
+
params[i] = OpenCL::Short::new(args[i]) if @procedure.parameters[i].type.size == 2
|
208
|
+
params[i] = OpenCL::Int::new(args[i]) if @procedure.parameters[i].type.size == 4
|
209
|
+
params[i] = OpenCL::Long::new(args[i]) if @procedure.parameters[i].type.size == 8
|
210
|
+
else
|
211
|
+
params[i] = OpenCL::UChar::new(args[i]) if @procedure.parameters[i].type.size == 1
|
212
|
+
params[i] = OpenCL::UShort::new(args[i]) if @procedure.parameters[i].type.size == 2
|
213
|
+
params[i] = OpenCL::UInt::new(args[i]) if @procedure.parameters[i].type.size == 4
|
214
|
+
params[i] = OpenCL::ULong::new(args[i]) if @procedure.parameters[i].type.size == 8
|
215
|
+
end
|
216
|
+
else
|
217
|
+
params[i] = args[i]
|
218
|
+
end
|
219
|
+
end
|
220
|
+
}
|
221
|
+
params.each_index{ |i|
|
222
|
+
@kernel.set_arg(i, params[i])
|
223
|
+
}
|
224
|
+
event = @queue.enqueue_NDrange_kernel(@kernel, opts[:global_work_size], :local_work_size => opts[:local_work_size])
|
225
|
+
@procedure.parameters.each_index { |i|
|
226
|
+
if @procedure.parameters[i].dimension then
|
227
|
+
if @procedure.parameters[i].direction == :in and @procedure.parameters[i].direction == :out then
|
228
|
+
@queue.enqueue_read_buffer( params[i], args[i], :blocking => true )
|
229
|
+
elsif @procedure.parameters[i].direction == :out then
|
230
|
+
@queue.enqueue_read_buffer( params[i], args[i], :blocking => true )
|
231
|
+
end
|
232
|
+
end
|
233
|
+
}
|
234
|
+
result = {}
|
235
|
+
result[:start] = event.profiling_command_start
|
236
|
+
result[:end] = event.profiling_command_end
|
237
|
+
result[:duration] = (result[:end] - result[:start])/1000000000.0
|
238
|
+
return result
|
239
|
+
end
|
240
|
+
EOF
|
241
|
+
eval run_method
|
242
|
+
return self
|
243
|
+
end
|
244
|
+
|
245
|
+
def build(options = {})
|
246
|
+
return build_opencl(options) if @lang == BOAST::CL
|
247
|
+
ldflags = self.setup_compiler(options)
|
248
|
+
extension = ".c" if @lang == BOAST::C
|
249
|
+
extension = ".cu" if @lang == BOAST::CUDA
|
250
|
+
extension = ".f90" if @lang == BOAST::FORTRAN
|
251
|
+
#temporary
|
252
|
+
c_compiler = options[:CC]
|
253
|
+
c_compiler = "cc" if not c_compiler
|
254
|
+
linker = options[:LD]
|
255
|
+
linker = c_compiler if not linker
|
256
|
+
#end temporary
|
257
|
+
source_file = Tempfile::new([@procedure.name,extension])
|
258
|
+
path = source_file.path
|
259
|
+
target = path.chomp(File::extname(path))+".o"
|
260
|
+
fill_code(source_file)
|
261
|
+
source_file.close
|
262
|
+
|
263
|
+
previous_lang = BOAST::get_lang
|
264
|
+
previous_output = BOAST::get_output
|
265
|
+
BOAST::set_lang(BOAST::C)
|
266
|
+
module_file_name = File::split(path.chomp(File::extname(path)))[0] + "/Mod_" + File::split(path.chomp(File::extname(path)))[1].gsub("-","_") + ".c"
|
267
|
+
module_name = File::split(module_file_name.chomp(File::extname(module_file_name)))[1]
|
268
|
+
module_file = File::open(module_file_name,"w+")
|
269
|
+
BOAST::set_output(module_file)
|
270
|
+
fill_module(module_file, module_name)
|
271
|
+
module_file.rewind
|
272
|
+
# puts module_file.read
|
273
|
+
module_file.close
|
274
|
+
BOAST::set_lang(previous_lang)
|
275
|
+
BOAST::set_output(previous_output)
|
276
|
+
module_target = module_file_name.chomp(File::extname(module_file_name))+".o"
|
277
|
+
module_final = module_file_name.chomp(File::extname(module_file_name))+".so"
|
278
|
+
kernel_files = []
|
279
|
+
@kernels.each { |kernel|
|
280
|
+
kernel_file = Tempfile::new([kernel.procedure.name,".o"])
|
281
|
+
kernel.binary.rewind
|
282
|
+
kernel_file.write( kernel.binary.read )
|
283
|
+
kernel_file.close
|
284
|
+
kernel_files.push(kernel_file.path)
|
285
|
+
}
|
286
|
+
file module_final => [module_target, target] do
|
287
|
+
#puts "#{linker} -shared -o #{module_final} #{module_target} #{target} #{kernel_files.join(" ")} -Wl,-Bsymbolic-functions -Wl,-z,relro -rdynamic -Wl,-export-dynamic #{ldflags}"
|
288
|
+
sh "#{linker} -shared -o #{module_final} #{module_target} #{target} #{kernel_files.join(" ")} -Wl,-Bsymbolic-functions -Wl,-z,relro -rdynamic -Wl,-export-dynamic #{ldflags}"
|
289
|
+
end
|
290
|
+
Rake::Task[module_final].invoke
|
291
|
+
require(module_final)
|
292
|
+
eval "self.extend(#{module_name})"
|
293
|
+
f = File::open(target,"rb")
|
294
|
+
@binary = StringIO::new
|
295
|
+
@binary.write( f.read )
|
296
|
+
f.close
|
297
|
+
File.unlink(target)
|
298
|
+
File.unlink(module_target)
|
299
|
+
File.unlink(module_file_name)
|
300
|
+
File.unlink(module_final)
|
301
|
+
kernel_files.each { |f|
|
302
|
+
File.unlink(f)
|
303
|
+
}
|
304
|
+
return self
|
305
|
+
end
|
306
|
+
|
307
|
+
def fill_code(source_file)
|
308
|
+
@code.rewind
|
309
|
+
source_file.puts "#include <inttypes.h>" if @lang == BOAST::C or @lang == BOAST::CUDA
|
310
|
+
source_file.puts "#include <cuda.h>" if @lang == BOAST::CUDA
|
311
|
+
source_file.write @code.read
|
312
|
+
if @lang == BOAST::CUDA then
|
313
|
+
source_file.write <<EOF
|
314
|
+
extern "C" {
|
315
|
+
#{@procedure.header(BOAST::CUDA,false)}{
|
316
|
+
dim3 dimBlock(block_size[0], block_size[1], block_size[2]);
|
317
|
+
dim3 dimGrid(block_number[0], block_number[1], block_number[2]);
|
318
|
+
cudaEvent_t __start, __stop;
|
319
|
+
float __time;
|
320
|
+
cudaEventCreate(&__start);
|
321
|
+
cudaEventCreate(&__stop);
|
322
|
+
cudaEventRecord(__start, 0);
|
323
|
+
#{@procedure.name}<<<dimGrid,dimBlock>>>(#{@procedure.parameters.join(", ")});
|
324
|
+
cudaEventRecord(__stop, 0);
|
325
|
+
cudaEventSynchronize(__stop);
|
326
|
+
cudaEventElapsedTime(&__time, __start, __stop);
|
327
|
+
return (unsigned long long int)((double)__time*(double)1e6);
|
328
|
+
}
|
329
|
+
}
|
330
|
+
EOF
|
331
|
+
end
|
332
|
+
@code.rewind
|
333
|
+
end
|
334
|
+
|
335
|
+
def fill_module(module_file, module_name)
|
336
|
+
module_file.write <<EOF
|
337
|
+
#include "ruby.h"
|
338
|
+
#include <inttypes.h>
|
339
|
+
#include <time.h>
|
340
|
+
#ifdef HAVE_NARRAY_H
|
341
|
+
#include "narray.h"
|
342
|
+
#endif
|
343
|
+
EOF
|
344
|
+
if( @lang == BOAST::CUDA ) then
|
345
|
+
module_file.print "#include <cuda_runtime.h>\n"
|
346
|
+
end
|
347
|
+
module_file.print @procedure.header(@lang)
|
348
|
+
module_file.write <<EOF
|
349
|
+
VALUE #{module_name} = Qnil;
|
350
|
+
void Init_#{module_name}();
|
351
|
+
VALUE method_run(int argc, VALUE *argv, VALUE self);
|
352
|
+
void Init_#{module_name}() {
|
353
|
+
#{module_name} = rb_define_module("#{module_name}");
|
354
|
+
rb_define_method(#{module_name}, "run", method_run, -1);
|
355
|
+
}
|
356
|
+
VALUE method_run(int argc, VALUE *argv, VALUE self) {
|
357
|
+
EOF
|
358
|
+
if( @lang == BOAST::CUDA ) then
|
359
|
+
module_file.write <<EOF
|
360
|
+
if( argc < #{@procedure.parameters.length} || argc > #{@procedure.parameters.length + 1} )
|
361
|
+
rb_raise(rb_eArgError, "wrong number of arguments for #{@procedure.name} (%d for #{@procedure.parameters.length})", argc);
|
362
|
+
VALUE rb_opts;
|
363
|
+
VALUE rb_ptr;
|
364
|
+
size_t block_size[3] = {1,1,1};
|
365
|
+
size_t block_number[3] = {1,1,1};
|
366
|
+
EOF
|
367
|
+
else
|
368
|
+
module_file.write <<EOF
|
369
|
+
if( argc != #{@procedure.parameters.length} )
|
370
|
+
rb_raise(rb_eArgError, "wrong number of arguments for #{@procedure.name} (%d for #{@procedure.parameters.length})", argc);
|
371
|
+
VALUE rb_ptr;
|
372
|
+
EOF
|
373
|
+
end
|
374
|
+
argc = @procedure.parameters.length
|
375
|
+
argv = Variable::new("argv",Real,{:dimension => [ Dimension::new(0,argc-1) ] })
|
376
|
+
rb_ptr = Variable::new("rb_ptr",Int)
|
377
|
+
@procedure.parameters.each { |param|
|
378
|
+
param_copy = param.copy
|
379
|
+
param_copy.constant = nil
|
380
|
+
param_copy.direction = nil
|
381
|
+
param_copy.decl
|
382
|
+
}
|
383
|
+
@procedure.parameters.each_index do |i|
|
384
|
+
param = @procedure.parameters[i]
|
385
|
+
if not param.dimension then
|
386
|
+
case param.type
|
387
|
+
when Int
|
388
|
+
(param === FuncCall::new("NUM2INT", argv[i])).print if param.type.size == 4
|
389
|
+
(param === FuncCall::new("NUM2LONG", argv[i])).print if param.type.size == 8
|
390
|
+
when Real
|
391
|
+
(param === FuncCall::new("NUM2DBL", argv[i])).print
|
392
|
+
end
|
393
|
+
else
|
394
|
+
(rb_ptr === argv[i]).print
|
395
|
+
if @lang == BOAST::CUDA then
|
396
|
+
module_file.print <<EOF
|
397
|
+
if ( IsNArray(rb_ptr) ) {
|
398
|
+
struct NARRAY *n_ary;
|
399
|
+
size_t array_size;
|
400
|
+
Data_Get_Struct(rb_ptr, struct NARRAY, n_ary);
|
401
|
+
array_size = n_ary->total * na_sizeof[n_ary->type];
|
402
|
+
cudaMalloc( (void **) &#{param.name}, array_size);
|
403
|
+
EOF
|
404
|
+
if param.direction == :in then
|
405
|
+
module_file.print <<EOF
|
406
|
+
cudaMemcpy(#{param.name}, (void *) n_ary->ptr, array_size, cudaMemcpyHostToDevice);
|
407
|
+
EOF
|
408
|
+
end
|
409
|
+
module_file.print <<EOF
|
410
|
+
} else
|
411
|
+
rb_raise(rb_eArgError, "wrong type of argument %d", #{i});
|
412
|
+
|
413
|
+
EOF
|
414
|
+
else
|
415
|
+
module_file.print <<EOF
|
416
|
+
if (TYPE(rb_ptr) == T_STRING) {
|
417
|
+
#{param.name} = (void *) RSTRING_PTR(rb_ptr);
|
418
|
+
} else if ( IsNArray(rb_ptr) ) {
|
419
|
+
struct NARRAY *n_ary;
|
420
|
+
Data_Get_Struct(rb_ptr, struct NARRAY, n_ary);
|
421
|
+
#{param.name} = (void *) n_ary->ptr;
|
422
|
+
} else
|
423
|
+
rb_raise(rb_eArgError, "wrong type of argument %d", #{i});
|
424
|
+
EOF
|
425
|
+
end
|
426
|
+
end
|
427
|
+
end
|
428
|
+
if @lang == BOAST::CUDA then
|
429
|
+
module_file.write <<EOF
|
430
|
+
if( argc == #{@procedure.parameters.length + 1} ) {
|
431
|
+
rb_opts = argv[argc -1];
|
432
|
+
if ( rb_opts != Qnil ) {
|
433
|
+
VALUE rb_array_data = Qnil;
|
434
|
+
int i;
|
435
|
+
if (TYPE(rb_opts) != T_HASH)
|
436
|
+
rb_raise(rb_eArgError, "Cuda options should be passed as a hash");
|
437
|
+
rb_ptr = rb_hash_aref(rb_opts, ID2SYM(rb_intern("block_size")));
|
438
|
+
if( rb_ptr != Qnil ) {
|
439
|
+
if (TYPE(rb_ptr) != T_ARRAY)
|
440
|
+
rb_raise(rb_eArgError, "Cuda option block_size should be an array");
|
441
|
+
for(i=0; i<3; i++) {
|
442
|
+
rb_array_data = rb_ary_entry(rb_ptr, i);
|
443
|
+
if( rb_array_data != Qnil )
|
444
|
+
block_size[i] = (size_t) NUM2LONG( rb_array_data );
|
445
|
+
}
|
446
|
+
}
|
447
|
+
rb_ptr = rb_hash_aref(rb_opts, ID2SYM(rb_intern("block_number")));
|
448
|
+
if( rb_ptr != Qnil ) {
|
449
|
+
if (TYPE(rb_ptr) != T_ARRAY)
|
450
|
+
rb_raise(rb_eArgError, "Cuda option block_number should be an array");
|
451
|
+
for(i=0; i<3; i++) {
|
452
|
+
rb_array_data = rb_ary_entry(rb_ptr, i);
|
453
|
+
if( rb_array_data != Qnil )
|
454
|
+
block_number[i] = (size_t) NUM2LONG( rb_array_data );
|
455
|
+
}
|
456
|
+
}
|
457
|
+
}
|
458
|
+
}
|
459
|
+
EOF
|
460
|
+
end
|
461
|
+
module_file.print " #{@procedure.properties[:return].type.decl} ret;\n" if @procedure.properties[:return]
|
462
|
+
module_file.print " VALUE stats = rb_hash_new();\n"
|
463
|
+
module_file.print " struct timespec start, stop;\n"
|
464
|
+
module_file.print " unsigned long long int duration;\n"
|
465
|
+
module_file.print " clock_gettime(CLOCK_REALTIME, &start);\n"
|
466
|
+
if @lang == BOAST::CUDA then
|
467
|
+
module_file.print " duration = "
|
468
|
+
elsif @procedure.properties[:return] then
|
469
|
+
module_file.print " ret = "
|
470
|
+
end
|
471
|
+
module_file.print " #{@procedure.name}"
|
472
|
+
module_file.print "_" if @lang == BOAST::FORTRAN
|
473
|
+
module_file.print "_wrapper" if @lang == BOAST::CUDA
|
474
|
+
module_file.print "("
|
475
|
+
if(@lang == BOAST::FORTRAN) then
|
476
|
+
params = []
|
477
|
+
@procedure.parameters.each { |param|
|
478
|
+
if param.dimension then
|
479
|
+
params.push( param.name )
|
480
|
+
else
|
481
|
+
params.push( "&"+param.name )
|
482
|
+
end
|
483
|
+
}
|
484
|
+
module_file.print params.join(", ")
|
485
|
+
else
|
486
|
+
module_file.print @procedure.parameters.join(", ")
|
487
|
+
end
|
488
|
+
if @lang == BOAST::CUDA then
|
489
|
+
module_file.print ", " if @procedure.parameters.length > 0
|
490
|
+
module_file.print "block_number, block_size"
|
491
|
+
end
|
492
|
+
module_file.print " );\n"
|
493
|
+
module_file.print " clock_gettime(CLOCK_REALTIME, &stop);\n"
|
494
|
+
|
495
|
+
if @lang == BOAST::CUDA then
|
496
|
+
@procedure.parameters.each_index do |i|
|
497
|
+
param = @procedure.parameters[i]
|
498
|
+
if param.dimension then
|
499
|
+
(rb_ptr === argv[i]).print
|
500
|
+
module_file.print <<EOF
|
501
|
+
if ( IsNArray(rb_ptr) ) {
|
502
|
+
EOF
|
503
|
+
if param.direction == :out then
|
504
|
+
module_file.print <<EOF
|
505
|
+
struct NARRAY *n_ary;
|
506
|
+
size_t array_size;
|
507
|
+
Data_Get_Struct(rb_ptr, struct NARRAY, n_ary);
|
508
|
+
array_size = n_ary->total * na_sizeof[n_ary->type];
|
509
|
+
cudaMemcpy(#{param.name}, (void *) n_ary->ptr, array_size, cudaMemcpyDeviceToHost);
|
510
|
+
EOF
|
511
|
+
end
|
512
|
+
module_file.print <<EOF
|
513
|
+
cudaFree( (void *) #{param.name});
|
514
|
+
} else
|
515
|
+
rb_raise(rb_eArgError, "wrong type of argument %d", #{i});
|
516
|
+
|
517
|
+
EOF
|
518
|
+
end
|
519
|
+
end
|
520
|
+
end
|
521
|
+
if @lang != BOAST::CUDA then
|
522
|
+
module_file.print " duration = (unsigned long long int)stop.tv_sec * (unsigned long long int)1000000000 + stop.tv_nsec;\n"
|
523
|
+
module_file.print " duration -= (unsigned long long int)start.tv_sec * (unsigned long long int)1000000000 + start.tv_nsec;\n"
|
524
|
+
end
|
525
|
+
module_file.print " rb_hash_aset(stats,ID2SYM(rb_intern(\"duration\")),rb_float_new((double)duration*(double)1e-9));\n"
|
526
|
+
if @procedure.properties[:return] then
|
527
|
+
type_ret = @procedure.properties[:return].type
|
528
|
+
module_file.print " rb_hash_aset(stats,ID2SYM(rb_intern(\"return\")),rb_int_new((long long)ret));\n" if type_ret.kind_of?(Int) and type_ret.signed
|
529
|
+
module_file.print " rb_hash_aset(stats,ID2SYM(rb_intern(\"return\")),rb_int_new((unsigned long long)ret));\n" if type_ret.kind_of?(Int) and not type_ret.signed
|
530
|
+
module_file.print " rb_hash_aset(stats,ID2SYM(rb_intern(\"return\")),rb_float_new((double)ret));\n" if type_ret.kind_of?(Real)
|
531
|
+
end
|
532
|
+
module_file.print " return stats;\n"
|
533
|
+
module_file.print "}"
|
534
|
+
end
|
535
|
+
|
536
|
+
def method_missing(meth, *args, &block)
|
537
|
+
if meth.to_s == "run" then
|
538
|
+
self.build
|
539
|
+
self.run(*args,&block)
|
540
|
+
else
|
541
|
+
super
|
542
|
+
end
|
543
|
+
end
|
544
|
+
end
|
545
|
+
end
|