BOAST 1.3.5 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/BOAST.gemspec +1 -1
- data/LICENSE +13 -1
- data/README.md +62 -13
- data/lib/BOAST.rb +3 -1
- data/lib/BOAST/Language/ARMCPUID_by_name.rb +3752 -0
- data/lib/BOAST/Language/Algorithm.rb +4 -24
- data/lib/BOAST/Language/Architectures.rb +5 -0
- data/lib/BOAST/Language/Arithmetic.rb +38 -5
- data/lib/BOAST/Language/BOAST_OpenCL.rb +7 -8
- data/lib/BOAST/Language/Case.rb +10 -3
- data/lib/BOAST/Language/Config.rb +36 -12
- data/lib/BOAST/Language/ControlStructure.rb +7 -3
- data/lib/BOAST/Language/DataTypes.rb +6 -0
- data/lib/BOAST/Language/Expression.rb +26 -2
- data/lib/BOAST/Language/For.rb +59 -30
- data/lib/BOAST/Language/FuncCall.rb +9 -5
- data/lib/BOAST/Language/Functors.rb +1 -1
- data/lib/BOAST/Language/HighLevelOperators.rb +172 -0
- data/lib/BOAST/Language/If.rb +25 -9
- data/lib/BOAST/Language/Index.rb +5 -5
- data/lib/BOAST/Language/Intrinsics.rb +40 -27
- data/lib/BOAST/Language/OpenMP.rb +1 -0
- data/lib/BOAST/Language/Operators.rb +221 -34
- data/lib/BOAST/Language/Parens.rb +3 -2
- data/lib/BOAST/Language/Procedure.rb +18 -5
- data/lib/BOAST/Language/Slice.rb +176 -44
- data/lib/BOAST/Language/Variable.rb +99 -56
- data/lib/BOAST/Language/While.rb +18 -3
- data/lib/BOAST/Language/{CPUID_by_name.rb → X86CPUID_by_name.rb} +0 -0
- data/lib/BOAST/Optimization/Optimization.rb +2 -0
- data/lib/BOAST/Runtime/AffinityProbe.rb +7 -3
- data/lib/BOAST/Runtime/CKernel.rb +3 -0
- data/lib/BOAST/Runtime/CRuntime.rb +4 -0
- data/lib/BOAST/Runtime/CompiledRuntime.rb +404 -77
- data/lib/BOAST/Runtime/Compilers.rb +44 -18
- data/lib/BOAST/Runtime/Config.rb +9 -0
- data/lib/BOAST/Runtime/EnergyProbe.rb +19 -3
- data/lib/BOAST/Runtime/FFIRuntime.rb +23 -0
- data/lib/BOAST/Runtime/FORTRANRuntime.rb +1 -1
- data/lib/BOAST/Runtime/MAQAO.rb +29 -0
- data/lib/BOAST/Runtime/NonRegression.rb +64 -3
- data/lib/BOAST/Runtime/OpenCLRuntime.rb +16 -6
- data/lib/BOAST/Runtime/Probe.rb +21 -1
- metadata +5 -3
@@ -43,10 +43,13 @@ module BOAST
|
|
43
43
|
end
|
44
44
|
end
|
45
45
|
|
46
|
-
def setup_c_compiler(options, includes, narray_path, runner)
|
46
|
+
def setup_c_compiler(options, includes, narray_path, runner, probes)
|
47
47
|
c_mppa_compiler = "k1-gcc"
|
48
48
|
c_compiler = options[:CC]
|
49
49
|
cflags = options[:CFLAGS]
|
50
|
+
probes.each { |p|
|
51
|
+
cflags += " #{p.cflags}" if p.respond_to?(:cflags)
|
52
|
+
}
|
50
53
|
cflags += " -march=#{get_model}"
|
51
54
|
cflags += " -fPIC #{includes}"
|
52
55
|
cflags += " -DHAVE_NARRAY_H" if narray_path
|
@@ -58,6 +61,13 @@ module BOAST
|
|
58
61
|
cflags += " #{openmp_cflags}"
|
59
62
|
end
|
60
63
|
|
64
|
+
cflags_no_fpic = cflags.gsub("-fPIC","")
|
65
|
+
|
66
|
+
rule ".nofpic#{objext}" => '.c' do |t|
|
67
|
+
c_call_string = "#{c_compiler} #{cflags_no_fpic} -c -o #{t.name} #{t.source}"
|
68
|
+
runner.call(t, c_call_string)
|
69
|
+
end
|
70
|
+
|
61
71
|
rule ".#{objext}" => '.c' do |t|
|
62
72
|
c_call_string = "#{c_compiler} #{cflags} -c -o #{t.name} #{t.source}"
|
63
73
|
runner.call(t, c_call_string)
|
@@ -76,7 +86,7 @@ module BOAST
|
|
76
86
|
end
|
77
87
|
end
|
78
88
|
|
79
|
-
def setup_cxx_compiler(options, includes, runner)
|
89
|
+
def setup_cxx_compiler(options, includes, runner, probes)
|
80
90
|
cxx_compiler = options[:CXX]
|
81
91
|
cxxflags = options[:CXXFLAGS]
|
82
92
|
cxxflags += " -fPIC #{includes}"
|
@@ -86,13 +96,20 @@ module BOAST
|
|
86
96
|
cxxflags += " #{openmp_cxxflags}"
|
87
97
|
end
|
88
98
|
|
99
|
+
cxxflags_no_fpic = cxxflags.gsub("-fPIC","")
|
100
|
+
|
101
|
+
rule ".nofpic#{RbConfig::CONFIG["OBJEXT"]}" => '.cpp' do |t|
|
102
|
+
cxx_call_string = "#{cxx_compiler} #{cxxflags_no_fpic} -c -o #{t.name} #{t.source}"
|
103
|
+
runner.call(t, cxx_call_string)
|
104
|
+
end
|
105
|
+
|
89
106
|
rule ".#{RbConfig::CONFIG["OBJEXT"]}" => '.cpp' do |t|
|
90
107
|
cxx_call_string = "#{cxx_compiler} #{cxxflags} -c -o #{t.name} #{t.source}"
|
91
108
|
runner.call(t, cxx_call_string)
|
92
109
|
end
|
93
110
|
end
|
94
111
|
|
95
|
-
def setup_fortran_compiler(options, runner)
|
112
|
+
def setup_fortran_compiler(options, runner, probes)
|
96
113
|
f_compiler = options[:FC]
|
97
114
|
fcflags = options[:FCFLAGS]
|
98
115
|
fcflags += " -march=#{get_model}"
|
@@ -104,13 +121,20 @@ module BOAST
|
|
104
121
|
fcflags += " #{openmp_fcflags}"
|
105
122
|
end
|
106
123
|
|
124
|
+
fcflags_no_fpic = fcflags.gsub("-fPIC","")
|
125
|
+
|
126
|
+
rule ".nofpic#{RbConfig::CONFIG["OBJEXT"]}" => '.f90' do |t|
|
127
|
+
f_call_string = "#{f_compiler} #{fcflags_no_fpic} -c -o #{t.name} #{t.source}"
|
128
|
+
runner.call(t, f_call_string)
|
129
|
+
end
|
130
|
+
|
107
131
|
rule ".#{RbConfig::CONFIG["OBJEXT"]}" => '.f90' do |t|
|
108
132
|
f_call_string = "#{f_compiler} #{fcflags} -c -o #{t.name} #{t.source}"
|
109
133
|
runner.call(t, f_call_string)
|
110
134
|
end
|
111
135
|
end
|
112
136
|
|
113
|
-
def setup_cuda_compiler(options, runner)
|
137
|
+
def setup_cuda_compiler(options, runner, probes)
|
114
138
|
cuda_compiler = options[:NVCC]
|
115
139
|
cudaflags = options[:NVCCFLAGS]
|
116
140
|
cudaflags += " --compiler-options '-fPIC','-D_FORCE_INLINES'"
|
@@ -121,7 +145,7 @@ module BOAST
|
|
121
145
|
end
|
122
146
|
end
|
123
147
|
|
124
|
-
def setup_linker_mppa(options, runner)
|
148
|
+
def setup_linker_mppa(options, runner, probes)
|
125
149
|
objext = RbConfig::CONFIG["OBJEXT"]
|
126
150
|
ldflags = options[:LDFLAGS]
|
127
151
|
board = " -mboard=developer"
|
@@ -141,11 +165,13 @@ module BOAST
|
|
141
165
|
|
142
166
|
end
|
143
167
|
|
144
|
-
def setup_linker(options)
|
168
|
+
def setup_linker(options, probes)
|
145
169
|
ldflags = options[:LDFLAGS]
|
146
170
|
ldflags += " -march=#{get_model}"
|
147
171
|
ldflags += " -L#{RbConfig::CONFIG["libdir"]} #{RbConfig::CONFIG["LIBRUBYARG"]}"
|
148
|
-
|
172
|
+
probes.each { |p|
|
173
|
+
ldflags += " #{p.ldflags}" if p.respond_to?(:ldflags)
|
174
|
+
}
|
149
175
|
ldflags += " -lcudart" if @lang == CUDA
|
150
176
|
ldflags += " -L/usr/local/k1tools/lib64 -lmppaipc -lpcie -lz -lelf -lmppa_multiloader" if @architecture == MPPA
|
151
177
|
ldflags += " -lmppamon -lmppabm -lm -lmppalock" if @architecture == MPPA
|
@@ -160,17 +186,17 @@ module BOAST
|
|
160
186
|
end
|
161
187
|
|
162
188
|
if OS.mac? then
|
163
|
-
ldflags = "-Wl,-undefined,dynamic_lookup -Wl,-multiply_defined,suppress #{ldflags}"
|
164
189
|
ldshared = "-dynamic -bundle"
|
190
|
+
ldshared_flags = "-Wl,-undefined,dynamic_lookup -Wl,-multiply_defined,suppress"
|
165
191
|
else
|
166
|
-
ldflags = "-Wl,-Bsymbolic-functions -Wl,-z,relro -rdynamic -Wl,-export-dynamic #{ldflags}"
|
167
192
|
ldshared = "-shared"
|
193
|
+
ldshared_flags = "-Wl,-Bsymbolic-functions -Wl,-z,relro -rdynamic -Wl,-export-dynamic"
|
168
194
|
end
|
169
195
|
|
170
|
-
return [linker, ldshared, ldflags]
|
196
|
+
return [linker, ldshared, ldshared_flags, ldflags]
|
171
197
|
end
|
172
198
|
|
173
|
-
def setup_compilers(options = {})
|
199
|
+
def setup_compilers(probes, options = {})
|
174
200
|
Rake::Task::clear
|
175
201
|
verbose = options[:VERBOSE]
|
176
202
|
verbose = get_verbose if not verbose
|
@@ -184,7 +210,7 @@ module BOAST
|
|
184
210
|
if verbose then
|
185
211
|
sh call_string
|
186
212
|
else
|
187
|
-
status,
|
213
|
+
status, _, stderr = systemu call_string
|
188
214
|
if not status.success? then
|
189
215
|
puts stderr
|
190
216
|
fail "#{t.source}: compilation failed"
|
@@ -193,14 +219,14 @@ module BOAST
|
|
193
219
|
end
|
194
220
|
}
|
195
221
|
|
196
|
-
setup_c_compiler(options, includes, narray_path, runner)
|
197
|
-
setup_cxx_compiler(options, includes, runner)
|
198
|
-
setup_fortran_compiler(options, runner)
|
199
|
-
setup_cuda_compiler(options, runner)
|
222
|
+
setup_c_compiler(options, includes, narray_path, runner, probes)
|
223
|
+
setup_cxx_compiler(options, includes, runner, probes)
|
224
|
+
setup_fortran_compiler(options, runner, probes)
|
225
|
+
setup_cuda_compiler(options, runner, probes)
|
200
226
|
|
201
|
-
setup_linker_mppa(options, runner) if @architecture == MPPA
|
227
|
+
setup_linker_mppa(options, runner, probes) if @architecture == MPPA
|
202
228
|
|
203
|
-
return setup_linker(options)
|
229
|
+
return setup_linker(options, probes)
|
204
230
|
|
205
231
|
end
|
206
232
|
|
data/lib/BOAST/Runtime/Config.rb
CHANGED
@@ -19,6 +19,9 @@ module BOAST
|
|
19
19
|
:CLDEVICETYPE => nil,
|
20
20
|
:MAQAO => 'maqao',
|
21
21
|
:MAQAO_FLAGS => '',
|
22
|
+
:MAQAO_PATH => '',
|
23
|
+
:MAQAO_PASS => false,
|
24
|
+
:MAQAO_SCRIPT => '',
|
22
25
|
:OPENMP => false,
|
23
26
|
:probes => nil
|
24
27
|
}
|
@@ -42,19 +45,25 @@ module BOAST
|
|
42
45
|
module PrivateStateAccessor
|
43
46
|
private_boolean_state_accessor :verbose
|
44
47
|
private_boolean_state_accessor :debug_source
|
48
|
+
private_boolean_state_accessor :debug_kernel_source
|
45
49
|
private_boolean_state_accessor :ffi
|
50
|
+
private_boolean_state_accessor :executable
|
46
51
|
private_boolean_state_accessor :keep_temp
|
47
52
|
private_state_accessor :fortran_line_length
|
48
53
|
end
|
49
54
|
|
50
55
|
boolean_state_accessor :verbose
|
51
56
|
boolean_state_accessor :debug_source
|
57
|
+
boolean_state_accessor :debug_kernel_source
|
52
58
|
boolean_state_accessor :ffi
|
59
|
+
boolean_state_accessor :executable
|
53
60
|
boolean_state_accessor :keep_temp
|
54
61
|
state_accessor :fortran_line_length
|
55
62
|
default_state_getter :verbose, false
|
56
63
|
default_state_getter :debug_source, false
|
64
|
+
default_state_getter :debug_kernel_source, false
|
57
65
|
default_state_getter :ffi, false
|
66
|
+
default_state_getter :executable, false
|
58
67
|
default_state_getter :keep_temp, false
|
59
68
|
default_state_getter :fortran_line_length, 72
|
60
69
|
|
@@ -111,6 +111,8 @@ EOF
|
|
111
111
|
EOF
|
112
112
|
end
|
113
113
|
def compute
|
114
|
+
end
|
115
|
+
def store
|
114
116
|
get_output.print <<EOF
|
115
117
|
{
|
116
118
|
VALUE results;
|
@@ -147,6 +149,8 @@ EOF
|
|
147
149
|
get_output.puts "_boast_energy = redfst_get(_boast_energy);"
|
148
150
|
end
|
149
151
|
def compute
|
152
|
+
end
|
153
|
+
def store
|
150
154
|
get_output.print <<EOF
|
151
155
|
{
|
152
156
|
VALUE results;
|
@@ -176,13 +180,18 @@ EOF
|
|
176
180
|
rb_hash_aset(_boast_stats, ID2SYM(rb_intern("energy")), results);
|
177
181
|
}
|
178
182
|
EOF
|
179
|
-
|
183
|
+
end
|
180
184
|
def is_available?
|
185
|
+
return false if OS.mac?
|
181
186
|
path = []
|
182
187
|
if ENV['LIBRARY_PATH'] then
|
183
188
|
path += ENV['LIBRARY_PATH'].split(':').inject([]){|mem, x| []!=mem ? mem : Dir.glob(x+'/libredfst.so')}
|
184
189
|
end
|
185
|
-
|
190
|
+
begin
|
191
|
+
path += `ldconfig -p`.gsub("\t","").split("\n").find_all { |e| e.match(/libredfst\.so/) }.collect { |e| e.split(" => ")[1] } if path == []
|
192
|
+
rescue
|
193
|
+
path += `/sbin/ldconfig -p`.gsub("\t","").split("\n").find_all { |e| e.match(/libredfst\.so/) }.collect { |e| e.split(" => ")[1] } if path == []
|
194
|
+
end
|
186
195
|
return path != []
|
187
196
|
end
|
188
197
|
end
|
@@ -209,6 +218,8 @@ EOF
|
|
209
218
|
get_output.puts "emlStop(_boast_energy);";
|
210
219
|
end
|
211
220
|
def compute
|
221
|
+
end
|
222
|
+
def store
|
212
223
|
get_output.print <<EOF
|
213
224
|
{
|
214
225
|
VALUE results;
|
@@ -221,11 +232,16 @@ EOF
|
|
221
232
|
EOF
|
222
233
|
end
|
223
234
|
def is_available?
|
235
|
+
return false if OS.mac?
|
224
236
|
path = []
|
225
237
|
if ENV['LIBRARY_PATH'] then
|
226
238
|
path += ENV['LIBRARY_PATH'].split(':').inject([]){|mem, x| []!=mem ? mem : Dir.glob(x+'/libeml.so')}
|
227
239
|
end
|
228
|
-
|
240
|
+
begin
|
241
|
+
path += `ldconfig -p`.gsub("\t","").split("\n").find_all { |e| e.match(/libeml\.so/) }.collect { |e| e.split(" => ")[1] } if path == []
|
242
|
+
rescue
|
243
|
+
path += `/sbin/ldconfig -p`.gsub("\t","").split("\n").find_all { |e| e.match(/libeml\.so/) }.collect { |e| e.split(" => ")[1] } if path == []
|
244
|
+
end
|
229
245
|
return path != []
|
230
246
|
end
|
231
247
|
end
|
@@ -3,6 +3,22 @@ module BOAST
|
|
3
3
|
# @private
|
4
4
|
module FFIRuntime
|
5
5
|
|
6
|
+
def build( options = {} )
|
7
|
+
if options[:library_path] then
|
8
|
+
eval <<EOF
|
9
|
+
def library_path
|
10
|
+
return "#{options[:library_path]}"
|
11
|
+
end
|
12
|
+
EOF
|
13
|
+
@marker = Tempfile::new([@procedure.name,""])
|
14
|
+
create_ffi_module
|
15
|
+
eval "self.extend(#{module_name})"
|
16
|
+
return self
|
17
|
+
else
|
18
|
+
super
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
6
22
|
private
|
7
23
|
|
8
24
|
def target
|
@@ -25,6 +41,13 @@ module BOAST
|
|
25
41
|
create_library_source
|
26
42
|
end
|
27
43
|
|
44
|
+
def save_module
|
45
|
+
f = File::open(library_path, "rb")
|
46
|
+
@module_binary = StringIO::new
|
47
|
+
@module_binary.write( f.read )
|
48
|
+
f.close
|
49
|
+
end
|
50
|
+
|
28
51
|
def create_ffi_module
|
29
52
|
s =<<EOF
|
30
53
|
require 'ffi'
|
data/lib/BOAST/Runtime/MAQAO.rb
CHANGED
@@ -32,4 +32,33 @@ module BOAST
|
|
32
32
|
end
|
33
33
|
end
|
34
34
|
|
35
|
+
module MAQAO
|
36
|
+
|
37
|
+
def create_executable_target( linker, ldflags, kernel_files)
|
38
|
+
file target_executable => target_executable_depends do
|
39
|
+
sh "#{linker} -o #{target_executable} #{target_executable_depends.join(" ")} #{(kernel_files.collect {|f| f.path}).join(" ")} #{ldflags}"
|
40
|
+
maqao_script = @compiler_options[:MAQAO_SCRIPT]
|
41
|
+
if maqao_script == '' then
|
42
|
+
maqao_script = "#{@compiler_options[:MAQAO_PATH]}/scripts/maqao_from_boast.sh"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
sh "#{maqao_script} #{@compiler_options[:MAQAO_PATH]} #{@procedure.name} Init_#{base_name} #{target_executable}"
|
46
|
+
Rake::Task[target_executable].invoke
|
47
|
+
end
|
48
|
+
|
49
|
+
def create_targets( linker, ldshared, ldflags, kernel_files)
|
50
|
+
file target => target_depends do
|
51
|
+
sh "#{linker} #{ldshared} -o #{target} #{target_depends.join(" ")} #{(kernel_files.collect {|f| f.path}).join(" ")} #{ldflags}"
|
52
|
+
no_fpic_obj = target_depends[1].gsub(/\.o$/, "_no_fpic.o")
|
53
|
+
maqao_script = @compiler_options[:MAQAO_SCRIPT]
|
54
|
+
if maqao_script == '' then
|
55
|
+
maqao_script = "#{@compiler_options[:MAQAO_PATH]}/scripts/maqao_from_boast.sh"
|
56
|
+
end
|
57
|
+
sh "#{maqao_script} #{@compiler_options[:MAQAO_PATH]} #{library_source} #{@procedure.name} Init_#{base_name} #{target} #{no_fpic_obj}"
|
58
|
+
end
|
59
|
+
Rake::Task[target].invoke
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
63
|
+
|
35
64
|
end
|
@@ -10,6 +10,14 @@ module BOAST
|
|
10
10
|
return load_ref_files( path, suffix, :out )
|
11
11
|
end
|
12
12
|
|
13
|
+
def dump_ref_inputs(values, path = ".", suffix = ".in" )
|
14
|
+
return dump_ref_files(values, path, suffix, :in )
|
15
|
+
end
|
16
|
+
|
17
|
+
def dump_ref_outputs(values, path = ".", suffix = ".out" )
|
18
|
+
return dump_ref_files(values, path, suffix, :out )
|
19
|
+
end
|
20
|
+
|
13
21
|
def compare_ref(ref_outputs, outputs, epsilon = nil)
|
14
22
|
res = {}
|
15
23
|
@procedure.parameters.each_with_index { |param, indx|
|
@@ -52,7 +60,7 @@ module BOAST
|
|
52
60
|
when 2
|
53
61
|
type = NArray::SINT
|
54
62
|
when 4
|
55
|
-
type = NArray::
|
63
|
+
type = NArray::INT
|
56
64
|
else
|
57
65
|
STDERR::puts "Unsupported Int size for NArray: #{param.type.size}, defaulting to byte" if debug?
|
58
66
|
type = NArray::BYTE
|
@@ -94,12 +102,27 @@ module BOAST
|
|
94
102
|
return type
|
95
103
|
end
|
96
104
|
|
105
|
+
def write_param(param, value, directory, suffix, intent)
|
106
|
+
if intent == :out and ( param.direction == :in or param.constant ) then
|
107
|
+
return nil
|
108
|
+
end
|
109
|
+
f = File::new( directory + "/" + "#{param.name+suffix}", "wb" )
|
110
|
+
if param.dimension or param.vector? then
|
111
|
+
f.write value.to_s
|
112
|
+
else
|
113
|
+
type = get_scalar_type(param)
|
114
|
+
f.write [value].pack(type)
|
115
|
+
end
|
116
|
+
f.close
|
117
|
+
return nil
|
118
|
+
end
|
119
|
+
|
97
120
|
def read_param(param, directory, suffix, intent)
|
98
121
|
if intent == :out and ( param.direction == :in or param.constant ) then
|
99
122
|
return nil
|
100
123
|
end
|
101
124
|
f = File::new( directory + "/" + param.name+suffix, "rb" )
|
102
|
-
if param.dimension then
|
125
|
+
if param.dimension or param.vector? then
|
103
126
|
type = get_array_type(param)
|
104
127
|
if f.size == 0 then
|
105
128
|
res = NArray::new(type, 1)
|
@@ -132,7 +155,28 @@ module BOAST
|
|
132
155
|
return res
|
133
156
|
end
|
134
157
|
|
135
|
-
def
|
158
|
+
def write_gpu_dim(value, directory)
|
159
|
+
global_work_size = value[:global_work_size]
|
160
|
+
block_number = value[:block_number]
|
161
|
+
local_work_size = value[:local_work_size]
|
162
|
+
local_work_size = value[:block_size] unless local_work_size
|
163
|
+
(local_work_size.length..2).each{ |i| local_work_size[i] = 1 }
|
164
|
+
if global_work_size and not block_number then
|
165
|
+
block_number = []
|
166
|
+
(global_work_size.length..2).each{ |i| global_work_size[i] = 1 }
|
167
|
+
local_work_size.each_index { |i|
|
168
|
+
block_number[i] = global_work_size[i] / local_work_size[i]
|
169
|
+
}
|
170
|
+
end
|
171
|
+
(block_number.length..2).each{ |i| block_number[i] = 1 }
|
172
|
+
|
173
|
+
File::open( directory + "/problem_size", "w") { |f|
|
174
|
+
f.write "<#{local_work_size.join(",")}><#{block_number.join(",")}>"
|
175
|
+
}
|
176
|
+
return nil
|
177
|
+
end
|
178
|
+
|
179
|
+
def load_ref_files( path, suffix, intent )
|
136
180
|
proc_path = path + "/#{@procedure.name}/"
|
137
181
|
res_h = {}
|
138
182
|
begin
|
@@ -154,6 +198,23 @@ module BOAST
|
|
154
198
|
return res_h
|
155
199
|
end
|
156
200
|
|
201
|
+
def dump_ref_files( values, path, suffix, intent )
|
202
|
+
proc_path = path + "/#{@procedure.name}/"
|
203
|
+
Dir.mkdir( proc_path ) unless File.exists?( proc_path )
|
204
|
+
values.each { |key, vals|
|
205
|
+
case_path = proc_path + "#{key}/"
|
206
|
+
Dir.mkdir( case_path ) unless File.exists?( case_path )
|
207
|
+
d = Pathname.new( case_path )
|
208
|
+
@procedure.parameters.each_with_index { |param, i|
|
209
|
+
write_param( param, vals[i], d.to_s, suffix, intent )
|
210
|
+
}
|
211
|
+
if @lang == CUDA or @lang == CL then
|
212
|
+
write_gpu_dim( vals.last, d.to_s )
|
213
|
+
end
|
214
|
+
}
|
215
|
+
return nil
|
216
|
+
end
|
217
|
+
|
157
218
|
end
|
158
219
|
|
159
220
|
end
|
@@ -53,7 +53,6 @@ module BOAST
|
|
53
53
|
@context = OpenCL::create_context([device])
|
54
54
|
end
|
55
55
|
program = @context.create_program_with_source([@code.string])
|
56
|
-
opts = options[:CLFLAGS]
|
57
56
|
begin
|
58
57
|
program.build(:options => options[:CLFLAGS])
|
59
58
|
rescue OpenCL::Error => e
|
@@ -130,7 +129,6 @@ module BOAST
|
|
130
129
|
run_method = <<EOF
|
131
130
|
def self.run(*args)
|
132
131
|
raise "Wrong number of arguments \#{args.length} for #{@procedure.parameters.length}" if args.length > #{@procedure.parameters.length+1} or args.length < #{@procedure.parameters.length}
|
133
|
-
energy_data = NArray::float(1024)
|
134
132
|
params = []
|
135
133
|
opts = BOAST::get_run_config
|
136
134
|
opts = opts.update(args.pop) if args.length == #{@procedure.parameters.length+1}
|
@@ -153,17 +151,29 @@ def self.run(*args)
|
|
153
151
|
if not lws then
|
154
152
|
lws = opts[:block_size]
|
155
153
|
end
|
156
|
-
|
154
|
+
event1 = @queue.enqueue_NDrange_kernel(@kernel, gws, :local_work_size => lws)
|
155
|
+
if opts[:repeat] and opts[:repeat] > 1 then
|
156
|
+
(opts[:repeat] - 2).times {
|
157
|
+
@queue.enqueue_NDrange_kernel(@kernel, gws, :local_work_size => lws)
|
158
|
+
}
|
159
|
+
event2 = @queue.enqueue_NDrange_kernel(@kernel, gws, :local_work_size => lws)
|
160
|
+
end
|
157
161
|
@procedure.parameters.each_index { |i|
|
158
162
|
if @procedure.parameters[i].dimension and (@procedure.parameters[i].direction == :inout or @procedure.parameters[i].direction == :out) then
|
159
163
|
read_opencl_param( params[i], args[i], @procedure.parameters[i] )
|
160
164
|
end
|
161
165
|
}
|
162
166
|
@queue.finish
|
167
|
+
start_t = event1.profiling_command_start
|
168
|
+
if opts[:repeat] and opts[:repeat] > 1 then
|
169
|
+
end_t = event2.profiling_command_end
|
170
|
+
else
|
171
|
+
end_t = event1.profiling_command_end
|
172
|
+
end
|
163
173
|
result = {}
|
164
|
-
result[:start] =
|
165
|
-
result[:end] =
|
166
|
-
result[:duration] = (
|
174
|
+
result[:start] = start_t
|
175
|
+
result[:end] = end_t
|
176
|
+
result[:duration] = (end_t - start_t)/1000000000.0
|
167
177
|
return result
|
168
178
|
end
|
169
179
|
EOF
|