BOAST 1.3.5 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/BOAST.gemspec +1 -1
- data/LICENSE +13 -1
- data/README.md +62 -13
- data/lib/BOAST.rb +3 -1
- data/lib/BOAST/Language/ARMCPUID_by_name.rb +3752 -0
- data/lib/BOAST/Language/Algorithm.rb +4 -24
- data/lib/BOAST/Language/Architectures.rb +5 -0
- data/lib/BOAST/Language/Arithmetic.rb +38 -5
- data/lib/BOAST/Language/BOAST_OpenCL.rb +7 -8
- data/lib/BOAST/Language/Case.rb +10 -3
- data/lib/BOAST/Language/Config.rb +36 -12
- data/lib/BOAST/Language/ControlStructure.rb +7 -3
- data/lib/BOAST/Language/DataTypes.rb +6 -0
- data/lib/BOAST/Language/Expression.rb +26 -2
- data/lib/BOAST/Language/For.rb +59 -30
- data/lib/BOAST/Language/FuncCall.rb +9 -5
- data/lib/BOAST/Language/Functors.rb +1 -1
- data/lib/BOAST/Language/HighLevelOperators.rb +172 -0
- data/lib/BOAST/Language/If.rb +25 -9
- data/lib/BOAST/Language/Index.rb +5 -5
- data/lib/BOAST/Language/Intrinsics.rb +40 -27
- data/lib/BOAST/Language/OpenMP.rb +1 -0
- data/lib/BOAST/Language/Operators.rb +221 -34
- data/lib/BOAST/Language/Parens.rb +3 -2
- data/lib/BOAST/Language/Procedure.rb +18 -5
- data/lib/BOAST/Language/Slice.rb +176 -44
- data/lib/BOAST/Language/Variable.rb +99 -56
- data/lib/BOAST/Language/While.rb +18 -3
- data/lib/BOAST/Language/{CPUID_by_name.rb → X86CPUID_by_name.rb} +0 -0
- data/lib/BOAST/Optimization/Optimization.rb +2 -0
- data/lib/BOAST/Runtime/AffinityProbe.rb +7 -3
- data/lib/BOAST/Runtime/CKernel.rb +3 -0
- data/lib/BOAST/Runtime/CRuntime.rb +4 -0
- data/lib/BOAST/Runtime/CompiledRuntime.rb +404 -77
- data/lib/BOAST/Runtime/Compilers.rb +44 -18
- data/lib/BOAST/Runtime/Config.rb +9 -0
- data/lib/BOAST/Runtime/EnergyProbe.rb +19 -3
- data/lib/BOAST/Runtime/FFIRuntime.rb +23 -0
- data/lib/BOAST/Runtime/FORTRANRuntime.rb +1 -1
- data/lib/BOAST/Runtime/MAQAO.rb +29 -0
- data/lib/BOAST/Runtime/NonRegression.rb +64 -3
- data/lib/BOAST/Runtime/OpenCLRuntime.rb +16 -6
- data/lib/BOAST/Runtime/Probe.rb +21 -1
- metadata +5 -3
@@ -43,10 +43,13 @@ module BOAST
|
|
43
43
|
end
|
44
44
|
end
|
45
45
|
|
46
|
-
def setup_c_compiler(options, includes, narray_path, runner)
|
46
|
+
def setup_c_compiler(options, includes, narray_path, runner, probes)
|
47
47
|
c_mppa_compiler = "k1-gcc"
|
48
48
|
c_compiler = options[:CC]
|
49
49
|
cflags = options[:CFLAGS]
|
50
|
+
probes.each { |p|
|
51
|
+
cflags += " #{p.cflags}" if p.respond_to?(:cflags)
|
52
|
+
}
|
50
53
|
cflags += " -march=#{get_model}"
|
51
54
|
cflags += " -fPIC #{includes}"
|
52
55
|
cflags += " -DHAVE_NARRAY_H" if narray_path
|
@@ -58,6 +61,13 @@ module BOAST
|
|
58
61
|
cflags += " #{openmp_cflags}"
|
59
62
|
end
|
60
63
|
|
64
|
+
cflags_no_fpic = cflags.gsub("-fPIC","")
|
65
|
+
|
66
|
+
rule ".nofpic#{objext}" => '.c' do |t|
|
67
|
+
c_call_string = "#{c_compiler} #{cflags_no_fpic} -c -o #{t.name} #{t.source}"
|
68
|
+
runner.call(t, c_call_string)
|
69
|
+
end
|
70
|
+
|
61
71
|
rule ".#{objext}" => '.c' do |t|
|
62
72
|
c_call_string = "#{c_compiler} #{cflags} -c -o #{t.name} #{t.source}"
|
63
73
|
runner.call(t, c_call_string)
|
@@ -76,7 +86,7 @@ module BOAST
|
|
76
86
|
end
|
77
87
|
end
|
78
88
|
|
79
|
-
def setup_cxx_compiler(options, includes, runner)
|
89
|
+
def setup_cxx_compiler(options, includes, runner, probes)
|
80
90
|
cxx_compiler = options[:CXX]
|
81
91
|
cxxflags = options[:CXXFLAGS]
|
82
92
|
cxxflags += " -fPIC #{includes}"
|
@@ -86,13 +96,20 @@ module BOAST
|
|
86
96
|
cxxflags += " #{openmp_cxxflags}"
|
87
97
|
end
|
88
98
|
|
99
|
+
cxxflags_no_fpic = cxxflags.gsub("-fPIC","")
|
100
|
+
|
101
|
+
rule ".nofpic#{RbConfig::CONFIG["OBJEXT"]}" => '.cpp' do |t|
|
102
|
+
cxx_call_string = "#{cxx_compiler} #{cxxflags_no_fpic} -c -o #{t.name} #{t.source}"
|
103
|
+
runner.call(t, cxx_call_string)
|
104
|
+
end
|
105
|
+
|
89
106
|
rule ".#{RbConfig::CONFIG["OBJEXT"]}" => '.cpp' do |t|
|
90
107
|
cxx_call_string = "#{cxx_compiler} #{cxxflags} -c -o #{t.name} #{t.source}"
|
91
108
|
runner.call(t, cxx_call_string)
|
92
109
|
end
|
93
110
|
end
|
94
111
|
|
95
|
-
def setup_fortran_compiler(options, runner)
|
112
|
+
def setup_fortran_compiler(options, runner, probes)
|
96
113
|
f_compiler = options[:FC]
|
97
114
|
fcflags = options[:FCFLAGS]
|
98
115
|
fcflags += " -march=#{get_model}"
|
@@ -104,13 +121,20 @@ module BOAST
|
|
104
121
|
fcflags += " #{openmp_fcflags}"
|
105
122
|
end
|
106
123
|
|
124
|
+
fcflags_no_fpic = fcflags.gsub("-fPIC","")
|
125
|
+
|
126
|
+
rule ".nofpic#{RbConfig::CONFIG["OBJEXT"]}" => '.f90' do |t|
|
127
|
+
f_call_string = "#{f_compiler} #{fcflags_no_fpic} -c -o #{t.name} #{t.source}"
|
128
|
+
runner.call(t, f_call_string)
|
129
|
+
end
|
130
|
+
|
107
131
|
rule ".#{RbConfig::CONFIG["OBJEXT"]}" => '.f90' do |t|
|
108
132
|
f_call_string = "#{f_compiler} #{fcflags} -c -o #{t.name} #{t.source}"
|
109
133
|
runner.call(t, f_call_string)
|
110
134
|
end
|
111
135
|
end
|
112
136
|
|
113
|
-
def setup_cuda_compiler(options, runner)
|
137
|
+
def setup_cuda_compiler(options, runner, probes)
|
114
138
|
cuda_compiler = options[:NVCC]
|
115
139
|
cudaflags = options[:NVCCFLAGS]
|
116
140
|
cudaflags += " --compiler-options '-fPIC','-D_FORCE_INLINES'"
|
@@ -121,7 +145,7 @@ module BOAST
|
|
121
145
|
end
|
122
146
|
end
|
123
147
|
|
124
|
-
def setup_linker_mppa(options, runner)
|
148
|
+
def setup_linker_mppa(options, runner, probes)
|
125
149
|
objext = RbConfig::CONFIG["OBJEXT"]
|
126
150
|
ldflags = options[:LDFLAGS]
|
127
151
|
board = " -mboard=developer"
|
@@ -141,11 +165,13 @@ module BOAST
|
|
141
165
|
|
142
166
|
end
|
143
167
|
|
144
|
-
def setup_linker(options)
|
168
|
+
def setup_linker(options, probes)
|
145
169
|
ldflags = options[:LDFLAGS]
|
146
170
|
ldflags += " -march=#{get_model}"
|
147
171
|
ldflags += " -L#{RbConfig::CONFIG["libdir"]} #{RbConfig::CONFIG["LIBRUBYARG"]}"
|
148
|
-
|
172
|
+
probes.each { |p|
|
173
|
+
ldflags += " #{p.ldflags}" if p.respond_to?(:ldflags)
|
174
|
+
}
|
149
175
|
ldflags += " -lcudart" if @lang == CUDA
|
150
176
|
ldflags += " -L/usr/local/k1tools/lib64 -lmppaipc -lpcie -lz -lelf -lmppa_multiloader" if @architecture == MPPA
|
151
177
|
ldflags += " -lmppamon -lmppabm -lm -lmppalock" if @architecture == MPPA
|
@@ -160,17 +186,17 @@ module BOAST
|
|
160
186
|
end
|
161
187
|
|
162
188
|
if OS.mac? then
|
163
|
-
ldflags = "-Wl,-undefined,dynamic_lookup -Wl,-multiply_defined,suppress #{ldflags}"
|
164
189
|
ldshared = "-dynamic -bundle"
|
190
|
+
ldshared_flags = "-Wl,-undefined,dynamic_lookup -Wl,-multiply_defined,suppress"
|
165
191
|
else
|
166
|
-
ldflags = "-Wl,-Bsymbolic-functions -Wl,-z,relro -rdynamic -Wl,-export-dynamic #{ldflags}"
|
167
192
|
ldshared = "-shared"
|
193
|
+
ldshared_flags = "-Wl,-Bsymbolic-functions -Wl,-z,relro -rdynamic -Wl,-export-dynamic"
|
168
194
|
end
|
169
195
|
|
170
|
-
return [linker, ldshared, ldflags]
|
196
|
+
return [linker, ldshared, ldshared_flags, ldflags]
|
171
197
|
end
|
172
198
|
|
173
|
-
def setup_compilers(options = {})
|
199
|
+
def setup_compilers(probes, options = {})
|
174
200
|
Rake::Task::clear
|
175
201
|
verbose = options[:VERBOSE]
|
176
202
|
verbose = get_verbose if not verbose
|
@@ -184,7 +210,7 @@ module BOAST
|
|
184
210
|
if verbose then
|
185
211
|
sh call_string
|
186
212
|
else
|
187
|
-
status,
|
213
|
+
status, _, stderr = systemu call_string
|
188
214
|
if not status.success? then
|
189
215
|
puts stderr
|
190
216
|
fail "#{t.source}: compilation failed"
|
@@ -193,14 +219,14 @@ module BOAST
|
|
193
219
|
end
|
194
220
|
}
|
195
221
|
|
196
|
-
setup_c_compiler(options, includes, narray_path, runner)
|
197
|
-
setup_cxx_compiler(options, includes, runner)
|
198
|
-
setup_fortran_compiler(options, runner)
|
199
|
-
setup_cuda_compiler(options, runner)
|
222
|
+
setup_c_compiler(options, includes, narray_path, runner, probes)
|
223
|
+
setup_cxx_compiler(options, includes, runner, probes)
|
224
|
+
setup_fortran_compiler(options, runner, probes)
|
225
|
+
setup_cuda_compiler(options, runner, probes)
|
200
226
|
|
201
|
-
setup_linker_mppa(options, runner) if @architecture == MPPA
|
227
|
+
setup_linker_mppa(options, runner, probes) if @architecture == MPPA
|
202
228
|
|
203
|
-
return setup_linker(options)
|
229
|
+
return setup_linker(options, probes)
|
204
230
|
|
205
231
|
end
|
206
232
|
|
data/lib/BOAST/Runtime/Config.rb
CHANGED
@@ -19,6 +19,9 @@ module BOAST
|
|
19
19
|
:CLDEVICETYPE => nil,
|
20
20
|
:MAQAO => 'maqao',
|
21
21
|
:MAQAO_FLAGS => '',
|
22
|
+
:MAQAO_PATH => '',
|
23
|
+
:MAQAO_PASS => false,
|
24
|
+
:MAQAO_SCRIPT => '',
|
22
25
|
:OPENMP => false,
|
23
26
|
:probes => nil
|
24
27
|
}
|
@@ -42,19 +45,25 @@ module BOAST
|
|
42
45
|
module PrivateStateAccessor
|
43
46
|
private_boolean_state_accessor :verbose
|
44
47
|
private_boolean_state_accessor :debug_source
|
48
|
+
private_boolean_state_accessor :debug_kernel_source
|
45
49
|
private_boolean_state_accessor :ffi
|
50
|
+
private_boolean_state_accessor :executable
|
46
51
|
private_boolean_state_accessor :keep_temp
|
47
52
|
private_state_accessor :fortran_line_length
|
48
53
|
end
|
49
54
|
|
50
55
|
boolean_state_accessor :verbose
|
51
56
|
boolean_state_accessor :debug_source
|
57
|
+
boolean_state_accessor :debug_kernel_source
|
52
58
|
boolean_state_accessor :ffi
|
59
|
+
boolean_state_accessor :executable
|
53
60
|
boolean_state_accessor :keep_temp
|
54
61
|
state_accessor :fortran_line_length
|
55
62
|
default_state_getter :verbose, false
|
56
63
|
default_state_getter :debug_source, false
|
64
|
+
default_state_getter :debug_kernel_source, false
|
57
65
|
default_state_getter :ffi, false
|
66
|
+
default_state_getter :executable, false
|
58
67
|
default_state_getter :keep_temp, false
|
59
68
|
default_state_getter :fortran_line_length, 72
|
60
69
|
|
@@ -111,6 +111,8 @@ EOF
|
|
111
111
|
EOF
|
112
112
|
end
|
113
113
|
def compute
|
114
|
+
end
|
115
|
+
def store
|
114
116
|
get_output.print <<EOF
|
115
117
|
{
|
116
118
|
VALUE results;
|
@@ -147,6 +149,8 @@ EOF
|
|
147
149
|
get_output.puts "_boast_energy = redfst_get(_boast_energy);"
|
148
150
|
end
|
149
151
|
def compute
|
152
|
+
end
|
153
|
+
def store
|
150
154
|
get_output.print <<EOF
|
151
155
|
{
|
152
156
|
VALUE results;
|
@@ -176,13 +180,18 @@ EOF
|
|
176
180
|
rb_hash_aset(_boast_stats, ID2SYM(rb_intern("energy")), results);
|
177
181
|
}
|
178
182
|
EOF
|
179
|
-
|
183
|
+
end
|
180
184
|
def is_available?
|
185
|
+
return false if OS.mac?
|
181
186
|
path = []
|
182
187
|
if ENV['LIBRARY_PATH'] then
|
183
188
|
path += ENV['LIBRARY_PATH'].split(':').inject([]){|mem, x| []!=mem ? mem : Dir.glob(x+'/libredfst.so')}
|
184
189
|
end
|
185
|
-
|
190
|
+
begin
|
191
|
+
path += `ldconfig -p`.gsub("\t","").split("\n").find_all { |e| e.match(/libredfst\.so/) }.collect { |e| e.split(" => ")[1] } if path == []
|
192
|
+
rescue
|
193
|
+
path += `/sbin/ldconfig -p`.gsub("\t","").split("\n").find_all { |e| e.match(/libredfst\.so/) }.collect { |e| e.split(" => ")[1] } if path == []
|
194
|
+
end
|
186
195
|
return path != []
|
187
196
|
end
|
188
197
|
end
|
@@ -209,6 +218,8 @@ EOF
|
|
209
218
|
get_output.puts "emlStop(_boast_energy);";
|
210
219
|
end
|
211
220
|
def compute
|
221
|
+
end
|
222
|
+
def store
|
212
223
|
get_output.print <<EOF
|
213
224
|
{
|
214
225
|
VALUE results;
|
@@ -221,11 +232,16 @@ EOF
|
|
221
232
|
EOF
|
222
233
|
end
|
223
234
|
def is_available?
|
235
|
+
return false if OS.mac?
|
224
236
|
path = []
|
225
237
|
if ENV['LIBRARY_PATH'] then
|
226
238
|
path += ENV['LIBRARY_PATH'].split(':').inject([]){|mem, x| []!=mem ? mem : Dir.glob(x+'/libeml.so')}
|
227
239
|
end
|
228
|
-
|
240
|
+
begin
|
241
|
+
path += `ldconfig -p`.gsub("\t","").split("\n").find_all { |e| e.match(/libeml\.so/) }.collect { |e| e.split(" => ")[1] } if path == []
|
242
|
+
rescue
|
243
|
+
path += `/sbin/ldconfig -p`.gsub("\t","").split("\n").find_all { |e| e.match(/libeml\.so/) }.collect { |e| e.split(" => ")[1] } if path == []
|
244
|
+
end
|
229
245
|
return path != []
|
230
246
|
end
|
231
247
|
end
|
@@ -3,6 +3,22 @@ module BOAST
|
|
3
3
|
# @private
|
4
4
|
module FFIRuntime
|
5
5
|
|
6
|
+
def build( options = {} )
|
7
|
+
if options[:library_path] then
|
8
|
+
eval <<EOF
|
9
|
+
def library_path
|
10
|
+
return "#{options[:library_path]}"
|
11
|
+
end
|
12
|
+
EOF
|
13
|
+
@marker = Tempfile::new([@procedure.name,""])
|
14
|
+
create_ffi_module
|
15
|
+
eval "self.extend(#{module_name})"
|
16
|
+
return self
|
17
|
+
else
|
18
|
+
super
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
6
22
|
private
|
7
23
|
|
8
24
|
def target
|
@@ -25,6 +41,13 @@ module BOAST
|
|
25
41
|
create_library_source
|
26
42
|
end
|
27
43
|
|
44
|
+
def save_module
|
45
|
+
f = File::open(library_path, "rb")
|
46
|
+
@module_binary = StringIO::new
|
47
|
+
@module_binary.write( f.read )
|
48
|
+
f.close
|
49
|
+
end
|
50
|
+
|
28
51
|
def create_ffi_module
|
29
52
|
s =<<EOF
|
30
53
|
require 'ffi'
|
data/lib/BOAST/Runtime/MAQAO.rb
CHANGED
@@ -32,4 +32,33 @@ module BOAST
|
|
32
32
|
end
|
33
33
|
end
|
34
34
|
|
35
|
+
module MAQAO
|
36
|
+
|
37
|
+
def create_executable_target( linker, ldflags, kernel_files)
|
38
|
+
file target_executable => target_executable_depends do
|
39
|
+
sh "#{linker} -o #{target_executable} #{target_executable_depends.join(" ")} #{(kernel_files.collect {|f| f.path}).join(" ")} #{ldflags}"
|
40
|
+
maqao_script = @compiler_options[:MAQAO_SCRIPT]
|
41
|
+
if maqao_script == '' then
|
42
|
+
maqao_script = "#{@compiler_options[:MAQAO_PATH]}/scripts/maqao_from_boast.sh"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
sh "#{maqao_script} #{@compiler_options[:MAQAO_PATH]} #{@procedure.name} Init_#{base_name} #{target_executable}"
|
46
|
+
Rake::Task[target_executable].invoke
|
47
|
+
end
|
48
|
+
|
49
|
+
def create_targets( linker, ldshared, ldflags, kernel_files)
|
50
|
+
file target => target_depends do
|
51
|
+
sh "#{linker} #{ldshared} -o #{target} #{target_depends.join(" ")} #{(kernel_files.collect {|f| f.path}).join(" ")} #{ldflags}"
|
52
|
+
no_fpic_obj = target_depends[1].gsub(/\.o$/, "_no_fpic.o")
|
53
|
+
maqao_script = @compiler_options[:MAQAO_SCRIPT]
|
54
|
+
if maqao_script == '' then
|
55
|
+
maqao_script = "#{@compiler_options[:MAQAO_PATH]}/scripts/maqao_from_boast.sh"
|
56
|
+
end
|
57
|
+
sh "#{maqao_script} #{@compiler_options[:MAQAO_PATH]} #{library_source} #{@procedure.name} Init_#{base_name} #{target} #{no_fpic_obj}"
|
58
|
+
end
|
59
|
+
Rake::Task[target].invoke
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
63
|
+
|
35
64
|
end
|
@@ -10,6 +10,14 @@ module BOAST
|
|
10
10
|
return load_ref_files( path, suffix, :out )
|
11
11
|
end
|
12
12
|
|
13
|
+
def dump_ref_inputs(values, path = ".", suffix = ".in" )
|
14
|
+
return dump_ref_files(values, path, suffix, :in )
|
15
|
+
end
|
16
|
+
|
17
|
+
def dump_ref_outputs(values, path = ".", suffix = ".out" )
|
18
|
+
return dump_ref_files(values, path, suffix, :out )
|
19
|
+
end
|
20
|
+
|
13
21
|
def compare_ref(ref_outputs, outputs, epsilon = nil)
|
14
22
|
res = {}
|
15
23
|
@procedure.parameters.each_with_index { |param, indx|
|
@@ -52,7 +60,7 @@ module BOAST
|
|
52
60
|
when 2
|
53
61
|
type = NArray::SINT
|
54
62
|
when 4
|
55
|
-
type = NArray::
|
63
|
+
type = NArray::INT
|
56
64
|
else
|
57
65
|
STDERR::puts "Unsupported Int size for NArray: #{param.type.size}, defaulting to byte" if debug?
|
58
66
|
type = NArray::BYTE
|
@@ -94,12 +102,27 @@ module BOAST
|
|
94
102
|
return type
|
95
103
|
end
|
96
104
|
|
105
|
+
def write_param(param, value, directory, suffix, intent)
|
106
|
+
if intent == :out and ( param.direction == :in or param.constant ) then
|
107
|
+
return nil
|
108
|
+
end
|
109
|
+
f = File::new( directory + "/" + "#{param.name+suffix}", "wb" )
|
110
|
+
if param.dimension or param.vector? then
|
111
|
+
f.write value.to_s
|
112
|
+
else
|
113
|
+
type = get_scalar_type(param)
|
114
|
+
f.write [value].pack(type)
|
115
|
+
end
|
116
|
+
f.close
|
117
|
+
return nil
|
118
|
+
end
|
119
|
+
|
97
120
|
def read_param(param, directory, suffix, intent)
|
98
121
|
if intent == :out and ( param.direction == :in or param.constant ) then
|
99
122
|
return nil
|
100
123
|
end
|
101
124
|
f = File::new( directory + "/" + param.name+suffix, "rb" )
|
102
|
-
if param.dimension then
|
125
|
+
if param.dimension or param.vector? then
|
103
126
|
type = get_array_type(param)
|
104
127
|
if f.size == 0 then
|
105
128
|
res = NArray::new(type, 1)
|
@@ -132,7 +155,28 @@ module BOAST
|
|
132
155
|
return res
|
133
156
|
end
|
134
157
|
|
135
|
-
def
|
158
|
+
def write_gpu_dim(value, directory)
|
159
|
+
global_work_size = value[:global_work_size]
|
160
|
+
block_number = value[:block_number]
|
161
|
+
local_work_size = value[:local_work_size]
|
162
|
+
local_work_size = value[:block_size] unless local_work_size
|
163
|
+
(local_work_size.length..2).each{ |i| local_work_size[i] = 1 }
|
164
|
+
if global_work_size and not block_number then
|
165
|
+
block_number = []
|
166
|
+
(global_work_size.length..2).each{ |i| global_work_size[i] = 1 }
|
167
|
+
local_work_size.each_index { |i|
|
168
|
+
block_number[i] = global_work_size[i] / local_work_size[i]
|
169
|
+
}
|
170
|
+
end
|
171
|
+
(block_number.length..2).each{ |i| block_number[i] = 1 }
|
172
|
+
|
173
|
+
File::open( directory + "/problem_size", "w") { |f|
|
174
|
+
f.write "<#{local_work_size.join(",")}><#{block_number.join(",")}>"
|
175
|
+
}
|
176
|
+
return nil
|
177
|
+
end
|
178
|
+
|
179
|
+
def load_ref_files( path, suffix, intent )
|
136
180
|
proc_path = path + "/#{@procedure.name}/"
|
137
181
|
res_h = {}
|
138
182
|
begin
|
@@ -154,6 +198,23 @@ module BOAST
|
|
154
198
|
return res_h
|
155
199
|
end
|
156
200
|
|
201
|
+
def dump_ref_files( values, path, suffix, intent )
|
202
|
+
proc_path = path + "/#{@procedure.name}/"
|
203
|
+
Dir.mkdir( proc_path ) unless File.exists?( proc_path )
|
204
|
+
values.each { |key, vals|
|
205
|
+
case_path = proc_path + "#{key}/"
|
206
|
+
Dir.mkdir( case_path ) unless File.exists?( case_path )
|
207
|
+
d = Pathname.new( case_path )
|
208
|
+
@procedure.parameters.each_with_index { |param, i|
|
209
|
+
write_param( param, vals[i], d.to_s, suffix, intent )
|
210
|
+
}
|
211
|
+
if @lang == CUDA or @lang == CL then
|
212
|
+
write_gpu_dim( vals.last, d.to_s )
|
213
|
+
end
|
214
|
+
}
|
215
|
+
return nil
|
216
|
+
end
|
217
|
+
|
157
218
|
end
|
158
219
|
|
159
220
|
end
|
@@ -53,7 +53,6 @@ module BOAST
|
|
53
53
|
@context = OpenCL::create_context([device])
|
54
54
|
end
|
55
55
|
program = @context.create_program_with_source([@code.string])
|
56
|
-
opts = options[:CLFLAGS]
|
57
56
|
begin
|
58
57
|
program.build(:options => options[:CLFLAGS])
|
59
58
|
rescue OpenCL::Error => e
|
@@ -130,7 +129,6 @@ module BOAST
|
|
130
129
|
run_method = <<EOF
|
131
130
|
def self.run(*args)
|
132
131
|
raise "Wrong number of arguments \#{args.length} for #{@procedure.parameters.length}" if args.length > #{@procedure.parameters.length+1} or args.length < #{@procedure.parameters.length}
|
133
|
-
energy_data = NArray::float(1024)
|
134
132
|
params = []
|
135
133
|
opts = BOAST::get_run_config
|
136
134
|
opts = opts.update(args.pop) if args.length == #{@procedure.parameters.length+1}
|
@@ -153,17 +151,29 @@ def self.run(*args)
|
|
153
151
|
if not lws then
|
154
152
|
lws = opts[:block_size]
|
155
153
|
end
|
156
|
-
|
154
|
+
event1 = @queue.enqueue_NDrange_kernel(@kernel, gws, :local_work_size => lws)
|
155
|
+
if opts[:repeat] and opts[:repeat] > 1 then
|
156
|
+
(opts[:repeat] - 2).times {
|
157
|
+
@queue.enqueue_NDrange_kernel(@kernel, gws, :local_work_size => lws)
|
158
|
+
}
|
159
|
+
event2 = @queue.enqueue_NDrange_kernel(@kernel, gws, :local_work_size => lws)
|
160
|
+
end
|
157
161
|
@procedure.parameters.each_index { |i|
|
158
162
|
if @procedure.parameters[i].dimension and (@procedure.parameters[i].direction == :inout or @procedure.parameters[i].direction == :out) then
|
159
163
|
read_opencl_param( params[i], args[i], @procedure.parameters[i] )
|
160
164
|
end
|
161
165
|
}
|
162
166
|
@queue.finish
|
167
|
+
start_t = event1.profiling_command_start
|
168
|
+
if opts[:repeat] and opts[:repeat] > 1 then
|
169
|
+
end_t = event2.profiling_command_end
|
170
|
+
else
|
171
|
+
end_t = event1.profiling_command_end
|
172
|
+
end
|
163
173
|
result = {}
|
164
|
-
result[:start] =
|
165
|
-
result[:end] =
|
166
|
-
result[:duration] = (
|
174
|
+
result[:start] = start_t
|
175
|
+
result[:end] = end_t
|
176
|
+
result[:duration] = (end_t - start_t)/1000000000.0
|
167
177
|
return result
|
168
178
|
end
|
169
179
|
EOF
|