BOAST 1.3.5 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/BOAST.gemspec +1 -1
  3. data/LICENSE +13 -1
  4. data/README.md +62 -13
  5. data/lib/BOAST.rb +3 -1
  6. data/lib/BOAST/Language/ARMCPUID_by_name.rb +3752 -0
  7. data/lib/BOAST/Language/Algorithm.rb +4 -24
  8. data/lib/BOAST/Language/Architectures.rb +5 -0
  9. data/lib/BOAST/Language/Arithmetic.rb +38 -5
  10. data/lib/BOAST/Language/BOAST_OpenCL.rb +7 -8
  11. data/lib/BOAST/Language/Case.rb +10 -3
  12. data/lib/BOAST/Language/Config.rb +36 -12
  13. data/lib/BOAST/Language/ControlStructure.rb +7 -3
  14. data/lib/BOAST/Language/DataTypes.rb +6 -0
  15. data/lib/BOAST/Language/Expression.rb +26 -2
  16. data/lib/BOAST/Language/For.rb +59 -30
  17. data/lib/BOAST/Language/FuncCall.rb +9 -5
  18. data/lib/BOAST/Language/Functors.rb +1 -1
  19. data/lib/BOAST/Language/HighLevelOperators.rb +172 -0
  20. data/lib/BOAST/Language/If.rb +25 -9
  21. data/lib/BOAST/Language/Index.rb +5 -5
  22. data/lib/BOAST/Language/Intrinsics.rb +40 -27
  23. data/lib/BOAST/Language/OpenMP.rb +1 -0
  24. data/lib/BOAST/Language/Operators.rb +221 -34
  25. data/lib/BOAST/Language/Parens.rb +3 -2
  26. data/lib/BOAST/Language/Procedure.rb +18 -5
  27. data/lib/BOAST/Language/Slice.rb +176 -44
  28. data/lib/BOAST/Language/Variable.rb +99 -56
  29. data/lib/BOAST/Language/While.rb +18 -3
  30. data/lib/BOAST/Language/{CPUID_by_name.rb → X86CPUID_by_name.rb} +0 -0
  31. data/lib/BOAST/Optimization/Optimization.rb +2 -0
  32. data/lib/BOAST/Runtime/AffinityProbe.rb +7 -3
  33. data/lib/BOAST/Runtime/CKernel.rb +3 -0
  34. data/lib/BOAST/Runtime/CRuntime.rb +4 -0
  35. data/lib/BOAST/Runtime/CompiledRuntime.rb +404 -77
  36. data/lib/BOAST/Runtime/Compilers.rb +44 -18
  37. data/lib/BOAST/Runtime/Config.rb +9 -0
  38. data/lib/BOAST/Runtime/EnergyProbe.rb +19 -3
  39. data/lib/BOAST/Runtime/FFIRuntime.rb +23 -0
  40. data/lib/BOAST/Runtime/FORTRANRuntime.rb +1 -1
  41. data/lib/BOAST/Runtime/MAQAO.rb +29 -0
  42. data/lib/BOAST/Runtime/NonRegression.rb +64 -3
  43. data/lib/BOAST/Runtime/OpenCLRuntime.rb +16 -6
  44. data/lib/BOAST/Runtime/Probe.rb +21 -1
  45. metadata +5 -3
@@ -43,10 +43,13 @@ module BOAST
43
43
  end
44
44
  end
45
45
 
46
- def setup_c_compiler(options, includes, narray_path, runner)
46
+ def setup_c_compiler(options, includes, narray_path, runner, probes)
47
47
  c_mppa_compiler = "k1-gcc"
48
48
  c_compiler = options[:CC]
49
49
  cflags = options[:CFLAGS]
50
+ probes.each { |p|
51
+ cflags += " #{p.cflags}" if p.respond_to?(:cflags)
52
+ }
50
53
  cflags += " -march=#{get_model}"
51
54
  cflags += " -fPIC #{includes}"
52
55
  cflags += " -DHAVE_NARRAY_H" if narray_path
@@ -58,6 +61,13 @@ module BOAST
58
61
  cflags += " #{openmp_cflags}"
59
62
  end
60
63
 
64
+ cflags_no_fpic = cflags.gsub("-fPIC","")
65
+
66
+ rule ".nofpic#{objext}" => '.c' do |t|
67
+ c_call_string = "#{c_compiler} #{cflags_no_fpic} -c -o #{t.name} #{t.source}"
68
+ runner.call(t, c_call_string)
69
+ end
70
+
61
71
  rule ".#{objext}" => '.c' do |t|
62
72
  c_call_string = "#{c_compiler} #{cflags} -c -o #{t.name} #{t.source}"
63
73
  runner.call(t, c_call_string)
@@ -76,7 +86,7 @@ module BOAST
76
86
  end
77
87
  end
78
88
 
79
- def setup_cxx_compiler(options, includes, runner)
89
+ def setup_cxx_compiler(options, includes, runner, probes)
80
90
  cxx_compiler = options[:CXX]
81
91
  cxxflags = options[:CXXFLAGS]
82
92
  cxxflags += " -fPIC #{includes}"
@@ -86,13 +96,20 @@ module BOAST
86
96
  cxxflags += " #{openmp_cxxflags}"
87
97
  end
88
98
 
99
+ cxxflags_no_fpic = cxxflags.gsub("-fPIC","")
100
+
101
+ rule ".nofpic#{RbConfig::CONFIG["OBJEXT"]}" => '.cpp' do |t|
102
+ cxx_call_string = "#{cxx_compiler} #{cxxflags_no_fpic} -c -o #{t.name} #{t.source}"
103
+ runner.call(t, cxx_call_string)
104
+ end
105
+
89
106
  rule ".#{RbConfig::CONFIG["OBJEXT"]}" => '.cpp' do |t|
90
107
  cxx_call_string = "#{cxx_compiler} #{cxxflags} -c -o #{t.name} #{t.source}"
91
108
  runner.call(t, cxx_call_string)
92
109
  end
93
110
  end
94
111
 
95
- def setup_fortran_compiler(options, runner)
112
+ def setup_fortran_compiler(options, runner, probes)
96
113
  f_compiler = options[:FC]
97
114
  fcflags = options[:FCFLAGS]
98
115
  fcflags += " -march=#{get_model}"
@@ -104,13 +121,20 @@ module BOAST
104
121
  fcflags += " #{openmp_fcflags}"
105
122
  end
106
123
 
124
+ fcflags_no_fpic = fcflags.gsub("-fPIC","")
125
+
126
+ rule ".nofpic#{RbConfig::CONFIG["OBJEXT"]}" => '.f90' do |t|
127
+ f_call_string = "#{f_compiler} #{fcflags_no_fpic} -c -o #{t.name} #{t.source}"
128
+ runner.call(t, f_call_string)
129
+ end
130
+
107
131
  rule ".#{RbConfig::CONFIG["OBJEXT"]}" => '.f90' do |t|
108
132
  f_call_string = "#{f_compiler} #{fcflags} -c -o #{t.name} #{t.source}"
109
133
  runner.call(t, f_call_string)
110
134
  end
111
135
  end
112
136
 
113
- def setup_cuda_compiler(options, runner)
137
+ def setup_cuda_compiler(options, runner, probes)
114
138
  cuda_compiler = options[:NVCC]
115
139
  cudaflags = options[:NVCCFLAGS]
116
140
  cudaflags += " --compiler-options '-fPIC','-D_FORCE_INLINES'"
@@ -121,7 +145,7 @@ module BOAST
121
145
  end
122
146
  end
123
147
 
124
- def setup_linker_mppa(options, runner)
148
+ def setup_linker_mppa(options, runner, probes)
125
149
  objext = RbConfig::CONFIG["OBJEXT"]
126
150
  ldflags = options[:LDFLAGS]
127
151
  board = " -mboard=developer"
@@ -141,11 +165,13 @@ module BOAST
141
165
 
142
166
  end
143
167
 
144
- def setup_linker(options)
168
+ def setup_linker(options, probes)
145
169
  ldflags = options[:LDFLAGS]
146
170
  ldflags += " -march=#{get_model}"
147
171
  ldflags += " -L#{RbConfig::CONFIG["libdir"]} #{RbConfig::CONFIG["LIBRUBYARG"]}"
148
- ldflags += " -lrt" if not OS.mac?
172
+ probes.each { |p|
173
+ ldflags += " #{p.ldflags}" if p.respond_to?(:ldflags)
174
+ }
149
175
  ldflags += " -lcudart" if @lang == CUDA
150
176
  ldflags += " -L/usr/local/k1tools/lib64 -lmppaipc -lpcie -lz -lelf -lmppa_multiloader" if @architecture == MPPA
151
177
  ldflags += " -lmppamon -lmppabm -lm -lmppalock" if @architecture == MPPA
@@ -160,17 +186,17 @@ module BOAST
160
186
  end
161
187
 
162
188
  if OS.mac? then
163
- ldflags = "-Wl,-undefined,dynamic_lookup -Wl,-multiply_defined,suppress #{ldflags}"
164
189
  ldshared = "-dynamic -bundle"
190
+ ldshared_flags = "-Wl,-undefined,dynamic_lookup -Wl,-multiply_defined,suppress"
165
191
  else
166
- ldflags = "-Wl,-Bsymbolic-functions -Wl,-z,relro -rdynamic -Wl,-export-dynamic #{ldflags}"
167
192
  ldshared = "-shared"
193
+ ldshared_flags = "-Wl,-Bsymbolic-functions -Wl,-z,relro -rdynamic -Wl,-export-dynamic"
168
194
  end
169
195
 
170
- return [linker, ldshared, ldflags]
196
+ return [linker, ldshared, ldshared_flags, ldflags]
171
197
  end
172
198
 
173
- def setup_compilers(options = {})
199
+ def setup_compilers(probes, options = {})
174
200
  Rake::Task::clear
175
201
  verbose = options[:VERBOSE]
176
202
  verbose = get_verbose if not verbose
@@ -184,7 +210,7 @@ module BOAST
184
210
  if verbose then
185
211
  sh call_string
186
212
  else
187
- status, stdout, stderr = systemu call_string
213
+ status, _, stderr = systemu call_string
188
214
  if not status.success? then
189
215
  puts stderr
190
216
  fail "#{t.source}: compilation failed"
@@ -193,14 +219,14 @@ module BOAST
193
219
  end
194
220
  }
195
221
 
196
- setup_c_compiler(options, includes, narray_path, runner)
197
- setup_cxx_compiler(options, includes, runner)
198
- setup_fortran_compiler(options, runner)
199
- setup_cuda_compiler(options, runner)
222
+ setup_c_compiler(options, includes, narray_path, runner, probes)
223
+ setup_cxx_compiler(options, includes, runner, probes)
224
+ setup_fortran_compiler(options, runner, probes)
225
+ setup_cuda_compiler(options, runner, probes)
200
226
 
201
- setup_linker_mppa(options, runner) if @architecture == MPPA
227
+ setup_linker_mppa(options, runner, probes) if @architecture == MPPA
202
228
 
203
- return setup_linker(options)
229
+ return setup_linker(options, probes)
204
230
 
205
231
  end
206
232
 
@@ -19,6 +19,9 @@ module BOAST
19
19
  :CLDEVICETYPE => nil,
20
20
  :MAQAO => 'maqao',
21
21
  :MAQAO_FLAGS => '',
22
+ :MAQAO_PATH => '',
23
+ :MAQAO_PASS => false,
24
+ :MAQAO_SCRIPT => '',
22
25
  :OPENMP => false,
23
26
  :probes => nil
24
27
  }
@@ -42,19 +45,25 @@ module BOAST
42
45
  module PrivateStateAccessor
43
46
  private_boolean_state_accessor :verbose
44
47
  private_boolean_state_accessor :debug_source
48
+ private_boolean_state_accessor :debug_kernel_source
45
49
  private_boolean_state_accessor :ffi
50
+ private_boolean_state_accessor :executable
46
51
  private_boolean_state_accessor :keep_temp
47
52
  private_state_accessor :fortran_line_length
48
53
  end
49
54
 
50
55
  boolean_state_accessor :verbose
51
56
  boolean_state_accessor :debug_source
57
+ boolean_state_accessor :debug_kernel_source
52
58
  boolean_state_accessor :ffi
59
+ boolean_state_accessor :executable
53
60
  boolean_state_accessor :keep_temp
54
61
  state_accessor :fortran_line_length
55
62
  default_state_getter :verbose, false
56
63
  default_state_getter :debug_source, false
64
+ default_state_getter :debug_kernel_source, false
57
65
  default_state_getter :ffi, false
66
+ default_state_getter :executable, false
58
67
  default_state_getter :keep_temp, false
59
68
  default_state_getter :fortran_line_length, 72
60
69
 
@@ -111,6 +111,8 @@ EOF
111
111
  EOF
112
112
  end
113
113
  def compute
114
+ end
115
+ def store
114
116
  get_output.print <<EOF
115
117
  {
116
118
  VALUE results;
@@ -147,6 +149,8 @@ EOF
147
149
  get_output.puts "_boast_energy = redfst_get(_boast_energy);"
148
150
  end
149
151
  def compute
152
+ end
153
+ def store
150
154
  get_output.print <<EOF
151
155
  {
152
156
  VALUE results;
@@ -176,13 +180,18 @@ EOF
176
180
  rb_hash_aset(_boast_stats, ID2SYM(rb_intern("energy")), results);
177
181
  }
178
182
  EOF
179
- end
183
+ end
180
184
  def is_available?
185
+ return false if OS.mac?
181
186
  path = []
182
187
  if ENV['LIBRARY_PATH'] then
183
188
  path += ENV['LIBRARY_PATH'].split(':').inject([]){|mem, x| []!=mem ? mem : Dir.glob(x+'/libredfst.so')}
184
189
  end
185
- path += `ldconfig -p`.gsub("\t","").split("\n").find_all { |e| e.match(/libredfst\.so/) }.collect { |e| e.split(" => ")[1] } if path == []
190
+ begin
191
+ path += `ldconfig -p`.gsub("\t","").split("\n").find_all { |e| e.match(/libredfst\.so/) }.collect { |e| e.split(" => ")[1] } if path == []
192
+ rescue
193
+ path += `/sbin/ldconfig -p`.gsub("\t","").split("\n").find_all { |e| e.match(/libredfst\.so/) }.collect { |e| e.split(" => ")[1] } if path == []
194
+ end
186
195
  return path != []
187
196
  end
188
197
  end
@@ -209,6 +218,8 @@ EOF
209
218
  get_output.puts "emlStop(_boast_energy);";
210
219
  end
211
220
  def compute
221
+ end
222
+ def store
212
223
  get_output.print <<EOF
213
224
  {
214
225
  VALUE results;
@@ -221,11 +232,16 @@ EOF
221
232
  EOF
222
233
  end
223
234
  def is_available?
235
+ return false if OS.mac?
224
236
  path = []
225
237
  if ENV['LIBRARY_PATH'] then
226
238
  path += ENV['LIBRARY_PATH'].split(':').inject([]){|mem, x| []!=mem ? mem : Dir.glob(x+'/libeml.so')}
227
239
  end
228
- path += `ldconfig -p`.gsub("\t","").split("\n").find_all { |e| e.match(/libeml\.so/) }.collect { |e| e.split(" => ")[1] } if path == []
240
+ begin
241
+ path += `ldconfig -p`.gsub("\t","").split("\n").find_all { |e| e.match(/libeml\.so/) }.collect { |e| e.split(" => ")[1] } if path == []
242
+ rescue
243
+ path += `/sbin/ldconfig -p`.gsub("\t","").split("\n").find_all { |e| e.match(/libeml\.so/) }.collect { |e| e.split(" => ")[1] } if path == []
244
+ end
229
245
  return path != []
230
246
  end
231
247
  end
@@ -3,6 +3,22 @@ module BOAST
3
3
  # @private
4
4
  module FFIRuntime
5
5
 
6
+ def build( options = {} )
7
+ if options[:library_path] then
8
+ eval <<EOF
9
+ def library_path
10
+ return "#{options[:library_path]}"
11
+ end
12
+ EOF
13
+ @marker = Tempfile::new([@procedure.name,""])
14
+ create_ffi_module
15
+ eval "self.extend(#{module_name})"
16
+ return self
17
+ else
18
+ super
19
+ end
20
+ end
21
+
6
22
  private
7
23
 
8
24
  def target
@@ -25,6 +41,13 @@ module BOAST
25
41
  create_library_source
26
42
  end
27
43
 
44
+ def save_module
45
+ f = File::open(library_path, "rb")
46
+ @module_binary = StringIO::new
47
+ @module_binary.write( f.read )
48
+ f.close
49
+ end
50
+
28
51
  def create_ffi_module
29
52
  s =<<EOF
30
53
  require 'ffi'
@@ -7,7 +7,7 @@ module BOAST
7
7
  private
8
8
 
9
9
  def method_name
10
- return @procedure.name + "_"
10
+ return "#{@procedure.name}_"
11
11
  end
12
12
 
13
13
  def line_limited_source
@@ -32,4 +32,33 @@ module BOAST
32
32
  end
33
33
  end
34
34
 
35
+ module MAQAO
36
+
37
+ def create_executable_target( linker, ldflags, kernel_files)
38
+ file target_executable => target_executable_depends do
39
+ sh "#{linker} -o #{target_executable} #{target_executable_depends.join(" ")} #{(kernel_files.collect {|f| f.path}).join(" ")} #{ldflags}"
40
+ maqao_script = @compiler_options[:MAQAO_SCRIPT]
41
+ if maqao_script == '' then
42
+ maqao_script = "#{@compiler_options[:MAQAO_PATH]}/scripts/maqao_from_boast.sh"
43
+ end
44
+ end
45
+ sh "#{maqao_script} #{@compiler_options[:MAQAO_PATH]} #{@procedure.name} Init_#{base_name} #{target_executable}"
46
+ Rake::Task[target_executable].invoke
47
+ end
48
+
49
+ def create_targets( linker, ldshared, ldflags, kernel_files)
50
+ file target => target_depends do
51
+ sh "#{linker} #{ldshared} -o #{target} #{target_depends.join(" ")} #{(kernel_files.collect {|f| f.path}).join(" ")} #{ldflags}"
52
+ no_fpic_obj = target_depends[1].gsub(/\.o$/, "_no_fpic.o")
53
+ maqao_script = @compiler_options[:MAQAO_SCRIPT]
54
+ if maqao_script == '' then
55
+ maqao_script = "#{@compiler_options[:MAQAO_PATH]}/scripts/maqao_from_boast.sh"
56
+ end
57
+ sh "#{maqao_script} #{@compiler_options[:MAQAO_PATH]} #{library_source} #{@procedure.name} Init_#{base_name} #{target} #{no_fpic_obj}"
58
+ end
59
+ Rake::Task[target].invoke
60
+ end
61
+
62
+ end
63
+
35
64
  end
@@ -10,6 +10,14 @@ module BOAST
10
10
  return load_ref_files( path, suffix, :out )
11
11
  end
12
12
 
13
+ def dump_ref_inputs(values, path = ".", suffix = ".in" )
14
+ return dump_ref_files(values, path, suffix, :in )
15
+ end
16
+
17
+ def dump_ref_outputs(values, path = ".", suffix = ".out" )
18
+ return dump_ref_files(values, path, suffix, :out )
19
+ end
20
+
13
21
  def compare_ref(ref_outputs, outputs, epsilon = nil)
14
22
  res = {}
15
23
  @procedure.parameters.each_with_index { |param, indx|
@@ -52,7 +60,7 @@ module BOAST
52
60
  when 2
53
61
  type = NArray::SINT
54
62
  when 4
55
- type = NArray::SINT
63
+ type = NArray::INT
56
64
  else
57
65
  STDERR::puts "Unsupported Int size for NArray: #{param.type.size}, defaulting to byte" if debug?
58
66
  type = NArray::BYTE
@@ -94,12 +102,27 @@ module BOAST
94
102
  return type
95
103
  end
96
104
 
105
+ def write_param(param, value, directory, suffix, intent)
106
+ if intent == :out and ( param.direction == :in or param.constant ) then
107
+ return nil
108
+ end
109
+ f = File::new( directory + "/" + "#{param.name+suffix}", "wb" )
110
+ if param.dimension or param.vector? then
111
+ f.write value.to_s
112
+ else
113
+ type = get_scalar_type(param)
114
+ f.write [value].pack(type)
115
+ end
116
+ f.close
117
+ return nil
118
+ end
119
+
97
120
  def read_param(param, directory, suffix, intent)
98
121
  if intent == :out and ( param.direction == :in or param.constant ) then
99
122
  return nil
100
123
  end
101
124
  f = File::new( directory + "/" + param.name+suffix, "rb" )
102
- if param.dimension then
125
+ if param.dimension or param.vector? then
103
126
  type = get_array_type(param)
104
127
  if f.size == 0 then
105
128
  res = NArray::new(type, 1)
@@ -132,7 +155,28 @@ module BOAST
132
155
  return res
133
156
  end
134
157
 
135
- def load_ref_files( path, suffix, intent )
158
+ def write_gpu_dim(value, directory)
159
+ global_work_size = value[:global_work_size]
160
+ block_number = value[:block_number]
161
+ local_work_size = value[:local_work_size]
162
+ local_work_size = value[:block_size] unless local_work_size
163
+ (local_work_size.length..2).each{ |i| local_work_size[i] = 1 }
164
+ if global_work_size and not block_number then
165
+ block_number = []
166
+ (global_work_size.length..2).each{ |i| global_work_size[i] = 1 }
167
+ local_work_size.each_index { |i|
168
+ block_number[i] = global_work_size[i] / local_work_size[i]
169
+ }
170
+ end
171
+ (block_number.length..2).each{ |i| block_number[i] = 1 }
172
+
173
+ File::open( directory + "/problem_size", "w") { |f|
174
+ f.write "<#{local_work_size.join(",")}><#{block_number.join(",")}>"
175
+ }
176
+ return nil
177
+ end
178
+
179
+ def load_ref_files( path, suffix, intent )
136
180
  proc_path = path + "/#{@procedure.name}/"
137
181
  res_h = {}
138
182
  begin
@@ -154,6 +198,23 @@ module BOAST
154
198
  return res_h
155
199
  end
156
200
 
201
+ def dump_ref_files( values, path, suffix, intent )
202
+ proc_path = path + "/#{@procedure.name}/"
203
+ Dir.mkdir( proc_path ) unless File.exists?( proc_path )
204
+ values.each { |key, vals|
205
+ case_path = proc_path + "#{key}/"
206
+ Dir.mkdir( case_path ) unless File.exists?( case_path )
207
+ d = Pathname.new( case_path )
208
+ @procedure.parameters.each_with_index { |param, i|
209
+ write_param( param, vals[i], d.to_s, suffix, intent )
210
+ }
211
+ if @lang == CUDA or @lang == CL then
212
+ write_gpu_dim( vals.last, d.to_s )
213
+ end
214
+ }
215
+ return nil
216
+ end
217
+
157
218
  end
158
219
 
159
220
  end
@@ -53,7 +53,6 @@ module BOAST
53
53
  @context = OpenCL::create_context([device])
54
54
  end
55
55
  program = @context.create_program_with_source([@code.string])
56
- opts = options[:CLFLAGS]
57
56
  begin
58
57
  program.build(:options => options[:CLFLAGS])
59
58
  rescue OpenCL::Error => e
@@ -130,7 +129,6 @@ module BOAST
130
129
  run_method = <<EOF
131
130
  def self.run(*args)
132
131
  raise "Wrong number of arguments \#{args.length} for #{@procedure.parameters.length}" if args.length > #{@procedure.parameters.length+1} or args.length < #{@procedure.parameters.length}
133
- energy_data = NArray::float(1024)
134
132
  params = []
135
133
  opts = BOAST::get_run_config
136
134
  opts = opts.update(args.pop) if args.length == #{@procedure.parameters.length+1}
@@ -153,17 +151,29 @@ def self.run(*args)
153
151
  if not lws then
154
152
  lws = opts[:block_size]
155
153
  end
156
- event = @queue.enqueue_NDrange_kernel(@kernel, gws, :local_work_size => lws)
154
+ event1 = @queue.enqueue_NDrange_kernel(@kernel, gws, :local_work_size => lws)
155
+ if opts[:repeat] and opts[:repeat] > 1 then
156
+ (opts[:repeat] - 2).times {
157
+ @queue.enqueue_NDrange_kernel(@kernel, gws, :local_work_size => lws)
158
+ }
159
+ event2 = @queue.enqueue_NDrange_kernel(@kernel, gws, :local_work_size => lws)
160
+ end
157
161
  @procedure.parameters.each_index { |i|
158
162
  if @procedure.parameters[i].dimension and (@procedure.parameters[i].direction == :inout or @procedure.parameters[i].direction == :out) then
159
163
  read_opencl_param( params[i], args[i], @procedure.parameters[i] )
160
164
  end
161
165
  }
162
166
  @queue.finish
167
+ start_t = event1.profiling_command_start
168
+ if opts[:repeat] and opts[:repeat] > 1 then
169
+ end_t = event2.profiling_command_end
170
+ else
171
+ end_t = event1.profiling_command_end
172
+ end
163
173
  result = {}
164
- result[:start] = event.profiling_command_start
165
- result[:end] = event.profiling_command_end
166
- result[:duration] = (result[:end] - result[:start])/1000000000.0
174
+ result[:start] = start_t
175
+ result[:end] = end_t
176
+ result[:duration] = (end_t - start_t)/1000000000.0
167
177
  return result
168
178
  end
169
179
  EOF