BOAST 1.2.0 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 939618b7050d9d0298fa5f06b310ac5a881b85ef
4
- data.tar.gz: 60b1fc0bc92e9b1dad4003604a30c9f4de634d87
3
+ metadata.gz: 0bb86d632c7e15c62f42007db65ef307a86ed1eb
4
+ data.tar.gz: 8b4323362889540f6e731bad43e6765b24f26d2c
5
5
  SHA512:
6
- metadata.gz: cc1877354a30495b196128bd5e94055808197c14e9e7d6fd9dbb8c1e6c6dc603efbd5a877fd6d64f4d7f12b0fe88a0a8c1801cacb3baf13d206d25740914af4e
7
- data.tar.gz: 858093b8d634b8f0ff61f90aa5a2b7a8d67c5b49e9013cdc4093a453095adfaf9ba3490ad8548082b1bd0728cd280184a2ccacf147d94dcdbf541fff124217ec
6
+ metadata.gz: d4dd1ffe821624820fec06684910ad87f0dd451d81b596fafceda7b41651e0d5810dfe4aa902de69d609387577e4ace87b1ede8dea7ebcc65cc4d11d13cd3dda
7
+ data.tar.gz: 65d6bc915e2299d38e1bd2f4c52057f42e5c6ada5fd5d26416dacd20a4169f364f3eaec28c9e755a4d3b40c3903cb8f2092c23d55dcf400b5b7dbeac5a3f1787
data/BOAST.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'BOAST'
3
- s.version = "1.2.0"
3
+ s.version = "1.2.1"
4
4
  s.author = "Brice Videau"
5
5
  s.email = "brice.videau@imag.fr"
6
6
  s.homepage = "https://github.com/Nanosim-LIG/boast"
@@ -12,7 +12,7 @@ Gem::Specification.new do |s|
12
12
  s.required_ruby_version = '>= 1.9.3'
13
13
  s.add_dependency 'narray', '~> 0.6.0', '>=0.6.0.8'
14
14
  s.add_dependency 'narray_ffi', '~> 1.2', '>=1.2.0'
15
- s.add_dependency 'opencl_ruby_ffi', '~> 1.0', '>=1.0.0'
15
+ s.add_dependency 'opencl_ruby_ffi', '~> 1.2', '>=1.2.1'
16
16
  s.add_dependency 'systemu', '~> 2', '>=2.2.0'
17
17
  s.add_dependency 'os', '~> 0.9', '>=0.9.6'
18
18
  s.add_dependency 'PAPI', '~> 0', '>=0.101'
@@ -30,6 +30,7 @@ module BOAST
30
30
  private_boolean_state_accessor :decl_module
31
31
  private_boolean_state_accessor :annotate
32
32
  private_boolean_state_accessor :optimizer_log
33
+ private_boolean_state_accessor :disable_openmp
33
34
 
34
35
  private
35
36
  def push_env(*args)
@@ -84,6 +85,7 @@ module BOAST
84
85
  boolean_state_accessor :decl_module
85
86
  boolean_state_accessor :annotate
86
87
  boolean_state_accessor :optimizer_log
88
+ boolean_state_accessor :disable_openmp
87
89
 
88
90
  default_state_getter :address_size, OS.bits/8
89
91
  default_state_getter :lang, FORTRAN, '"const_get(#{envs})"', :BOAST_LANG
@@ -104,6 +106,7 @@ module BOAST
104
106
  default_state_getter :annotate_level, 0
105
107
  default_state_getter :optimizer_log, false
106
108
  default_state_getter :optimizer_log_file, nil
109
+ default_state_getter :disable_openmp, false
107
110
 
108
111
  alias use_vla_old? use_vla?
109
112
  class << self
@@ -163,17 +166,27 @@ module BOAST
163
166
 
164
167
  @@env = Hash::new{|h, k| h[k] = []}
165
168
 
166
- def push_env(vars = {})
167
- vars.each { |key,value|
169
+ def push_env(vars = {}, &block)
170
+ keys = []
171
+ vars.each { |key, value|
168
172
  var = nil
169
173
  begin
170
174
  var = BOAST::class_variable_get("@@"+key.to_s)
171
175
  rescue
176
+ BOAST::pop_env(*keys)
172
177
  raise "Unknown module variable #{key}!"
173
178
  end
174
179
  @@env[key].push(var)
175
180
  BOAST::class_variable_set("@@"+key.to_s, value)
181
+ keys.push(key)
176
182
  }
183
+ if block then
184
+ begin
185
+ block.call
186
+ ensure
187
+ BOAST::pop_env(*vars.keys)
188
+ end
189
+ end
177
190
  end
178
191
 
179
192
  def pop_env(*vars)
@@ -91,6 +91,33 @@ X86architectures ={"pentium2"=>["MMX"],
91
91
  "RDSEED",
92
92
  "ADCX",
93
93
  "PREFETCHW"],
94
+ "knl"=>
95
+ ["MOVBE",
96
+ "MMX",
97
+ "SSE",
98
+ "SSE2",
99
+ "SSE3",
100
+ "SSSE3",
101
+ "SSE4.1",
102
+ "SSE4.2",
103
+ "POPCNT",
104
+ "AVX",
105
+ "AVX2",
106
+ "AES",
107
+ "PCLMUL",
108
+ "FSGSBASE",
109
+ "RDRND",
110
+ "FMA",
111
+ "BMI",
112
+ "BMI2",
113
+ "F16C",
114
+ "RDSEED",
115
+ "ADCX",
116
+ "PREFETCHW",
117
+ "AVX512F",
118
+ "AVX512PF",
119
+ "AVX512ER",
120
+ "AVX512CD"],
94
121
  "bonnell"=>["MOVBE", "MMX", "SSE", "SSE2", "SSE3", "SSSE3"],
95
122
  "silvermont"=>
96
123
  ["MOVBE",
@@ -14,6 +14,11 @@ module BOAST
14
14
  return !!@alignment
15
15
  end
16
16
 
17
+ def set_align(align)
18
+ @alignment = align
19
+ return self
20
+ end
21
+
17
22
  def to_var
18
23
  var = @source.copy("#{self}", :const => nil, :constant => nil, :dim => nil, :dimension => nil, :direction => nil, :dir => nil, :align => alignment)
19
24
  return var
@@ -39,16 +39,36 @@ module BOAST
39
39
  INTRINSICS = Hash::new { |h, k| h[k] = Hash::new { |h2, k2| h2[k2] = {} } }
40
40
  CONVERSIONS = Hash::new { |h, k| h[k] = Hash::new { |h2, k2| h2[k2] = {} } }
41
41
 
42
+ def check_coverage
43
+ ins = []
44
+ INTRINSICS[X86].each { |i,v|
45
+ if i == :CVT then
46
+ v.each { |type1, h|
47
+ h.each { |type2, instr|
48
+ ins.push instr.to_s
49
+ }
50
+ }
51
+ else
52
+ v.each { |type, instr|
53
+ ins.push instr.to_s
54
+ }
55
+ end
56
+ }
57
+ return ins - INSTRUCTIONS.keys
58
+ end
59
+
60
+ module_function :check_coverage
61
+
42
62
  def intrinsics_by_vector_name(intr_symbol, type, type2=nil)
43
63
  if type2 then
44
64
  instruction = INTRINSICS[get_architecture][intr_symbol][type][type2]
45
65
  else
46
66
  instruction = INTRINSICS[get_architecture][intr_symbol][type]
47
67
  end
68
+ raise IntrinsicsError, "Unsupported operation #{intr_symbol} for #{type}#{type2 ? " and #{type2}" : ""} on #{get_architecture_name}!" unless instruction
48
69
  return instruction if get_architecture == ARM
49
- raise IntrinsicsError, "Unsupported operation #{intr_symbol} for #{type}#{type2 ? "and #{type2}" : ""} on #{get_architecture_name}!" unless instruction
50
70
  supported = (INSTRUCTIONS[instruction.to_s] & MODELS[get_model.to_s]).size > 0
51
- raise IntrinsicsError, "Unsupported operation #{intr_symbol} for #{type}#{type2 ? "and #{type2}" : ""} on #{get_model}! (requires #{INSTRUCTIONS[instruction.to_s].join(" or ")})" unless supported
71
+ raise IntrinsicsError, "Unsupported operation #{intr_symbol} for #{type}#{type2 ? " and #{type2}" : ""} on #{get_model}! (requires #{INSTRUCTIONS[instruction.to_s].join(" or ")})" unless supported
52
72
  return instruction
53
73
  end
54
74
 
@@ -213,7 +233,7 @@ module BOAST
213
233
 
214
234
  module_function :type_name_X86
215
235
 
216
- [64, 128, 256].each { |vector_size|
236
+ [64, 128, 256, 512].each { |vector_size|
217
237
  vs = ( vector_size < 256 ? "" : "#{vector_size}" )
218
238
  sizes = [8, 16, 32]
219
239
  sizes.push( 64 ) if vector_size > 64
@@ -222,12 +242,17 @@ module BOAST
222
242
  vtype = vector_type_name( :int, size, vector_size, sign )
223
243
  type = type_name_X86( :int, size, vector_size )
224
244
  instructions = [[:ADD, "add"], [:SUB, "sub"]]
225
- instructions.push( [:SET, "setr"], [:SET1, "set1"] )
226
- instructions.push( [:MUL, "mullo"] ) if vector_size > 64 and size >= 16 and size <= 32
245
+ instructions.push( [:SET, "setr"] ) unless size < 32 and vector_size == 512
246
+ instructions.push( [:SET1, "set1"] )
247
+ instructions.push( [:MUL, "mullo"] ) if vector_size > 64 and size >= 16
248
+ instructions.push( [:MASKLOAD, "maskload"], [:MASKSTORE, "maskstore"] ) if vector_size <= 256 and vector_size >= 128 and size >= 32
249
+ instructions.push( [:MASK_LOAD, "mask_load"], [:MASK_STORE, "mask_store"],
250
+ [:MASK_LOADU, "mask_loadu"], [:MASK_STOREU, "mask_storeu"],
251
+ [:MASKZ_LOAD, "maskz_load"], [:MASKZ_LOADU, "maskz_loadu"], ) if vector_size >= 128 and size >= 32
227
252
  instructions.each { |cl, ins|
228
253
  INTRINSICS[X86][cl][vtype] = "_mm#{vs}_#{ins}_#{type}".to_sym
229
254
  }
230
- if size == 64 and vector_size < 512 then
255
+ if size == 64 and vector_size == 256 then
231
256
  INTRINSICS[X86][:SET1][vtype] = "_mm#{vs}_set1_#{type}x".to_sym
232
257
  INTRINSICS[X86][:SET][vtype] = "_mm#{vs}_setr_#{type}x".to_sym
233
258
  end
@@ -236,8 +261,10 @@ module BOAST
236
261
  [8, 16, 32, 64].each { |size|
237
262
  [:signed, :unsigned].each { |sign|
238
263
  vtype = vector_type_name( :int, size, vector_size, sign )
239
- [[:LOAD, "loadu"], [:LOADA, "load"],
240
- [:STORE, "storeu"], [:STOREA, "store"]].each { |cl, ins|
264
+ instructions = [[:LOAD, "loadu"], [:LOADA, "load"],
265
+ [:STORE, "storeu"], [:STOREA, "store"],
266
+ [:SETZERO, "setzero"] ]
267
+ instructions.each { |cl, ins|
241
268
  INTRINSICS[X86][cl][vtype] = "_mm#{vs}_#{ins}_si#{vector_size}".to_sym
242
269
  }
243
270
  }
@@ -245,12 +272,19 @@ module BOAST
245
272
  sizes = []
246
273
  sizes.push( 32, 64 ) if vector_size > 64
247
274
  sizes.each { |size|
248
- [[:ADD, "add"], [:SUB, "sub"], [:MUL, "mul"], [:DIV, "div"], [:POW, "pow"],
249
- [:FMADD, "fmadd"], [:FMSUB, "fmsub"], [:FNMADD, "fnmadd"], [:FNMSUB, "fnmsub"],
250
- [:ADDSUB, "addsub"], [:FMADDSUB, "fmaddsub"], [:FMSUBADD, "fmsubadd"],
251
- [:LOAD, "loadu"], [:LOADA, "load"], [:MASKLOAD, "maskload"],
252
- [:STORE, "storeu"], [:STOREA, "store"], [:MASKSTORE, "maskstore"],
253
- [:SET, "setr"], [:SET1, "set1"] ].each { |cl, ins|
275
+ instructions = [[:ADD, "add"], [:SUB, "sub"], [:MUL, "mul"], [:DIV, "div"], [:POW, "pow"],
276
+ [:FMADD, "fmadd"], [:FMSUB, "fmsub"],
277
+ [:FNMADD, "fnmadd"], [:FNMSUB, "fnmsub"],
278
+ [:FMADDSUB, "fmaddsub"], [:FMSUBADD, "fmsubadd"],
279
+ [:LOAD, "loadu"], [:LOADA, "load"],
280
+ [:STORE, "storeu"], [:STOREA, "store"],
281
+ [:SET, "setr"], [:SET1, "set1"], [:SETZERO, "setzero"],
282
+ [:MASK_LOAD, "mask_load"], [:MASK_STORE, "mask_store"],
283
+ [:MASK_LOADU, "mask_loadu"], [:MASK_STOREU, "mask_storeu"],
284
+ [:MASKZ_LOAD, "maskz_load"], [:MASKZ_LOADU, "maskz_loadu"] ]
285
+ instructions.push( [:MASKLOAD, "maskload"], [:MASKSTORE, "maskstore"] ) if vector_size < 512
286
+ instructions.push( [:ADDSUB, "addsub"] ) if vector_size < 512
287
+ instructions.each { |cl, ins|
254
288
  vtype = vector_type_name( :float, size, vector_size)
255
289
  type = type_name_X86( :float, size, vector_size )
256
290
  INTRINSICS[X86][cl][vtype] = "_mm#{vs}_#{ins}_#{type}".to_sym
@@ -258,7 +292,7 @@ module BOAST
258
292
  }
259
293
  }
260
294
  INTRINSICS[X86][:CVT] = Hash::new { |h,k| h[k] = {} }
261
- [128, 256].each { |bvsize|
295
+ [128, 256, 512].each { |bvsize|
262
296
  [16, 32, 64].each { |bsize|
263
297
  ssize = bsize/2
264
298
  while ssize >= 8
@@ -296,6 +330,15 @@ module BOAST
296
330
  INTRINSICS[X86][:CVT][ivtype][fvtype] = "_mm#{vs}_cvt#{ftype}_#{itype}".to_sym
297
331
  }
298
332
  }
333
+ [64,32].each { |bsize|
334
+ ftype = type_name_X86( :float, bsize, bvsize )
335
+ itype = type_name_X86( :int, bsize, bvsize, :signed )
336
+ fvtype = vector_type_name( :float, bsize, bvsize )
337
+ ivtype = vector_type_name( :int, bsize, bvsize, :signed )
338
+ vs = ( bvsize < 256 ? "" : "#{bvsize}" )
339
+ INTRINSICS[X86][:CVT][fvtype][ivtype] = "_mm#{vs}_cvt#{itype}_#{ftype}".to_sym
340
+ INTRINSICS[X86][:CVT][ivtype][fvtype] = "_mm#{vs}_cvt#{ftype}_#{itype}".to_sym
341
+ }
299
342
  }
300
343
 
301
344
 
@@ -324,7 +367,7 @@ module BOAST
324
367
  vtype = vector_type_name( :float, size, vector_size )
325
368
  type = type_name_ARM( :float, size )
326
369
  [[:ADD, "add"], [:SUB, "sub"], [:MUL, "mul"],
327
- [:FMADD, "mla"], [:FNMSUB, "mls"],
370
+ [:FMADD, "mla"], [:FNMADD, "mls"],
328
371
  [:LOAD, "ldl"], [:LOADA, "ldl"],
329
372
  [:STORE, "stl"], [:STOREA, "stl"]].each { |cl, ins|
330
373
  INTRINSICS[ARM][cl][vtype] = "v#{ins}#{q}_#{type}".to_sym
@@ -332,6 +375,9 @@ module BOAST
332
375
  [[:SET1, "dup"]].each { |cl, ins|
333
376
  INTRINSICS[ARM][cl][vtype] = "v#{ins}#{q}_n_#{type}".to_sym
334
377
  }
378
+ [[:SET_LANE, "set"]].each { |cl, ins|
379
+ INTRINSICS[ARM][cl][vtype] = "v#{ins}#{q}_lane_#{type}".to_sym
380
+ }
335
381
  }
336
382
  }
337
383
  INTRINSICS[ARM][:CVT] = Hash::new { |h,k| h[k] = {} }
@@ -344,8 +390,8 @@ module BOAST
344
390
  ivtype = vector_type_name( :int, int_size, vector_size, sign )
345
391
  ftype = type_name_ARM( :float, float_size )
346
392
  itype = type_name_ARM( :int, int_size, sign )
347
- INTRINSICS[ARM][:CVT][fvtype][ivtype] = "vcvt#{q}_#{itype}_#{ftype}".to_sym
348
- INTRINSICS[ARM][:CVT][ivtype][fvtype] = "vcvt#{q}_#{ftype}_#{itype}".to_sym
393
+ INTRINSICS[ARM][:CVT][ivtype][fvtype] = "vcvt#{q}_#{itype}_#{ftype}".to_sym
394
+ INTRINSICS[ARM][:CVT][fvtype][ivtype] = "vcvt#{q}_#{ftype}_#{itype}".to_sym
349
395
  }
350
396
  }
351
397
  svsize = 64
@@ -357,8 +403,8 @@ module BOAST
357
403
  btype = type_name_ARM( :int, bsize, sign )
358
404
  svtype = vector_type_name( :int, ssize, svsize, sign )
359
405
  bvtype = vector_type_name( :int, bsize, bvsize, sign )
360
- INTRINSICS[ARM][:CVT][svtype][bvtype] = "vmovl_#{stype}".to_sym
361
- INTRINSICS[ARM][:CVT][bvtype][svtype] = "vmovn_#{btype}".to_sym
406
+ INTRINSICS[ARM][:CVT][bvtype][svtype] = "vmovl_#{stype}".to_sym
407
+ INTRINSICS[ARM][:CVT][svtype][bvtype] = "vmovn_#{btype}".to_sym
362
408
  }
363
409
  }
364
410
 
@@ -152,7 +152,7 @@ EOF
152
152
  end
153
153
 
154
154
  def open
155
- output.puts to_s
155
+ output.puts to_s unless disable_openmp
156
156
  return self
157
157
  end
158
158
 
@@ -166,7 +166,7 @@ EOF
166
166
  end
167
167
 
168
168
  def close
169
- output.puts end_string(openmp_end_clauses_to_s)
169
+ output.puts end_string(openmp_end_clauses_to_s) unless disable_openmp
170
170
  return self
171
171
  end
172
172
 
@@ -316,7 +316,7 @@ module BOAST
316
316
 
317
317
  def initialize(source, return_type)
318
318
  @source = source
319
- @return_type = return_type
319
+ @return_type = return_type.to_var
320
320
  end
321
321
 
322
322
  def type
@@ -328,21 +328,29 @@ module BOAST
328
328
  if @source.kind_of?( Array ) then
329
329
  raise OperatorError, "Invalid array length!" unless @source.length == @return_type.type.vector_length
330
330
  return @return_type.copy("(#{@return_type.type.decl})( #{@source.join(", ")} )", DISCARD_OPTIONS) if lang == CL
331
-
331
+ return Set(@source.first, @return_type).to_var if @source.uniq.size == 1
332
332
  begin
333
333
  instruction = intrinsics(:SET, @return_type.type)
334
+ raise IntrinsicsError unless instruction
334
335
  return @return_type.copy("#{instruction}( #{@source.join(", ")} )", DISCARD_OPTIONS)
335
336
  rescue IntrinsicsError
336
337
  instruction = intrinsics(:SET_LANE, @return_type.type)
338
+ raise IntrinsicsError, "Missing instruction for SET_LANE on #{get_architecture_name}!" unless instruction
337
339
  s = Set(0, @return_type).to_s
338
340
  @source.each_with_index { |v,i|
339
- s = "#{instruction}(#{v}, #{s}, #{i})"
341
+ s = "#{instruction}( #{v}, #{s}, #{i} )"
340
342
  }
341
343
  return @return_type.copy(s, DISCARD_OPTIONS)
342
344
  end
343
345
  elsif @source.class != Variable or @source.type.vector_length == 1 then
344
346
  return @return_type.copy("(#{@return_type.type.decl})( #{@source} )", DISCARD_OPTIONS) if lang == CL
345
-
347
+ if (@source.is_a?(Numeric) and @source == 0) or (@source.class == Variable and @source.constant == 0) then
348
+ begin
349
+ instruction = intrinsics(:SETZERO, @return_type.type)
350
+ return @return_type.copy("#{instruction}( )", DISCARD_OPTIONS) if instruction
351
+ rescue IntrinsicsError
352
+ end
353
+ end
346
354
  instruction = intrinsics(:SET1, @return_type.type)
347
355
  return @return_type.copy("#{instruction}( #{@source} )", DISCARD_OPTIONS)
348
356
  elsif @return_type.type != @source.type
@@ -379,7 +387,7 @@ module BOAST
379
387
 
380
388
  def initialize(source, return_type)
381
389
  @source = source
382
- @return_type = return_type
390
+ @return_type = return_type.to_var
383
391
  end
384
392
 
385
393
  def type
@@ -445,12 +453,12 @@ module BOAST
445
453
  def initialize(source, mask, return_type)
446
454
  @source = source
447
455
  @mask = mask
448
- @return_type = return_type
456
+ @return_type = return_type.to_var
449
457
  end
450
458
 
451
459
  def get_mask
452
- raise OperatorError, "Mask size is wrong: #{@mask.length} for #{@return_type.type.vector_length}!" if @mask.length != @return_type.type.vector_length
453
- return Load(@mask.collect { |m| ( m and m != 0 ) ? -1 : 0 }, Int("mask", :size => @return_type.type.size, :vector_length => @return_type.type.vector_length ) )
460
+ type = @return_type.type
461
+ return Set(@mask.collect { |m| ( m and m != 0 ) ? -1 : 0 }, Int("mask", :size => type.size, :vector_length => type.vector_length ) )
454
462
  end
455
463
 
456
464
  private :get_mask
@@ -461,8 +469,12 @@ module BOAST
461
469
 
462
470
  def to_var
463
471
  raise OperatorError, "Cannot load unknown type!" unless @return_type
472
+ type = @return_type.type
464
473
  raise LanguageError, "Unsupported language!" unless lang == C
465
- instruction = intrinsics(:MASKLOAD, @return_type.type)
474
+ raise OperatorError, "Mask size is wrong: #{@mask.length} for #{type.vector_length}!" if @mask.length != type.vector_length
475
+ return Load( @source, @return_type ).to_var unless @mask.include?(0)
476
+ return Set( 0, @return_type ).to_var if @mask.uniq.size == 1 and @mask.uniq.first == 0
477
+ instruction = intrinsics(:MASKLOAD, type)
466
478
  s = ""
467
479
  src = "#{@source}"
468
480
  if src[0] != "*" then
@@ -470,8 +482,8 @@ module BOAST
470
482
  else
471
483
  src = src[1..-1]
472
484
  end
473
- p_type = @return_type.type.copy(:vector_length => 1)
474
- s += "#{instruction}((#{p_type.decl} * )#{src}, #{get_mask})"
485
+ p_type = type.copy(:vector_length => 1)
486
+ s += "#{instruction}( (#{p_type.decl} * ) #{src}, #{get_mask} )"
475
487
  return @return_type.copy( s, DISCARD_OPTIONS)
476
488
  end
477
489
 
@@ -501,11 +513,11 @@ module BOAST
501
513
  attr_reader :source
502
514
  attr_reader :store_type
503
515
 
504
- def initialize(dest, source, mask, store_type = nil)
516
+ def initialize(dest, source, store_type = nil)
505
517
  @dest = dest
506
518
  @source = source
507
519
  @store_type = store_type
508
- @store_type = source unless @store_type
520
+ @store_type = source.to_var unless @store_type
509
521
  end
510
522
 
511
523
  def to_s
@@ -516,17 +528,17 @@ module BOAST
516
528
  else
517
529
  dst = dst[1..-1]
518
530
  end
531
+ type = @store_type.type
532
+ return "vstore#{type.vector_length}( #{@source}, 0, #{dst} )" if lang == CL
533
+ return "*((int64_t * ) #{dst}) = _m_to_int64( #{@source} )" if get_architecture == X86 and type.total_size*8 == 64
519
534
 
520
- return "vstore#{@source.type.vector_length}(#{@source}, 0, #{dst})" if lang == CL
521
- return "*((int64_t * ) #{dst}) = _m_to_int64( #{@source} )" if get_architecture == X86 and @source.type.total_size*8 == 64
522
-
523
- if @dest.alignment == @source.type.total_size then
524
- instruction = intrinsics(:STOREA, @source.type)
535
+ if @dest.alignment == type.total_size then
536
+ instruction = intrinsics(:STOREA, type)
525
537
  else
526
- instruction = intrinsics(:STORE, @source.type)
538
+ instruction = intrinsics(:STORE, type)
527
539
  end
528
- p_type = @source.type.copy(:vector_length => 1)
529
- p_type = @source.type if get_architecture == X86 and @source.type.kind_of?(Int)
540
+ p_type = type.copy(:vector_length => 1)
541
+ p_type = type if get_architecture == X86 and type.kind_of?(Int)
530
542
  return "#{instruction}( (#{p_type.decl} * ) #{dst}, #{@source} )"
531
543
  end
532
544
  return Affectation.basic_usage(@dest, @source)
@@ -560,20 +572,24 @@ module BOAST
560
572
  @source = source
561
573
  @mask = mask
562
574
  @store_type = store_type
563
- @store_type = source unless @store_type
575
+ @store_type = source.to_var unless @store_type
564
576
  end
565
577
 
566
578
  def get_mask
567
- raise OperatorError, "Mask size is wrong: #{@mask.length} for #{@store_type.type.vector_length}!" if @mask.length != @store_type.type.vector_length
568
- return Load.to_s(@mask.collect { |m| ( m and m != 0 ) ? -1 : 0 }, Int("mask", :size => @store_type.type.size, :vector_length => @store_type.type.vector_length ) )
579
+ type = @store_type.type
580
+ return Set(@mask.collect { |m| ( m and m != 0 ) ? -1 : 0 }, Int("mask", :size => type.size, :vector_length => type.vector_length ) )
569
581
  end
570
582
 
571
583
  private :get_mask
572
584
 
573
585
  def to_s
574
586
  raise OperatorError, "Cannot store unknown type!" unless @store_type
587
+ type = @store_type.type
575
588
  raise LanguageError, "Unsupported language!" unless lang == C
576
- instruction = intrinsics(:MASKSTORE, @store_type.type)
589
+ raise OperatorError, "Mask size is wrong: #{@mask.length} for #{type.vector_length}!" if @mask.length != type.vector_length
590
+ return Store( @dest, @source, @store_type ).to_s unless @mask.include?(0)
591
+ return nil if @mask.uniq.size == 1 and @mask.uniq.first == 0
592
+ instruction = intrinsics(:MASKSTORE, type)
577
593
  s = ""
578
594
  dst = "#{@dest}"
579
595
  if dst[0] != "*" then
@@ -581,8 +597,81 @@ module BOAST
581
597
  else
582
598
  dst = dst[1..-1]
583
599
  end
584
- p_type = @store_type.type.copy(:vector_length => 1)
585
- return s += "#{instruction}((#{p_type.decl} * )#{dst}, #{get_mask}, #{Operator.convert(@source, @store_type.type)})"
600
+ p_type = type.copy(:vector_length => 1)
601
+ return s += "#{instruction}( (#{p_type.decl} * ) #{dst}, #{get_mask}, #{Operator.convert(@source, type)} )"
602
+ end
603
+
604
+ def pr
605
+ ss = to_s
606
+ if ss then
607
+ s=""
608
+ s += indent
609
+ s += ss
610
+ s += ";" if [C, CL, CUDA].include?( lang )
611
+ output.puts s
612
+ end
613
+ return self
614
+ end
615
+
616
+ end
617
+
618
+ class FMA < Operator
619
+ extend Functor
620
+ include Intrinsics
621
+ include Arithmetic
622
+ include Inspectable
623
+ include PrivateStateAccessor
624
+
625
+ attr_reader :operand1
626
+ attr_reader :operand2
627
+ attr_reader :operand3
628
+ attr_reader :return_type
629
+
630
+ def initialize(a,b,c)
631
+ @operand1 = a
632
+ @operand2 = b
633
+ @operand3 = c
634
+ @return_type = nil
635
+ @return_type = @operand3.to_var unless @return_type
636
+ end
637
+
638
+ def convert_operand(op)
639
+ return "#{Operator.convert(op, @return_type.type)}"
640
+ end
641
+
642
+ private :convert_operand
643
+
644
+ def type
645
+ return @return_type.type
646
+ end
647
+
648
+ def to_var
649
+ instruction = nil
650
+ begin
651
+ instruction = intrinsics(:FMADD,@return_type.type)
652
+ rescue
653
+ end
654
+ return (@operand3 + @operand1 * @operand2).to_var unless lang != FORTRAN and @return_type and ( instruction or ( [CL, CUDA].include?(lang) ) )
655
+ op1 = convert_operand(@operand1.to_var)
656
+ op2 = convert_operand(@operand2.to_var)
657
+ op3 = convert_operand(@operand3.to_var)
658
+ if [CL, CUDA].include?(lang)
659
+ ret_name = "fma( #{op1}, #{op2}, #{op3} )"
660
+ else
661
+ case architecture
662
+ when X86
663
+ ret_name = "#{instruction}( #{op1}, #{op2}, #{op3} )"
664
+ when ARM
665
+ ret_name = "#{instruction}( #{op3}, #{op1}, #{op2} )"
666
+ else
667
+ return (@operand3 + @operand1 * @operand2).to_var
668
+ end
669
+ end
670
+ return @return_type.copy( ret_name, DISCARD_OPTIONS)
671
+ end
672
+
673
+ def to_s
674
+ return to_var.to_s
586
675
  end
587
676
 
588
677
  def pr
@@ -596,7 +685,7 @@ module BOAST
596
685
 
597
686
  end
598
687
 
599
- class FMA < Operator
688
+ class FMS < Operator
600
689
  extend Functor
601
690
  include Intrinsics
602
691
  include Arithmetic
@@ -629,23 +718,24 @@ module BOAST
629
718
  def to_var
630
719
  instruction = nil
631
720
  begin
632
- instruction = intrinsics(:FMADD,@return_type.type)
721
+ instruction = intrinsics(:FNMADD,@return_type.type)
633
722
  rescue
634
723
  end
635
- return (@operand3 + @operand1 * @operand2).to_var unless lang != FORTRAN and @return_type and ( instruction or ( [CL, CUDA].include?(lang) ) )
636
- op1 = convert_operand(@operand1)
637
- op2 = convert_operand(@operand2)
638
- op3 = convert_operand(@operand3)
724
+ return (@operand3 - @operand1 * @operand2).to_var unless lang != FORTRAN and @return_type and ( instruction or ( [CL, CUDA].include?(lang) ) )
725
+ op1 = convert_operand(@operand1.to_var)
726
+ op2 = convert_operand(@operand2.to_var)
727
+ op3 = convert_operand(@operand3.to_var)
639
728
  if [CL, CUDA].include?(lang)
640
- ret_name = "fma(#{op1},#{op2},#{op3})"
729
+ op1 = convert_operand((-@operand1).to_var)
730
+ ret_name = "fma( #{op1}, #{op2}, #{op3} )"
641
731
  else
642
732
  case architecture
643
733
  when X86
644
- ret_name = "#{instruction}(#{op1},#{op2},#{op3})"
734
+ ret_name = "#{instruction}( #{op1}, #{op2}, #{op3} )"
645
735
  when ARM
646
- ret_name = "#{instruction}(#{op2},#{op3},#{op1})"
736
+ ret_name = "#{instruction}( #{op3}, #{op1}, #{op2} )"
647
737
  else
648
- return (@operand1 * @operand2 + @operand3).to_var
738
+ return (@operand3 - @operand1 * @operand2).to_var
649
739
  end
650
740
  end
651
741
  return @return_type.copy( ret_name, DISCARD_OPTIONS)
@@ -2,10 +2,16 @@ module BOAST
2
2
 
3
3
  module_function
4
4
 
5
- def register_funccall(name)
5
+ FUNCCALLS = {}
6
+
7
+ def register_funccall(name, options = {})
8
+ sym = name.to_sym
9
+ FUNCCALLS[sym] = {}
10
+ FUNCCALLS[sym][:parameters] = options[:parameters]
11
+ FUNCCALLS[sym][:returns] = options[:returns]
6
12
  s =<<EOF
7
13
  def self.#{name}(*args)
8
- return FuncCall("#{name}", *args)
14
+ return FuncCall(#{sym.inspect}, *args#{options[:returns] ? ", returns: FUNCCALLS[#{sym.inspect}][:returns]" : ""})
9
15
  end
10
16
  EOF
11
17
  eval s
@@ -252,6 +252,11 @@ module BOAST
252
252
  return Variable::new(name, @type.class, h)
253
253
  end
254
254
 
255
+ def set_align(align)
256
+ @alignment = align
257
+ return self
258
+ end
259
+
255
260
  def self.from_type(name, type, options={})
256
261
  hash = type.to_hash
257
262
  options.each { |k,v|
@@ -396,7 +396,7 @@ EOF
396
396
 
397
397
  load_module
398
398
 
399
- cleanup(kernel_files)
399
+ cleanup(kernel_files) unless keep_temp
400
400
 
401
401
  eval "self.extend(#{module_name})"
402
402
 
@@ -51,7 +51,7 @@ module BOAST
51
51
  cflags += " -DHAVE_NARRAY_H" if narray_path
52
52
  cflags += " -I/usr/local/k1tools/include" if @architecture == MPPA
53
53
  objext = RbConfig::CONFIG["OBJEXT"]
54
- if options[:openmp] and @lang == C then
54
+ if options[:openmp] and @lang == C and not disable_openmp then
55
55
  openmp_cflags = get_openmp_flags(c_compiler)
56
56
  raise "unkwown openmp flags for: #{c_compiler}" if not openmp_cflags
57
57
  cflags += " #{openmp_cflags}"
@@ -79,7 +79,7 @@ module BOAST
79
79
  cxx_compiler = options[:CXX]
80
80
  cxxflags = options[:CXXFLAGS]
81
81
  cxxflags += " -fPIC #{includes}"
82
- if options[:openmp] and @lang == C then
82
+ if options[:openmp] and @lang == C and not disable_openmp then
83
83
  openmp_cxxflags = get_openmp_flags(cxx_compiler)
84
84
  raise "unkwown openmp flags for: #{cxx_compiler}" if not openmp_cxxflags
85
85
  cxxflags += " #{openmp_cxxflags}"
@@ -97,7 +97,7 @@ module BOAST
97
97
  fcflags += " -march=#{get_model}"
98
98
  fcflags += " -fPIC"
99
99
  fcflags += " -fno-second-underscore" if f_compiler == 'g95'
100
- if options[:openmp] and @lang == FORTRAN then
100
+ if options[:openmp] and @lang == FORTRAN and not disable_openmp then
101
101
  openmp_fcflags = get_openmp_flags(f_compiler)
102
102
  raise "unkwown openmp flags for: #{f_compiler}" if not openmp_fcflags
103
103
  fcflags += " #{openmp_fcflags}"
@@ -152,7 +152,7 @@ module BOAST
152
152
  c_compiler = "cc" if not c_compiler
153
153
  linker = options[:LD]
154
154
  linker = c_compiler if not linker
155
- if options[:openmp] then
155
+ if options[:openmp] and not disable_openmp then
156
156
  openmp_ldflags = get_openmp_flags(linker)
157
157
  raise "unknown openmp flags for: #{linker}" if not openmp_ldflags
158
158
  ldflags += " #{openmp_ldflags}"
@@ -35,16 +35,19 @@ module BOAST
35
35
  private_boolean_state_accessor :verbose
36
36
  private_boolean_state_accessor :debug_source
37
37
  private_boolean_state_accessor :ffi
38
+ private_boolean_state_accessor :keep_temp
38
39
  private_state_accessor :fortran_line_length
39
40
  end
40
41
 
41
42
  boolean_state_accessor :verbose
42
43
  boolean_state_accessor :debug_source
43
44
  boolean_state_accessor :ffi
45
+ boolean_state_accessor :keep_temp
44
46
  state_accessor :fortran_line_length
45
- default_state_getter :ffi, false
46
47
  default_state_getter :verbose, false
47
48
  default_state_getter :debug_source, false
49
+ default_state_getter :ffi, false
50
+ default_state_getter :keep_temp, false
48
51
  default_state_getter :fortran_line_length, 72
49
52
 
50
53
  module_function
@@ -26,8 +26,8 @@ module BOAST
26
26
  puts "#{compiler_options[:MAQAO]} cqa #{maqao_model ? "--uarch=#{maqao_model} " : ""}#{f1.path} --fct=#{@procedure.name} #{compiler_options[:MAQAO_FLAGS]}"
27
27
  end
28
28
  result = `#{compiler_options[:MAQAO]} cqa #{maqao_model ? "--uarch=#{maqao_model} " : ""}#{f1.path} --fct=#{@procedure.name} #{compiler_options[:MAQAO_FLAGS]}`
29
- File::unlink(library_object)
30
- File::unlink(library_source)
29
+ File::unlink(library_object) unless keep_temp
30
+ File::unlink(library_source) unless keep_temp
31
31
  return result
32
32
  end
33
33
  end
@@ -38,23 +38,23 @@ module BOAST
38
38
 
39
39
  def init_opencl_types
40
40
  @@opencl_real_types = {
41
- 2 => OpenCL::Half,
42
- 4 => OpenCL::Float,
43
- 8 => OpenCL::Double
41
+ 2 => OpenCL::Half1,
42
+ 4 => OpenCL::Float1,
43
+ 8 => OpenCL::Double1
44
44
  }
45
45
 
46
46
  @@opencl_int_types = {
47
47
  true => {
48
- 1 => OpenCL::Char,
49
- 2 => OpenCL::Short,
50
- 4 => OpenCL::Int,
51
- 8 => OpenCL::Long
48
+ 1 => OpenCL::Char1,
49
+ 2 => OpenCL::Short1,
50
+ 4 => OpenCL::Int1,
51
+ 8 => OpenCL::Long1
52
52
  },
53
53
  false => {
54
- 1 => OpenCL::UChar,
55
- 2 => OpenCL::UShort,
56
- 4 => OpenCL::UInt,
57
- 8 => OpenCL::ULong
54
+ 1 => OpenCL::UChar1,
55
+ 2 => OpenCL::UShort1,
56
+ 4 => OpenCL::UInt1,
57
+ 8 => OpenCL::ULong1
58
58
  }
59
59
  }
60
60
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: BOAST
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0
4
+ version: 1.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brice Videau
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-03-08 00:00:00.000000000 Z
11
+ date: 2016-04-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: narray
@@ -56,20 +56,20 @@ dependencies:
56
56
  requirements:
57
57
  - - "~>"
58
58
  - !ruby/object:Gem::Version
59
- version: '1.0'
59
+ version: '1.2'
60
60
  - - ">="
61
61
  - !ruby/object:Gem::Version
62
- version: 1.0.0
62
+ version: 1.2.1
63
63
  type: :runtime
64
64
  prerelease: false
65
65
  version_requirements: !ruby/object:Gem::Requirement
66
66
  requirements:
67
67
  - - "~>"
68
68
  - !ruby/object:Gem::Version
69
- version: '1.0'
69
+ version: '1.2'
70
70
  - - ">="
71
71
  - !ruby/object:Gem::Version
72
- version: 1.0.0
72
+ version: 1.2.1
73
73
  - !ruby/object:Gem::Dependency
74
74
  name: systemu
75
75
  requirement: !ruby/object:Gem::Requirement