BOAST 1.2.0 → 1.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 939618b7050d9d0298fa5f06b310ac5a881b85ef
4
- data.tar.gz: 60b1fc0bc92e9b1dad4003604a30c9f4de634d87
3
+ metadata.gz: 0bb86d632c7e15c62f42007db65ef307a86ed1eb
4
+ data.tar.gz: 8b4323362889540f6e731bad43e6765b24f26d2c
5
5
  SHA512:
6
- metadata.gz: cc1877354a30495b196128bd5e94055808197c14e9e7d6fd9dbb8c1e6c6dc603efbd5a877fd6d64f4d7f12b0fe88a0a8c1801cacb3baf13d206d25740914af4e
7
- data.tar.gz: 858093b8d634b8f0ff61f90aa5a2b7a8d67c5b49e9013cdc4093a453095adfaf9ba3490ad8548082b1bd0728cd280184a2ccacf147d94dcdbf541fff124217ec
6
+ metadata.gz: d4dd1ffe821624820fec06684910ad87f0dd451d81b596fafceda7b41651e0d5810dfe4aa902de69d609387577e4ace87b1ede8dea7ebcc65cc4d11d13cd3dda
7
+ data.tar.gz: 65d6bc915e2299d38e1bd2f4c52057f42e5c6ada5fd5d26416dacd20a4169f364f3eaec28c9e755a4d3b40c3903cb8f2092c23d55dcf400b5b7dbeac5a3f1787
data/BOAST.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'BOAST'
3
- s.version = "1.2.0"
3
+ s.version = "1.2.1"
4
4
  s.author = "Brice Videau"
5
5
  s.email = "brice.videau@imag.fr"
6
6
  s.homepage = "https://github.com/Nanosim-LIG/boast"
@@ -12,7 +12,7 @@ Gem::Specification.new do |s|
12
12
  s.required_ruby_version = '>= 1.9.3'
13
13
  s.add_dependency 'narray', '~> 0.6.0', '>=0.6.0.8'
14
14
  s.add_dependency 'narray_ffi', '~> 1.2', '>=1.2.0'
15
- s.add_dependency 'opencl_ruby_ffi', '~> 1.0', '>=1.0.0'
15
+ s.add_dependency 'opencl_ruby_ffi', '~> 1.2', '>=1.2.1'
16
16
  s.add_dependency 'systemu', '~> 2', '>=2.2.0'
17
17
  s.add_dependency 'os', '~> 0.9', '>=0.9.6'
18
18
  s.add_dependency 'PAPI', '~> 0', '>=0.101'
@@ -30,6 +30,7 @@ module BOAST
30
30
  private_boolean_state_accessor :decl_module
31
31
  private_boolean_state_accessor :annotate
32
32
  private_boolean_state_accessor :optimizer_log
33
+ private_boolean_state_accessor :disable_openmp
33
34
 
34
35
  private
35
36
  def push_env(*args)
@@ -84,6 +85,7 @@ module BOAST
84
85
  boolean_state_accessor :decl_module
85
86
  boolean_state_accessor :annotate
86
87
  boolean_state_accessor :optimizer_log
88
+ boolean_state_accessor :disable_openmp
87
89
 
88
90
  default_state_getter :address_size, OS.bits/8
89
91
  default_state_getter :lang, FORTRAN, '"const_get(#{envs})"', :BOAST_LANG
@@ -104,6 +106,7 @@ module BOAST
104
106
  default_state_getter :annotate_level, 0
105
107
  default_state_getter :optimizer_log, false
106
108
  default_state_getter :optimizer_log_file, nil
109
+ default_state_getter :disable_openmp, false
107
110
 
108
111
  alias use_vla_old? use_vla?
109
112
  class << self
@@ -163,17 +166,27 @@ module BOAST
163
166
 
164
167
  @@env = Hash::new{|h, k| h[k] = []}
165
168
 
166
- def push_env(vars = {})
167
- vars.each { |key,value|
169
+ def push_env(vars = {}, &block)
170
+ keys = []
171
+ vars.each { |key, value|
168
172
  var = nil
169
173
  begin
170
174
  var = BOAST::class_variable_get("@@"+key.to_s)
171
175
  rescue
176
+ BOAST::pop_env(*keys)
172
177
  raise "Unknown module variable #{key}!"
173
178
  end
174
179
  @@env[key].push(var)
175
180
  BOAST::class_variable_set("@@"+key.to_s, value)
181
+ keys.push(key)
176
182
  }
183
+ if block then
184
+ begin
185
+ block.call
186
+ ensure
187
+ BOAST::pop_env(*vars.keys)
188
+ end
189
+ end
177
190
  end
178
191
 
179
192
  def pop_env(*vars)
@@ -91,6 +91,33 @@ X86architectures ={"pentium2"=>["MMX"],
91
91
  "RDSEED",
92
92
  "ADCX",
93
93
  "PREFETCHW"],
94
+ "knl"=>
95
+ ["MOVBE",
96
+ "MMX",
97
+ "SSE",
98
+ "SSE2",
99
+ "SSE3",
100
+ "SSSE3",
101
+ "SSE4.1",
102
+ "SSE4.2",
103
+ "POPCNT",
104
+ "AVX",
105
+ "AVX2",
106
+ "AES",
107
+ "PCLMUL",
108
+ "FSGSBASE",
109
+ "RDRND",
110
+ "FMA",
111
+ "BMI",
112
+ "BMI2",
113
+ "F16C",
114
+ "RDSEED",
115
+ "ADCX",
116
+ "PREFETCHW",
117
+ "AVX512F",
118
+ "AVX512PF",
119
+ "AVX512ER",
120
+ "AVX512CD"],
94
121
  "bonnell"=>["MOVBE", "MMX", "SSE", "SSE2", "SSE3", "SSSE3"],
95
122
  "silvermont"=>
96
123
  ["MOVBE",
@@ -14,6 +14,11 @@ module BOAST
14
14
  return !!@alignment
15
15
  end
16
16
 
17
+ def set_align(align)
18
+ @alignment = align
19
+ return self
20
+ end
21
+
17
22
  def to_var
18
23
  var = @source.copy("#{self}", :const => nil, :constant => nil, :dim => nil, :dimension => nil, :direction => nil, :dir => nil, :align => alignment)
19
24
  return var
@@ -39,16 +39,36 @@ module BOAST
39
39
  INTRINSICS = Hash::new { |h, k| h[k] = Hash::new { |h2, k2| h2[k2] = {} } }
40
40
  CONVERSIONS = Hash::new { |h, k| h[k] = Hash::new { |h2, k2| h2[k2] = {} } }
41
41
 
42
+ def check_coverage
43
+ ins = []
44
+ INTRINSICS[X86].each { |i,v|
45
+ if i == :CVT then
46
+ v.each { |type1, h|
47
+ h.each { |type2, instr|
48
+ ins.push instr.to_s
49
+ }
50
+ }
51
+ else
52
+ v.each { |type, instr|
53
+ ins.push instr.to_s
54
+ }
55
+ end
56
+ }
57
+ return ins - INSTRUCTIONS.keys
58
+ end
59
+
60
+ module_function :check_coverage
61
+
42
62
  def intrinsics_by_vector_name(intr_symbol, type, type2=nil)
43
63
  if type2 then
44
64
  instruction = INTRINSICS[get_architecture][intr_symbol][type][type2]
45
65
  else
46
66
  instruction = INTRINSICS[get_architecture][intr_symbol][type]
47
67
  end
68
+ raise IntrinsicsError, "Unsupported operation #{intr_symbol} for #{type}#{type2 ? " and #{type2}" : ""} on #{get_architecture_name}!" unless instruction
48
69
  return instruction if get_architecture == ARM
49
- raise IntrinsicsError, "Unsupported operation #{intr_symbol} for #{type}#{type2 ? "and #{type2}" : ""} on #{get_architecture_name}!" unless instruction
50
70
  supported = (INSTRUCTIONS[instruction.to_s] & MODELS[get_model.to_s]).size > 0
51
- raise IntrinsicsError, "Unsupported operation #{intr_symbol} for #{type}#{type2 ? "and #{type2}" : ""} on #{get_model}! (requires #{INSTRUCTIONS[instruction.to_s].join(" or ")})" unless supported
71
+ raise IntrinsicsError, "Unsupported operation #{intr_symbol} for #{type}#{type2 ? " and #{type2}" : ""} on #{get_model}! (requires #{INSTRUCTIONS[instruction.to_s].join(" or ")})" unless supported
52
72
  return instruction
53
73
  end
54
74
 
@@ -213,7 +233,7 @@ module BOAST
213
233
 
214
234
  module_function :type_name_X86
215
235
 
216
- [64, 128, 256].each { |vector_size|
236
+ [64, 128, 256, 512].each { |vector_size|
217
237
  vs = ( vector_size < 256 ? "" : "#{vector_size}" )
218
238
  sizes = [8, 16, 32]
219
239
  sizes.push( 64 ) if vector_size > 64
@@ -222,12 +242,17 @@ module BOAST
222
242
  vtype = vector_type_name( :int, size, vector_size, sign )
223
243
  type = type_name_X86( :int, size, vector_size )
224
244
  instructions = [[:ADD, "add"], [:SUB, "sub"]]
225
- instructions.push( [:SET, "setr"], [:SET1, "set1"] )
226
- instructions.push( [:MUL, "mullo"] ) if vector_size > 64 and size >= 16 and size <= 32
245
+ instructions.push( [:SET, "setr"] ) unless size < 32 and vector_size == 512
246
+ instructions.push( [:SET1, "set1"] )
247
+ instructions.push( [:MUL, "mullo"] ) if vector_size > 64 and size >= 16
248
+ instructions.push( [:MASKLOAD, "maskload"], [:MASKSTORE, "maskstore"] ) if vector_size <= 256 and vector_size >= 128 and size >= 32
249
+ instructions.push( [:MASK_LOAD, "mask_load"], [:MASK_STORE, "mask_store"],
250
+ [:MASK_LOADU, "mask_loadu"], [:MASK_STOREU, "mask_storeu"],
251
+ [:MASKZ_LOAD, "maskz_load"], [:MASKZ_LOADU, "maskz_loadu"], ) if vector_size >= 128 and size >= 32
227
252
  instructions.each { |cl, ins|
228
253
  INTRINSICS[X86][cl][vtype] = "_mm#{vs}_#{ins}_#{type}".to_sym
229
254
  }
230
- if size == 64 and vector_size < 512 then
255
+ if size == 64 and vector_size == 256 then
231
256
  INTRINSICS[X86][:SET1][vtype] = "_mm#{vs}_set1_#{type}x".to_sym
232
257
  INTRINSICS[X86][:SET][vtype] = "_mm#{vs}_setr_#{type}x".to_sym
233
258
  end
@@ -236,8 +261,10 @@ module BOAST
236
261
  [8, 16, 32, 64].each { |size|
237
262
  [:signed, :unsigned].each { |sign|
238
263
  vtype = vector_type_name( :int, size, vector_size, sign )
239
- [[:LOAD, "loadu"], [:LOADA, "load"],
240
- [:STORE, "storeu"], [:STOREA, "store"]].each { |cl, ins|
264
+ instructions = [[:LOAD, "loadu"], [:LOADA, "load"],
265
+ [:STORE, "storeu"], [:STOREA, "store"],
266
+ [:SETZERO, "setzero"] ]
267
+ instructions.each { |cl, ins|
241
268
  INTRINSICS[X86][cl][vtype] = "_mm#{vs}_#{ins}_si#{vector_size}".to_sym
242
269
  }
243
270
  }
@@ -245,12 +272,19 @@ module BOAST
245
272
  sizes = []
246
273
  sizes.push( 32, 64 ) if vector_size > 64
247
274
  sizes.each { |size|
248
- [[:ADD, "add"], [:SUB, "sub"], [:MUL, "mul"], [:DIV, "div"], [:POW, "pow"],
249
- [:FMADD, "fmadd"], [:FMSUB, "fmsub"], [:FNMADD, "fnmadd"], [:FNMSUB, "fnmsub"],
250
- [:ADDSUB, "addsub"], [:FMADDSUB, "fmaddsub"], [:FMSUBADD, "fmsubadd"],
251
- [:LOAD, "loadu"], [:LOADA, "load"], [:MASKLOAD, "maskload"],
252
- [:STORE, "storeu"], [:STOREA, "store"], [:MASKSTORE, "maskstore"],
253
- [:SET, "setr"], [:SET1, "set1"] ].each { |cl, ins|
275
+ instructions = [[:ADD, "add"], [:SUB, "sub"], [:MUL, "mul"], [:DIV, "div"], [:POW, "pow"],
276
+ [:FMADD, "fmadd"], [:FMSUB, "fmsub"],
277
+ [:FNMADD, "fnmadd"], [:FNMSUB, "fnmsub"],
278
+ [:FMADDSUB, "fmaddsub"], [:FMSUBADD, "fmsubadd"],
279
+ [:LOAD, "loadu"], [:LOADA, "load"],
280
+ [:STORE, "storeu"], [:STOREA, "store"],
281
+ [:SET, "setr"], [:SET1, "set1"], [:SETZERO, "setzero"],
282
+ [:MASK_LOAD, "mask_load"], [:MASK_STORE, "mask_store"],
283
+ [:MASK_LOADU, "mask_loadu"], [:MASK_STOREU, "mask_storeu"],
284
+ [:MASKZ_LOAD, "maskz_load"], [:MASKZ_LOADU, "maskz_loadu"] ]
285
+ instructions.push( [:MASKLOAD, "maskload"], [:MASKSTORE, "maskstore"] ) if vector_size < 512
286
+ instructions.push( [:ADDSUB, "addsub"] ) if vector_size < 512
287
+ instructions.each { |cl, ins|
254
288
  vtype = vector_type_name( :float, size, vector_size)
255
289
  type = type_name_X86( :float, size, vector_size )
256
290
  INTRINSICS[X86][cl][vtype] = "_mm#{vs}_#{ins}_#{type}".to_sym
@@ -258,7 +292,7 @@ module BOAST
258
292
  }
259
293
  }
260
294
  INTRINSICS[X86][:CVT] = Hash::new { |h,k| h[k] = {} }
261
- [128, 256].each { |bvsize|
295
+ [128, 256, 512].each { |bvsize|
262
296
  [16, 32, 64].each { |bsize|
263
297
  ssize = bsize/2
264
298
  while ssize >= 8
@@ -296,6 +330,15 @@ module BOAST
296
330
  INTRINSICS[X86][:CVT][ivtype][fvtype] = "_mm#{vs}_cvt#{ftype}_#{itype}".to_sym
297
331
  }
298
332
  }
333
+ [64,32].each { |bsize|
334
+ ftype = type_name_X86( :float, bsize, bvsize )
335
+ itype = type_name_X86( :int, bsize, bvsize, :signed )
336
+ fvtype = vector_type_name( :float, bsize, bvsize )
337
+ ivtype = vector_type_name( :int, bsize, bvsize, :signed )
338
+ vs = ( bvsize < 256 ? "" : "#{bvsize}" )
339
+ INTRINSICS[X86][:CVT][fvtype][ivtype] = "_mm#{vs}_cvt#{itype}_#{ftype}".to_sym
340
+ INTRINSICS[X86][:CVT][ivtype][fvtype] = "_mm#{vs}_cvt#{ftype}_#{itype}".to_sym
341
+ }
299
342
  }
300
343
 
301
344
 
@@ -324,7 +367,7 @@ module BOAST
324
367
  vtype = vector_type_name( :float, size, vector_size )
325
368
  type = type_name_ARM( :float, size )
326
369
  [[:ADD, "add"], [:SUB, "sub"], [:MUL, "mul"],
327
- [:FMADD, "mla"], [:FNMSUB, "mls"],
370
+ [:FMADD, "mla"], [:FNMADD, "mls"],
328
371
  [:LOAD, "ldl"], [:LOADA, "ldl"],
329
372
  [:STORE, "stl"], [:STOREA, "stl"]].each { |cl, ins|
330
373
  INTRINSICS[ARM][cl][vtype] = "v#{ins}#{q}_#{type}".to_sym
@@ -332,6 +375,9 @@ module BOAST
332
375
  [[:SET1, "dup"]].each { |cl, ins|
333
376
  INTRINSICS[ARM][cl][vtype] = "v#{ins}#{q}_n_#{type}".to_sym
334
377
  }
378
+ [[:SET_LANE, "set"]].each { |cl, ins|
379
+ INTRINSICS[ARM][cl][vtype] = "v#{ins}#{q}_lane_#{type}".to_sym
380
+ }
335
381
  }
336
382
  }
337
383
  INTRINSICS[ARM][:CVT] = Hash::new { |h,k| h[k] = {} }
@@ -344,8 +390,8 @@ module BOAST
344
390
  ivtype = vector_type_name( :int, int_size, vector_size, sign )
345
391
  ftype = type_name_ARM( :float, float_size )
346
392
  itype = type_name_ARM( :int, int_size, sign )
347
- INTRINSICS[ARM][:CVT][fvtype][ivtype] = "vcvt#{q}_#{itype}_#{ftype}".to_sym
348
- INTRINSICS[ARM][:CVT][ivtype][fvtype] = "vcvt#{q}_#{ftype}_#{itype}".to_sym
393
+ INTRINSICS[ARM][:CVT][ivtype][fvtype] = "vcvt#{q}_#{itype}_#{ftype}".to_sym
394
+ INTRINSICS[ARM][:CVT][fvtype][ivtype] = "vcvt#{q}_#{ftype}_#{itype}".to_sym
349
395
  }
350
396
  }
351
397
  svsize = 64
@@ -357,8 +403,8 @@ module BOAST
357
403
  btype = type_name_ARM( :int, bsize, sign )
358
404
  svtype = vector_type_name( :int, ssize, svsize, sign )
359
405
  bvtype = vector_type_name( :int, bsize, bvsize, sign )
360
- INTRINSICS[ARM][:CVT][svtype][bvtype] = "vmovl_#{stype}".to_sym
361
- INTRINSICS[ARM][:CVT][bvtype][svtype] = "vmovn_#{btype}".to_sym
406
+ INTRINSICS[ARM][:CVT][bvtype][svtype] = "vmovl_#{stype}".to_sym
407
+ INTRINSICS[ARM][:CVT][svtype][bvtype] = "vmovn_#{btype}".to_sym
362
408
  }
363
409
  }
364
410
 
@@ -152,7 +152,7 @@ EOF
152
152
  end
153
153
 
154
154
  def open
155
- output.puts to_s
155
+ output.puts to_s unless disable_openmp
156
156
  return self
157
157
  end
158
158
 
@@ -166,7 +166,7 @@ EOF
166
166
  end
167
167
 
168
168
  def close
169
- output.puts end_string(openmp_end_clauses_to_s)
169
+ output.puts end_string(openmp_end_clauses_to_s) unless disable_openmp
170
170
  return self
171
171
  end
172
172
 
@@ -316,7 +316,7 @@ module BOAST
316
316
 
317
317
  def initialize(source, return_type)
318
318
  @source = source
319
- @return_type = return_type
319
+ @return_type = return_type.to_var
320
320
  end
321
321
 
322
322
  def type
@@ -328,21 +328,29 @@ module BOAST
328
328
  if @source.kind_of?( Array ) then
329
329
  raise OperatorError, "Invalid array length!" unless @source.length == @return_type.type.vector_length
330
330
  return @return_type.copy("(#{@return_type.type.decl})( #{@source.join(", ")} )", DISCARD_OPTIONS) if lang == CL
331
-
331
+ return Set(@source.first, @return_type).to_var if @source.uniq.size == 1
332
332
  begin
333
333
  instruction = intrinsics(:SET, @return_type.type)
334
+ raise IntrinsicsError unless instruction
334
335
  return @return_type.copy("#{instruction}( #{@source.join(", ")} )", DISCARD_OPTIONS)
335
336
  rescue IntrinsicsError
336
337
  instruction = intrinsics(:SET_LANE, @return_type.type)
338
+ raise IntrinsicsError, "Missing instruction for SET_LANE on #{get_architecture_name}!" unless instruction
337
339
  s = Set(0, @return_type).to_s
338
340
  @source.each_with_index { |v,i|
339
- s = "#{instruction}(#{v}, #{s}, #{i})"
341
+ s = "#{instruction}( #{v}, #{s}, #{i} )"
340
342
  }
341
343
  return @return_type.copy(s, DISCARD_OPTIONS)
342
344
  end
343
345
  elsif @source.class != Variable or @source.type.vector_length == 1 then
344
346
  return @return_type.copy("(#{@return_type.type.decl})( #{@source} )", DISCARD_OPTIONS) if lang == CL
345
-
347
+ if (@source.is_a?(Numeric) and @source == 0) or (@source.class == Variable and @source.constant == 0) then
348
+ begin
349
+ instruction = intrinsics(:SETZERO, @return_type.type)
350
+ return @return_type.copy("#{instruction}( )", DISCARD_OPTIONS) if instruction
351
+ rescue IntrinsicsError
352
+ end
353
+ end
346
354
  instruction = intrinsics(:SET1, @return_type.type)
347
355
  return @return_type.copy("#{instruction}( #{@source} )", DISCARD_OPTIONS)
348
356
  elsif @return_type.type != @source.type
@@ -379,7 +387,7 @@ module BOAST
379
387
 
380
388
  def initialize(source, return_type)
381
389
  @source = source
382
- @return_type = return_type
390
+ @return_type = return_type.to_var
383
391
  end
384
392
 
385
393
  def type
@@ -445,12 +453,12 @@ module BOAST
445
453
  def initialize(source, mask, return_type)
446
454
  @source = source
447
455
  @mask = mask
448
- @return_type = return_type
456
+ @return_type = return_type.to_var
449
457
  end
450
458
 
451
459
  def get_mask
452
- raise OperatorError, "Mask size is wrong: #{@mask.length} for #{@return_type.type.vector_length}!" if @mask.length != @return_type.type.vector_length
453
- return Load(@mask.collect { |m| ( m and m != 0 ) ? -1 : 0 }, Int("mask", :size => @return_type.type.size, :vector_length => @return_type.type.vector_length ) )
460
+ type = @return_type.type
461
+ return Set(@mask.collect { |m| ( m and m != 0 ) ? -1 : 0 }, Int("mask", :size => type.size, :vector_length => type.vector_length ) )
454
462
  end
455
463
 
456
464
  private :get_mask
@@ -461,8 +469,12 @@ module BOAST
461
469
 
462
470
  def to_var
463
471
  raise OperatorError, "Cannot load unknown type!" unless @return_type
472
+ type = @return_type.type
464
473
  raise LanguageError, "Unsupported language!" unless lang == C
465
- instruction = intrinsics(:MASKLOAD, @return_type.type)
474
+ raise OperatorError, "Mask size is wrong: #{@mask.length} for #{type.vector_length}!" if @mask.length != type.vector_length
475
+ return Load( @source, @return_type ).to_var unless @mask.include?(0)
476
+ return Set( 0, @return_type ).to_var if @mask.uniq.size == 1 and @mask.uniq.first == 0
477
+ instruction = intrinsics(:MASKLOAD, type)
466
478
  s = ""
467
479
  src = "#{@source}"
468
480
  if src[0] != "*" then
@@ -470,8 +482,8 @@ module BOAST
470
482
  else
471
483
  src = src[1..-1]
472
484
  end
473
- p_type = @return_type.type.copy(:vector_length => 1)
474
- s += "#{instruction}((#{p_type.decl} * )#{src}, #{get_mask})"
485
+ p_type = type.copy(:vector_length => 1)
486
+ s += "#{instruction}( (#{p_type.decl} * ) #{src}, #{get_mask} )"
475
487
  return @return_type.copy( s, DISCARD_OPTIONS)
476
488
  end
477
489
 
@@ -501,11 +513,11 @@ module BOAST
501
513
  attr_reader :source
502
514
  attr_reader :store_type
503
515
 
504
- def initialize(dest, source, mask, store_type = nil)
516
+ def initialize(dest, source, store_type = nil)
505
517
  @dest = dest
506
518
  @source = source
507
519
  @store_type = store_type
508
- @store_type = source unless @store_type
520
+ @store_type = source.to_var unless @store_type
509
521
  end
510
522
 
511
523
  def to_s
@@ -516,17 +528,17 @@ module BOAST
516
528
  else
517
529
  dst = dst[1..-1]
518
530
  end
531
+ type = @store_type.type
532
+ return "vstore#{type.vector_length}( #{@source}, 0, #{dst} )" if lang == CL
533
+ return "*((int64_t * ) #{dst}) = _m_to_int64( #{@source} )" if get_architecture == X86 and type.total_size*8 == 64
519
534
 
520
- return "vstore#{@source.type.vector_length}(#{@source}, 0, #{dst})" if lang == CL
521
- return "*((int64_t * ) #{dst}) = _m_to_int64( #{@source} )" if get_architecture == X86 and @source.type.total_size*8 == 64
522
-
523
- if @dest.alignment == @source.type.total_size then
524
- instruction = intrinsics(:STOREA, @source.type)
535
+ if @dest.alignment == type.total_size then
536
+ instruction = intrinsics(:STOREA, type)
525
537
  else
526
- instruction = intrinsics(:STORE, @source.type)
538
+ instruction = intrinsics(:STORE, type)
527
539
  end
528
- p_type = @source.type.copy(:vector_length => 1)
529
- p_type = @source.type if get_architecture == X86 and @source.type.kind_of?(Int)
540
+ p_type = type.copy(:vector_length => 1)
541
+ p_type = type if get_architecture == X86 and type.kind_of?(Int)
530
542
  return "#{instruction}( (#{p_type.decl} * ) #{dst}, #{@source} )"
531
543
  end
532
544
  return Affectation.basic_usage(@dest, @source)
@@ -560,20 +572,24 @@ module BOAST
560
572
  @source = source
561
573
  @mask = mask
562
574
  @store_type = store_type
563
- @store_type = source unless @store_type
575
+ @store_type = source.to_var unless @store_type
564
576
  end
565
577
 
566
578
  def get_mask
567
- raise OperatorError, "Mask size is wrong: #{@mask.length} for #{@store_type.type.vector_length}!" if @mask.length != @store_type.type.vector_length
568
- return Load.to_s(@mask.collect { |m| ( m and m != 0 ) ? -1 : 0 }, Int("mask", :size => @store_type.type.size, :vector_length => @store_type.type.vector_length ) )
579
+ type = @store_type.type
580
+ return Set(@mask.collect { |m| ( m and m != 0 ) ? -1 : 0 }, Int("mask", :size => type.size, :vector_length => type.vector_length ) )
569
581
  end
570
582
 
571
583
  private :get_mask
572
584
 
573
585
  def to_s
574
586
  raise OperatorError, "Cannot store unknown type!" unless @store_type
587
+ type = @store_type.type
575
588
  raise LanguageError, "Unsupported language!" unless lang == C
576
- instruction = intrinsics(:MASKSTORE, @store_type.type)
589
+ raise OperatorError, "Mask size is wrong: #{@mask.length} for #{type.vector_length}!" if @mask.length != type.vector_length
590
+ return Store( @dest, @source, @store_type ).to_s unless @mask.include?(0)
591
+ return nil if @mask.uniq.size == 1 and @mask.uniq.first == 0
592
+ instruction = intrinsics(:MASKSTORE, type)
577
593
  s = ""
578
594
  dst = "#{@dest}"
579
595
  if dst[0] != "*" then
@@ -581,8 +597,81 @@ module BOAST
581
597
  else
582
598
  dst = dst[1..-1]
583
599
  end
584
- p_type = @store_type.type.copy(:vector_length => 1)
585
- return s += "#{instruction}((#{p_type.decl} * )#{dst}, #{get_mask}, #{Operator.convert(@source, @store_type.type)})"
600
+ p_type = type.copy(:vector_length => 1)
601
+ return s += "#{instruction}( (#{p_type.decl} * ) #{dst}, #{get_mask}, #{Operator.convert(@source, type)} )"
602
+ end
603
+
604
+ def pr
605
+ ss = to_s
606
+ if ss then
607
+ s=""
608
+ s += indent
609
+ s += ss
610
+ s += ";" if [C, CL, CUDA].include?( lang )
611
+ output.puts s
612
+ end
613
+ return self
614
+ end
615
+
616
+ end
617
+
618
+ class FMA < Operator
619
+ extend Functor
620
+ include Intrinsics
621
+ include Arithmetic
622
+ include Inspectable
623
+ include PrivateStateAccessor
624
+
625
+ attr_reader :operand1
626
+ attr_reader :operand2
627
+ attr_reader :operand3
628
+ attr_reader :return_type
629
+
630
+ def initialize(a,b,c)
631
+ @operand1 = a
632
+ @operand2 = b
633
+ @operand3 = c
634
+ @return_type = nil
635
+ @return_type = @operand3.to_var unless @return_type
636
+ end
637
+
638
+ def convert_operand(op)
639
+ return "#{Operator.convert(op, @return_type.type)}"
640
+ end
641
+
642
+ private :convert_operand
643
+
644
+ def type
645
+ return @return_type.type
646
+ end
647
+
648
+ def to_var
649
+ instruction = nil
650
+ begin
651
+ instruction = intrinsics(:FMADD,@return_type.type)
652
+ rescue
653
+ end
654
+ return (@operand3 + @operand1 * @operand2).to_var unless lang != FORTRAN and @return_type and ( instruction or ( [CL, CUDA].include?(lang) ) )
655
+ op1 = convert_operand(@operand1.to_var)
656
+ op2 = convert_operand(@operand2.to_var)
657
+ op3 = convert_operand(@operand3.to_var)
658
+ if [CL, CUDA].include?(lang)
659
+ ret_name = "fma( #{op1}, #{op2}, #{op3} )"
660
+ else
661
+ case architecture
662
+ when X86
663
+ ret_name = "#{instruction}( #{op1}, #{op2}, #{op3} )"
664
+ when ARM
665
+ ret_name = "#{instruction}( #{op3}, #{op1}, #{op2} )"
666
+ else
667
+ return (@operand3 + @operand1 * @operand2).to_var
668
+ end
669
+ end
670
+ return @return_type.copy( ret_name, DISCARD_OPTIONS)
671
+ end
672
+
673
+ def to_s
674
+ return to_var.to_s
586
675
  end
587
676
 
588
677
  def pr
@@ -596,7 +685,7 @@ module BOAST
596
685
 
597
686
  end
598
687
 
599
- class FMA < Operator
688
+ class FMS < Operator
600
689
  extend Functor
601
690
  include Intrinsics
602
691
  include Arithmetic
@@ -629,23 +718,24 @@ module BOAST
629
718
  def to_var
630
719
  instruction = nil
631
720
  begin
632
- instruction = intrinsics(:FMADD,@return_type.type)
721
+ instruction = intrinsics(:FNMADD,@return_type.type)
633
722
  rescue
634
723
  end
635
- return (@operand3 + @operand1 * @operand2).to_var unless lang != FORTRAN and @return_type and ( instruction or ( [CL, CUDA].include?(lang) ) )
636
- op1 = convert_operand(@operand1)
637
- op2 = convert_operand(@operand2)
638
- op3 = convert_operand(@operand3)
724
+ return (@operand3 - @operand1 * @operand2).to_var unless lang != FORTRAN and @return_type and ( instruction or ( [CL, CUDA].include?(lang) ) )
725
+ op1 = convert_operand(@operand1.to_var)
726
+ op2 = convert_operand(@operand2.to_var)
727
+ op3 = convert_operand(@operand3.to_var)
639
728
  if [CL, CUDA].include?(lang)
640
- ret_name = "fma(#{op1},#{op2},#{op3})"
729
+ op1 = convert_operand((-@operand1).to_var)
730
+ ret_name = "fma( #{op1}, #{op2}, #{op3} )"
641
731
  else
642
732
  case architecture
643
733
  when X86
644
- ret_name = "#{instruction}(#{op1},#{op2},#{op3})"
734
+ ret_name = "#{instruction}( #{op1}, #{op2}, #{op3} )"
645
735
  when ARM
646
- ret_name = "#{instruction}(#{op2},#{op3},#{op1})"
736
+ ret_name = "#{instruction}( #{op3}, #{op1}, #{op2} )"
647
737
  else
648
- return (@operand1 * @operand2 + @operand3).to_var
738
+ return (@operand3 - @operand1 * @operand2).to_var
649
739
  end
650
740
  end
651
741
  return @return_type.copy( ret_name, DISCARD_OPTIONS)
@@ -2,10 +2,16 @@ module BOAST
2
2
 
3
3
  module_function
4
4
 
5
- def register_funccall(name)
5
+ FUNCCALLS = {}
6
+
7
+ def register_funccall(name, options = {})
8
+ sym = name.to_sym
9
+ FUNCCALLS[sym] = {}
10
+ FUNCCALLS[sym][:parameters] = options[:parameters]
11
+ FUNCCALLS[sym][:returns] = options[:returns]
6
12
  s =<<EOF
7
13
  def self.#{name}(*args)
8
- return FuncCall("#{name}", *args)
14
+ return FuncCall(#{sym.inspect}, *args#{options[:returns] ? ", returns: FUNCCALLS[#{sym.inspect}][:returns]" : ""})
9
15
  end
10
16
  EOF
11
17
  eval s
@@ -252,6 +252,11 @@ module BOAST
252
252
  return Variable::new(name, @type.class, h)
253
253
  end
254
254
 
255
+ def set_align(align)
256
+ @alignment = align
257
+ return self
258
+ end
259
+
255
260
  def self.from_type(name, type, options={})
256
261
  hash = type.to_hash
257
262
  options.each { |k,v|
@@ -396,7 +396,7 @@ EOF
396
396
 
397
397
  load_module
398
398
 
399
- cleanup(kernel_files)
399
+ cleanup(kernel_files) unless keep_temp
400
400
 
401
401
  eval "self.extend(#{module_name})"
402
402
 
@@ -51,7 +51,7 @@ module BOAST
51
51
  cflags += " -DHAVE_NARRAY_H" if narray_path
52
52
  cflags += " -I/usr/local/k1tools/include" if @architecture == MPPA
53
53
  objext = RbConfig::CONFIG["OBJEXT"]
54
- if options[:openmp] and @lang == C then
54
+ if options[:openmp] and @lang == C and not disable_openmp then
55
55
  openmp_cflags = get_openmp_flags(c_compiler)
56
56
  raise "unkwown openmp flags for: #{c_compiler}" if not openmp_cflags
57
57
  cflags += " #{openmp_cflags}"
@@ -79,7 +79,7 @@ module BOAST
79
79
  cxx_compiler = options[:CXX]
80
80
  cxxflags = options[:CXXFLAGS]
81
81
  cxxflags += " -fPIC #{includes}"
82
- if options[:openmp] and @lang == C then
82
+ if options[:openmp] and @lang == C and not disable_openmp then
83
83
  openmp_cxxflags = get_openmp_flags(cxx_compiler)
84
84
  raise "unkwown openmp flags for: #{cxx_compiler}" if not openmp_cxxflags
85
85
  cxxflags += " #{openmp_cxxflags}"
@@ -97,7 +97,7 @@ module BOAST
97
97
  fcflags += " -march=#{get_model}"
98
98
  fcflags += " -fPIC"
99
99
  fcflags += " -fno-second-underscore" if f_compiler == 'g95'
100
- if options[:openmp] and @lang == FORTRAN then
100
+ if options[:openmp] and @lang == FORTRAN and not disable_openmp then
101
101
  openmp_fcflags = get_openmp_flags(f_compiler)
102
102
  raise "unkwown openmp flags for: #{f_compiler}" if not openmp_fcflags
103
103
  fcflags += " #{openmp_fcflags}"
@@ -152,7 +152,7 @@ module BOAST
152
152
  c_compiler = "cc" if not c_compiler
153
153
  linker = options[:LD]
154
154
  linker = c_compiler if not linker
155
- if options[:openmp] then
155
+ if options[:openmp] and not disable_openmp then
156
156
  openmp_ldflags = get_openmp_flags(linker)
157
157
  raise "unknown openmp flags for: #{linker}" if not openmp_ldflags
158
158
  ldflags += " #{openmp_ldflags}"
@@ -35,16 +35,19 @@ module BOAST
35
35
  private_boolean_state_accessor :verbose
36
36
  private_boolean_state_accessor :debug_source
37
37
  private_boolean_state_accessor :ffi
38
+ private_boolean_state_accessor :keep_temp
38
39
  private_state_accessor :fortran_line_length
39
40
  end
40
41
 
41
42
  boolean_state_accessor :verbose
42
43
  boolean_state_accessor :debug_source
43
44
  boolean_state_accessor :ffi
45
+ boolean_state_accessor :keep_temp
44
46
  state_accessor :fortran_line_length
45
- default_state_getter :ffi, false
46
47
  default_state_getter :verbose, false
47
48
  default_state_getter :debug_source, false
49
+ default_state_getter :ffi, false
50
+ default_state_getter :keep_temp, false
48
51
  default_state_getter :fortran_line_length, 72
49
52
 
50
53
  module_function
@@ -26,8 +26,8 @@ module BOAST
26
26
  puts "#{compiler_options[:MAQAO]} cqa #{maqao_model ? "--uarch=#{maqao_model} " : ""}#{f1.path} --fct=#{@procedure.name} #{compiler_options[:MAQAO_FLAGS]}"
27
27
  end
28
28
  result = `#{compiler_options[:MAQAO]} cqa #{maqao_model ? "--uarch=#{maqao_model} " : ""}#{f1.path} --fct=#{@procedure.name} #{compiler_options[:MAQAO_FLAGS]}`
29
- File::unlink(library_object)
30
- File::unlink(library_source)
29
+ File::unlink(library_object) unless keep_temp
30
+ File::unlink(library_source) unless keep_temp
31
31
  return result
32
32
  end
33
33
  end
@@ -38,23 +38,23 @@ module BOAST
38
38
 
39
39
  def init_opencl_types
40
40
  @@opencl_real_types = {
41
- 2 => OpenCL::Half,
42
- 4 => OpenCL::Float,
43
- 8 => OpenCL::Double
41
+ 2 => OpenCL::Half1,
42
+ 4 => OpenCL::Float1,
43
+ 8 => OpenCL::Double1
44
44
  }
45
45
 
46
46
  @@opencl_int_types = {
47
47
  true => {
48
- 1 => OpenCL::Char,
49
- 2 => OpenCL::Short,
50
- 4 => OpenCL::Int,
51
- 8 => OpenCL::Long
48
+ 1 => OpenCL::Char1,
49
+ 2 => OpenCL::Short1,
50
+ 4 => OpenCL::Int1,
51
+ 8 => OpenCL::Long1
52
52
  },
53
53
  false => {
54
- 1 => OpenCL::UChar,
55
- 2 => OpenCL::UShort,
56
- 4 => OpenCL::UInt,
57
- 8 => OpenCL::ULong
54
+ 1 => OpenCL::UChar1,
55
+ 2 => OpenCL::UShort1,
56
+ 4 => OpenCL::UInt1,
57
+ 8 => OpenCL::ULong1
58
58
  }
59
59
  }
60
60
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: BOAST
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0
4
+ version: 1.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brice Videau
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-03-08 00:00:00.000000000 Z
11
+ date: 2016-04-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: narray
@@ -56,20 +56,20 @@ dependencies:
56
56
  requirements:
57
57
  - - "~>"
58
58
  - !ruby/object:Gem::Version
59
- version: '1.0'
59
+ version: '1.2'
60
60
  - - ">="
61
61
  - !ruby/object:Gem::Version
62
- version: 1.0.0
62
+ version: 1.2.1
63
63
  type: :runtime
64
64
  prerelease: false
65
65
  version_requirements: !ruby/object:Gem::Requirement
66
66
  requirements:
67
67
  - - "~>"
68
68
  - !ruby/object:Gem::Version
69
- version: '1.0'
69
+ version: '1.2'
70
70
  - - ">="
71
71
  - !ruby/object:Gem::Version
72
- version: 1.0.0
72
+ version: 1.2.1
73
73
  - !ruby/object:Gem::Dependency
74
74
  name: systemu
75
75
  requirement: !ruby/object:Gem::Requirement