BOAST 1.2.0 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/BOAST.gemspec +2 -2
- data/lib/BOAST/Language/Algorithm.rb +15 -2
- data/lib/BOAST/Language/Architectures.rb +27 -0
- data/lib/BOAST/Language/Index.rb +5 -0
- data/lib/BOAST/Language/Intrinsics.rb +66 -20
- data/lib/BOAST/Language/OpenMP.rb +2 -2
- data/lib/BOAST/Language/Operators.rb +127 -37
- data/lib/BOAST/Language/Parens.rb +8 -2
- data/lib/BOAST/Language/Variable.rb +5 -0
- data/lib/BOAST/Runtime/CompiledRuntime.rb +1 -1
- data/lib/BOAST/Runtime/Compilers.rb +4 -4
- data/lib/BOAST/Runtime/Config.rb +4 -1
- data/lib/BOAST/Runtime/MAQAO.rb +2 -2
- data/lib/BOAST/Runtime/OpenCLRuntime.rb +11 -11
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0bb86d632c7e15c62f42007db65ef307a86ed1eb
|
4
|
+
data.tar.gz: 8b4323362889540f6e731bad43e6765b24f26d2c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d4dd1ffe821624820fec06684910ad87f0dd451d81b596fafceda7b41651e0d5810dfe4aa902de69d609387577e4ace87b1ede8dea7ebcc65cc4d11d13cd3dda
|
7
|
+
data.tar.gz: 65d6bc915e2299d38e1bd2f4c52057f42e5c6ada5fd5d26416dacd20a4169f364f3eaec28c9e755a4d3b40c3903cb8f2092c23d55dcf400b5b7dbeac5a3f1787
|
data/BOAST.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'BOAST'
|
3
|
-
s.version = "1.2.
|
3
|
+
s.version = "1.2.1"
|
4
4
|
s.author = "Brice Videau"
|
5
5
|
s.email = "brice.videau@imag.fr"
|
6
6
|
s.homepage = "https://github.com/Nanosim-LIG/boast"
|
@@ -12,7 +12,7 @@ Gem::Specification.new do |s|
|
|
12
12
|
s.required_ruby_version = '>= 1.9.3'
|
13
13
|
s.add_dependency 'narray', '~> 0.6.0', '>=0.6.0.8'
|
14
14
|
s.add_dependency 'narray_ffi', '~> 1.2', '>=1.2.0'
|
15
|
-
s.add_dependency 'opencl_ruby_ffi', '~> 1.
|
15
|
+
s.add_dependency 'opencl_ruby_ffi', '~> 1.2', '>=1.2.1'
|
16
16
|
s.add_dependency 'systemu', '~> 2', '>=2.2.0'
|
17
17
|
s.add_dependency 'os', '~> 0.9', '>=0.9.6'
|
18
18
|
s.add_dependency 'PAPI', '~> 0', '>=0.101'
|
@@ -30,6 +30,7 @@ module BOAST
|
|
30
30
|
private_boolean_state_accessor :decl_module
|
31
31
|
private_boolean_state_accessor :annotate
|
32
32
|
private_boolean_state_accessor :optimizer_log
|
33
|
+
private_boolean_state_accessor :disable_openmp
|
33
34
|
|
34
35
|
private
|
35
36
|
def push_env(*args)
|
@@ -84,6 +85,7 @@ module BOAST
|
|
84
85
|
boolean_state_accessor :decl_module
|
85
86
|
boolean_state_accessor :annotate
|
86
87
|
boolean_state_accessor :optimizer_log
|
88
|
+
boolean_state_accessor :disable_openmp
|
87
89
|
|
88
90
|
default_state_getter :address_size, OS.bits/8
|
89
91
|
default_state_getter :lang, FORTRAN, '"const_get(#{envs})"', :BOAST_LANG
|
@@ -104,6 +106,7 @@ module BOAST
|
|
104
106
|
default_state_getter :annotate_level, 0
|
105
107
|
default_state_getter :optimizer_log, false
|
106
108
|
default_state_getter :optimizer_log_file, nil
|
109
|
+
default_state_getter :disable_openmp, false
|
107
110
|
|
108
111
|
alias use_vla_old? use_vla?
|
109
112
|
class << self
|
@@ -163,17 +166,27 @@ module BOAST
|
|
163
166
|
|
164
167
|
@@env = Hash::new{|h, k| h[k] = []}
|
165
168
|
|
166
|
-
def push_env(vars = {})
|
167
|
-
|
169
|
+
def push_env(vars = {}, &block)
|
170
|
+
keys = []
|
171
|
+
vars.each { |key, value|
|
168
172
|
var = nil
|
169
173
|
begin
|
170
174
|
var = BOAST::class_variable_get("@@"+key.to_s)
|
171
175
|
rescue
|
176
|
+
BOAST::pop_env(*keys)
|
172
177
|
raise "Unknown module variable #{key}!"
|
173
178
|
end
|
174
179
|
@@env[key].push(var)
|
175
180
|
BOAST::class_variable_set("@@"+key.to_s, value)
|
181
|
+
keys.push(key)
|
176
182
|
}
|
183
|
+
if block then
|
184
|
+
begin
|
185
|
+
block.call
|
186
|
+
ensure
|
187
|
+
BOAST::pop_env(*vars.keys)
|
188
|
+
end
|
189
|
+
end
|
177
190
|
end
|
178
191
|
|
179
192
|
def pop_env(*vars)
|
@@ -91,6 +91,33 @@ X86architectures ={"pentium2"=>["MMX"],
|
|
91
91
|
"RDSEED",
|
92
92
|
"ADCX",
|
93
93
|
"PREFETCHW"],
|
94
|
+
"knl"=>
|
95
|
+
["MOVBE",
|
96
|
+
"MMX",
|
97
|
+
"SSE",
|
98
|
+
"SSE2",
|
99
|
+
"SSE3",
|
100
|
+
"SSSE3",
|
101
|
+
"SSE4.1",
|
102
|
+
"SSE4.2",
|
103
|
+
"POPCNT",
|
104
|
+
"AVX",
|
105
|
+
"AVX2",
|
106
|
+
"AES",
|
107
|
+
"PCLMUL",
|
108
|
+
"FSGSBASE",
|
109
|
+
"RDRND",
|
110
|
+
"FMA",
|
111
|
+
"BMI",
|
112
|
+
"BMI2",
|
113
|
+
"F16C",
|
114
|
+
"RDSEED",
|
115
|
+
"ADCX",
|
116
|
+
"PREFETCHW",
|
117
|
+
"AVX512F",
|
118
|
+
"AVX512PF",
|
119
|
+
"AVX512ER",
|
120
|
+
"AVX512CD"],
|
94
121
|
"bonnell"=>["MOVBE", "MMX", "SSE", "SSE2", "SSE3", "SSSE3"],
|
95
122
|
"silvermont"=>
|
96
123
|
["MOVBE",
|
data/lib/BOAST/Language/Index.rb
CHANGED
@@ -14,6 +14,11 @@ module BOAST
|
|
14
14
|
return !!@alignment
|
15
15
|
end
|
16
16
|
|
17
|
+
def set_align(align)
|
18
|
+
@alignment = align
|
19
|
+
return self
|
20
|
+
end
|
21
|
+
|
17
22
|
def to_var
|
18
23
|
var = @source.copy("#{self}", :const => nil, :constant => nil, :dim => nil, :dimension => nil, :direction => nil, :dir => nil, :align => alignment)
|
19
24
|
return var
|
@@ -39,16 +39,36 @@ module BOAST
|
|
39
39
|
INTRINSICS = Hash::new { |h, k| h[k] = Hash::new { |h2, k2| h2[k2] = {} } }
|
40
40
|
CONVERSIONS = Hash::new { |h, k| h[k] = Hash::new { |h2, k2| h2[k2] = {} } }
|
41
41
|
|
42
|
+
def check_coverage
|
43
|
+
ins = []
|
44
|
+
INTRINSICS[X86].each { |i,v|
|
45
|
+
if i == :CVT then
|
46
|
+
v.each { |type1, h|
|
47
|
+
h.each { |type2, instr|
|
48
|
+
ins.push instr.to_s
|
49
|
+
}
|
50
|
+
}
|
51
|
+
else
|
52
|
+
v.each { |type, instr|
|
53
|
+
ins.push instr.to_s
|
54
|
+
}
|
55
|
+
end
|
56
|
+
}
|
57
|
+
return ins - INSTRUCTIONS.keys
|
58
|
+
end
|
59
|
+
|
60
|
+
module_function :check_coverage
|
61
|
+
|
42
62
|
def intrinsics_by_vector_name(intr_symbol, type, type2=nil)
|
43
63
|
if type2 then
|
44
64
|
instruction = INTRINSICS[get_architecture][intr_symbol][type][type2]
|
45
65
|
else
|
46
66
|
instruction = INTRINSICS[get_architecture][intr_symbol][type]
|
47
67
|
end
|
68
|
+
raise IntrinsicsError, "Unsupported operation #{intr_symbol} for #{type}#{type2 ? " and #{type2}" : ""} on #{get_architecture_name}!" unless instruction
|
48
69
|
return instruction if get_architecture == ARM
|
49
|
-
raise IntrinsicsError, "Unsupported operation #{intr_symbol} for #{type}#{type2 ? "and #{type2}" : ""} on #{get_architecture_name}!" unless instruction
|
50
70
|
supported = (INSTRUCTIONS[instruction.to_s] & MODELS[get_model.to_s]).size > 0
|
51
|
-
raise IntrinsicsError, "Unsupported operation #{intr_symbol} for #{type}#{type2 ? "and #{type2}" : ""} on #{get_model}! (requires #{INSTRUCTIONS[instruction.to_s].join(" or ")})" unless supported
|
71
|
+
raise IntrinsicsError, "Unsupported operation #{intr_symbol} for #{type}#{type2 ? " and #{type2}" : ""} on #{get_model}! (requires #{INSTRUCTIONS[instruction.to_s].join(" or ")})" unless supported
|
52
72
|
return instruction
|
53
73
|
end
|
54
74
|
|
@@ -213,7 +233,7 @@ module BOAST
|
|
213
233
|
|
214
234
|
module_function :type_name_X86
|
215
235
|
|
216
|
-
[64, 128, 256].each { |vector_size|
|
236
|
+
[64, 128, 256, 512].each { |vector_size|
|
217
237
|
vs = ( vector_size < 256 ? "" : "#{vector_size}" )
|
218
238
|
sizes = [8, 16, 32]
|
219
239
|
sizes.push( 64 ) if vector_size > 64
|
@@ -222,12 +242,17 @@ module BOAST
|
|
222
242
|
vtype = vector_type_name( :int, size, vector_size, sign )
|
223
243
|
type = type_name_X86( :int, size, vector_size )
|
224
244
|
instructions = [[:ADD, "add"], [:SUB, "sub"]]
|
225
|
-
instructions.push( [:SET, "setr"]
|
226
|
-
instructions.push( [:
|
245
|
+
instructions.push( [:SET, "setr"] ) unless size < 32 and vector_size == 512
|
246
|
+
instructions.push( [:SET1, "set1"] )
|
247
|
+
instructions.push( [:MUL, "mullo"] ) if vector_size > 64 and size >= 16
|
248
|
+
instructions.push( [:MASKLOAD, "maskload"], [:MASKSTORE, "maskstore"] ) if vector_size <= 256 and vector_size >= 128 and size >= 32
|
249
|
+
instructions.push( [:MASK_LOAD, "mask_load"], [:MASK_STORE, "mask_store"],
|
250
|
+
[:MASK_LOADU, "mask_loadu"], [:MASK_STOREU, "mask_storeu"],
|
251
|
+
[:MASKZ_LOAD, "maskz_load"], [:MASKZ_LOADU, "maskz_loadu"], ) if vector_size >= 128 and size >= 32
|
227
252
|
instructions.each { |cl, ins|
|
228
253
|
INTRINSICS[X86][cl][vtype] = "_mm#{vs}_#{ins}_#{type}".to_sym
|
229
254
|
}
|
230
|
-
if size == 64 and vector_size
|
255
|
+
if size == 64 and vector_size == 256 then
|
231
256
|
INTRINSICS[X86][:SET1][vtype] = "_mm#{vs}_set1_#{type}x".to_sym
|
232
257
|
INTRINSICS[X86][:SET][vtype] = "_mm#{vs}_setr_#{type}x".to_sym
|
233
258
|
end
|
@@ -236,8 +261,10 @@ module BOAST
|
|
236
261
|
[8, 16, 32, 64].each { |size|
|
237
262
|
[:signed, :unsigned].each { |sign|
|
238
263
|
vtype = vector_type_name( :int, size, vector_size, sign )
|
239
|
-
[[:LOAD, "loadu"],
|
240
|
-
|
264
|
+
instructions = [[:LOAD, "loadu"], [:LOADA, "load"],
|
265
|
+
[:STORE, "storeu"], [:STOREA, "store"],
|
266
|
+
[:SETZERO, "setzero"] ]
|
267
|
+
instructions.each { |cl, ins|
|
241
268
|
INTRINSICS[X86][cl][vtype] = "_mm#{vs}_#{ins}_si#{vector_size}".to_sym
|
242
269
|
}
|
243
270
|
}
|
@@ -245,12 +272,19 @@ module BOAST
|
|
245
272
|
sizes = []
|
246
273
|
sizes.push( 32, 64 ) if vector_size > 64
|
247
274
|
sizes.each { |size|
|
248
|
-
[[:ADD, "add"],
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
275
|
+
instructions = [[:ADD, "add"], [:SUB, "sub"], [:MUL, "mul"], [:DIV, "div"], [:POW, "pow"],
|
276
|
+
[:FMADD, "fmadd"], [:FMSUB, "fmsub"],
|
277
|
+
[:FNMADD, "fnmadd"], [:FNMSUB, "fnmsub"],
|
278
|
+
[:FMADDSUB, "fmaddsub"], [:FMSUBADD, "fmsubadd"],
|
279
|
+
[:LOAD, "loadu"], [:LOADA, "load"],
|
280
|
+
[:STORE, "storeu"], [:STOREA, "store"],
|
281
|
+
[:SET, "setr"], [:SET1, "set1"], [:SETZERO, "setzero"],
|
282
|
+
[:MASK_LOAD, "mask_load"], [:MASK_STORE, "mask_store"],
|
283
|
+
[:MASK_LOADU, "mask_loadu"], [:MASK_STOREU, "mask_storeu"],
|
284
|
+
[:MASKZ_LOAD, "maskz_load"], [:MASKZ_LOADU, "maskz_loadu"] ]
|
285
|
+
instructions.push( [:MASKLOAD, "maskload"], [:MASKSTORE, "maskstore"] ) if vector_size < 512
|
286
|
+
instructions.push( [:ADDSUB, "addsub"] ) if vector_size < 512
|
287
|
+
instructions.each { |cl, ins|
|
254
288
|
vtype = vector_type_name( :float, size, vector_size)
|
255
289
|
type = type_name_X86( :float, size, vector_size )
|
256
290
|
INTRINSICS[X86][cl][vtype] = "_mm#{vs}_#{ins}_#{type}".to_sym
|
@@ -258,7 +292,7 @@ module BOAST
|
|
258
292
|
}
|
259
293
|
}
|
260
294
|
INTRINSICS[X86][:CVT] = Hash::new { |h,k| h[k] = {} }
|
261
|
-
[128, 256].each { |bvsize|
|
295
|
+
[128, 256, 512].each { |bvsize|
|
262
296
|
[16, 32, 64].each { |bsize|
|
263
297
|
ssize = bsize/2
|
264
298
|
while ssize >= 8
|
@@ -296,6 +330,15 @@ module BOAST
|
|
296
330
|
INTRINSICS[X86][:CVT][ivtype][fvtype] = "_mm#{vs}_cvt#{ftype}_#{itype}".to_sym
|
297
331
|
}
|
298
332
|
}
|
333
|
+
[64,32].each { |bsize|
|
334
|
+
ftype = type_name_X86( :float, bsize, bvsize )
|
335
|
+
itype = type_name_X86( :int, bsize, bvsize, :signed )
|
336
|
+
fvtype = vector_type_name( :float, bsize, bvsize )
|
337
|
+
ivtype = vector_type_name( :int, bsize, bvsize, :signed )
|
338
|
+
vs = ( bvsize < 256 ? "" : "#{bvsize}" )
|
339
|
+
INTRINSICS[X86][:CVT][fvtype][ivtype] = "_mm#{vs}_cvt#{itype}_#{ftype}".to_sym
|
340
|
+
INTRINSICS[X86][:CVT][ivtype][fvtype] = "_mm#{vs}_cvt#{ftype}_#{itype}".to_sym
|
341
|
+
}
|
299
342
|
}
|
300
343
|
|
301
344
|
|
@@ -324,7 +367,7 @@ module BOAST
|
|
324
367
|
vtype = vector_type_name( :float, size, vector_size )
|
325
368
|
type = type_name_ARM( :float, size )
|
326
369
|
[[:ADD, "add"], [:SUB, "sub"], [:MUL, "mul"],
|
327
|
-
[:FMADD, "mla"], [:
|
370
|
+
[:FMADD, "mla"], [:FNMADD, "mls"],
|
328
371
|
[:LOAD, "ldl"], [:LOADA, "ldl"],
|
329
372
|
[:STORE, "stl"], [:STOREA, "stl"]].each { |cl, ins|
|
330
373
|
INTRINSICS[ARM][cl][vtype] = "v#{ins}#{q}_#{type}".to_sym
|
@@ -332,6 +375,9 @@ module BOAST
|
|
332
375
|
[[:SET1, "dup"]].each { |cl, ins|
|
333
376
|
INTRINSICS[ARM][cl][vtype] = "v#{ins}#{q}_n_#{type}".to_sym
|
334
377
|
}
|
378
|
+
[[:SET_LANE, "set"]].each { |cl, ins|
|
379
|
+
INTRINSICS[ARM][cl][vtype] = "v#{ins}#{q}_lane_#{type}".to_sym
|
380
|
+
}
|
335
381
|
}
|
336
382
|
}
|
337
383
|
INTRINSICS[ARM][:CVT] = Hash::new { |h,k| h[k] = {} }
|
@@ -344,8 +390,8 @@ module BOAST
|
|
344
390
|
ivtype = vector_type_name( :int, int_size, vector_size, sign )
|
345
391
|
ftype = type_name_ARM( :float, float_size )
|
346
392
|
itype = type_name_ARM( :int, int_size, sign )
|
347
|
-
INTRINSICS[ARM][:CVT][
|
348
|
-
INTRINSICS[ARM][:CVT][
|
393
|
+
INTRINSICS[ARM][:CVT][ivtype][fvtype] = "vcvt#{q}_#{itype}_#{ftype}".to_sym
|
394
|
+
INTRINSICS[ARM][:CVT][fvtype][ivtype] = "vcvt#{q}_#{ftype}_#{itype}".to_sym
|
349
395
|
}
|
350
396
|
}
|
351
397
|
svsize = 64
|
@@ -357,8 +403,8 @@ module BOAST
|
|
357
403
|
btype = type_name_ARM( :int, bsize, sign )
|
358
404
|
svtype = vector_type_name( :int, ssize, svsize, sign )
|
359
405
|
bvtype = vector_type_name( :int, bsize, bvsize, sign )
|
360
|
-
INTRINSICS[ARM][:CVT][
|
361
|
-
INTRINSICS[ARM][:CVT][
|
406
|
+
INTRINSICS[ARM][:CVT][bvtype][svtype] = "vmovl_#{stype}".to_sym
|
407
|
+
INTRINSICS[ARM][:CVT][svtype][bvtype] = "vmovn_#{btype}".to_sym
|
362
408
|
}
|
363
409
|
}
|
364
410
|
|
@@ -152,7 +152,7 @@ EOF
|
|
152
152
|
end
|
153
153
|
|
154
154
|
def open
|
155
|
-
output.puts to_s
|
155
|
+
output.puts to_s unless disable_openmp
|
156
156
|
return self
|
157
157
|
end
|
158
158
|
|
@@ -166,7 +166,7 @@ EOF
|
|
166
166
|
end
|
167
167
|
|
168
168
|
def close
|
169
|
-
output.puts end_string(openmp_end_clauses_to_s)
|
169
|
+
output.puts end_string(openmp_end_clauses_to_s) unless disable_openmp
|
170
170
|
return self
|
171
171
|
end
|
172
172
|
|
@@ -316,7 +316,7 @@ module BOAST
|
|
316
316
|
|
317
317
|
def initialize(source, return_type)
|
318
318
|
@source = source
|
319
|
-
@return_type = return_type
|
319
|
+
@return_type = return_type.to_var
|
320
320
|
end
|
321
321
|
|
322
322
|
def type
|
@@ -328,21 +328,29 @@ module BOAST
|
|
328
328
|
if @source.kind_of?( Array ) then
|
329
329
|
raise OperatorError, "Invalid array length!" unless @source.length == @return_type.type.vector_length
|
330
330
|
return @return_type.copy("(#{@return_type.type.decl})( #{@source.join(", ")} )", DISCARD_OPTIONS) if lang == CL
|
331
|
-
|
331
|
+
return Set(@source.first, @return_type).to_var if @source.uniq.size == 1
|
332
332
|
begin
|
333
333
|
instruction = intrinsics(:SET, @return_type.type)
|
334
|
+
raise IntrinsicsError unless instruction
|
334
335
|
return @return_type.copy("#{instruction}( #{@source.join(", ")} )", DISCARD_OPTIONS)
|
335
336
|
rescue IntrinsicsError
|
336
337
|
instruction = intrinsics(:SET_LANE, @return_type.type)
|
338
|
+
raise IntrinsicsError, "Missing instruction for SET_LANE on #{get_architecture_name}!" unless instruction
|
337
339
|
s = Set(0, @return_type).to_s
|
338
340
|
@source.each_with_index { |v,i|
|
339
|
-
s = "#{instruction}(#{v}, #{s}, #{i})"
|
341
|
+
s = "#{instruction}( #{v}, #{s}, #{i} )"
|
340
342
|
}
|
341
343
|
return @return_type.copy(s, DISCARD_OPTIONS)
|
342
344
|
end
|
343
345
|
elsif @source.class != Variable or @source.type.vector_length == 1 then
|
344
346
|
return @return_type.copy("(#{@return_type.type.decl})( #{@source} )", DISCARD_OPTIONS) if lang == CL
|
345
|
-
|
347
|
+
if (@source.is_a?(Numeric) and @source == 0) or (@source.class == Variable and @source.constant == 0) then
|
348
|
+
begin
|
349
|
+
instruction = intrinsics(:SETZERO, @return_type.type)
|
350
|
+
return @return_type.copy("#{instruction}( )", DISCARD_OPTIONS) if instruction
|
351
|
+
rescue IntrinsicsError
|
352
|
+
end
|
353
|
+
end
|
346
354
|
instruction = intrinsics(:SET1, @return_type.type)
|
347
355
|
return @return_type.copy("#{instruction}( #{@source} )", DISCARD_OPTIONS)
|
348
356
|
elsif @return_type.type != @source.type
|
@@ -379,7 +387,7 @@ module BOAST
|
|
379
387
|
|
380
388
|
def initialize(source, return_type)
|
381
389
|
@source = source
|
382
|
-
@return_type = return_type
|
390
|
+
@return_type = return_type.to_var
|
383
391
|
end
|
384
392
|
|
385
393
|
def type
|
@@ -445,12 +453,12 @@ module BOAST
|
|
445
453
|
def initialize(source, mask, return_type)
|
446
454
|
@source = source
|
447
455
|
@mask = mask
|
448
|
-
@return_type = return_type
|
456
|
+
@return_type = return_type.to_var
|
449
457
|
end
|
450
458
|
|
451
459
|
def get_mask
|
452
|
-
|
453
|
-
return
|
460
|
+
type = @return_type.type
|
461
|
+
return Set(@mask.collect { |m| ( m and m != 0 ) ? -1 : 0 }, Int("mask", :size => type.size, :vector_length => type.vector_length ) )
|
454
462
|
end
|
455
463
|
|
456
464
|
private :get_mask
|
@@ -461,8 +469,12 @@ module BOAST
|
|
461
469
|
|
462
470
|
def to_var
|
463
471
|
raise OperatorError, "Cannot load unknown type!" unless @return_type
|
472
|
+
type = @return_type.type
|
464
473
|
raise LanguageError, "Unsupported language!" unless lang == C
|
465
|
-
|
474
|
+
raise OperatorError, "Mask size is wrong: #{@mask.length} for #{type.vector_length}!" if @mask.length != type.vector_length
|
475
|
+
return Load( @source, @return_type ).to_var unless @mask.include?(0)
|
476
|
+
return Set( 0, @return_type ).to_var if @mask.uniq.size == 1 and @mask.uniq.first == 0
|
477
|
+
instruction = intrinsics(:MASKLOAD, type)
|
466
478
|
s = ""
|
467
479
|
src = "#{@source}"
|
468
480
|
if src[0] != "*" then
|
@@ -470,8 +482,8 @@ module BOAST
|
|
470
482
|
else
|
471
483
|
src = src[1..-1]
|
472
484
|
end
|
473
|
-
p_type =
|
474
|
-
s += "#{instruction}((#{p_type.decl} * )#{src}, #{get_mask})"
|
485
|
+
p_type = type.copy(:vector_length => 1)
|
486
|
+
s += "#{instruction}( (#{p_type.decl} * ) #{src}, #{get_mask} )"
|
475
487
|
return @return_type.copy( s, DISCARD_OPTIONS)
|
476
488
|
end
|
477
489
|
|
@@ -501,11 +513,11 @@ module BOAST
|
|
501
513
|
attr_reader :source
|
502
514
|
attr_reader :store_type
|
503
515
|
|
504
|
-
def initialize(dest, source,
|
516
|
+
def initialize(dest, source, store_type = nil)
|
505
517
|
@dest = dest
|
506
518
|
@source = source
|
507
519
|
@store_type = store_type
|
508
|
-
@store_type = source unless @store_type
|
520
|
+
@store_type = source.to_var unless @store_type
|
509
521
|
end
|
510
522
|
|
511
523
|
def to_s
|
@@ -516,17 +528,17 @@ module BOAST
|
|
516
528
|
else
|
517
529
|
dst = dst[1..-1]
|
518
530
|
end
|
531
|
+
type = @store_type.type
|
532
|
+
return "vstore#{type.vector_length}( #{@source}, 0, #{dst} )" if lang == CL
|
533
|
+
return "*((int64_t * ) #{dst}) = _m_to_int64( #{@source} )" if get_architecture == X86 and type.total_size*8 == 64
|
519
534
|
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
if @dest.alignment == @source.type.total_size then
|
524
|
-
instruction = intrinsics(:STOREA, @source.type)
|
535
|
+
if @dest.alignment == type.total_size then
|
536
|
+
instruction = intrinsics(:STOREA, type)
|
525
537
|
else
|
526
|
-
instruction = intrinsics(:STORE,
|
538
|
+
instruction = intrinsics(:STORE, type)
|
527
539
|
end
|
528
|
-
p_type =
|
529
|
-
p_type =
|
540
|
+
p_type = type.copy(:vector_length => 1)
|
541
|
+
p_type = type if get_architecture == X86 and type.kind_of?(Int)
|
530
542
|
return "#{instruction}( (#{p_type.decl} * ) #{dst}, #{@source} )"
|
531
543
|
end
|
532
544
|
return Affectation.basic_usage(@dest, @source)
|
@@ -560,20 +572,24 @@ module BOAST
|
|
560
572
|
@source = source
|
561
573
|
@mask = mask
|
562
574
|
@store_type = store_type
|
563
|
-
@store_type = source unless @store_type
|
575
|
+
@store_type = source.to_var unless @store_type
|
564
576
|
end
|
565
577
|
|
566
578
|
def get_mask
|
567
|
-
|
568
|
-
return
|
579
|
+
type = @store_type.type
|
580
|
+
return Set(@mask.collect { |m| ( m and m != 0 ) ? -1 : 0 }, Int("mask", :size => type.size, :vector_length => type.vector_length ) )
|
569
581
|
end
|
570
582
|
|
571
583
|
private :get_mask
|
572
584
|
|
573
585
|
def to_s
|
574
586
|
raise OperatorError, "Cannot store unknown type!" unless @store_type
|
587
|
+
type = @store_type.type
|
575
588
|
raise LanguageError, "Unsupported language!" unless lang == C
|
576
|
-
|
589
|
+
raise OperatorError, "Mask size is wrong: #{@mask.length} for #{type.vector_length}!" if @mask.length != type.vector_length
|
590
|
+
return Store( @dest, @source, @store_type ).to_s unless @mask.include?(0)
|
591
|
+
return nil if @mask.uniq.size == 1 and @mask.uniq.first == 0
|
592
|
+
instruction = intrinsics(:MASKSTORE, type)
|
577
593
|
s = ""
|
578
594
|
dst = "#{@dest}"
|
579
595
|
if dst[0] != "*" then
|
@@ -581,8 +597,81 @@ module BOAST
|
|
581
597
|
else
|
582
598
|
dst = dst[1..-1]
|
583
599
|
end
|
584
|
-
p_type =
|
585
|
-
return s += "#{instruction}((#{p_type.decl} * )#{dst}, #{get_mask}, #{Operator.convert(@source,
|
600
|
+
p_type = type.copy(:vector_length => 1)
|
601
|
+
return s += "#{instruction}( (#{p_type.decl} * ) #{dst}, #{get_mask}, #{Operator.convert(@source, type)} )"
|
602
|
+
end
|
603
|
+
|
604
|
+
def pr
|
605
|
+
ss = to_s
|
606
|
+
if ss then
|
607
|
+
s=""
|
608
|
+
s += indent
|
609
|
+
s += ss
|
610
|
+
s += ";" if [C, CL, CUDA].include?( lang )
|
611
|
+
output.puts s
|
612
|
+
end
|
613
|
+
return self
|
614
|
+
end
|
615
|
+
|
616
|
+
end
|
617
|
+
|
618
|
+
class FMA < Operator
|
619
|
+
extend Functor
|
620
|
+
include Intrinsics
|
621
|
+
include Arithmetic
|
622
|
+
include Inspectable
|
623
|
+
include PrivateStateAccessor
|
624
|
+
|
625
|
+
attr_reader :operand1
|
626
|
+
attr_reader :operand2
|
627
|
+
attr_reader :operand3
|
628
|
+
attr_reader :return_type
|
629
|
+
|
630
|
+
def initialize(a,b,c)
|
631
|
+
@operand1 = a
|
632
|
+
@operand2 = b
|
633
|
+
@operand3 = c
|
634
|
+
@return_type = nil
|
635
|
+
@return_type = @operand3.to_var unless @return_type
|
636
|
+
end
|
637
|
+
|
638
|
+
def convert_operand(op)
|
639
|
+
return "#{Operator.convert(op, @return_type.type)}"
|
640
|
+
end
|
641
|
+
|
642
|
+
private :convert_operand
|
643
|
+
|
644
|
+
def type
|
645
|
+
return @return_type.type
|
646
|
+
end
|
647
|
+
|
648
|
+
def to_var
|
649
|
+
instruction = nil
|
650
|
+
begin
|
651
|
+
instruction = intrinsics(:FMADD,@return_type.type)
|
652
|
+
rescue
|
653
|
+
end
|
654
|
+
return (@operand3 + @operand1 * @operand2).to_var unless lang != FORTRAN and @return_type and ( instruction or ( [CL, CUDA].include?(lang) ) )
|
655
|
+
op1 = convert_operand(@operand1.to_var)
|
656
|
+
op2 = convert_operand(@operand2.to_var)
|
657
|
+
op3 = convert_operand(@operand3.to_var)
|
658
|
+
if [CL, CUDA].include?(lang)
|
659
|
+
ret_name = "fma( #{op1}, #{op2}, #{op3} )"
|
660
|
+
else
|
661
|
+
case architecture
|
662
|
+
when X86
|
663
|
+
ret_name = "#{instruction}( #{op1}, #{op2}, #{op3} )"
|
664
|
+
when ARM
|
665
|
+
ret_name = "#{instruction}( #{op3}, #{op1}, #{op2} )"
|
666
|
+
else
|
667
|
+
return (@operand3 + @operand1 * @operand2).to_var
|
668
|
+
end
|
669
|
+
end
|
670
|
+
return @return_type.copy( ret_name, DISCARD_OPTIONS)
|
671
|
+
end
|
672
|
+
|
673
|
+
def to_s
|
674
|
+
return to_var.to_s
|
586
675
|
end
|
587
676
|
|
588
677
|
def pr
|
@@ -596,7 +685,7 @@ module BOAST
|
|
596
685
|
|
597
686
|
end
|
598
687
|
|
599
|
-
class
|
688
|
+
class FMS < Operator
|
600
689
|
extend Functor
|
601
690
|
include Intrinsics
|
602
691
|
include Arithmetic
|
@@ -629,23 +718,24 @@ module BOAST
|
|
629
718
|
def to_var
|
630
719
|
instruction = nil
|
631
720
|
begin
|
632
|
-
instruction = intrinsics(:
|
721
|
+
instruction = intrinsics(:FNMADD,@return_type.type)
|
633
722
|
rescue
|
634
723
|
end
|
635
|
-
return (@operand3
|
636
|
-
op1 = convert_operand(@operand1)
|
637
|
-
op2 = convert_operand(@operand2)
|
638
|
-
op3 = convert_operand(@operand3)
|
724
|
+
return (@operand3 - @operand1 * @operand2).to_var unless lang != FORTRAN and @return_type and ( instruction or ( [CL, CUDA].include?(lang) ) )
|
725
|
+
op1 = convert_operand(@operand1.to_var)
|
726
|
+
op2 = convert_operand(@operand2.to_var)
|
727
|
+
op3 = convert_operand(@operand3.to_var)
|
639
728
|
if [CL, CUDA].include?(lang)
|
640
|
-
|
729
|
+
op1 = convert_operand((-@operand1).to_var)
|
730
|
+
ret_name = "fma( #{op1}, #{op2}, #{op3} )"
|
641
731
|
else
|
642
732
|
case architecture
|
643
733
|
when X86
|
644
|
-
ret_name = "#{instruction}(#{op1}
|
734
|
+
ret_name = "#{instruction}( #{op1}, #{op2}, #{op3} )"
|
645
735
|
when ARM
|
646
|
-
ret_name = "#{instruction}(#{
|
736
|
+
ret_name = "#{instruction}( #{op3}, #{op1}, #{op2} )"
|
647
737
|
else
|
648
|
-
return (@operand1 * @operand2
|
738
|
+
return (@operand3 - @operand1 * @operand2).to_var
|
649
739
|
end
|
650
740
|
end
|
651
741
|
return @return_type.copy( ret_name, DISCARD_OPTIONS)
|
@@ -2,10 +2,16 @@ module BOAST
|
|
2
2
|
|
3
3
|
module_function
|
4
4
|
|
5
|
-
|
5
|
+
FUNCCALLS = {}
|
6
|
+
|
7
|
+
def register_funccall(name, options = {})
|
8
|
+
sym = name.to_sym
|
9
|
+
FUNCCALLS[sym] = {}
|
10
|
+
FUNCCALLS[sym][:parameters] = options[:parameters]
|
11
|
+
FUNCCALLS[sym][:returns] = options[:returns]
|
6
12
|
s =<<EOF
|
7
13
|
def self.#{name}(*args)
|
8
|
-
return FuncCall(
|
14
|
+
return FuncCall(#{sym.inspect}, *args#{options[:returns] ? ", returns: FUNCCALLS[#{sym.inspect}][:returns]" : ""})
|
9
15
|
end
|
10
16
|
EOF
|
11
17
|
eval s
|
@@ -51,7 +51,7 @@ module BOAST
|
|
51
51
|
cflags += " -DHAVE_NARRAY_H" if narray_path
|
52
52
|
cflags += " -I/usr/local/k1tools/include" if @architecture == MPPA
|
53
53
|
objext = RbConfig::CONFIG["OBJEXT"]
|
54
|
-
if options[:openmp] and @lang == C then
|
54
|
+
if options[:openmp] and @lang == C and not disable_openmp then
|
55
55
|
openmp_cflags = get_openmp_flags(c_compiler)
|
56
56
|
raise "unkwown openmp flags for: #{c_compiler}" if not openmp_cflags
|
57
57
|
cflags += " #{openmp_cflags}"
|
@@ -79,7 +79,7 @@ module BOAST
|
|
79
79
|
cxx_compiler = options[:CXX]
|
80
80
|
cxxflags = options[:CXXFLAGS]
|
81
81
|
cxxflags += " -fPIC #{includes}"
|
82
|
-
if options[:openmp] and @lang == C then
|
82
|
+
if options[:openmp] and @lang == C and not disable_openmp then
|
83
83
|
openmp_cxxflags = get_openmp_flags(cxx_compiler)
|
84
84
|
raise "unkwown openmp flags for: #{cxx_compiler}" if not openmp_cxxflags
|
85
85
|
cxxflags += " #{openmp_cxxflags}"
|
@@ -97,7 +97,7 @@ module BOAST
|
|
97
97
|
fcflags += " -march=#{get_model}"
|
98
98
|
fcflags += " -fPIC"
|
99
99
|
fcflags += " -fno-second-underscore" if f_compiler == 'g95'
|
100
|
-
if options[:openmp] and @lang == FORTRAN then
|
100
|
+
if options[:openmp] and @lang == FORTRAN and not disable_openmp then
|
101
101
|
openmp_fcflags = get_openmp_flags(f_compiler)
|
102
102
|
raise "unkwown openmp flags for: #{f_compiler}" if not openmp_fcflags
|
103
103
|
fcflags += " #{openmp_fcflags}"
|
@@ -152,7 +152,7 @@ module BOAST
|
|
152
152
|
c_compiler = "cc" if not c_compiler
|
153
153
|
linker = options[:LD]
|
154
154
|
linker = c_compiler if not linker
|
155
|
-
if options[:openmp] then
|
155
|
+
if options[:openmp] and not disable_openmp then
|
156
156
|
openmp_ldflags = get_openmp_flags(linker)
|
157
157
|
raise "unknown openmp flags for: #{linker}" if not openmp_ldflags
|
158
158
|
ldflags += " #{openmp_ldflags}"
|
data/lib/BOAST/Runtime/Config.rb
CHANGED
@@ -35,16 +35,19 @@ module BOAST
|
|
35
35
|
private_boolean_state_accessor :verbose
|
36
36
|
private_boolean_state_accessor :debug_source
|
37
37
|
private_boolean_state_accessor :ffi
|
38
|
+
private_boolean_state_accessor :keep_temp
|
38
39
|
private_state_accessor :fortran_line_length
|
39
40
|
end
|
40
41
|
|
41
42
|
boolean_state_accessor :verbose
|
42
43
|
boolean_state_accessor :debug_source
|
43
44
|
boolean_state_accessor :ffi
|
45
|
+
boolean_state_accessor :keep_temp
|
44
46
|
state_accessor :fortran_line_length
|
45
|
-
default_state_getter :ffi, false
|
46
47
|
default_state_getter :verbose, false
|
47
48
|
default_state_getter :debug_source, false
|
49
|
+
default_state_getter :ffi, false
|
50
|
+
default_state_getter :keep_temp, false
|
48
51
|
default_state_getter :fortran_line_length, 72
|
49
52
|
|
50
53
|
module_function
|
data/lib/BOAST/Runtime/MAQAO.rb
CHANGED
@@ -26,8 +26,8 @@ module BOAST
|
|
26
26
|
puts "#{compiler_options[:MAQAO]} cqa #{maqao_model ? "--uarch=#{maqao_model} " : ""}#{f1.path} --fct=#{@procedure.name} #{compiler_options[:MAQAO_FLAGS]}"
|
27
27
|
end
|
28
28
|
result = `#{compiler_options[:MAQAO]} cqa #{maqao_model ? "--uarch=#{maqao_model} " : ""}#{f1.path} --fct=#{@procedure.name} #{compiler_options[:MAQAO_FLAGS]}`
|
29
|
-
File::unlink(library_object)
|
30
|
-
File::unlink(library_source)
|
29
|
+
File::unlink(library_object) unless keep_temp
|
30
|
+
File::unlink(library_source) unless keep_temp
|
31
31
|
return result
|
32
32
|
end
|
33
33
|
end
|
@@ -38,23 +38,23 @@ module BOAST
|
|
38
38
|
|
39
39
|
def init_opencl_types
|
40
40
|
@@opencl_real_types = {
|
41
|
-
2 => OpenCL::
|
42
|
-
4 => OpenCL::
|
43
|
-
8 => OpenCL::
|
41
|
+
2 => OpenCL::Half1,
|
42
|
+
4 => OpenCL::Float1,
|
43
|
+
8 => OpenCL::Double1
|
44
44
|
}
|
45
45
|
|
46
46
|
@@opencl_int_types = {
|
47
47
|
true => {
|
48
|
-
1 => OpenCL::
|
49
|
-
2 => OpenCL::
|
50
|
-
4 => OpenCL::
|
51
|
-
8 => OpenCL::
|
48
|
+
1 => OpenCL::Char1,
|
49
|
+
2 => OpenCL::Short1,
|
50
|
+
4 => OpenCL::Int1,
|
51
|
+
8 => OpenCL::Long1
|
52
52
|
},
|
53
53
|
false => {
|
54
|
-
1 => OpenCL::
|
55
|
-
2 => OpenCL::
|
56
|
-
4 => OpenCL::
|
57
|
-
8 => OpenCL::
|
54
|
+
1 => OpenCL::UChar1,
|
55
|
+
2 => OpenCL::UShort1,
|
56
|
+
4 => OpenCL::UInt1,
|
57
|
+
8 => OpenCL::ULong1
|
58
58
|
}
|
59
59
|
}
|
60
60
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: BOAST
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.
|
4
|
+
version: 1.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brice Videau
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-04-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: narray
|
@@ -56,20 +56,20 @@ dependencies:
|
|
56
56
|
requirements:
|
57
57
|
- - "~>"
|
58
58
|
- !ruby/object:Gem::Version
|
59
|
-
version: '1.
|
59
|
+
version: '1.2'
|
60
60
|
- - ">="
|
61
61
|
- !ruby/object:Gem::Version
|
62
|
-
version: 1.
|
62
|
+
version: 1.2.1
|
63
63
|
type: :runtime
|
64
64
|
prerelease: false
|
65
65
|
version_requirements: !ruby/object:Gem::Requirement
|
66
66
|
requirements:
|
67
67
|
- - "~>"
|
68
68
|
- !ruby/object:Gem::Version
|
69
|
-
version: '1.
|
69
|
+
version: '1.2'
|
70
70
|
- - ">="
|
71
71
|
- !ruby/object:Gem::Version
|
72
|
-
version: 1.
|
72
|
+
version: 1.2.1
|
73
73
|
- !ruby/object:Gem::Dependency
|
74
74
|
name: systemu
|
75
75
|
requirement: !ruby/object:Gem::Requirement
|