BOAST 1.2.0 → 1.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/BOAST.gemspec +2 -2
- data/lib/BOAST/Language/Algorithm.rb +15 -2
- data/lib/BOAST/Language/Architectures.rb +27 -0
- data/lib/BOAST/Language/Index.rb +5 -0
- data/lib/BOAST/Language/Intrinsics.rb +66 -20
- data/lib/BOAST/Language/OpenMP.rb +2 -2
- data/lib/BOAST/Language/Operators.rb +127 -37
- data/lib/BOAST/Language/Parens.rb +8 -2
- data/lib/BOAST/Language/Variable.rb +5 -0
- data/lib/BOAST/Runtime/CompiledRuntime.rb +1 -1
- data/lib/BOAST/Runtime/Compilers.rb +4 -4
- data/lib/BOAST/Runtime/Config.rb +4 -1
- data/lib/BOAST/Runtime/MAQAO.rb +2 -2
- data/lib/BOAST/Runtime/OpenCLRuntime.rb +11 -11
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0bb86d632c7e15c62f42007db65ef307a86ed1eb
|
4
|
+
data.tar.gz: 8b4323362889540f6e731bad43e6765b24f26d2c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d4dd1ffe821624820fec06684910ad87f0dd451d81b596fafceda7b41651e0d5810dfe4aa902de69d609387577e4ace87b1ede8dea7ebcc65cc4d11d13cd3dda
|
7
|
+
data.tar.gz: 65d6bc915e2299d38e1bd2f4c52057f42e5c6ada5fd5d26416dacd20a4169f364f3eaec28c9e755a4d3b40c3903cb8f2092c23d55dcf400b5b7dbeac5a3f1787
|
data/BOAST.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'BOAST'
|
3
|
-
s.version = "1.2.
|
3
|
+
s.version = "1.2.1"
|
4
4
|
s.author = "Brice Videau"
|
5
5
|
s.email = "brice.videau@imag.fr"
|
6
6
|
s.homepage = "https://github.com/Nanosim-LIG/boast"
|
@@ -12,7 +12,7 @@ Gem::Specification.new do |s|
|
|
12
12
|
s.required_ruby_version = '>= 1.9.3'
|
13
13
|
s.add_dependency 'narray', '~> 0.6.0', '>=0.6.0.8'
|
14
14
|
s.add_dependency 'narray_ffi', '~> 1.2', '>=1.2.0'
|
15
|
-
s.add_dependency 'opencl_ruby_ffi', '~> 1.
|
15
|
+
s.add_dependency 'opencl_ruby_ffi', '~> 1.2', '>=1.2.1'
|
16
16
|
s.add_dependency 'systemu', '~> 2', '>=2.2.0'
|
17
17
|
s.add_dependency 'os', '~> 0.9', '>=0.9.6'
|
18
18
|
s.add_dependency 'PAPI', '~> 0', '>=0.101'
|
@@ -30,6 +30,7 @@ module BOAST
|
|
30
30
|
private_boolean_state_accessor :decl_module
|
31
31
|
private_boolean_state_accessor :annotate
|
32
32
|
private_boolean_state_accessor :optimizer_log
|
33
|
+
private_boolean_state_accessor :disable_openmp
|
33
34
|
|
34
35
|
private
|
35
36
|
def push_env(*args)
|
@@ -84,6 +85,7 @@ module BOAST
|
|
84
85
|
boolean_state_accessor :decl_module
|
85
86
|
boolean_state_accessor :annotate
|
86
87
|
boolean_state_accessor :optimizer_log
|
88
|
+
boolean_state_accessor :disable_openmp
|
87
89
|
|
88
90
|
default_state_getter :address_size, OS.bits/8
|
89
91
|
default_state_getter :lang, FORTRAN, '"const_get(#{envs})"', :BOAST_LANG
|
@@ -104,6 +106,7 @@ module BOAST
|
|
104
106
|
default_state_getter :annotate_level, 0
|
105
107
|
default_state_getter :optimizer_log, false
|
106
108
|
default_state_getter :optimizer_log_file, nil
|
109
|
+
default_state_getter :disable_openmp, false
|
107
110
|
|
108
111
|
alias use_vla_old? use_vla?
|
109
112
|
class << self
|
@@ -163,17 +166,27 @@ module BOAST
|
|
163
166
|
|
164
167
|
@@env = Hash::new{|h, k| h[k] = []}
|
165
168
|
|
166
|
-
def push_env(vars = {})
|
167
|
-
|
169
|
+
def push_env(vars = {}, &block)
|
170
|
+
keys = []
|
171
|
+
vars.each { |key, value|
|
168
172
|
var = nil
|
169
173
|
begin
|
170
174
|
var = BOAST::class_variable_get("@@"+key.to_s)
|
171
175
|
rescue
|
176
|
+
BOAST::pop_env(*keys)
|
172
177
|
raise "Unknown module variable #{key}!"
|
173
178
|
end
|
174
179
|
@@env[key].push(var)
|
175
180
|
BOAST::class_variable_set("@@"+key.to_s, value)
|
181
|
+
keys.push(key)
|
176
182
|
}
|
183
|
+
if block then
|
184
|
+
begin
|
185
|
+
block.call
|
186
|
+
ensure
|
187
|
+
BOAST::pop_env(*vars.keys)
|
188
|
+
end
|
189
|
+
end
|
177
190
|
end
|
178
191
|
|
179
192
|
def pop_env(*vars)
|
@@ -91,6 +91,33 @@ X86architectures ={"pentium2"=>["MMX"],
|
|
91
91
|
"RDSEED",
|
92
92
|
"ADCX",
|
93
93
|
"PREFETCHW"],
|
94
|
+
"knl"=>
|
95
|
+
["MOVBE",
|
96
|
+
"MMX",
|
97
|
+
"SSE",
|
98
|
+
"SSE2",
|
99
|
+
"SSE3",
|
100
|
+
"SSSE3",
|
101
|
+
"SSE4.1",
|
102
|
+
"SSE4.2",
|
103
|
+
"POPCNT",
|
104
|
+
"AVX",
|
105
|
+
"AVX2",
|
106
|
+
"AES",
|
107
|
+
"PCLMUL",
|
108
|
+
"FSGSBASE",
|
109
|
+
"RDRND",
|
110
|
+
"FMA",
|
111
|
+
"BMI",
|
112
|
+
"BMI2",
|
113
|
+
"F16C",
|
114
|
+
"RDSEED",
|
115
|
+
"ADCX",
|
116
|
+
"PREFETCHW",
|
117
|
+
"AVX512F",
|
118
|
+
"AVX512PF",
|
119
|
+
"AVX512ER",
|
120
|
+
"AVX512CD"],
|
94
121
|
"bonnell"=>["MOVBE", "MMX", "SSE", "SSE2", "SSE3", "SSSE3"],
|
95
122
|
"silvermont"=>
|
96
123
|
["MOVBE",
|
data/lib/BOAST/Language/Index.rb
CHANGED
@@ -14,6 +14,11 @@ module BOAST
|
|
14
14
|
return !!@alignment
|
15
15
|
end
|
16
16
|
|
17
|
+
def set_align(align)
|
18
|
+
@alignment = align
|
19
|
+
return self
|
20
|
+
end
|
21
|
+
|
17
22
|
def to_var
|
18
23
|
var = @source.copy("#{self}", :const => nil, :constant => nil, :dim => nil, :dimension => nil, :direction => nil, :dir => nil, :align => alignment)
|
19
24
|
return var
|
@@ -39,16 +39,36 @@ module BOAST
|
|
39
39
|
INTRINSICS = Hash::new { |h, k| h[k] = Hash::new { |h2, k2| h2[k2] = {} } }
|
40
40
|
CONVERSIONS = Hash::new { |h, k| h[k] = Hash::new { |h2, k2| h2[k2] = {} } }
|
41
41
|
|
42
|
+
def check_coverage
|
43
|
+
ins = []
|
44
|
+
INTRINSICS[X86].each { |i,v|
|
45
|
+
if i == :CVT then
|
46
|
+
v.each { |type1, h|
|
47
|
+
h.each { |type2, instr|
|
48
|
+
ins.push instr.to_s
|
49
|
+
}
|
50
|
+
}
|
51
|
+
else
|
52
|
+
v.each { |type, instr|
|
53
|
+
ins.push instr.to_s
|
54
|
+
}
|
55
|
+
end
|
56
|
+
}
|
57
|
+
return ins - INSTRUCTIONS.keys
|
58
|
+
end
|
59
|
+
|
60
|
+
module_function :check_coverage
|
61
|
+
|
42
62
|
def intrinsics_by_vector_name(intr_symbol, type, type2=nil)
|
43
63
|
if type2 then
|
44
64
|
instruction = INTRINSICS[get_architecture][intr_symbol][type][type2]
|
45
65
|
else
|
46
66
|
instruction = INTRINSICS[get_architecture][intr_symbol][type]
|
47
67
|
end
|
68
|
+
raise IntrinsicsError, "Unsupported operation #{intr_symbol} for #{type}#{type2 ? " and #{type2}" : ""} on #{get_architecture_name}!" unless instruction
|
48
69
|
return instruction if get_architecture == ARM
|
49
|
-
raise IntrinsicsError, "Unsupported operation #{intr_symbol} for #{type}#{type2 ? "and #{type2}" : ""} on #{get_architecture_name}!" unless instruction
|
50
70
|
supported = (INSTRUCTIONS[instruction.to_s] & MODELS[get_model.to_s]).size > 0
|
51
|
-
raise IntrinsicsError, "Unsupported operation #{intr_symbol} for #{type}#{type2 ? "and #{type2}" : ""} on #{get_model}! (requires #{INSTRUCTIONS[instruction.to_s].join(" or ")})" unless supported
|
71
|
+
raise IntrinsicsError, "Unsupported operation #{intr_symbol} for #{type}#{type2 ? " and #{type2}" : ""} on #{get_model}! (requires #{INSTRUCTIONS[instruction.to_s].join(" or ")})" unless supported
|
52
72
|
return instruction
|
53
73
|
end
|
54
74
|
|
@@ -213,7 +233,7 @@ module BOAST
|
|
213
233
|
|
214
234
|
module_function :type_name_X86
|
215
235
|
|
216
|
-
[64, 128, 256].each { |vector_size|
|
236
|
+
[64, 128, 256, 512].each { |vector_size|
|
217
237
|
vs = ( vector_size < 256 ? "" : "#{vector_size}" )
|
218
238
|
sizes = [8, 16, 32]
|
219
239
|
sizes.push( 64 ) if vector_size > 64
|
@@ -222,12 +242,17 @@ module BOAST
|
|
222
242
|
vtype = vector_type_name( :int, size, vector_size, sign )
|
223
243
|
type = type_name_X86( :int, size, vector_size )
|
224
244
|
instructions = [[:ADD, "add"], [:SUB, "sub"]]
|
225
|
-
instructions.push( [:SET, "setr"]
|
226
|
-
instructions.push( [:
|
245
|
+
instructions.push( [:SET, "setr"] ) unless size < 32 and vector_size == 512
|
246
|
+
instructions.push( [:SET1, "set1"] )
|
247
|
+
instructions.push( [:MUL, "mullo"] ) if vector_size > 64 and size >= 16
|
248
|
+
instructions.push( [:MASKLOAD, "maskload"], [:MASKSTORE, "maskstore"] ) if vector_size <= 256 and vector_size >= 128 and size >= 32
|
249
|
+
instructions.push( [:MASK_LOAD, "mask_load"], [:MASK_STORE, "mask_store"],
|
250
|
+
[:MASK_LOADU, "mask_loadu"], [:MASK_STOREU, "mask_storeu"],
|
251
|
+
[:MASKZ_LOAD, "maskz_load"], [:MASKZ_LOADU, "maskz_loadu"], ) if vector_size >= 128 and size >= 32
|
227
252
|
instructions.each { |cl, ins|
|
228
253
|
INTRINSICS[X86][cl][vtype] = "_mm#{vs}_#{ins}_#{type}".to_sym
|
229
254
|
}
|
230
|
-
if size == 64 and vector_size
|
255
|
+
if size == 64 and vector_size == 256 then
|
231
256
|
INTRINSICS[X86][:SET1][vtype] = "_mm#{vs}_set1_#{type}x".to_sym
|
232
257
|
INTRINSICS[X86][:SET][vtype] = "_mm#{vs}_setr_#{type}x".to_sym
|
233
258
|
end
|
@@ -236,8 +261,10 @@ module BOAST
|
|
236
261
|
[8, 16, 32, 64].each { |size|
|
237
262
|
[:signed, :unsigned].each { |sign|
|
238
263
|
vtype = vector_type_name( :int, size, vector_size, sign )
|
239
|
-
[[:LOAD, "loadu"],
|
240
|
-
|
264
|
+
instructions = [[:LOAD, "loadu"], [:LOADA, "load"],
|
265
|
+
[:STORE, "storeu"], [:STOREA, "store"],
|
266
|
+
[:SETZERO, "setzero"] ]
|
267
|
+
instructions.each { |cl, ins|
|
241
268
|
INTRINSICS[X86][cl][vtype] = "_mm#{vs}_#{ins}_si#{vector_size}".to_sym
|
242
269
|
}
|
243
270
|
}
|
@@ -245,12 +272,19 @@ module BOAST
|
|
245
272
|
sizes = []
|
246
273
|
sizes.push( 32, 64 ) if vector_size > 64
|
247
274
|
sizes.each { |size|
|
248
|
-
[[:ADD, "add"],
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
275
|
+
instructions = [[:ADD, "add"], [:SUB, "sub"], [:MUL, "mul"], [:DIV, "div"], [:POW, "pow"],
|
276
|
+
[:FMADD, "fmadd"], [:FMSUB, "fmsub"],
|
277
|
+
[:FNMADD, "fnmadd"], [:FNMSUB, "fnmsub"],
|
278
|
+
[:FMADDSUB, "fmaddsub"], [:FMSUBADD, "fmsubadd"],
|
279
|
+
[:LOAD, "loadu"], [:LOADA, "load"],
|
280
|
+
[:STORE, "storeu"], [:STOREA, "store"],
|
281
|
+
[:SET, "setr"], [:SET1, "set1"], [:SETZERO, "setzero"],
|
282
|
+
[:MASK_LOAD, "mask_load"], [:MASK_STORE, "mask_store"],
|
283
|
+
[:MASK_LOADU, "mask_loadu"], [:MASK_STOREU, "mask_storeu"],
|
284
|
+
[:MASKZ_LOAD, "maskz_load"], [:MASKZ_LOADU, "maskz_loadu"] ]
|
285
|
+
instructions.push( [:MASKLOAD, "maskload"], [:MASKSTORE, "maskstore"] ) if vector_size < 512
|
286
|
+
instructions.push( [:ADDSUB, "addsub"] ) if vector_size < 512
|
287
|
+
instructions.each { |cl, ins|
|
254
288
|
vtype = vector_type_name( :float, size, vector_size)
|
255
289
|
type = type_name_X86( :float, size, vector_size )
|
256
290
|
INTRINSICS[X86][cl][vtype] = "_mm#{vs}_#{ins}_#{type}".to_sym
|
@@ -258,7 +292,7 @@ module BOAST
|
|
258
292
|
}
|
259
293
|
}
|
260
294
|
INTRINSICS[X86][:CVT] = Hash::new { |h,k| h[k] = {} }
|
261
|
-
[128, 256].each { |bvsize|
|
295
|
+
[128, 256, 512].each { |bvsize|
|
262
296
|
[16, 32, 64].each { |bsize|
|
263
297
|
ssize = bsize/2
|
264
298
|
while ssize >= 8
|
@@ -296,6 +330,15 @@ module BOAST
|
|
296
330
|
INTRINSICS[X86][:CVT][ivtype][fvtype] = "_mm#{vs}_cvt#{ftype}_#{itype}".to_sym
|
297
331
|
}
|
298
332
|
}
|
333
|
+
[64,32].each { |bsize|
|
334
|
+
ftype = type_name_X86( :float, bsize, bvsize )
|
335
|
+
itype = type_name_X86( :int, bsize, bvsize, :signed )
|
336
|
+
fvtype = vector_type_name( :float, bsize, bvsize )
|
337
|
+
ivtype = vector_type_name( :int, bsize, bvsize, :signed )
|
338
|
+
vs = ( bvsize < 256 ? "" : "#{bvsize}" )
|
339
|
+
INTRINSICS[X86][:CVT][fvtype][ivtype] = "_mm#{vs}_cvt#{itype}_#{ftype}".to_sym
|
340
|
+
INTRINSICS[X86][:CVT][ivtype][fvtype] = "_mm#{vs}_cvt#{ftype}_#{itype}".to_sym
|
341
|
+
}
|
299
342
|
}
|
300
343
|
|
301
344
|
|
@@ -324,7 +367,7 @@ module BOAST
|
|
324
367
|
vtype = vector_type_name( :float, size, vector_size )
|
325
368
|
type = type_name_ARM( :float, size )
|
326
369
|
[[:ADD, "add"], [:SUB, "sub"], [:MUL, "mul"],
|
327
|
-
[:FMADD, "mla"], [:
|
370
|
+
[:FMADD, "mla"], [:FNMADD, "mls"],
|
328
371
|
[:LOAD, "ldl"], [:LOADA, "ldl"],
|
329
372
|
[:STORE, "stl"], [:STOREA, "stl"]].each { |cl, ins|
|
330
373
|
INTRINSICS[ARM][cl][vtype] = "v#{ins}#{q}_#{type}".to_sym
|
@@ -332,6 +375,9 @@ module BOAST
|
|
332
375
|
[[:SET1, "dup"]].each { |cl, ins|
|
333
376
|
INTRINSICS[ARM][cl][vtype] = "v#{ins}#{q}_n_#{type}".to_sym
|
334
377
|
}
|
378
|
+
[[:SET_LANE, "set"]].each { |cl, ins|
|
379
|
+
INTRINSICS[ARM][cl][vtype] = "v#{ins}#{q}_lane_#{type}".to_sym
|
380
|
+
}
|
335
381
|
}
|
336
382
|
}
|
337
383
|
INTRINSICS[ARM][:CVT] = Hash::new { |h,k| h[k] = {} }
|
@@ -344,8 +390,8 @@ module BOAST
|
|
344
390
|
ivtype = vector_type_name( :int, int_size, vector_size, sign )
|
345
391
|
ftype = type_name_ARM( :float, float_size )
|
346
392
|
itype = type_name_ARM( :int, int_size, sign )
|
347
|
-
INTRINSICS[ARM][:CVT][
|
348
|
-
INTRINSICS[ARM][:CVT][
|
393
|
+
INTRINSICS[ARM][:CVT][ivtype][fvtype] = "vcvt#{q}_#{itype}_#{ftype}".to_sym
|
394
|
+
INTRINSICS[ARM][:CVT][fvtype][ivtype] = "vcvt#{q}_#{ftype}_#{itype}".to_sym
|
349
395
|
}
|
350
396
|
}
|
351
397
|
svsize = 64
|
@@ -357,8 +403,8 @@ module BOAST
|
|
357
403
|
btype = type_name_ARM( :int, bsize, sign )
|
358
404
|
svtype = vector_type_name( :int, ssize, svsize, sign )
|
359
405
|
bvtype = vector_type_name( :int, bsize, bvsize, sign )
|
360
|
-
INTRINSICS[ARM][:CVT][
|
361
|
-
INTRINSICS[ARM][:CVT][
|
406
|
+
INTRINSICS[ARM][:CVT][bvtype][svtype] = "vmovl_#{stype}".to_sym
|
407
|
+
INTRINSICS[ARM][:CVT][svtype][bvtype] = "vmovn_#{btype}".to_sym
|
362
408
|
}
|
363
409
|
}
|
364
410
|
|
@@ -152,7 +152,7 @@ EOF
|
|
152
152
|
end
|
153
153
|
|
154
154
|
def open
|
155
|
-
output.puts to_s
|
155
|
+
output.puts to_s unless disable_openmp
|
156
156
|
return self
|
157
157
|
end
|
158
158
|
|
@@ -166,7 +166,7 @@ EOF
|
|
166
166
|
end
|
167
167
|
|
168
168
|
def close
|
169
|
-
output.puts end_string(openmp_end_clauses_to_s)
|
169
|
+
output.puts end_string(openmp_end_clauses_to_s) unless disable_openmp
|
170
170
|
return self
|
171
171
|
end
|
172
172
|
|
@@ -316,7 +316,7 @@ module BOAST
|
|
316
316
|
|
317
317
|
def initialize(source, return_type)
|
318
318
|
@source = source
|
319
|
-
@return_type = return_type
|
319
|
+
@return_type = return_type.to_var
|
320
320
|
end
|
321
321
|
|
322
322
|
def type
|
@@ -328,21 +328,29 @@ module BOAST
|
|
328
328
|
if @source.kind_of?( Array ) then
|
329
329
|
raise OperatorError, "Invalid array length!" unless @source.length == @return_type.type.vector_length
|
330
330
|
return @return_type.copy("(#{@return_type.type.decl})( #{@source.join(", ")} )", DISCARD_OPTIONS) if lang == CL
|
331
|
-
|
331
|
+
return Set(@source.first, @return_type).to_var if @source.uniq.size == 1
|
332
332
|
begin
|
333
333
|
instruction = intrinsics(:SET, @return_type.type)
|
334
|
+
raise IntrinsicsError unless instruction
|
334
335
|
return @return_type.copy("#{instruction}( #{@source.join(", ")} )", DISCARD_OPTIONS)
|
335
336
|
rescue IntrinsicsError
|
336
337
|
instruction = intrinsics(:SET_LANE, @return_type.type)
|
338
|
+
raise IntrinsicsError, "Missing instruction for SET_LANE on #{get_architecture_name}!" unless instruction
|
337
339
|
s = Set(0, @return_type).to_s
|
338
340
|
@source.each_with_index { |v,i|
|
339
|
-
s = "#{instruction}(#{v}, #{s}, #{i})"
|
341
|
+
s = "#{instruction}( #{v}, #{s}, #{i} )"
|
340
342
|
}
|
341
343
|
return @return_type.copy(s, DISCARD_OPTIONS)
|
342
344
|
end
|
343
345
|
elsif @source.class != Variable or @source.type.vector_length == 1 then
|
344
346
|
return @return_type.copy("(#{@return_type.type.decl})( #{@source} )", DISCARD_OPTIONS) if lang == CL
|
345
|
-
|
347
|
+
if (@source.is_a?(Numeric) and @source == 0) or (@source.class == Variable and @source.constant == 0) then
|
348
|
+
begin
|
349
|
+
instruction = intrinsics(:SETZERO, @return_type.type)
|
350
|
+
return @return_type.copy("#{instruction}( )", DISCARD_OPTIONS) if instruction
|
351
|
+
rescue IntrinsicsError
|
352
|
+
end
|
353
|
+
end
|
346
354
|
instruction = intrinsics(:SET1, @return_type.type)
|
347
355
|
return @return_type.copy("#{instruction}( #{@source} )", DISCARD_OPTIONS)
|
348
356
|
elsif @return_type.type != @source.type
|
@@ -379,7 +387,7 @@ module BOAST
|
|
379
387
|
|
380
388
|
def initialize(source, return_type)
|
381
389
|
@source = source
|
382
|
-
@return_type = return_type
|
390
|
+
@return_type = return_type.to_var
|
383
391
|
end
|
384
392
|
|
385
393
|
def type
|
@@ -445,12 +453,12 @@ module BOAST
|
|
445
453
|
def initialize(source, mask, return_type)
|
446
454
|
@source = source
|
447
455
|
@mask = mask
|
448
|
-
@return_type = return_type
|
456
|
+
@return_type = return_type.to_var
|
449
457
|
end
|
450
458
|
|
451
459
|
def get_mask
|
452
|
-
|
453
|
-
return
|
460
|
+
type = @return_type.type
|
461
|
+
return Set(@mask.collect { |m| ( m and m != 0 ) ? -1 : 0 }, Int("mask", :size => type.size, :vector_length => type.vector_length ) )
|
454
462
|
end
|
455
463
|
|
456
464
|
private :get_mask
|
@@ -461,8 +469,12 @@ module BOAST
|
|
461
469
|
|
462
470
|
def to_var
|
463
471
|
raise OperatorError, "Cannot load unknown type!" unless @return_type
|
472
|
+
type = @return_type.type
|
464
473
|
raise LanguageError, "Unsupported language!" unless lang == C
|
465
|
-
|
474
|
+
raise OperatorError, "Mask size is wrong: #{@mask.length} for #{type.vector_length}!" if @mask.length != type.vector_length
|
475
|
+
return Load( @source, @return_type ).to_var unless @mask.include?(0)
|
476
|
+
return Set( 0, @return_type ).to_var if @mask.uniq.size == 1 and @mask.uniq.first == 0
|
477
|
+
instruction = intrinsics(:MASKLOAD, type)
|
466
478
|
s = ""
|
467
479
|
src = "#{@source}"
|
468
480
|
if src[0] != "*" then
|
@@ -470,8 +482,8 @@ module BOAST
|
|
470
482
|
else
|
471
483
|
src = src[1..-1]
|
472
484
|
end
|
473
|
-
p_type =
|
474
|
-
s += "#{instruction}((#{p_type.decl} * )#{src}, #{get_mask})"
|
485
|
+
p_type = type.copy(:vector_length => 1)
|
486
|
+
s += "#{instruction}( (#{p_type.decl} * ) #{src}, #{get_mask} )"
|
475
487
|
return @return_type.copy( s, DISCARD_OPTIONS)
|
476
488
|
end
|
477
489
|
|
@@ -501,11 +513,11 @@ module BOAST
|
|
501
513
|
attr_reader :source
|
502
514
|
attr_reader :store_type
|
503
515
|
|
504
|
-
def initialize(dest, source,
|
516
|
+
def initialize(dest, source, store_type = nil)
|
505
517
|
@dest = dest
|
506
518
|
@source = source
|
507
519
|
@store_type = store_type
|
508
|
-
@store_type = source unless @store_type
|
520
|
+
@store_type = source.to_var unless @store_type
|
509
521
|
end
|
510
522
|
|
511
523
|
def to_s
|
@@ -516,17 +528,17 @@ module BOAST
|
|
516
528
|
else
|
517
529
|
dst = dst[1..-1]
|
518
530
|
end
|
531
|
+
type = @store_type.type
|
532
|
+
return "vstore#{type.vector_length}( #{@source}, 0, #{dst} )" if lang == CL
|
533
|
+
return "*((int64_t * ) #{dst}) = _m_to_int64( #{@source} )" if get_architecture == X86 and type.total_size*8 == 64
|
519
534
|
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
if @dest.alignment == @source.type.total_size then
|
524
|
-
instruction = intrinsics(:STOREA, @source.type)
|
535
|
+
if @dest.alignment == type.total_size then
|
536
|
+
instruction = intrinsics(:STOREA, type)
|
525
537
|
else
|
526
|
-
instruction = intrinsics(:STORE,
|
538
|
+
instruction = intrinsics(:STORE, type)
|
527
539
|
end
|
528
|
-
p_type =
|
529
|
-
p_type =
|
540
|
+
p_type = type.copy(:vector_length => 1)
|
541
|
+
p_type = type if get_architecture == X86 and type.kind_of?(Int)
|
530
542
|
return "#{instruction}( (#{p_type.decl} * ) #{dst}, #{@source} )"
|
531
543
|
end
|
532
544
|
return Affectation.basic_usage(@dest, @source)
|
@@ -560,20 +572,24 @@ module BOAST
|
|
560
572
|
@source = source
|
561
573
|
@mask = mask
|
562
574
|
@store_type = store_type
|
563
|
-
@store_type = source unless @store_type
|
575
|
+
@store_type = source.to_var unless @store_type
|
564
576
|
end
|
565
577
|
|
566
578
|
def get_mask
|
567
|
-
|
568
|
-
return
|
579
|
+
type = @store_type.type
|
580
|
+
return Set(@mask.collect { |m| ( m and m != 0 ) ? -1 : 0 }, Int("mask", :size => type.size, :vector_length => type.vector_length ) )
|
569
581
|
end
|
570
582
|
|
571
583
|
private :get_mask
|
572
584
|
|
573
585
|
def to_s
|
574
586
|
raise OperatorError, "Cannot store unknown type!" unless @store_type
|
587
|
+
type = @store_type.type
|
575
588
|
raise LanguageError, "Unsupported language!" unless lang == C
|
576
|
-
|
589
|
+
raise OperatorError, "Mask size is wrong: #{@mask.length} for #{type.vector_length}!" if @mask.length != type.vector_length
|
590
|
+
return Store( @dest, @source, @store_type ).to_s unless @mask.include?(0)
|
591
|
+
return nil if @mask.uniq.size == 1 and @mask.uniq.first == 0
|
592
|
+
instruction = intrinsics(:MASKSTORE, type)
|
577
593
|
s = ""
|
578
594
|
dst = "#{@dest}"
|
579
595
|
if dst[0] != "*" then
|
@@ -581,8 +597,81 @@ module BOAST
|
|
581
597
|
else
|
582
598
|
dst = dst[1..-1]
|
583
599
|
end
|
584
|
-
p_type =
|
585
|
-
return s += "#{instruction}((#{p_type.decl} * )#{dst}, #{get_mask}, #{Operator.convert(@source,
|
600
|
+
p_type = type.copy(:vector_length => 1)
|
601
|
+
return s += "#{instruction}( (#{p_type.decl} * ) #{dst}, #{get_mask}, #{Operator.convert(@source, type)} )"
|
602
|
+
end
|
603
|
+
|
604
|
+
def pr
|
605
|
+
ss = to_s
|
606
|
+
if ss then
|
607
|
+
s=""
|
608
|
+
s += indent
|
609
|
+
s += ss
|
610
|
+
s += ";" if [C, CL, CUDA].include?( lang )
|
611
|
+
output.puts s
|
612
|
+
end
|
613
|
+
return self
|
614
|
+
end
|
615
|
+
|
616
|
+
end
|
617
|
+
|
618
|
+
class FMA < Operator
|
619
|
+
extend Functor
|
620
|
+
include Intrinsics
|
621
|
+
include Arithmetic
|
622
|
+
include Inspectable
|
623
|
+
include PrivateStateAccessor
|
624
|
+
|
625
|
+
attr_reader :operand1
|
626
|
+
attr_reader :operand2
|
627
|
+
attr_reader :operand3
|
628
|
+
attr_reader :return_type
|
629
|
+
|
630
|
+
def initialize(a,b,c)
|
631
|
+
@operand1 = a
|
632
|
+
@operand2 = b
|
633
|
+
@operand3 = c
|
634
|
+
@return_type = nil
|
635
|
+
@return_type = @operand3.to_var unless @return_type
|
636
|
+
end
|
637
|
+
|
638
|
+
def convert_operand(op)
|
639
|
+
return "#{Operator.convert(op, @return_type.type)}"
|
640
|
+
end
|
641
|
+
|
642
|
+
private :convert_operand
|
643
|
+
|
644
|
+
def type
|
645
|
+
return @return_type.type
|
646
|
+
end
|
647
|
+
|
648
|
+
def to_var
|
649
|
+
instruction = nil
|
650
|
+
begin
|
651
|
+
instruction = intrinsics(:FMADD,@return_type.type)
|
652
|
+
rescue
|
653
|
+
end
|
654
|
+
return (@operand3 + @operand1 * @operand2).to_var unless lang != FORTRAN and @return_type and ( instruction or ( [CL, CUDA].include?(lang) ) )
|
655
|
+
op1 = convert_operand(@operand1.to_var)
|
656
|
+
op2 = convert_operand(@operand2.to_var)
|
657
|
+
op3 = convert_operand(@operand3.to_var)
|
658
|
+
if [CL, CUDA].include?(lang)
|
659
|
+
ret_name = "fma( #{op1}, #{op2}, #{op3} )"
|
660
|
+
else
|
661
|
+
case architecture
|
662
|
+
when X86
|
663
|
+
ret_name = "#{instruction}( #{op1}, #{op2}, #{op3} )"
|
664
|
+
when ARM
|
665
|
+
ret_name = "#{instruction}( #{op3}, #{op1}, #{op2} )"
|
666
|
+
else
|
667
|
+
return (@operand3 + @operand1 * @operand2).to_var
|
668
|
+
end
|
669
|
+
end
|
670
|
+
return @return_type.copy( ret_name, DISCARD_OPTIONS)
|
671
|
+
end
|
672
|
+
|
673
|
+
def to_s
|
674
|
+
return to_var.to_s
|
586
675
|
end
|
587
676
|
|
588
677
|
def pr
|
@@ -596,7 +685,7 @@ module BOAST
|
|
596
685
|
|
597
686
|
end
|
598
687
|
|
599
|
-
class
|
688
|
+
class FMS < Operator
|
600
689
|
extend Functor
|
601
690
|
include Intrinsics
|
602
691
|
include Arithmetic
|
@@ -629,23 +718,24 @@ module BOAST
|
|
629
718
|
def to_var
|
630
719
|
instruction = nil
|
631
720
|
begin
|
632
|
-
instruction = intrinsics(:
|
721
|
+
instruction = intrinsics(:FNMADD,@return_type.type)
|
633
722
|
rescue
|
634
723
|
end
|
635
|
-
return (@operand3
|
636
|
-
op1 = convert_operand(@operand1)
|
637
|
-
op2 = convert_operand(@operand2)
|
638
|
-
op3 = convert_operand(@operand3)
|
724
|
+
return (@operand3 - @operand1 * @operand2).to_var unless lang != FORTRAN and @return_type and ( instruction or ( [CL, CUDA].include?(lang) ) )
|
725
|
+
op1 = convert_operand(@operand1.to_var)
|
726
|
+
op2 = convert_operand(@operand2.to_var)
|
727
|
+
op3 = convert_operand(@operand3.to_var)
|
639
728
|
if [CL, CUDA].include?(lang)
|
640
|
-
|
729
|
+
op1 = convert_operand((-@operand1).to_var)
|
730
|
+
ret_name = "fma( #{op1}, #{op2}, #{op3} )"
|
641
731
|
else
|
642
732
|
case architecture
|
643
733
|
when X86
|
644
|
-
ret_name = "#{instruction}(#{op1}
|
734
|
+
ret_name = "#{instruction}( #{op1}, #{op2}, #{op3} )"
|
645
735
|
when ARM
|
646
|
-
ret_name = "#{instruction}(#{
|
736
|
+
ret_name = "#{instruction}( #{op3}, #{op1}, #{op2} )"
|
647
737
|
else
|
648
|
-
return (@operand1 * @operand2
|
738
|
+
return (@operand3 - @operand1 * @operand2).to_var
|
649
739
|
end
|
650
740
|
end
|
651
741
|
return @return_type.copy( ret_name, DISCARD_OPTIONS)
|
@@ -2,10 +2,16 @@ module BOAST
|
|
2
2
|
|
3
3
|
module_function
|
4
4
|
|
5
|
-
|
5
|
+
FUNCCALLS = {}
|
6
|
+
|
7
|
+
def register_funccall(name, options = {})
|
8
|
+
sym = name.to_sym
|
9
|
+
FUNCCALLS[sym] = {}
|
10
|
+
FUNCCALLS[sym][:parameters] = options[:parameters]
|
11
|
+
FUNCCALLS[sym][:returns] = options[:returns]
|
6
12
|
s =<<EOF
|
7
13
|
def self.#{name}(*args)
|
8
|
-
return FuncCall(
|
14
|
+
return FuncCall(#{sym.inspect}, *args#{options[:returns] ? ", returns: FUNCCALLS[#{sym.inspect}][:returns]" : ""})
|
9
15
|
end
|
10
16
|
EOF
|
11
17
|
eval s
|
@@ -51,7 +51,7 @@ module BOAST
|
|
51
51
|
cflags += " -DHAVE_NARRAY_H" if narray_path
|
52
52
|
cflags += " -I/usr/local/k1tools/include" if @architecture == MPPA
|
53
53
|
objext = RbConfig::CONFIG["OBJEXT"]
|
54
|
-
if options[:openmp] and @lang == C then
|
54
|
+
if options[:openmp] and @lang == C and not disable_openmp then
|
55
55
|
openmp_cflags = get_openmp_flags(c_compiler)
|
56
56
|
raise "unkwown openmp flags for: #{c_compiler}" if not openmp_cflags
|
57
57
|
cflags += " #{openmp_cflags}"
|
@@ -79,7 +79,7 @@ module BOAST
|
|
79
79
|
cxx_compiler = options[:CXX]
|
80
80
|
cxxflags = options[:CXXFLAGS]
|
81
81
|
cxxflags += " -fPIC #{includes}"
|
82
|
-
if options[:openmp] and @lang == C then
|
82
|
+
if options[:openmp] and @lang == C and not disable_openmp then
|
83
83
|
openmp_cxxflags = get_openmp_flags(cxx_compiler)
|
84
84
|
raise "unkwown openmp flags for: #{cxx_compiler}" if not openmp_cxxflags
|
85
85
|
cxxflags += " #{openmp_cxxflags}"
|
@@ -97,7 +97,7 @@ module BOAST
|
|
97
97
|
fcflags += " -march=#{get_model}"
|
98
98
|
fcflags += " -fPIC"
|
99
99
|
fcflags += " -fno-second-underscore" if f_compiler == 'g95'
|
100
|
-
if options[:openmp] and @lang == FORTRAN then
|
100
|
+
if options[:openmp] and @lang == FORTRAN and not disable_openmp then
|
101
101
|
openmp_fcflags = get_openmp_flags(f_compiler)
|
102
102
|
raise "unkwown openmp flags for: #{f_compiler}" if not openmp_fcflags
|
103
103
|
fcflags += " #{openmp_fcflags}"
|
@@ -152,7 +152,7 @@ module BOAST
|
|
152
152
|
c_compiler = "cc" if not c_compiler
|
153
153
|
linker = options[:LD]
|
154
154
|
linker = c_compiler if not linker
|
155
|
-
if options[:openmp] then
|
155
|
+
if options[:openmp] and not disable_openmp then
|
156
156
|
openmp_ldflags = get_openmp_flags(linker)
|
157
157
|
raise "unknown openmp flags for: #{linker}" if not openmp_ldflags
|
158
158
|
ldflags += " #{openmp_ldflags}"
|
data/lib/BOAST/Runtime/Config.rb
CHANGED
@@ -35,16 +35,19 @@ module BOAST
|
|
35
35
|
private_boolean_state_accessor :verbose
|
36
36
|
private_boolean_state_accessor :debug_source
|
37
37
|
private_boolean_state_accessor :ffi
|
38
|
+
private_boolean_state_accessor :keep_temp
|
38
39
|
private_state_accessor :fortran_line_length
|
39
40
|
end
|
40
41
|
|
41
42
|
boolean_state_accessor :verbose
|
42
43
|
boolean_state_accessor :debug_source
|
43
44
|
boolean_state_accessor :ffi
|
45
|
+
boolean_state_accessor :keep_temp
|
44
46
|
state_accessor :fortran_line_length
|
45
|
-
default_state_getter :ffi, false
|
46
47
|
default_state_getter :verbose, false
|
47
48
|
default_state_getter :debug_source, false
|
49
|
+
default_state_getter :ffi, false
|
50
|
+
default_state_getter :keep_temp, false
|
48
51
|
default_state_getter :fortran_line_length, 72
|
49
52
|
|
50
53
|
module_function
|
data/lib/BOAST/Runtime/MAQAO.rb
CHANGED
@@ -26,8 +26,8 @@ module BOAST
|
|
26
26
|
puts "#{compiler_options[:MAQAO]} cqa #{maqao_model ? "--uarch=#{maqao_model} " : ""}#{f1.path} --fct=#{@procedure.name} #{compiler_options[:MAQAO_FLAGS]}"
|
27
27
|
end
|
28
28
|
result = `#{compiler_options[:MAQAO]} cqa #{maqao_model ? "--uarch=#{maqao_model} " : ""}#{f1.path} --fct=#{@procedure.name} #{compiler_options[:MAQAO_FLAGS]}`
|
29
|
-
File::unlink(library_object)
|
30
|
-
File::unlink(library_source)
|
29
|
+
File::unlink(library_object) unless keep_temp
|
30
|
+
File::unlink(library_source) unless keep_temp
|
31
31
|
return result
|
32
32
|
end
|
33
33
|
end
|
@@ -38,23 +38,23 @@ module BOAST
|
|
38
38
|
|
39
39
|
def init_opencl_types
|
40
40
|
@@opencl_real_types = {
|
41
|
-
2 => OpenCL::
|
42
|
-
4 => OpenCL::
|
43
|
-
8 => OpenCL::
|
41
|
+
2 => OpenCL::Half1,
|
42
|
+
4 => OpenCL::Float1,
|
43
|
+
8 => OpenCL::Double1
|
44
44
|
}
|
45
45
|
|
46
46
|
@@opencl_int_types = {
|
47
47
|
true => {
|
48
|
-
1 => OpenCL::
|
49
|
-
2 => OpenCL::
|
50
|
-
4 => OpenCL::
|
51
|
-
8 => OpenCL::
|
48
|
+
1 => OpenCL::Char1,
|
49
|
+
2 => OpenCL::Short1,
|
50
|
+
4 => OpenCL::Int1,
|
51
|
+
8 => OpenCL::Long1
|
52
52
|
},
|
53
53
|
false => {
|
54
|
-
1 => OpenCL::
|
55
|
-
2 => OpenCL::
|
56
|
-
4 => OpenCL::
|
57
|
-
8 => OpenCL::
|
54
|
+
1 => OpenCL::UChar1,
|
55
|
+
2 => OpenCL::UShort1,
|
56
|
+
4 => OpenCL::UInt1,
|
57
|
+
8 => OpenCL::ULong1
|
58
58
|
}
|
59
59
|
}
|
60
60
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: BOAST
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.
|
4
|
+
version: 1.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brice Videau
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-04-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: narray
|
@@ -56,20 +56,20 @@ dependencies:
|
|
56
56
|
requirements:
|
57
57
|
- - "~>"
|
58
58
|
- !ruby/object:Gem::Version
|
59
|
-
version: '1.
|
59
|
+
version: '1.2'
|
60
60
|
- - ">="
|
61
61
|
- !ruby/object:Gem::Version
|
62
|
-
version: 1.
|
62
|
+
version: 1.2.1
|
63
63
|
type: :runtime
|
64
64
|
prerelease: false
|
65
65
|
version_requirements: !ruby/object:Gem::Requirement
|
66
66
|
requirements:
|
67
67
|
- - "~>"
|
68
68
|
- !ruby/object:Gem::Version
|
69
|
-
version: '1.
|
69
|
+
version: '1.2'
|
70
70
|
- - ">="
|
71
71
|
- !ruby/object:Gem::Version
|
72
|
-
version: 1.
|
72
|
+
version: 1.2.1
|
73
73
|
- !ruby/object:Gem::Dependency
|
74
74
|
name: systemu
|
75
75
|
requirement: !ruby/object:Gem::Requirement
|