BOAST 1.3.4 → 1.3.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/BOAST.gemspec +1 -1
- data/lib/BOAST/Language/Algorithm.rb +3 -3
- data/lib/BOAST/Language/Architectures.rb +58 -0
- data/lib/BOAST/Language/CPUID_by_name.rb +1871 -1871
- data/lib/BOAST/Language/Config.rb +10 -0
- data/lib/BOAST/Language/Intrinsics.rb +87 -44
- data/lib/BOAST/Language/Operators.rb +92 -10
- data/lib/BOAST/Language/Variable.rb +11 -4
- data/lib/BOAST/Optimization/Optimization.rb +58 -7
- data/lib/BOAST/Runtime/CRuntime.rb +1 -1
- data/lib/BOAST/Runtime/CompiledRuntime.rb +1 -0
- data/lib/BOAST/Runtime/EnergyProbe.rb +18 -8
- data/lib/BOAST/Runtime/FFIRuntime.rb +12 -6
- data/lib/BOAST/Runtime/FORTRANRuntime.rb +7 -2
- data/lib/BOAST/Runtime/MPPARuntime.rb +1 -1
- metadata +2 -2
@@ -135,6 +135,16 @@ module BOAST
|
|
135
135
|
return use_vla_old?
|
136
136
|
end
|
137
137
|
|
138
|
+
def set_model(val)
|
139
|
+
@@model=val
|
140
|
+
Intrinsics::generate_conversions
|
141
|
+
end
|
142
|
+
|
143
|
+
def model=(val)
|
144
|
+
@@model=val
|
145
|
+
Intrinsics::generate_conversions
|
146
|
+
end
|
147
|
+
|
138
148
|
# @private
|
139
149
|
def get_default_architecture
|
140
150
|
architecture = const_get(ENV["ARCHITECTURE"]) if ENV["ARCHITECTURE"]
|
@@ -66,8 +66,27 @@ module BOAST
|
|
66
66
|
end
|
67
67
|
raise IntrinsicsError, "Unsupported operation #{intr_symbol} for #{type}#{type2 ? " and #{type2}" : ""} on #{get_architecture_name}!" unless instruction
|
68
68
|
return instruction if get_architecture == ARM
|
69
|
-
supported =
|
70
|
-
|
69
|
+
supported = false
|
70
|
+
INSTRUCTIONS[instruction.to_s].each { |cpuid|
|
71
|
+
if cpuid.kind_of?( Array ) then
|
72
|
+
supported = true if (cpuid - MODELS[get_model.to_s]).empty?
|
73
|
+
else
|
74
|
+
supported = true if MODELS[get_model.to_s].include?( cpuid )
|
75
|
+
end
|
76
|
+
}
|
77
|
+
# supported = (INSTRUCTIONS[instruction.to_s] & MODELS[get_model.to_s]).size > 0
|
78
|
+
if not supported then
|
79
|
+
required = ""
|
80
|
+
INSTRUCTIONS[instruction.to_s].each { |cpuid|
|
81
|
+
required += " or " if required != ""
|
82
|
+
if cpuid.kind_of?( Array ) then
|
83
|
+
required += "( #{cpuid.join(" and ")} )"
|
84
|
+
else
|
85
|
+
required += "#{cpuid}"
|
86
|
+
end
|
87
|
+
}
|
88
|
+
raise IntrinsicsError, "Unsupported operation #{intr_symbol} for #{type}#{type2 ? " and #{type2}" : ""} on #{get_model}! (requires #{required})"
|
89
|
+
end
|
71
90
|
return instruction
|
72
91
|
end
|
73
92
|
|
@@ -81,14 +100,14 @@ module BOAST
|
|
81
100
|
|
82
101
|
def get_conversion_path(type_dest, type_orig)
|
83
102
|
conversion_path = CONVERSIONS[get_architecture][get_vector_name(type_dest)][get_vector_name(type_orig)]
|
84
|
-
raise IntrinsicsError, "Unavailable conversion from #{get_vector_name(type_orig)} to #{get_vector_name(type_dest)} on #{get_architecture_name}!" unless conversion_path
|
103
|
+
raise IntrinsicsError, "Unavailable conversion from #{get_vector_name(type_orig)} to #{get_vector_name(type_dest)} on #{get_architecture_name}#{get_architecture==X86 ? "(#{get_model})" : "" }!" unless conversion_path
|
85
104
|
return conversion_path
|
86
105
|
end
|
87
106
|
|
88
107
|
module_function :get_conversion_path
|
89
108
|
|
90
109
|
def get_vector_decl_X86( data_type )
|
91
|
-
raise IntrinsicsError, "Unsupported vector size on X86: #{data_type.total_size*8}!" unless [64,128,256].include?( data_type.total_size*8 )
|
110
|
+
raise IntrinsicsError, "Unsupported vector size on X86: #{data_type.total_size*8}!" unless [64,128,256,512].include?( data_type.total_size*8 )
|
92
111
|
s = "__m#{data_type.total_size*8}"
|
93
112
|
case data_type
|
94
113
|
when Int
|
@@ -112,7 +131,7 @@ module BOAST
|
|
112
131
|
raise IntrinsicsError, "Unsupported data size for int vector on ARM: #{data_type.size*8}!" unless [1,2,4,8].include?( data_type.size )
|
113
132
|
return get_vector_name( data_type ).to_s
|
114
133
|
when Real
|
115
|
-
raise IntrinsicsError, "Unsupported data size for real vector on ARM: #{data_type.size*8}!"
|
134
|
+
raise IntrinsicsError, "Unsupported data size for real vector on ARM: #{data_type.size*8}!" unless [4,8].include?( data_type.size )
|
116
135
|
return get_vector_name( data_type ).to_s
|
117
136
|
else
|
118
137
|
raise IntrinsicsError, "Unsupported data type #{data_type} for vector on ARM!"
|
@@ -244,10 +263,11 @@ module BOAST
|
|
244
263
|
instructions.push( [:SET, "setr"] ) unless size < 32 and vector_size == 512
|
245
264
|
instructions.push( [:SET1, "set1"] )
|
246
265
|
instructions.push( [:MUL, "mullo"] ) if vector_size > 64 and size >= 16
|
247
|
-
instructions.push( [:MASKLOAD,
|
248
|
-
instructions.push( [:MASK_LOAD,
|
249
|
-
[:
|
250
|
-
[:MASKZ_LOAD,
|
266
|
+
instructions.push( [:MASKLOAD, "maskload"], [:MASKSTORE, "maskstore"] ) if vector_size <= 256 and vector_size >= 128 and size >= 32
|
267
|
+
instructions.push( [:MASK_LOAD, "mask_loadu"], [:MASK_STORE, "mask_storeu"],
|
268
|
+
[:MASK_LOADA, "mask_load"], [:MASK_STOREA, "mask_store"],
|
269
|
+
[:MASKZ_LOAD, "maskz_loadu"],
|
270
|
+
[:MASKZ_LOADA, "maskz_load"], ) if vector_size >= 128 and size >= 32
|
251
271
|
instructions.each { |cl, ins|
|
252
272
|
INTRINSICS[X86][cl][vtype] = "_mm#{vs}_#{ins}_#{type}".to_sym
|
253
273
|
}
|
@@ -278,9 +298,10 @@ module BOAST
|
|
278
298
|
[:LOAD, "loadu"], [:LOADA, "load"],
|
279
299
|
[:STORE, "storeu"], [:STOREA, "store"],
|
280
300
|
[:SET, "setr"], [:SET1, "set1"], [:SETZERO, "setzero"],
|
281
|
-
[:MASK_LOAD,
|
282
|
-
[:
|
283
|
-
[:MASKZ_LOAD,
|
301
|
+
[:MASK_LOAD, "mask_loadu"], [:MASK_STORE, "mask_storeu"],
|
302
|
+
[:MASK_LOADA, "mask_load"], [:MASK_STOREA, "mask_store"],
|
303
|
+
[:MASKZ_LOAD, "maskz_loadu"],
|
304
|
+
[:MASKZ_LOADA, "maskz_load"]]
|
284
305
|
instructions.push( [:MASKLOAD, "maskload"], [:MASKSTORE, "maskstore"] ) if vector_size < 512
|
285
306
|
instructions.push( [:ADDSUB, "addsub"] ) if vector_size < 512
|
286
307
|
instructions.each { |cl, ins|
|
@@ -362,7 +383,7 @@ module BOAST
|
|
362
383
|
}
|
363
384
|
}
|
364
385
|
}
|
365
|
-
[32].each { |size|
|
386
|
+
[32, 64].each { |size|
|
366
387
|
vtype = vector_type_name( :float, size, vector_size )
|
367
388
|
type = type_name_ARM( :float, size )
|
368
389
|
[[:ADD, "add"], [:SUB, "sub"], [:MUL, "mul"],
|
@@ -381,18 +402,24 @@ module BOAST
|
|
381
402
|
}
|
382
403
|
INTRINSICS[ARM][:CVT] = Hash::new { |h,k| h[k] = {} }
|
383
404
|
[64, 128].each { |vector_size|
|
384
|
-
int_size
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
405
|
+
[[32, 32],[64, 64]].each { |int_size, float_size|
|
406
|
+
q = (vector_size == 128 ? "q" : "")
|
407
|
+
[:signed, :unsigned].each { |sign|
|
408
|
+
fvtype = vector_type_name( :float, float_size, vector_size )
|
409
|
+
ivtype = vector_type_name( :int, int_size, vector_size, sign )
|
410
|
+
ftype = type_name_ARM( :float, float_size )
|
411
|
+
itype = type_name_ARM( :int, int_size, sign )
|
412
|
+
INTRINSICS[ARM][:CVT][ivtype][fvtype] = "vcvt#{q}_#{itype}_#{ftype}".to_sym
|
413
|
+
INTRINSICS[ARM][:CVT][fvtype][ivtype] = "vcvt#{q}_#{ftype}_#{itype}".to_sym
|
414
|
+
}
|
394
415
|
}
|
395
416
|
}
|
417
|
+
sfvtype = vector_type_name( :float, 32, 64 )
|
418
|
+
sdvtype = vector_type_name( :float, 64, 128 )
|
419
|
+
sftype = type_name_ARM( :float, 32 )
|
420
|
+
sdtype = type_name_ARM( :float, 64 )
|
421
|
+
INTRINSICS[ARM][:CVT][sfvtype][sdvtype] = "vcvt_#{sftype}_#{sdtype}".to_sym
|
422
|
+
INTRINSICS[ARM][:CVT][sdvtype][sfvtype] = "vcvt_#{sdtype}_#{sftype}".to_sym
|
396
423
|
svsize = 64
|
397
424
|
bvsize = 128
|
398
425
|
[16, 32, 64].each { |bsize|
|
@@ -407,30 +434,46 @@ module BOAST
|
|
407
434
|
}
|
408
435
|
}
|
409
436
|
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
437
|
+
def generate_conversions
|
438
|
+
[X86, ARM].each { |arch|
|
439
|
+
cvt_dgraph = RGL::DirectedAdjacencyGraph::new
|
440
|
+
INTRINSICS[arch][:CVT].each { |dest, origs|
|
441
|
+
origs.each { |orig, intrinsic|
|
442
|
+
supported = true
|
443
|
+
if arch == X86
|
444
|
+
supported = false
|
445
|
+
INSTRUCTIONS[intrinsic.to_s].each { |cpuid|
|
446
|
+
if cpuid.kind_of?( Array ) then
|
447
|
+
supported = true if (cpuid - MODELS[get_model.to_s]).empty?
|
448
|
+
else
|
449
|
+
supported = true if MODELS[get_model.to_s].include?( cpuid )
|
450
|
+
end
|
451
|
+
}
|
452
|
+
end
|
453
|
+
cvt_dgraph.add_edge(orig, dest) if supported
|
454
|
+
}
|
415
455
|
}
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
456
|
+
cvt_dgraph.vertices.each { |source|
|
457
|
+
hash = {}
|
458
|
+
cvt_dgraph.edges.each { |e| hash[e.to_a] = 1 }
|
459
|
+
paths = cvt_dgraph.dijkstra_shortest_paths( hash, source )
|
460
|
+
paths.each { |dest, path|
|
461
|
+
CONVERSIONS[arch][dest][source] = path if path
|
462
|
+
}
|
463
|
+
}
|
464
|
+
types = []
|
465
|
+
INTRINSICS[arch].each { |intrinsic, instructions|
|
466
|
+
types += instructions.keys
|
467
|
+
}
|
468
|
+
types.uniq
|
469
|
+
types.each { |type|
|
470
|
+
CONVERSIONS[arch][type][type] = [type]
|
423
471
|
}
|
424
472
|
}
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
types.uniq
|
430
|
-
types.each { |type|
|
431
|
-
CONVERSIONS[arch][type][type] = [type]
|
432
|
-
}
|
433
|
-
}
|
473
|
+
end
|
474
|
+
module_function :generate_conversions
|
475
|
+
|
476
|
+
generate_conversions
|
434
477
|
|
435
478
|
end
|
436
479
|
|
@@ -192,7 +192,7 @@ module BOAST
|
|
192
192
|
if arg1.class == Variable and arg1.type.vector_length > 1 then
|
193
193
|
return "#{arg1} = #{Load(arg2, arg1)}"
|
194
194
|
elsif arg2.class == Variable and arg2.type.vector_length > 1 then
|
195
|
-
return "#{Store(arg1, arg2, return_type)}"
|
195
|
+
return "#{Store(arg1, arg2, :store_type => return_type)}"
|
196
196
|
end
|
197
197
|
return basic_usage(arg1, arg2)
|
198
198
|
end
|
@@ -304,6 +304,61 @@ module BOAST
|
|
304
304
|
|
305
305
|
end
|
306
306
|
|
307
|
+
class Mask
|
308
|
+
extend Functor
|
309
|
+
|
310
|
+
attr_reader :value
|
311
|
+
attr_reader :length
|
312
|
+
attr_reader :pos_values
|
313
|
+
|
314
|
+
def empty?
|
315
|
+
if @pos_values then
|
316
|
+
return @pos_values == 0
|
317
|
+
else
|
318
|
+
return false
|
319
|
+
end
|
320
|
+
end
|
321
|
+
|
322
|
+
def full?
|
323
|
+
if @pos_values and @length
|
324
|
+
return @pos_values == @length
|
325
|
+
else
|
326
|
+
return false
|
327
|
+
end
|
328
|
+
end
|
329
|
+
|
330
|
+
def initialize( values, options = {} )
|
331
|
+
length = options[:length]
|
332
|
+
if values.kind_of?(Mask) then
|
333
|
+
raise OperatorError, "Wrong number of mask values (#{values.length} for #{length})!" if length and values.length and values.length != length
|
334
|
+
@value = values.value
|
335
|
+
@length = length ? length : values.length
|
336
|
+
@pos_values = values.pos_values
|
337
|
+
elsif values.kind_of?(Array) then
|
338
|
+
raise OperatorError, "Wrong number of mask values (#{values.length} for #{length})!" if length and values.length != length
|
339
|
+
s = "0x"
|
340
|
+
s += values.collect { |v| v != 0 ? 1 : 0 }.reverse.join
|
341
|
+
@value = Int( s, :signed => false, :size => values.length / 8 + ( values.length % 8 > 0 ? 1 : 0 ), :constant => s )
|
342
|
+
@length = values.length
|
343
|
+
@pos_values = values.reject { |e| e == 0 }.length
|
344
|
+
elsif values.kind_of?(Variable) and values.type.kind_of?(Int) then
|
345
|
+
raise OperatorError, "Wrong mask size (#{values.type.size} for #{length / 8 + ( length % 8 > 0 ? 1 : 0 )})!" if length and values.type.size != length / 8 + ( length % 8 > 0 ? 1 : 0 )
|
346
|
+
@value = values
|
347
|
+
@length = length if length
|
348
|
+
else
|
349
|
+
raise OperatorError, "Illegal valuess for mask (#{values.class}), expecting Array of Int!"
|
350
|
+
end
|
351
|
+
end
|
352
|
+
|
353
|
+
def to_s
|
354
|
+
return @value.to_s
|
355
|
+
end
|
356
|
+
|
357
|
+
def to_var
|
358
|
+
return @value
|
359
|
+
end
|
360
|
+
end
|
361
|
+
|
307
362
|
# @!parse module Functors; functorize Set; end
|
308
363
|
class Set < Operator
|
309
364
|
extend Functor
|
@@ -386,10 +441,14 @@ module BOAST
|
|
386
441
|
|
387
442
|
attr_reader :source
|
388
443
|
attr_reader :return_type
|
444
|
+
attr_reader :options
|
389
445
|
|
390
|
-
def initialize(source, return_type)
|
446
|
+
def initialize(source, return_type, options = {})
|
391
447
|
@source = source
|
392
448
|
@return_type = return_type.to_var
|
449
|
+
@options = options
|
450
|
+
@mask = options[:mask]
|
451
|
+
@zero = options[:zero]
|
393
452
|
end
|
394
453
|
|
395
454
|
def type
|
@@ -412,10 +471,25 @@ module BOAST
|
|
412
471
|
end
|
413
472
|
return @return_type.copy("vload#{@return_type.type.vector_length}(0, #{a2})", DISCARD_OPTIONS) if lang == CL
|
414
473
|
return @return_type.copy("_m_from_int64( *((int64_t * ) #{a2} ) )", DISCARD_OPTIONS) if get_architecture == X86 and @return_type.type.total_size*8 == 64
|
474
|
+
sym = ""
|
475
|
+
mask = nil
|
476
|
+
mask = Mask(@mask, :length => @return_type.type.vector_length) if @mask
|
477
|
+
if mask and not mask.full? then
|
478
|
+
return Set(0, @return_type) if @zero and mask.empty?
|
479
|
+
return @return_type if mask.empty?
|
480
|
+
sym += "MASK"
|
481
|
+
sym += "Z" if @zero
|
482
|
+
sym += "_"
|
483
|
+
end
|
415
484
|
if @source.alignment and @return_type.type.total_size and ( @source.alignment % @return_type.type.total_size ) == 0 then
|
416
|
-
|
485
|
+
sym += "LOADA"
|
417
486
|
else
|
418
|
-
|
487
|
+
sym += "LOAD"
|
488
|
+
end
|
489
|
+
instruction = intrinsics( sym.to_sym, @return_type.type)
|
490
|
+
if mask and not mask.full? then
|
491
|
+
return @return_type.copy("#{instruction}( (#{mask.value.type.decl})#{mask}, #{a2} )", DISCARD_OPTIONS) if @zero
|
492
|
+
return @return_type.copy("#{instruction}( #{@return_type}, (#{mask.value.type.decl})#{mask}, #{a2} )", DISCARD_OPTIONS)
|
419
493
|
end
|
420
494
|
return @return_type.copy("#{instruction}( #{a2} )", DISCARD_OPTIONS)
|
421
495
|
else
|
@@ -516,12 +590,14 @@ module BOAST
|
|
516
590
|
attr_reader :dest
|
517
591
|
attr_reader :source
|
518
592
|
attr_reader :store_type
|
593
|
+
attr_reader :options
|
519
594
|
|
520
|
-
def initialize(dest, source,
|
595
|
+
def initialize(dest, source, options = {})
|
521
596
|
@dest = dest
|
522
597
|
@source = source
|
523
|
-
@store_type = store_type
|
598
|
+
@store_type = options[:store_type]
|
524
599
|
@store_type = source.to_var unless @store_type
|
600
|
+
@mask = options[:mask]
|
525
601
|
end
|
526
602
|
|
527
603
|
def to_s
|
@@ -535,14 +611,20 @@ module BOAST
|
|
535
611
|
type = @store_type.type
|
536
612
|
return "vstore#{type.vector_length}( #{@source}, 0, #{dst} )" if lang == CL
|
537
613
|
return "*((int64_t * ) #{dst}) = _m_to_int64( #{@source} )" if get_architecture == X86 and type.total_size*8 == 64
|
538
|
-
|
614
|
+
sym = ""
|
615
|
+
mask = nil
|
616
|
+
mask = Mask(@mask, :length => @store_type.type.vector_length) if @mask
|
617
|
+
return "" if mask and mask.empty?
|
618
|
+
sym += "MASK_" if mask and not mask.full?
|
539
619
|
if @dest.alignment and type.total_size and ( @dest.alignment % type.total_size ) == 0 then
|
540
|
-
|
620
|
+
sym += "STOREA"
|
541
621
|
else
|
542
|
-
|
622
|
+
sym += "STORE"
|
543
623
|
end
|
624
|
+
instruction = intrinsics(sym.to_sym, type)
|
544
625
|
p_type = type.copy(:vector_length => 1)
|
545
626
|
p_type = type if get_architecture == X86 and type.kind_of?(Int)
|
627
|
+
return "#{instruction}( (#{p_type.decl} * ) #{dst}, (#{mask.value.type.decl})#{mask}, #{@source} )" if mask and not mask.full?
|
546
628
|
return "#{instruction}( (#{p_type.decl} * ) #{dst}, #{@source} )"
|
547
629
|
end
|
548
630
|
return Affectation.basic_usage(@dest, @source)
|
@@ -592,7 +674,7 @@ module BOAST
|
|
592
674
|
type = @store_type.type
|
593
675
|
raise LanguageError, "Unsupported language!" unless lang == C
|
594
676
|
raise OperatorError, "Mask size is wrong: #{@mask.length} for #{type.vector_length}!" if @mask.length != type.vector_length
|
595
|
-
return Store( @dest, @source, @store_type ).to_s unless @mask.include?(0)
|
677
|
+
return Store( @dest, @source, :store_type => @store_type ).to_s unless @mask.include?(0)
|
596
678
|
return nil if @mask.uniq.size == 1 and @mask.uniq.first == 0
|
597
679
|
instruction = intrinsics(:MASKSTORE, type)
|
598
680
|
s = ""
|
@@ -179,6 +179,7 @@ module BOAST
|
|
179
179
|
attr_reader :restrict
|
180
180
|
attr_reader :deferred_shape
|
181
181
|
attr_reader :optional
|
182
|
+
attr_accessor :reference
|
182
183
|
attr_accessor :alignment
|
183
184
|
attr_accessor :replace_constant
|
184
185
|
attr_accessor :force_replace_constant
|
@@ -231,6 +232,10 @@ module BOAST
|
|
231
232
|
!!@deferred_shape
|
232
233
|
end
|
233
234
|
|
235
|
+
def reference?
|
236
|
+
!!@reference
|
237
|
+
end
|
238
|
+
|
234
239
|
# Creates a new {Variable}
|
235
240
|
# @param [#to_s] name
|
236
241
|
# @param [DataType] type
|
@@ -239,6 +244,7 @@ module BOAST
|
|
239
244
|
# @option properties [Array<Dimension>] :dimension (or *:dim*) variable is an array rather than a scalar. Dimensions are given in Fortran order (contiguous first).
|
240
245
|
# @option properties [Object] :constant (or *:const*) states that the variable is a constant and give its value. For Variable with the *:dimension* property set must be a {ConstArray}
|
241
246
|
# @option properties [Boolean] :restrict specifies that the compiler can assume no aliasing to this array.
|
247
|
+
# @option properties [Boolean] :reference specifies that this variable is passed by reference.
|
242
248
|
# @option properties [Symbol] :allocate specify that the variable is to be allocated and where. Can only be *:heap* or *:stack* for now.
|
243
249
|
# @option properties [Boolean] :local indicates that the variable is to be allocated on the __local space of OpenCL devices or __shared__ space of CUDA devices. In C or FORTRAN this has the same effect as *:allocate* => *:stack*.
|
244
250
|
# @option properties [Boolean] :texture for OpenCL and CUDA. In OpenCL also specifies that a sampler has to be generated to access the array variable.
|
@@ -258,6 +264,7 @@ module BOAST
|
|
258
264
|
@alignment = properties[:align]
|
259
265
|
@deferred_shape = properties[:deferred_shape]
|
260
266
|
@optional = properties[:optional]
|
267
|
+
@reference = properties[:reference]
|
261
268
|
@force_replace_constant = false
|
262
269
|
if not properties[:replace_constant].nil? then
|
263
270
|
@replace_constant = properties[:replace_constant]
|
@@ -308,7 +315,7 @@ module BOAST
|
|
308
315
|
s = @constant.to_s + @type.suffix
|
309
316
|
return s
|
310
317
|
end
|
311
|
-
if @scalar_output and [C, CL, CUDA].include?( lang ) and not decl_module? then
|
318
|
+
if @scalar_output or @reference and [C, CL, CUDA].include?( lang ) and not decl_module? then
|
312
319
|
return "(*#{name})"
|
313
320
|
end
|
314
321
|
return @name
|
@@ -348,7 +355,7 @@ module BOAST
|
|
348
355
|
if dimension? then
|
349
356
|
s += " *" unless (use_vla? and lang != FORTRAN)
|
350
357
|
end
|
351
|
-
if not dimension? and ( lang == FORTRAN or @direction == :out or @direction == :inout ) then
|
358
|
+
if not dimension? and ( lang == FORTRAN or @direction == :out or @direction == :inout or @reference ) then
|
352
359
|
s += " *"
|
353
360
|
end
|
354
361
|
s += " #{@name}"
|
@@ -365,7 +372,7 @@ module BOAST
|
|
365
372
|
def decl_ffi(alloc, lang)
|
366
373
|
return :pointer if lang == FORTRAN and not alloc
|
367
374
|
return :pointer if dimension?
|
368
|
-
return :pointer if @direction == :out or @direction == :inout and not alloc
|
375
|
+
return :pointer if @direction == :out or @direction == :inout or @reference and not alloc
|
369
376
|
return @type.decl_ffi
|
370
377
|
end
|
371
378
|
|
@@ -426,7 +433,7 @@ module BOAST
|
|
426
433
|
end
|
427
434
|
|
428
435
|
def __pointer?(device = false)
|
429
|
-
return !!( ( not dimension? and ( @direction == :out or @direction == :inout ) ) or __pointer_array?(device) )
|
436
|
+
return !!( ( not dimension? and ( @direction == :out or @direction == :inout or @reference ) ) or __pointer_array?(device) )
|
430
437
|
end
|
431
438
|
|
432
439
|
def __restrict?
|
@@ -20,26 +20,64 @@ module BOAST
|
|
20
20
|
|
21
21
|
class OptimizationSpace
|
22
22
|
attr_reader :parameters
|
23
|
+
attr_reader :rules
|
24
|
+
attr_reader :checkers
|
25
|
+
HASH_NAME = "options"
|
23
26
|
|
24
27
|
def initialize( *parameters )
|
25
28
|
if parameters.length == 1 and parameters[0].is_a?(Hash) then
|
26
29
|
@parameters = []
|
27
30
|
parameters[0].each { |key, value|
|
28
|
-
|
31
|
+
if key == :rules then
|
32
|
+
@rules = [value].flatten
|
33
|
+
format_rules
|
34
|
+
elsif key == :checkers then
|
35
|
+
@checkers = [value].flatten
|
36
|
+
else
|
37
|
+
@parameters.push( OptimizationParameter::new(key, value) )
|
38
|
+
end
|
29
39
|
}
|
30
40
|
else
|
31
41
|
@parameters = parameters
|
32
42
|
end
|
33
43
|
end
|
34
44
|
|
45
|
+
# Add to the parameters of the rules the name of the hash variable
|
46
|
+
def format_rules
|
47
|
+
regxp = /(?<!#{HASH_NAME}\[):\w+(?!\])/
|
48
|
+
@rules.each{|r|
|
49
|
+
matches = r.scan(regxp)
|
50
|
+
matches = matches.uniq
|
51
|
+
matches.each{ |m|
|
52
|
+
r.gsub!(/(?<!#{HASH_NAME}\[)#{m}(?!\])/, "#{HASH_NAME}[#{m}]")
|
53
|
+
}
|
54
|
+
}
|
55
|
+
end
|
56
|
+
|
57
|
+
# Remove all points that do not meet ALL the rules.
|
58
|
+
def remove_unfeasible (points = [])
|
59
|
+
if @rules then
|
60
|
+
if @checkers
|
61
|
+
@checkers.each { |checker| eval checker }
|
62
|
+
end
|
63
|
+
s = <<EOF
|
64
|
+
points.reject!{ |#{HASH_NAME}|
|
65
|
+
not @rules.all?{ |r| eval r }
|
66
|
+
}
|
67
|
+
EOF
|
68
|
+
eval s
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
35
72
|
def to_h
|
36
73
|
h = {}
|
37
74
|
@parameters.each { |p|
|
38
75
|
h[p.name] = p.values
|
39
76
|
}
|
77
|
+
h[:rules] = @rules if @rules
|
78
|
+
h[:checkers] = @checkers if @checkers
|
40
79
|
return h
|
41
80
|
end
|
42
|
-
|
43
81
|
end
|
44
82
|
|
45
83
|
class Optimizer
|
@@ -47,6 +85,7 @@ module BOAST
|
|
47
85
|
attr_reader :experiments
|
48
86
|
attr_reader :search_space
|
49
87
|
attr_reader :log
|
88
|
+
attr_reader :history
|
50
89
|
|
51
90
|
def initialize(search_space, options = {} )
|
52
91
|
@search_space = search_space
|
@@ -107,7 +146,7 @@ EOF
|
|
107
146
|
end
|
108
147
|
|
109
148
|
def to_s
|
110
|
-
return
|
149
|
+
return to_a.to_s
|
111
150
|
end
|
112
151
|
|
113
152
|
end
|
@@ -118,13 +157,15 @@ EOF
|
|
118
157
|
def optimize(options={}, &block)
|
119
158
|
opts = { :population_size => 20,
|
120
159
|
:fitness_goal => 0,
|
121
|
-
:generations_limit => 100
|
160
|
+
:generations_limit => 100,
|
161
|
+
:search_space => @search_space }
|
122
162
|
opts.update(options)
|
123
163
|
opts[:organism] = @organism
|
124
164
|
@organism.block = block
|
125
165
|
@organism.experiments = 0
|
126
166
|
population = Darwinning::Population.new(opts)
|
127
167
|
population.evolve!
|
168
|
+
@history = population.history
|
128
169
|
@experiments = @organism.experiments
|
129
170
|
return population.best_member.to_a
|
130
171
|
end
|
@@ -136,14 +177,18 @@ EOF
|
|
136
177
|
def initialize(search_space, options = {} )
|
137
178
|
super
|
138
179
|
@randomize = options[:randomize]
|
180
|
+
@checkpoint = options[:checkpoint]
|
181
|
+
@checkpoint_size = options[:checkpoint_size]
|
182
|
+
@seed = options[:seed]
|
139
183
|
end
|
140
184
|
|
141
185
|
def points
|
142
186
|
params2 = @search_space.parameters.dup
|
143
187
|
param = params2.shift
|
188
|
+
|
144
189
|
pts = param.values.collect { |val| {param.name => val} }
|
145
190
|
if params2.size == 0 then
|
146
|
-
|
191
|
+
pts4 = pts
|
147
192
|
else
|
148
193
|
optim2 = BruteForceOptimizer::new(OptimizationSpace::new(*params2))
|
149
194
|
pts3=[]
|
@@ -152,8 +197,10 @@ EOF
|
|
152
197
|
pts3.push(p1.dup.update(p2))
|
153
198
|
}
|
154
199
|
}
|
155
|
-
|
200
|
+
pts4 = pts3
|
156
201
|
end
|
202
|
+
@search_space.remove_unfeasible pts4
|
203
|
+
return pts4
|
157
204
|
end
|
158
205
|
|
159
206
|
def each(&block)
|
@@ -169,7 +216,11 @@ EOF
|
|
169
216
|
@log = {}
|
170
217
|
best = [nil, Float::INFINITY]
|
171
218
|
pts = points
|
172
|
-
|
219
|
+
|
220
|
+
(@seed ? pts.shuffle!(random: Random.new(@seed)) : pts.shuffle!) if @randomize
|
221
|
+
|
222
|
+
pts = pts.slice(@checkpoint,@checkpoint_size) if @checkpoint_size
|
223
|
+
|
173
224
|
pts.each { |config|
|
174
225
|
@experiments += 1
|
175
226
|
metric = block.call(config)
|
@@ -26,7 +26,7 @@ module BOAST
|
|
26
26
|
@procedure.parameters.each { |param|
|
27
27
|
if param.dimension then
|
28
28
|
params.push( param.name )
|
29
|
-
elsif param.direction == :out or param.direction == :inout then
|
29
|
+
elsif param.direction == :out or param.direction == :inout or param.reference? then
|
30
30
|
params.push( "&"+param.name )
|
31
31
|
else
|
32
32
|
params.push( param.name )
|
@@ -220,6 +220,7 @@ EOF
|
|
220
220
|
param_copy = param.copy
|
221
221
|
param_copy.constant = nil
|
222
222
|
param_copy.direction = nil
|
223
|
+
param_copy.reference = nil
|
223
224
|
param_copy.decl
|
224
225
|
}
|
225
226
|
get_output.puts " #{@procedure.properties[:return].type.decl} _boast_ret;" if @procedure.properties[:return]
|
@@ -123,7 +123,7 @@ EOF
|
|
123
123
|
}
|
124
124
|
EOF
|
125
125
|
end
|
126
|
-
def is_available
|
126
|
+
def is_available?
|
127
127
|
[] != Dir.glob( '/sys/class/powercap/intel-rapl:0:0' )
|
128
128
|
end
|
129
129
|
end
|
@@ -177,8 +177,13 @@ EOF
|
|
177
177
|
}
|
178
178
|
EOF
|
179
179
|
end
|
180
|
-
def is_available
|
181
|
-
|
180
|
+
def is_available?
|
181
|
+
path = []
|
182
|
+
if ENV['LIBRARY_PATH'] then
|
183
|
+
path += ENV['LIBRARY_PATH'].split(':').inject([]){|mem, x| []!=mem ? mem : Dir.glob(x+'/libredfst.so')}
|
184
|
+
end
|
185
|
+
path += `ldconfig -p`.gsub("\t","").split("\n").find_all { |e| e.match(/libredfst\.so/) }.collect { |e| e.split(" => ")[1] } if path == []
|
186
|
+
return path != []
|
182
187
|
end
|
183
188
|
end
|
184
189
|
|
@@ -215,15 +220,20 @@ EOF
|
|
215
220
|
}
|
216
221
|
EOF
|
217
222
|
end
|
218
|
-
def is_available
|
219
|
-
|
223
|
+
def is_available?
|
224
|
+
path = []
|
225
|
+
if ENV['LIBRARY_PATH'] then
|
226
|
+
path += ENV['LIBRARY_PATH'].split(':').inject([]){|mem, x| []!=mem ? mem : Dir.glob(x+'/libeml.so')}
|
227
|
+
end
|
228
|
+
path += `ldconfig -p`.gsub("\t","").split("\n").find_all { |e| e.match(/libeml\.so/) }.collect { |e| e.split(" => ")[1] } if path == []
|
229
|
+
return path != []
|
220
230
|
end
|
221
231
|
end
|
222
|
-
if PowercapProbe.is_available
|
232
|
+
if PowercapProbe.is_available?
|
223
233
|
EnergyProbe = PowercapProbe
|
224
|
-
elsif RedfstProbe.is_available
|
234
|
+
elsif RedfstProbe.is_available?
|
225
235
|
EnergyProbe = RedfstProbe
|
226
|
-
elsif EmlProbe.is_available
|
236
|
+
elsif EmlProbe.is_available?
|
227
237
|
EnergyProbe = EmlProbe
|
228
238
|
else
|
229
239
|
EnergyProbe = nil
|