BOAST 1.3.4 → 1.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/BOAST.gemspec +1 -1
- data/lib/BOAST/Language/Algorithm.rb +3 -3
- data/lib/BOAST/Language/Architectures.rb +58 -0
- data/lib/BOAST/Language/CPUID_by_name.rb +1871 -1871
- data/lib/BOAST/Language/Config.rb +10 -0
- data/lib/BOAST/Language/Intrinsics.rb +87 -44
- data/lib/BOAST/Language/Operators.rb +92 -10
- data/lib/BOAST/Language/Variable.rb +11 -4
- data/lib/BOAST/Optimization/Optimization.rb +58 -7
- data/lib/BOAST/Runtime/CRuntime.rb +1 -1
- data/lib/BOAST/Runtime/CompiledRuntime.rb +1 -0
- data/lib/BOAST/Runtime/EnergyProbe.rb +18 -8
- data/lib/BOAST/Runtime/FFIRuntime.rb +12 -6
- data/lib/BOAST/Runtime/FORTRANRuntime.rb +7 -2
- data/lib/BOAST/Runtime/MPPARuntime.rb +1 -1
- metadata +2 -2
@@ -135,6 +135,16 @@ module BOAST
|
|
135
135
|
return use_vla_old?
|
136
136
|
end
|
137
137
|
|
138
|
+
def set_model(val)
|
139
|
+
@@model=val
|
140
|
+
Intrinsics::generate_conversions
|
141
|
+
end
|
142
|
+
|
143
|
+
def model=(val)
|
144
|
+
@@model=val
|
145
|
+
Intrinsics::generate_conversions
|
146
|
+
end
|
147
|
+
|
138
148
|
# @private
|
139
149
|
def get_default_architecture
|
140
150
|
architecture = const_get(ENV["ARCHITECTURE"]) if ENV["ARCHITECTURE"]
|
@@ -66,8 +66,27 @@ module BOAST
|
|
66
66
|
end
|
67
67
|
raise IntrinsicsError, "Unsupported operation #{intr_symbol} for #{type}#{type2 ? " and #{type2}" : ""} on #{get_architecture_name}!" unless instruction
|
68
68
|
return instruction if get_architecture == ARM
|
69
|
-
supported =
|
70
|
-
|
69
|
+
supported = false
|
70
|
+
INSTRUCTIONS[instruction.to_s].each { |cpuid|
|
71
|
+
if cpuid.kind_of?( Array ) then
|
72
|
+
supported = true if (cpuid - MODELS[get_model.to_s]).empty?
|
73
|
+
else
|
74
|
+
supported = true if MODELS[get_model.to_s].include?( cpuid )
|
75
|
+
end
|
76
|
+
}
|
77
|
+
# supported = (INSTRUCTIONS[instruction.to_s] & MODELS[get_model.to_s]).size > 0
|
78
|
+
if not supported then
|
79
|
+
required = ""
|
80
|
+
INSTRUCTIONS[instruction.to_s].each { |cpuid|
|
81
|
+
required += " or " if required != ""
|
82
|
+
if cpuid.kind_of?( Array ) then
|
83
|
+
required += "( #{cpuid.join(" and ")} )"
|
84
|
+
else
|
85
|
+
required += "#{cpuid}"
|
86
|
+
end
|
87
|
+
}
|
88
|
+
raise IntrinsicsError, "Unsupported operation #{intr_symbol} for #{type}#{type2 ? " and #{type2}" : ""} on #{get_model}! (requires #{required})"
|
89
|
+
end
|
71
90
|
return instruction
|
72
91
|
end
|
73
92
|
|
@@ -81,14 +100,14 @@ module BOAST
|
|
81
100
|
|
82
101
|
def get_conversion_path(type_dest, type_orig)
|
83
102
|
conversion_path = CONVERSIONS[get_architecture][get_vector_name(type_dest)][get_vector_name(type_orig)]
|
84
|
-
raise IntrinsicsError, "Unavailable conversion from #{get_vector_name(type_orig)} to #{get_vector_name(type_dest)} on #{get_architecture_name}!" unless conversion_path
|
103
|
+
raise IntrinsicsError, "Unavailable conversion from #{get_vector_name(type_orig)} to #{get_vector_name(type_dest)} on #{get_architecture_name}#{get_architecture==X86 ? "(#{get_model})" : "" }!" unless conversion_path
|
85
104
|
return conversion_path
|
86
105
|
end
|
87
106
|
|
88
107
|
module_function :get_conversion_path
|
89
108
|
|
90
109
|
def get_vector_decl_X86( data_type )
|
91
|
-
raise IntrinsicsError, "Unsupported vector size on X86: #{data_type.total_size*8}!" unless [64,128,256].include?( data_type.total_size*8 )
|
110
|
+
raise IntrinsicsError, "Unsupported vector size on X86: #{data_type.total_size*8}!" unless [64,128,256,512].include?( data_type.total_size*8 )
|
92
111
|
s = "__m#{data_type.total_size*8}"
|
93
112
|
case data_type
|
94
113
|
when Int
|
@@ -112,7 +131,7 @@ module BOAST
|
|
112
131
|
raise IntrinsicsError, "Unsupported data size for int vector on ARM: #{data_type.size*8}!" unless [1,2,4,8].include?( data_type.size )
|
113
132
|
return get_vector_name( data_type ).to_s
|
114
133
|
when Real
|
115
|
-
raise IntrinsicsError, "Unsupported data size for real vector on ARM: #{data_type.size*8}!"
|
134
|
+
raise IntrinsicsError, "Unsupported data size for real vector on ARM: #{data_type.size*8}!" unless [4,8].include?( data_type.size )
|
116
135
|
return get_vector_name( data_type ).to_s
|
117
136
|
else
|
118
137
|
raise IntrinsicsError, "Unsupported data type #{data_type} for vector on ARM!"
|
@@ -244,10 +263,11 @@ module BOAST
|
|
244
263
|
instructions.push( [:SET, "setr"] ) unless size < 32 and vector_size == 512
|
245
264
|
instructions.push( [:SET1, "set1"] )
|
246
265
|
instructions.push( [:MUL, "mullo"] ) if vector_size > 64 and size >= 16
|
247
|
-
instructions.push( [:MASKLOAD,
|
248
|
-
instructions.push( [:MASK_LOAD,
|
249
|
-
[:
|
250
|
-
[:MASKZ_LOAD,
|
266
|
+
instructions.push( [:MASKLOAD, "maskload"], [:MASKSTORE, "maskstore"] ) if vector_size <= 256 and vector_size >= 128 and size >= 32
|
267
|
+
instructions.push( [:MASK_LOAD, "mask_loadu"], [:MASK_STORE, "mask_storeu"],
|
268
|
+
[:MASK_LOADA, "mask_load"], [:MASK_STOREA, "mask_store"],
|
269
|
+
[:MASKZ_LOAD, "maskz_loadu"],
|
270
|
+
[:MASKZ_LOADA, "maskz_load"], ) if vector_size >= 128 and size >= 32
|
251
271
|
instructions.each { |cl, ins|
|
252
272
|
INTRINSICS[X86][cl][vtype] = "_mm#{vs}_#{ins}_#{type}".to_sym
|
253
273
|
}
|
@@ -278,9 +298,10 @@ module BOAST
|
|
278
298
|
[:LOAD, "loadu"], [:LOADA, "load"],
|
279
299
|
[:STORE, "storeu"], [:STOREA, "store"],
|
280
300
|
[:SET, "setr"], [:SET1, "set1"], [:SETZERO, "setzero"],
|
281
|
-
[:MASK_LOAD,
|
282
|
-
[:
|
283
|
-
[:MASKZ_LOAD,
|
301
|
+
[:MASK_LOAD, "mask_loadu"], [:MASK_STORE, "mask_storeu"],
|
302
|
+
[:MASK_LOADA, "mask_load"], [:MASK_STOREA, "mask_store"],
|
303
|
+
[:MASKZ_LOAD, "maskz_loadu"],
|
304
|
+
[:MASKZ_LOADA, "maskz_load"]]
|
284
305
|
instructions.push( [:MASKLOAD, "maskload"], [:MASKSTORE, "maskstore"] ) if vector_size < 512
|
285
306
|
instructions.push( [:ADDSUB, "addsub"] ) if vector_size < 512
|
286
307
|
instructions.each { |cl, ins|
|
@@ -362,7 +383,7 @@ module BOAST
|
|
362
383
|
}
|
363
384
|
}
|
364
385
|
}
|
365
|
-
[32].each { |size|
|
386
|
+
[32, 64].each { |size|
|
366
387
|
vtype = vector_type_name( :float, size, vector_size )
|
367
388
|
type = type_name_ARM( :float, size )
|
368
389
|
[[:ADD, "add"], [:SUB, "sub"], [:MUL, "mul"],
|
@@ -381,18 +402,24 @@ module BOAST
|
|
381
402
|
}
|
382
403
|
INTRINSICS[ARM][:CVT] = Hash::new { |h,k| h[k] = {} }
|
383
404
|
[64, 128].each { |vector_size|
|
384
|
-
int_size
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
405
|
+
[[32, 32],[64, 64]].each { |int_size, float_size|
|
406
|
+
q = (vector_size == 128 ? "q" : "")
|
407
|
+
[:signed, :unsigned].each { |sign|
|
408
|
+
fvtype = vector_type_name( :float, float_size, vector_size )
|
409
|
+
ivtype = vector_type_name( :int, int_size, vector_size, sign )
|
410
|
+
ftype = type_name_ARM( :float, float_size )
|
411
|
+
itype = type_name_ARM( :int, int_size, sign )
|
412
|
+
INTRINSICS[ARM][:CVT][ivtype][fvtype] = "vcvt#{q}_#{itype}_#{ftype}".to_sym
|
413
|
+
INTRINSICS[ARM][:CVT][fvtype][ivtype] = "vcvt#{q}_#{ftype}_#{itype}".to_sym
|
414
|
+
}
|
394
415
|
}
|
395
416
|
}
|
417
|
+
sfvtype = vector_type_name( :float, 32, 64 )
|
418
|
+
sdvtype = vector_type_name( :float, 64, 128 )
|
419
|
+
sftype = type_name_ARM( :float, 32 )
|
420
|
+
sdtype = type_name_ARM( :float, 64 )
|
421
|
+
INTRINSICS[ARM][:CVT][sfvtype][sdvtype] = "vcvt_#{sftype}_#{sdtype}".to_sym
|
422
|
+
INTRINSICS[ARM][:CVT][sdvtype][sfvtype] = "vcvt_#{sdtype}_#{sftype}".to_sym
|
396
423
|
svsize = 64
|
397
424
|
bvsize = 128
|
398
425
|
[16, 32, 64].each { |bsize|
|
@@ -407,30 +434,46 @@ module BOAST
|
|
407
434
|
}
|
408
435
|
}
|
409
436
|
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
437
|
+
def generate_conversions
|
438
|
+
[X86, ARM].each { |arch|
|
439
|
+
cvt_dgraph = RGL::DirectedAdjacencyGraph::new
|
440
|
+
INTRINSICS[arch][:CVT].each { |dest, origs|
|
441
|
+
origs.each { |orig, intrinsic|
|
442
|
+
supported = true
|
443
|
+
if arch == X86
|
444
|
+
supported = false
|
445
|
+
INSTRUCTIONS[intrinsic.to_s].each { |cpuid|
|
446
|
+
if cpuid.kind_of?( Array ) then
|
447
|
+
supported = true if (cpuid - MODELS[get_model.to_s]).empty?
|
448
|
+
else
|
449
|
+
supported = true if MODELS[get_model.to_s].include?( cpuid )
|
450
|
+
end
|
451
|
+
}
|
452
|
+
end
|
453
|
+
cvt_dgraph.add_edge(orig, dest) if supported
|
454
|
+
}
|
415
455
|
}
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
456
|
+
cvt_dgraph.vertices.each { |source|
|
457
|
+
hash = {}
|
458
|
+
cvt_dgraph.edges.each { |e| hash[e.to_a] = 1 }
|
459
|
+
paths = cvt_dgraph.dijkstra_shortest_paths( hash, source )
|
460
|
+
paths.each { |dest, path|
|
461
|
+
CONVERSIONS[arch][dest][source] = path if path
|
462
|
+
}
|
463
|
+
}
|
464
|
+
types = []
|
465
|
+
INTRINSICS[arch].each { |intrinsic, instructions|
|
466
|
+
types += instructions.keys
|
467
|
+
}
|
468
|
+
types.uniq
|
469
|
+
types.each { |type|
|
470
|
+
CONVERSIONS[arch][type][type] = [type]
|
423
471
|
}
|
424
472
|
}
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
types.uniq
|
430
|
-
types.each { |type|
|
431
|
-
CONVERSIONS[arch][type][type] = [type]
|
432
|
-
}
|
433
|
-
}
|
473
|
+
end
|
474
|
+
module_function :generate_conversions
|
475
|
+
|
476
|
+
generate_conversions
|
434
477
|
|
435
478
|
end
|
436
479
|
|
@@ -192,7 +192,7 @@ module BOAST
|
|
192
192
|
if arg1.class == Variable and arg1.type.vector_length > 1 then
|
193
193
|
return "#{arg1} = #{Load(arg2, arg1)}"
|
194
194
|
elsif arg2.class == Variable and arg2.type.vector_length > 1 then
|
195
|
-
return "#{Store(arg1, arg2, return_type)}"
|
195
|
+
return "#{Store(arg1, arg2, :store_type => return_type)}"
|
196
196
|
end
|
197
197
|
return basic_usage(arg1, arg2)
|
198
198
|
end
|
@@ -304,6 +304,61 @@ module BOAST
|
|
304
304
|
|
305
305
|
end
|
306
306
|
|
307
|
+
class Mask
|
308
|
+
extend Functor
|
309
|
+
|
310
|
+
attr_reader :value
|
311
|
+
attr_reader :length
|
312
|
+
attr_reader :pos_values
|
313
|
+
|
314
|
+
def empty?
|
315
|
+
if @pos_values then
|
316
|
+
return @pos_values == 0
|
317
|
+
else
|
318
|
+
return false
|
319
|
+
end
|
320
|
+
end
|
321
|
+
|
322
|
+
def full?
|
323
|
+
if @pos_values and @length
|
324
|
+
return @pos_values == @length
|
325
|
+
else
|
326
|
+
return false
|
327
|
+
end
|
328
|
+
end
|
329
|
+
|
330
|
+
def initialize( values, options = {} )
|
331
|
+
length = options[:length]
|
332
|
+
if values.kind_of?(Mask) then
|
333
|
+
raise OperatorError, "Wrong number of mask values (#{values.length} for #{length})!" if length and values.length and values.length != length
|
334
|
+
@value = values.value
|
335
|
+
@length = length ? length : values.length
|
336
|
+
@pos_values = values.pos_values
|
337
|
+
elsif values.kind_of?(Array) then
|
338
|
+
raise OperatorError, "Wrong number of mask values (#{values.length} for #{length})!" if length and values.length != length
|
339
|
+
s = "0x"
|
340
|
+
s += values.collect { |v| v != 0 ? 1 : 0 }.reverse.join
|
341
|
+
@value = Int( s, :signed => false, :size => values.length / 8 + ( values.length % 8 > 0 ? 1 : 0 ), :constant => s )
|
342
|
+
@length = values.length
|
343
|
+
@pos_values = values.reject { |e| e == 0 }.length
|
344
|
+
elsif values.kind_of?(Variable) and values.type.kind_of?(Int) then
|
345
|
+
raise OperatorError, "Wrong mask size (#{values.type.size} for #{length / 8 + ( length % 8 > 0 ? 1 : 0 )})!" if length and values.type.size != length / 8 + ( length % 8 > 0 ? 1 : 0 )
|
346
|
+
@value = values
|
347
|
+
@length = length if length
|
348
|
+
else
|
349
|
+
raise OperatorError, "Illegal valuess for mask (#{values.class}), expecting Array of Int!"
|
350
|
+
end
|
351
|
+
end
|
352
|
+
|
353
|
+
def to_s
|
354
|
+
return @value.to_s
|
355
|
+
end
|
356
|
+
|
357
|
+
def to_var
|
358
|
+
return @value
|
359
|
+
end
|
360
|
+
end
|
361
|
+
|
307
362
|
# @!parse module Functors; functorize Set; end
|
308
363
|
class Set < Operator
|
309
364
|
extend Functor
|
@@ -386,10 +441,14 @@ module BOAST
|
|
386
441
|
|
387
442
|
attr_reader :source
|
388
443
|
attr_reader :return_type
|
444
|
+
attr_reader :options
|
389
445
|
|
390
|
-
def initialize(source, return_type)
|
446
|
+
def initialize(source, return_type, options = {})
|
391
447
|
@source = source
|
392
448
|
@return_type = return_type.to_var
|
449
|
+
@options = options
|
450
|
+
@mask = options[:mask]
|
451
|
+
@zero = options[:zero]
|
393
452
|
end
|
394
453
|
|
395
454
|
def type
|
@@ -412,10 +471,25 @@ module BOAST
|
|
412
471
|
end
|
413
472
|
return @return_type.copy("vload#{@return_type.type.vector_length}(0, #{a2})", DISCARD_OPTIONS) if lang == CL
|
414
473
|
return @return_type.copy("_m_from_int64( *((int64_t * ) #{a2} ) )", DISCARD_OPTIONS) if get_architecture == X86 and @return_type.type.total_size*8 == 64
|
474
|
+
sym = ""
|
475
|
+
mask = nil
|
476
|
+
mask = Mask(@mask, :length => @return_type.type.vector_length) if @mask
|
477
|
+
if mask and not mask.full? then
|
478
|
+
return Set(0, @return_type) if @zero and mask.empty?
|
479
|
+
return @return_type if mask.empty?
|
480
|
+
sym += "MASK"
|
481
|
+
sym += "Z" if @zero
|
482
|
+
sym += "_"
|
483
|
+
end
|
415
484
|
if @source.alignment and @return_type.type.total_size and ( @source.alignment % @return_type.type.total_size ) == 0 then
|
416
|
-
|
485
|
+
sym += "LOADA"
|
417
486
|
else
|
418
|
-
|
487
|
+
sym += "LOAD"
|
488
|
+
end
|
489
|
+
instruction = intrinsics( sym.to_sym, @return_type.type)
|
490
|
+
if mask and not mask.full? then
|
491
|
+
return @return_type.copy("#{instruction}( (#{mask.value.type.decl})#{mask}, #{a2} )", DISCARD_OPTIONS) if @zero
|
492
|
+
return @return_type.copy("#{instruction}( #{@return_type}, (#{mask.value.type.decl})#{mask}, #{a2} )", DISCARD_OPTIONS)
|
419
493
|
end
|
420
494
|
return @return_type.copy("#{instruction}( #{a2} )", DISCARD_OPTIONS)
|
421
495
|
else
|
@@ -516,12 +590,14 @@ module BOAST
|
|
516
590
|
attr_reader :dest
|
517
591
|
attr_reader :source
|
518
592
|
attr_reader :store_type
|
593
|
+
attr_reader :options
|
519
594
|
|
520
|
-
def initialize(dest, source,
|
595
|
+
def initialize(dest, source, options = {})
|
521
596
|
@dest = dest
|
522
597
|
@source = source
|
523
|
-
@store_type = store_type
|
598
|
+
@store_type = options[:store_type]
|
524
599
|
@store_type = source.to_var unless @store_type
|
600
|
+
@mask = options[:mask]
|
525
601
|
end
|
526
602
|
|
527
603
|
def to_s
|
@@ -535,14 +611,20 @@ module BOAST
|
|
535
611
|
type = @store_type.type
|
536
612
|
return "vstore#{type.vector_length}( #{@source}, 0, #{dst} )" if lang == CL
|
537
613
|
return "*((int64_t * ) #{dst}) = _m_to_int64( #{@source} )" if get_architecture == X86 and type.total_size*8 == 64
|
538
|
-
|
614
|
+
sym = ""
|
615
|
+
mask = nil
|
616
|
+
mask = Mask(@mask, :length => @store_type.type.vector_length) if @mask
|
617
|
+
return "" if mask and mask.empty?
|
618
|
+
sym += "MASK_" if mask and not mask.full?
|
539
619
|
if @dest.alignment and type.total_size and ( @dest.alignment % type.total_size ) == 0 then
|
540
|
-
|
620
|
+
sym += "STOREA"
|
541
621
|
else
|
542
|
-
|
622
|
+
sym += "STORE"
|
543
623
|
end
|
624
|
+
instruction = intrinsics(sym.to_sym, type)
|
544
625
|
p_type = type.copy(:vector_length => 1)
|
545
626
|
p_type = type if get_architecture == X86 and type.kind_of?(Int)
|
627
|
+
return "#{instruction}( (#{p_type.decl} * ) #{dst}, (#{mask.value.type.decl})#{mask}, #{@source} )" if mask and not mask.full?
|
546
628
|
return "#{instruction}( (#{p_type.decl} * ) #{dst}, #{@source} )"
|
547
629
|
end
|
548
630
|
return Affectation.basic_usage(@dest, @source)
|
@@ -592,7 +674,7 @@ module BOAST
|
|
592
674
|
type = @store_type.type
|
593
675
|
raise LanguageError, "Unsupported language!" unless lang == C
|
594
676
|
raise OperatorError, "Mask size is wrong: #{@mask.length} for #{type.vector_length}!" if @mask.length != type.vector_length
|
595
|
-
return Store( @dest, @source, @store_type ).to_s unless @mask.include?(0)
|
677
|
+
return Store( @dest, @source, :store_type => @store_type ).to_s unless @mask.include?(0)
|
596
678
|
return nil if @mask.uniq.size == 1 and @mask.uniq.first == 0
|
597
679
|
instruction = intrinsics(:MASKSTORE, type)
|
598
680
|
s = ""
|
@@ -179,6 +179,7 @@ module BOAST
|
|
179
179
|
attr_reader :restrict
|
180
180
|
attr_reader :deferred_shape
|
181
181
|
attr_reader :optional
|
182
|
+
attr_accessor :reference
|
182
183
|
attr_accessor :alignment
|
183
184
|
attr_accessor :replace_constant
|
184
185
|
attr_accessor :force_replace_constant
|
@@ -231,6 +232,10 @@ module BOAST
|
|
231
232
|
!!@deferred_shape
|
232
233
|
end
|
233
234
|
|
235
|
+
def reference?
|
236
|
+
!!@reference
|
237
|
+
end
|
238
|
+
|
234
239
|
# Creates a new {Variable}
|
235
240
|
# @param [#to_s] name
|
236
241
|
# @param [DataType] type
|
@@ -239,6 +244,7 @@ module BOAST
|
|
239
244
|
# @option properties [Array<Dimension>] :dimension (or *:dim*) variable is an array rather than a scalar. Dimensions are given in Fortran order (contiguous first).
|
240
245
|
# @option properties [Object] :constant (or *:const*) states that the variable is a constant and give its value. For Variable with the *:dimension* property set must be a {ConstArray}
|
241
246
|
# @option properties [Boolean] :restrict specifies that the compiler can assume no aliasing to this array.
|
247
|
+
# @option properties [Boolean] :reference specifies that this variable is passed by reference.
|
242
248
|
# @option properties [Symbol] :allocate specify that the variable is to be allocated and where. Can only be *:heap* or *:stack* for now.
|
243
249
|
# @option properties [Boolean] :local indicates that the variable is to be allocated on the __local space of OpenCL devices or __shared__ space of CUDA devices. In C or FORTRAN this has the same effect as *:allocate* => *:stack*.
|
244
250
|
# @option properties [Boolean] :texture for OpenCL and CUDA. In OpenCL also specifies that a sampler has to be generated to access the array variable.
|
@@ -258,6 +264,7 @@ module BOAST
|
|
258
264
|
@alignment = properties[:align]
|
259
265
|
@deferred_shape = properties[:deferred_shape]
|
260
266
|
@optional = properties[:optional]
|
267
|
+
@reference = properties[:reference]
|
261
268
|
@force_replace_constant = false
|
262
269
|
if not properties[:replace_constant].nil? then
|
263
270
|
@replace_constant = properties[:replace_constant]
|
@@ -308,7 +315,7 @@ module BOAST
|
|
308
315
|
s = @constant.to_s + @type.suffix
|
309
316
|
return s
|
310
317
|
end
|
311
|
-
if @scalar_output and [C, CL, CUDA].include?( lang ) and not decl_module? then
|
318
|
+
if @scalar_output or @reference and [C, CL, CUDA].include?( lang ) and not decl_module? then
|
312
319
|
return "(*#{name})"
|
313
320
|
end
|
314
321
|
return @name
|
@@ -348,7 +355,7 @@ module BOAST
|
|
348
355
|
if dimension? then
|
349
356
|
s += " *" unless (use_vla? and lang != FORTRAN)
|
350
357
|
end
|
351
|
-
if not dimension? and ( lang == FORTRAN or @direction == :out or @direction == :inout ) then
|
358
|
+
if not dimension? and ( lang == FORTRAN or @direction == :out or @direction == :inout or @reference ) then
|
352
359
|
s += " *"
|
353
360
|
end
|
354
361
|
s += " #{@name}"
|
@@ -365,7 +372,7 @@ module BOAST
|
|
365
372
|
def decl_ffi(alloc, lang)
|
366
373
|
return :pointer if lang == FORTRAN and not alloc
|
367
374
|
return :pointer if dimension?
|
368
|
-
return :pointer if @direction == :out or @direction == :inout and not alloc
|
375
|
+
return :pointer if @direction == :out or @direction == :inout or @reference and not alloc
|
369
376
|
return @type.decl_ffi
|
370
377
|
end
|
371
378
|
|
@@ -426,7 +433,7 @@ module BOAST
|
|
426
433
|
end
|
427
434
|
|
428
435
|
def __pointer?(device = false)
|
429
|
-
return !!( ( not dimension? and ( @direction == :out or @direction == :inout ) ) or __pointer_array?(device) )
|
436
|
+
return !!( ( not dimension? and ( @direction == :out or @direction == :inout or @reference ) ) or __pointer_array?(device) )
|
430
437
|
end
|
431
438
|
|
432
439
|
def __restrict?
|
@@ -20,26 +20,64 @@ module BOAST
|
|
20
20
|
|
21
21
|
class OptimizationSpace
|
22
22
|
attr_reader :parameters
|
23
|
+
attr_reader :rules
|
24
|
+
attr_reader :checkers
|
25
|
+
HASH_NAME = "options"
|
23
26
|
|
24
27
|
def initialize( *parameters )
|
25
28
|
if parameters.length == 1 and parameters[0].is_a?(Hash) then
|
26
29
|
@parameters = []
|
27
30
|
parameters[0].each { |key, value|
|
28
|
-
|
31
|
+
if key == :rules then
|
32
|
+
@rules = [value].flatten
|
33
|
+
format_rules
|
34
|
+
elsif key == :checkers then
|
35
|
+
@checkers = [value].flatten
|
36
|
+
else
|
37
|
+
@parameters.push( OptimizationParameter::new(key, value) )
|
38
|
+
end
|
29
39
|
}
|
30
40
|
else
|
31
41
|
@parameters = parameters
|
32
42
|
end
|
33
43
|
end
|
34
44
|
|
45
|
+
# Add to the parameters of the rules the name of the hash variable
|
46
|
+
def format_rules
|
47
|
+
regxp = /(?<!#{HASH_NAME}\[):\w+(?!\])/
|
48
|
+
@rules.each{|r|
|
49
|
+
matches = r.scan(regxp)
|
50
|
+
matches = matches.uniq
|
51
|
+
matches.each{ |m|
|
52
|
+
r.gsub!(/(?<!#{HASH_NAME}\[)#{m}(?!\])/, "#{HASH_NAME}[#{m}]")
|
53
|
+
}
|
54
|
+
}
|
55
|
+
end
|
56
|
+
|
57
|
+
# Remove all points that do not meet ALL the rules.
|
58
|
+
def remove_unfeasible (points = [])
|
59
|
+
if @rules then
|
60
|
+
if @checkers
|
61
|
+
@checkers.each { |checker| eval checker }
|
62
|
+
end
|
63
|
+
s = <<EOF
|
64
|
+
points.reject!{ |#{HASH_NAME}|
|
65
|
+
not @rules.all?{ |r| eval r }
|
66
|
+
}
|
67
|
+
EOF
|
68
|
+
eval s
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
35
72
|
def to_h
|
36
73
|
h = {}
|
37
74
|
@parameters.each { |p|
|
38
75
|
h[p.name] = p.values
|
39
76
|
}
|
77
|
+
h[:rules] = @rules if @rules
|
78
|
+
h[:checkers] = @checkers if @checkers
|
40
79
|
return h
|
41
80
|
end
|
42
|
-
|
43
81
|
end
|
44
82
|
|
45
83
|
class Optimizer
|
@@ -47,6 +85,7 @@ module BOAST
|
|
47
85
|
attr_reader :experiments
|
48
86
|
attr_reader :search_space
|
49
87
|
attr_reader :log
|
88
|
+
attr_reader :history
|
50
89
|
|
51
90
|
def initialize(search_space, options = {} )
|
52
91
|
@search_space = search_space
|
@@ -107,7 +146,7 @@ EOF
|
|
107
146
|
end
|
108
147
|
|
109
148
|
def to_s
|
110
|
-
return
|
149
|
+
return to_a.to_s
|
111
150
|
end
|
112
151
|
|
113
152
|
end
|
@@ -118,13 +157,15 @@ EOF
|
|
118
157
|
def optimize(options={}, &block)
|
119
158
|
opts = { :population_size => 20,
|
120
159
|
:fitness_goal => 0,
|
121
|
-
:generations_limit => 100
|
160
|
+
:generations_limit => 100,
|
161
|
+
:search_space => @search_space }
|
122
162
|
opts.update(options)
|
123
163
|
opts[:organism] = @organism
|
124
164
|
@organism.block = block
|
125
165
|
@organism.experiments = 0
|
126
166
|
population = Darwinning::Population.new(opts)
|
127
167
|
population.evolve!
|
168
|
+
@history = population.history
|
128
169
|
@experiments = @organism.experiments
|
129
170
|
return population.best_member.to_a
|
130
171
|
end
|
@@ -136,14 +177,18 @@ EOF
|
|
136
177
|
def initialize(search_space, options = {} )
|
137
178
|
super
|
138
179
|
@randomize = options[:randomize]
|
180
|
+
@checkpoint = options[:checkpoint]
|
181
|
+
@checkpoint_size = options[:checkpoint_size]
|
182
|
+
@seed = options[:seed]
|
139
183
|
end
|
140
184
|
|
141
185
|
def points
|
142
186
|
params2 = @search_space.parameters.dup
|
143
187
|
param = params2.shift
|
188
|
+
|
144
189
|
pts = param.values.collect { |val| {param.name => val} }
|
145
190
|
if params2.size == 0 then
|
146
|
-
|
191
|
+
pts4 = pts
|
147
192
|
else
|
148
193
|
optim2 = BruteForceOptimizer::new(OptimizationSpace::new(*params2))
|
149
194
|
pts3=[]
|
@@ -152,8 +197,10 @@ EOF
|
|
152
197
|
pts3.push(p1.dup.update(p2))
|
153
198
|
}
|
154
199
|
}
|
155
|
-
|
200
|
+
pts4 = pts3
|
156
201
|
end
|
202
|
+
@search_space.remove_unfeasible pts4
|
203
|
+
return pts4
|
157
204
|
end
|
158
205
|
|
159
206
|
def each(&block)
|
@@ -169,7 +216,11 @@ EOF
|
|
169
216
|
@log = {}
|
170
217
|
best = [nil, Float::INFINITY]
|
171
218
|
pts = points
|
172
|
-
|
219
|
+
|
220
|
+
(@seed ? pts.shuffle!(random: Random.new(@seed)) : pts.shuffle!) if @randomize
|
221
|
+
|
222
|
+
pts = pts.slice(@checkpoint,@checkpoint_size) if @checkpoint_size
|
223
|
+
|
173
224
|
pts.each { |config|
|
174
225
|
@experiments += 1
|
175
226
|
metric = block.call(config)
|
@@ -26,7 +26,7 @@ module BOAST
|
|
26
26
|
@procedure.parameters.each { |param|
|
27
27
|
if param.dimension then
|
28
28
|
params.push( param.name )
|
29
|
-
elsif param.direction == :out or param.direction == :inout then
|
29
|
+
elsif param.direction == :out or param.direction == :inout or param.reference? then
|
30
30
|
params.push( "&"+param.name )
|
31
31
|
else
|
32
32
|
params.push( param.name )
|
@@ -220,6 +220,7 @@ EOF
|
|
220
220
|
param_copy = param.copy
|
221
221
|
param_copy.constant = nil
|
222
222
|
param_copy.direction = nil
|
223
|
+
param_copy.reference = nil
|
223
224
|
param_copy.decl
|
224
225
|
}
|
225
226
|
get_output.puts " #{@procedure.properties[:return].type.decl} _boast_ret;" if @procedure.properties[:return]
|
@@ -123,7 +123,7 @@ EOF
|
|
123
123
|
}
|
124
124
|
EOF
|
125
125
|
end
|
126
|
-
def is_available
|
126
|
+
def is_available?
|
127
127
|
[] != Dir.glob( '/sys/class/powercap/intel-rapl:0:0' )
|
128
128
|
end
|
129
129
|
end
|
@@ -177,8 +177,13 @@ EOF
|
|
177
177
|
}
|
178
178
|
EOF
|
179
179
|
end
|
180
|
-
def is_available
|
181
|
-
|
180
|
+
def is_available?
|
181
|
+
path = []
|
182
|
+
if ENV['LIBRARY_PATH'] then
|
183
|
+
path += ENV['LIBRARY_PATH'].split(':').inject([]){|mem, x| []!=mem ? mem : Dir.glob(x+'/libredfst.so')}
|
184
|
+
end
|
185
|
+
path += `ldconfig -p`.gsub("\t","").split("\n").find_all { |e| e.match(/libredfst\.so/) }.collect { |e| e.split(" => ")[1] } if path == []
|
186
|
+
return path != []
|
182
187
|
end
|
183
188
|
end
|
184
189
|
|
@@ -215,15 +220,20 @@ EOF
|
|
215
220
|
}
|
216
221
|
EOF
|
217
222
|
end
|
218
|
-
def is_available
|
219
|
-
|
223
|
+
def is_available?
|
224
|
+
path = []
|
225
|
+
if ENV['LIBRARY_PATH'] then
|
226
|
+
path += ENV['LIBRARY_PATH'].split(':').inject([]){|mem, x| []!=mem ? mem : Dir.glob(x+'/libeml.so')}
|
227
|
+
end
|
228
|
+
path += `ldconfig -p`.gsub("\t","").split("\n").find_all { |e| e.match(/libeml\.so/) }.collect { |e| e.split(" => ")[1] } if path == []
|
229
|
+
return path != []
|
220
230
|
end
|
221
231
|
end
|
222
|
-
if PowercapProbe.is_available
|
232
|
+
if PowercapProbe.is_available?
|
223
233
|
EnergyProbe = PowercapProbe
|
224
|
-
elsif RedfstProbe.is_available
|
234
|
+
elsif RedfstProbe.is_available?
|
225
235
|
EnergyProbe = RedfstProbe
|
226
|
-
elsif EmlProbe.is_available
|
236
|
+
elsif EmlProbe.is_available?
|
227
237
|
EnergyProbe = EmlProbe
|
228
238
|
else
|
229
239
|
EnergyProbe = nil
|