bones-compiler 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +117 -0
- data/LICENSE +9 -0
- data/README.rdoc +126 -0
- data/Rakefile +107 -0
- data/VERSION +1 -0
- data/bin/bones +20 -0
- data/examples/applications/ffos.c +552 -0
- data/examples/benchmarks/2mm.c +70 -0
- data/examples/benchmarks/3mm.c +81 -0
- data/examples/benchmarks/adi.c +81 -0
- data/examples/benchmarks/atax.c +65 -0
- data/examples/benchmarks/bicg.c +67 -0
- data/examples/benchmarks/cholesky.c +64 -0
- data/examples/benchmarks/common.h +168 -0
- data/examples/benchmarks/correlation.c +97 -0
- data/examples/benchmarks/covariance.c +77 -0
- data/examples/benchmarks/doitgen.c +63 -0
- data/examples/benchmarks/durbin.c +76 -0
- data/examples/benchmarks/dynprog.c +67 -0
- data/examples/benchmarks/fdtd-2d-apml.c +114 -0
- data/examples/benchmarks/fdtd-2d.c +74 -0
- data/examples/benchmarks/floyd-warshall.c +50 -0
- data/examples/benchmarks/gemm.c +69 -0
- data/examples/benchmarks/gemver.c +89 -0
- data/examples/benchmarks/gesummv.c +64 -0
- data/examples/benchmarks/gramschmidt.c +84 -0
- data/examples/benchmarks/jacobi-1d-imper.c +55 -0
- data/examples/benchmarks/jacobi-2d-imper.c +61 -0
- data/examples/benchmarks/lu.c +57 -0
- data/examples/benchmarks/ludcmp.c +91 -0
- data/examples/benchmarks/mvt.c +65 -0
- data/examples/benchmarks/overview.txt +38 -0
- data/examples/benchmarks/reg_detect.c +82 -0
- data/examples/benchmarks/saxpy.c +45 -0
- data/examples/benchmarks/seidel-2d.c +51 -0
- data/examples/benchmarks/symm.c +74 -0
- data/examples/benchmarks/syr2k.c +65 -0
- data/examples/benchmarks/syrk.c +62 -0
- data/examples/benchmarks/trisolv.c +57 -0
- data/examples/benchmarks/trmm.c +57 -0
- data/examples/chunk/example1.c +54 -0
- data/examples/chunk/example2.c +44 -0
- data/examples/chunk/example3.c +59 -0
- data/examples/chunk/example4.c +55 -0
- data/examples/chunk/example5.c +52 -0
- data/examples/element/example1.c +46 -0
- data/examples/element/example10.c +50 -0
- data/examples/element/example11.c +47 -0
- data/examples/element/example12.c +56 -0
- data/examples/element/example2.c +46 -0
- data/examples/element/example3.c +58 -0
- data/examples/element/example4.c +49 -0
- data/examples/element/example5.c +56 -0
- data/examples/element/example6.c +46 -0
- data/examples/element/example7.c +54 -0
- data/examples/element/example8.c +45 -0
- data/examples/element/example9.c +48 -0
- data/examples/neighbourhood/example1.c +54 -0
- data/examples/neighbourhood/example2.c +55 -0
- data/examples/neighbourhood/example3.c +82 -0
- data/examples/neighbourhood/example4.c +52 -0
- data/examples/shared/example1.c +45 -0
- data/examples/shared/example2.c +51 -0
- data/examples/shared/example3.c +55 -0
- data/examples/shared/example4.c +52 -0
- data/examples/shared/example5.c +48 -0
- data/lib/bones.rb +266 -0
- data/lib/bones/algorithm.rb +541 -0
- data/lib/bones/engine.rb +386 -0
- data/lib/bones/preprocessor.rb +161 -0
- data/lib/bones/species.rb +196 -0
- data/lib/bones/structure.rb +94 -0
- data/lib/bones/variable.rb +169 -0
- data/lib/bones/variablelist.rb +72 -0
- data/lib/castaddon.rb +27 -0
- data/lib/castaddon/index.rb +40 -0
- data/lib/castaddon/node.rb +753 -0
- data/lib/castaddon/type.rb +37 -0
- data/skeletons/CPU-C/common/epilogue.c +0 -0
- data/skeletons/CPU-C/common/globals.c +17 -0
- data/skeletons/CPU-C/common/globals_kernel.c +1 -0
- data/skeletons/CPU-C/common/header.c +0 -0
- data/skeletons/CPU-C/common/mem_copy_D2H.c +0 -0
- data/skeletons/CPU-C/common/mem_copy_H2D.c +0 -0
- data/skeletons/CPU-C/common/mem_epilogue.c +0 -0
- data/skeletons/CPU-C/common/mem_prologue.c +3 -0
- data/skeletons/CPU-C/common/prologue.c +0 -0
- data/skeletons/CPU-C/common/timer_1_start.c +0 -0
- data/skeletons/CPU-C/common/timer_1_stop.c +0 -0
- data/skeletons/CPU-C/common/timer_2_start.c +20 -0
- data/skeletons/CPU-C/common/timer_2_stop.c +8 -0
- data/skeletons/CPU-C/kernel/default.host.c +3 -0
- data/skeletons/CPU-C/kernel/default.kernel.c +15 -0
- data/skeletons/CPU-C/skeletons.txt +24 -0
- data/skeletons/CPU-OPENCL-AMD/common/epilogue.c +6 -0
- data/skeletons/CPU-OPENCL-AMD/common/globals.c +155 -0
- data/skeletons/CPU-OPENCL-AMD/common/globals_kernel.c +4 -0
- data/skeletons/CPU-OPENCL-AMD/common/header.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/mem_copy_D2H.c +8 -0
- data/skeletons/CPU-OPENCL-AMD/common/mem_copy_H2D.c +4 -0
- data/skeletons/CPU-OPENCL-AMD/common/mem_epilogue.c +3 -0
- data/skeletons/CPU-OPENCL-AMD/common/mem_prologue.c +6 -0
- data/skeletons/CPU-OPENCL-AMD/common/prologue.c +24 -0
- data/skeletons/CPU-OPENCL-AMD/common/timer_1_start.c +5 -0
- data/skeletons/CPU-OPENCL-AMD/common/timer_1_stop.c +9 -0
- data/skeletons/CPU-OPENCL-AMD/common/timer_2_start.c +16 -0
- data/skeletons/CPU-OPENCL-AMD/common/timer_2_stop.c +11 -0
- data/skeletons/CPU-OPENCL-AMD/kernel/D-element-to-1-shared.host.c +67 -0
- data/skeletons/CPU-OPENCL-AMD/kernel/D-element-to-1-shared.kernel.cl +72 -0
- data/skeletons/CPU-OPENCL-AMD/kernel/default.host.c +14 -0
- data/skeletons/CPU-OPENCL-AMD/kernel/default.kernel.cl +13 -0
- data/skeletons/CPU-OPENCL-AMD/skeletons.txt +26 -0
- data/skeletons/CPU-OPENCL-INTEL/common/epilogue.c +3 -0
- data/skeletons/CPU-OPENCL-INTEL/common/globals.c +154 -0
- data/skeletons/CPU-OPENCL-INTEL/common/globals_kernel.c +4 -0
- data/skeletons/CPU-OPENCL-INTEL/common/header.c +31 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +5 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +3 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_epilogue.c +3 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +4 -0
- data/skeletons/CPU-OPENCL-INTEL/common/prologue.c +24 -0
- data/skeletons/CPU-OPENCL-INTEL/common/timer_1_start.c +5 -0
- data/skeletons/CPU-OPENCL-INTEL/common/timer_1_stop.c +9 -0
- data/skeletons/CPU-OPENCL-INTEL/common/timer_2_start.c +16 -0
- data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +11 -0
- data/skeletons/CPU-OPENCL-INTEL/kernel/D-element-to-1-shared.host.c +67 -0
- data/skeletons/CPU-OPENCL-INTEL/kernel/D-element-to-1-shared.kernel.cl +72 -0
- data/skeletons/CPU-OPENCL-INTEL/kernel/default.host.c +14 -0
- data/skeletons/CPU-OPENCL-INTEL/kernel/default.kernel.cl +13 -0
- data/skeletons/CPU-OPENCL-INTEL/skeletons.txt +26 -0
- data/skeletons/CPU-OPENMP/common/epilogue.c +0 -0
- data/skeletons/CPU-OPENMP/common/globals.c +37 -0
- data/skeletons/CPU-OPENMP/common/globals_kernel.c +6 -0
- data/skeletons/CPU-OPENMP/common/header.c +0 -0
- data/skeletons/CPU-OPENMP/common/mem_copy_D2H.c +0 -0
- data/skeletons/CPU-OPENMP/common/mem_copy_H2D.c +0 -0
- data/skeletons/CPU-OPENMP/common/mem_epilogue.c +0 -0
- data/skeletons/CPU-OPENMP/common/mem_prologue.c +3 -0
- data/skeletons/CPU-OPENMP/common/prologue.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_1_start.c +12 -0
- data/skeletons/CPU-OPENMP/common/timer_1_stop.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_2_start.c +18 -0
- data/skeletons/CPU-OPENMP/common/timer_2_stop.c +8 -0
- data/skeletons/CPU-OPENMP/kernel/D-element-to-1-shared.host.c +27 -0
- data/skeletons/CPU-OPENMP/kernel/D-element-to-1-shared.kernel.c +46 -0
- data/skeletons/CPU-OPENMP/kernel/default.host.c +11 -0
- data/skeletons/CPU-OPENMP/kernel/default.kernel.c +18 -0
- data/skeletons/CPU-OPENMP/skeletons.txt +26 -0
- data/skeletons/GPU-CUDA/common/epilogue.c +0 -0
- data/skeletons/GPU-CUDA/common/globals.c +31 -0
- data/skeletons/GPU-CUDA/common/globals_kernel.c +4 -0
- data/skeletons/GPU-CUDA/common/header.c +0 -0
- data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +3 -0
- data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +3 -0
- data/skeletons/GPU-CUDA/common/mem_epilogue.c +3 -0
- data/skeletons/GPU-CUDA/common/mem_prologue.c +5 -0
- data/skeletons/GPU-CUDA/common/prologue.c +6 -0
- data/skeletons/GPU-CUDA/common/timer_1_start.c +6 -0
- data/skeletons/GPU-CUDA/common/timer_1_stop.c +10 -0
- data/skeletons/GPU-CUDA/common/timer_2_start.c +6 -0
- data/skeletons/GPU-CUDA/common/timer_2_stop.c +10 -0
- data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.host.c +3 -0
- data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +105 -0
- data/skeletons/GPU-CUDA/kernel/D-element-to-1-shared.host.c +3 -0
- data/skeletons/GPU-CUDA/kernel/D-element-to-1-shared.kernel.cu +119 -0
- data/skeletons/GPU-CUDA/kernel/D-element-to-N-shared.host.c +3 -0
- data/skeletons/GPU-CUDA/kernel/D-element-to-N-shared.kernel.cu +166 -0
- data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.host.c +3 -0
- data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +69 -0
- data/skeletons/GPU-CUDA/kernel/N-neighbourhood-N-to-N-element.host.c +3 -0
- data/skeletons/GPU-CUDA/kernel/N-neighbourhood-N-to-N-element.kernel.cu +42 -0
- data/skeletons/GPU-CUDA/kernel/default.host.c +3 -0
- data/skeletons/GPU-CUDA/kernel/default.kernel.cu +28 -0
- data/skeletons/GPU-CUDA/skeletons.txt +30 -0
- data/skeletons/GPU-OPENCL-AMD/common/epilogue.c +3 -0
- data/skeletons/GPU-OPENCL-AMD/common/globals.c +155 -0
- data/skeletons/GPU-OPENCL-AMD/common/globals_kernel.c +4 -0
- data/skeletons/GPU-OPENCL-AMD/common/header.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/common/mem_copy_D2H.c +4 -0
- data/skeletons/GPU-OPENCL-AMD/common/mem_copy_H2D.c +4 -0
- data/skeletons/GPU-OPENCL-AMD/common/mem_epilogue.c +3 -0
- data/skeletons/GPU-OPENCL-AMD/common/mem_prologue.c +3 -0
- data/skeletons/GPU-OPENCL-AMD/common/prologue.c +24 -0
- data/skeletons/GPU-OPENCL-AMD/common/timer_1_start.c +5 -0
- data/skeletons/GPU-OPENCL-AMD/common/timer_1_stop.c +9 -0
- data/skeletons/GPU-OPENCL-AMD/common/timer_2_start.c +4 -0
- data/skeletons/GPU-OPENCL-AMD/common/timer_2_stop.c +11 -0
- data/skeletons/GPU-OPENCL-AMD/kernel/D-element-to-1-shared.host.c +67 -0
- data/skeletons/GPU-OPENCL-AMD/kernel/D-element-to-1-shared.kernel.cl +72 -0
- data/skeletons/GPU-OPENCL-AMD/kernel/default.host.c +14 -0
- data/skeletons/GPU-OPENCL-AMD/kernel/default.kernel.cl +13 -0
- data/skeletons/GPU-OPENCL-AMD/skeletons.txt +26 -0
- data/skeletons/verification/header.c +2 -0
- data/skeletons/verification/timer_start.c +4 -0
- data/skeletons/verification/timer_stop.c +6 -0
- data/skeletons/verification/verify_results.c +23 -0
- data/test/bones/test_algorithm.rb +40 -0
- data/test/bones/test_common.rb +54 -0
- data/test/bones/test_preprocessor.rb +46 -0
- data/test/bones/test_species.rb +21 -0
- data/test/bones/test_variable.rb +84 -0
- data/test/test_helper.rb +106 -0
- metadata +303 -0
|
@@ -0,0 +1,541 @@
|
|
|
1
|
+
|
|
2
|
+
module Bones
|
|
3
|
+
# This class holds one algorithm, which includes a species,
|
|
4
|
+
# a name, and the source C-code.
|
|
5
|
+
#
|
|
6
|
+
# The algorithm class holds all sorts of information on var-
|
|
7
|
+
# iables. This information is only available after calling
|
|
8
|
+
# the 'populate' method, which populates a lists of varia-
|
|
9
|
+
# bles of all sorts: a regular list, a specialized hash,
|
|
10
|
+
# and lists of input/output array variables.
|
|
11
|
+
class Algorithm < Common
|
|
12
|
+
attr_reader :name, :species, :code, :lists, :arrays, :id, :function_name
|
|
13
|
+
attr_accessor :hash, :merge_factor
|
|
14
|
+
|
|
15
|
+
# Constant to set the name of the algorithm's accelerated version
|
|
16
|
+
ACCELERATED = '_accelerated'
|
|
17
|
+
# Constant to set the name of the algorithm's original version
|
|
18
|
+
ORIGINAL = '_original'
|
|
19
|
+
|
|
20
|
+
# This method initializes the class. It gives the new
|
|
21
|
+
# algorithm a name, species and source code. At initiali-
|
|
22
|
+
# zation, this method checks if the name starts with a
|
|
23
|
+
# digit. This is not allowed, so an underscore is added
|
|
24
|
+
# prior to the digit.
|
|
25
|
+
def initialize(name, filename, id, species, code)
|
|
26
|
+
name = '_'+name if name =~ /^\d/
|
|
27
|
+
@filename = filename
|
|
28
|
+
@basename = name
|
|
29
|
+
@name = (name+'_'+id).gsub(/\W/,'')
|
|
30
|
+
@id = id
|
|
31
|
+
@original_name = @name+ORIGINAL
|
|
32
|
+
@accelerated_name = @name+ACCELERATED
|
|
33
|
+
@species = species
|
|
34
|
+
@code = C::Statement.parse(code).preprocess
|
|
35
|
+
@hash = {}
|
|
36
|
+
@lists = {:host_name => [],:host_definition => [], :argument_name => [], :argument_definition => [], :golden_name => []}
|
|
37
|
+
@arrays = Variablelist.new()
|
|
38
|
+
@constants = Variablelist.new()
|
|
39
|
+
@merge_factor = 1
|
|
40
|
+
@function_code = ''
|
|
41
|
+
@function_name = ''
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# This method sets the code and name for the function in
|
|
45
|
+
# which the algorithm is found. This is done based on the
|
|
46
|
+
# original code, which is given as input to this method.
|
|
47
|
+
# The method does not return any value, instead, it sets
|
|
48
|
+
# two class variables (@function_code and @function_name).
|
|
49
|
+
def set_function(full_code)
|
|
50
|
+
full_code.get_functions.each do |function|
|
|
51
|
+
if function.node_exists?(@code)
|
|
52
|
+
@function_code = function
|
|
53
|
+
@function_name = function.name
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# This method performs the code transformations according
|
|
59
|
+
# to the transformation settings as provided as an argument
|
|
60
|
+
# to the function. It calls the various code transformation
|
|
61
|
+
# functions as implemented for the CAST class. The resulting
|
|
62
|
+
# modified code is finally stored in the search-and-replace
|
|
63
|
+
# hash.
|
|
64
|
+
# This method assumes that the populate method has already
|
|
65
|
+
# been called, such that the hash contains the dimensions
|
|
66
|
+
# needed to create the global ID definitions.
|
|
67
|
+
def perform_transformations(transformation_settings)
|
|
68
|
+
complexity = 0
|
|
69
|
+
|
|
70
|
+
# Save the original code (with flattened arrays) in the hash as well
|
|
71
|
+
new_code = @code.clone
|
|
72
|
+
@arrays.each do |array|
|
|
73
|
+
new_code.transform_flatten(array)
|
|
74
|
+
end
|
|
75
|
+
@hash[:algorithm_code0] = new_code.to_s
|
|
76
|
+
|
|
77
|
+
# Loop over the number of transformation 'blocks'
|
|
78
|
+
transformation_settings.split(' ').each_with_index do |transformation,num_transformation|
|
|
79
|
+
new_code = @code.clone
|
|
80
|
+
extra_indent = ''
|
|
81
|
+
|
|
82
|
+
# Replace existing loops in the code (always do this)
|
|
83
|
+
array = @arrays.representative
|
|
84
|
+
array.species.dimensions.each_with_index do |dimension,num_dimension|
|
|
85
|
+
index = (array.species.reverse?) ? num_dimension : array.species.dimensions.length-num_dimension-1
|
|
86
|
+
index_reverse = !(array.species.reverse?) ? num_dimension : array.species.dimensions.length-num_dimension-1
|
|
87
|
+
|
|
88
|
+
# Calculate the loop start and end conditions
|
|
89
|
+
from = array.species.from_at(index)
|
|
90
|
+
to = array.species.to_at(index)
|
|
91
|
+
|
|
92
|
+
# Process the existing code and update the hash
|
|
93
|
+
if from != to
|
|
94
|
+
new_code, loop_variable_name = new_code.remove_loop(from,to)
|
|
95
|
+
new_variable_name = GLOBAL_ID+'_'+index_reverse.to_s
|
|
96
|
+
new_code.replace_variable(loop_variable_name,new_variable_name)
|
|
97
|
+
update_hash(loop_variable_name)
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Shuffle the indices of the first input(s) (conditionally do this)
|
|
102
|
+
shuffle_arrays = []
|
|
103
|
+
if transformation[0,1] == '2'
|
|
104
|
+
shuffle_arrays.push(@arrays.select(INPUT)[0])
|
|
105
|
+
elsif transformation[0,1] == '3'
|
|
106
|
+
shuffle_arrays.push(@arrays.select(INPUT)[0])
|
|
107
|
+
shuffle_arrays.push(@arrays.select(INPUT)[1])
|
|
108
|
+
end
|
|
109
|
+
new_code.transform_shuffle(shuffle_arrays)
|
|
110
|
+
|
|
111
|
+
# Use the local on-chip memory (conditionally do this)
|
|
112
|
+
if transformation[0,1] == '1'
|
|
113
|
+
local_memory_arrays = [@arrays.select(INPUT)[0]]
|
|
114
|
+
new_code.transform_use_local_memory(local_memory_arrays)
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# Flatten the arrays to 1D (always do this)
|
|
118
|
+
@arrays.each do |array|
|
|
119
|
+
new_code.transform_flatten(array)
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Perform array substitution (conditionally do this)
|
|
123
|
+
@arrays.outputs.each do |array|
|
|
124
|
+
if array.species.element?
|
|
125
|
+
if @arrays.inputs.include?(array)
|
|
126
|
+
new_code.transform_substitution(array,true)
|
|
127
|
+
else
|
|
128
|
+
new_code.transform_substitution(array,false)
|
|
129
|
+
end
|
|
130
|
+
extra_indent = INDENT
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# Perform transformations for reduction operations (conditionally do this)
|
|
135
|
+
if transformation[1,1].to_i >= 1
|
|
136
|
+
new_code = new_code.transform_reduction(@arrays.select(INPUT)[0],@arrays.select(OUTPUT)[0],transformation[1,1].to_i)
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# Perform thread-merging (experimental)
|
|
140
|
+
# TODO: Solve the problem related to constants (e.g chunk/example1.c)
|
|
141
|
+
if @merge_factor == 1 && transformation[0,1] == '4'
|
|
142
|
+
@merge_factor = 4
|
|
143
|
+
end
|
|
144
|
+
if @merge_factor > 1
|
|
145
|
+
puts MESSAGE+'Merging threads by a factor '+@merge_factor.to_s+'.'
|
|
146
|
+
|
|
147
|
+
# Update the hash
|
|
148
|
+
@hash[:ids] = @hash[:ids].split(NL).map { |line|
|
|
149
|
+
C::parse(line).transform_merge_threads(@merge_factor,[GLOBAL_ID]+@constants.map{ |c| c.name }).to_s.split(NL).each_with_index.map do |id,index|
|
|
150
|
+
id.gsub(/\b#{GLOBAL_ID}\b/,"(#{GLOBAL_ID}+gridDim.x*blockDim.x*#{index})")
|
|
151
|
+
end
|
|
152
|
+
}.join(NL+INDENT*2)
|
|
153
|
+
@hash[:parallelism] = (@hash[:parallelism].to_i / @merge_factor).to_s
|
|
154
|
+
|
|
155
|
+
# Transform the code
|
|
156
|
+
excludes = (@constants+@arrays).map { |c| c.name }
|
|
157
|
+
new_code.transform_merge_threads(@merge_factor,excludes)
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Obtain the complexity in terms of operations for the resulting code
|
|
161
|
+
complexity += new_code.get_complexity
|
|
162
|
+
|
|
163
|
+
# Store the resulting code in the hash
|
|
164
|
+
resulting_code = new_code.strip_brackets.to_s
|
|
165
|
+
@hash[('algorithm_code'+(num_transformation+1).to_s).to_sym] = (transformation[1,1].to_i >= 1) ? resulting_code : extra_indent+INDENT+resulting_code.gsub!(NL,NL+INDENT)
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
@hash[:complexity] = complexity.to_s
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
# This method creates the search-and-replace hash based on
|
|
172
|
+
# information provided by the algorithm. It is called from
|
|
173
|
+
# the 'populate' method of this class.
|
|
174
|
+
#
|
|
175
|
+
# == List of possible hash keys:
|
|
176
|
+
#
|
|
177
|
+
# algorithm_id
|
|
178
|
+
# _name
|
|
179
|
+
# _basename
|
|
180
|
+
# _filename
|
|
181
|
+
# _code*
|
|
182
|
+
# (in*|out*)_type
|
|
183
|
+
# _name
|
|
184
|
+
# _devicename
|
|
185
|
+
# _devicepointer
|
|
186
|
+
# _dimensions
|
|
187
|
+
# _dimension*_to
|
|
188
|
+
# _from
|
|
189
|
+
# _sum
|
|
190
|
+
# _to
|
|
191
|
+
# _from
|
|
192
|
+
# _parameters
|
|
193
|
+
# _parameter*_to
|
|
194
|
+
# _from
|
|
195
|
+
# _sum
|
|
196
|
+
# _ids
|
|
197
|
+
# _localids
|
|
198
|
+
# _flatindex
|
|
199
|
+
# (in|out)_names
|
|
200
|
+
# _devicenames
|
|
201
|
+
# _devicedefinitions
|
|
202
|
+
# _devicedefinitionsopencl
|
|
203
|
+
# names
|
|
204
|
+
# devicenames
|
|
205
|
+
# devicedefinitions
|
|
206
|
+
# devicedefinitionsopencl
|
|
207
|
+
#
|
|
208
|
+
# parallelism
|
|
209
|
+
# factors
|
|
210
|
+
# ids
|
|
211
|
+
# verifyids
|
|
212
|
+
#
|
|
213
|
+
# argument_name
|
|
214
|
+
# argument_definition
|
|
215
|
+
# kernel_argument_list
|
|
216
|
+
#
|
|
217
|
+
def populate_hash
|
|
218
|
+
@hash = {:algorithm_id => @id,
|
|
219
|
+
:algorithm_name => @name,
|
|
220
|
+
:algorithm_basename => @basename,
|
|
221
|
+
:algorithm_filename => @filename,
|
|
222
|
+
:argument_name => @lists[:argument_name],
|
|
223
|
+
:argument_definition => @lists[:argument_definition]}
|
|
224
|
+
|
|
225
|
+
# Obtain the necessary data for the hash per array
|
|
226
|
+
parallelisms = []
|
|
227
|
+
DIRECTIONS.each do |direction|
|
|
228
|
+
arrays = @arrays.select(direction)
|
|
229
|
+
arrays.each_with_index do |array,num_array|
|
|
230
|
+
hashid = "#{direction}#{num_array}".to_sym
|
|
231
|
+
|
|
232
|
+
# Gather the name and type data
|
|
233
|
+
minihash = {:type => array.type_name,
|
|
234
|
+
:name => array.name,
|
|
235
|
+
:devicepointer => array.device_pointer,
|
|
236
|
+
:devicename => array.device_name,
|
|
237
|
+
:flatindex => array.flatindex}
|
|
238
|
+
|
|
239
|
+
# Gather the dimensions data
|
|
240
|
+
dimensions = array.species.dimensions
|
|
241
|
+
dimensions.each_with_index do |dimension,num_dimension|
|
|
242
|
+
minihash["dimension#{num_dimension}".to_sym] = {:sum => simplify(sum(dimension)),
|
|
243
|
+
:from => simplify(from(dimension)),
|
|
244
|
+
:to => simplify(to(dimension))}
|
|
245
|
+
end
|
|
246
|
+
minihash[:dimensions] = simplify(dimensions.map { |d| sum(d) }.join('*'))
|
|
247
|
+
minihash[:from] = dimensions.map { |d| from(d) }.zip(array.factors.drop(1).reverse).map { |e| simplify(e.join('')) }.join('+')
|
|
248
|
+
minihash[:to ] = dimensions.map { |d| to(d) }.zip(array.factors.drop(1).reverse).map { |e| simplify(e.join('')) }.join('+')
|
|
249
|
+
|
|
250
|
+
# Gather the parameter data
|
|
251
|
+
if array.species.has_parameter?
|
|
252
|
+
parameters = array.species.parameters
|
|
253
|
+
parameters.each_with_index do |parameter,num_parameter|
|
|
254
|
+
minihash["parameter#{num_parameter}".to_sym] = {:sum => simplify(sum(parameter)),
|
|
255
|
+
:from => simplify(from(parameter)),
|
|
256
|
+
:to => simplify(to(parameter))}
|
|
257
|
+
end
|
|
258
|
+
minihash[:parameters] = simplify(parameters.map { |p| sum(p) }.join('*'))
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
# Store the data into the hash
|
|
262
|
+
@hash[hashid] = minihash
|
|
263
|
+
|
|
264
|
+
# Gather information regarding the parallelism
|
|
265
|
+
if array.species.chunk?
|
|
266
|
+
dim_div = simplify(minihash[:dimensions]+'/'+minihash[:parameters])
|
|
267
|
+
parallelisms.push([dim_div,hashid,0])
|
|
268
|
+
elsif array.species.element? || array.species.neighbourhood?
|
|
269
|
+
parallelisms.push([minihash[:dimensions],hashid,1])
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
# Populate the global ID definitions hash, create the proper indices (and store as '{in/out}*_ids' in the hash)
|
|
273
|
+
ids, localids, verifyids, factors = [], [], [], ['']
|
|
274
|
+
dimensions = array.species.dimensions.clone
|
|
275
|
+
dimensions.each_with_index do |dimension,num_dimension|
|
|
276
|
+
index = (array.species.reverse?) ? num_dimension : array.species.dimensions.length-num_dimension-1
|
|
277
|
+
index_reverse = !(array.species.reverse?) ? num_dimension : array.species.dimensions.length-num_dimension-1
|
|
278
|
+
|
|
279
|
+
# Generate the index expressions
|
|
280
|
+
divider = (array.species.chunk?) ? '/'+sum(array.species.parameters[index]) : ''
|
|
281
|
+
minihash = {:dimensions => (index == dimensions.length-1) ? '1' : dimensions.drop(index+1).map { |d| sum(d) }.join('*'),
|
|
282
|
+
:modulo => (index_reverse != dimensions.length-1) ? '%('+sum(dimension)+divider+')' : '',
|
|
283
|
+
:offset => from(dimension)}
|
|
284
|
+
expr_global = simplify(search_and_replace(minihash,"((#{GLOBAL_ID}/(<dimensions>))<modulo>)+<offset>"))
|
|
285
|
+
expr_local = simplify(search_and_replace(minihash,"((#{LOCAL_ID }/(<dimensions>))<modulo>)+<offset>"))
|
|
286
|
+
|
|
287
|
+
# Selectively push the ID definitions to the result array
|
|
288
|
+
from = array.species.from_at(index)
|
|
289
|
+
to = array.species.to_at(index)
|
|
290
|
+
verifyids.push("const int #{GLOBAL_ID}_#{index_reverse} = "+expr_global+';')
|
|
291
|
+
if from != to
|
|
292
|
+
ids.push("const int #{GLOBAL_ID}_#{index_reverse} = "+expr_global+';')
|
|
293
|
+
localids.push("const int #{LOCAL_ID }_#{index_reverse} = "+expr_local+';')
|
|
294
|
+
factors.push(array.factors[index_reverse])
|
|
295
|
+
end
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
# Store the results in the hash
|
|
299
|
+
@hash[hashid][:ids] = ids.join(NL+INDENT*2)
|
|
300
|
+
@hash[hashid][:localids] = localids.join(NL+INDENT*2)
|
|
301
|
+
@hash[hashid][:verifyids] = verifyids.join(NL+INDENT*2)
|
|
302
|
+
@hash[hashid][:factors] = factors.last
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
# Create lists of array names and definitions
|
|
306
|
+
@hash["#{direction}_devicedefinitions".to_sym] = arrays.map { |a| a.device_definition }.uniq.join(', ')
|
|
307
|
+
@hash["#{direction}_devicedefinitionsopencl".to_sym] = arrays.map { |a| '__global '+a.device_definition }.uniq.join(', ')
|
|
308
|
+
@hash["#{direction}_devicenames".to_sym] = arrays.map { |a| a.device_name }.uniq.join(', ')
|
|
309
|
+
@hash["#{direction}_names".to_sym] = arrays.map { |a| a.name }.uniq.join(', ')
|
|
310
|
+
end
|
|
311
|
+
@hash[:devicedefinitions] = @arrays.map { |a| a.device_definition }.uniq.join(', ')
|
|
312
|
+
@hash[:devicedefinitionsopencl] = @arrays.map { |a| '__global '+a.device_definition }.uniq.join(', ')
|
|
313
|
+
@hash[:devicenames] = @arrays.map { |a| a.device_name }.uniq.join(', ')
|
|
314
|
+
@hash[:names] = @arrays.map { |a| a.name }.uniq.join(', ')
|
|
315
|
+
|
|
316
|
+
# Set the parallelism for the complete species, first sort them according to priorities and then find the maximum
|
|
317
|
+
# TODO: Remove the 'reverse' statement and get the 'ids' part working correctly for chunks
|
|
318
|
+
# TODO: How to find the maximum of symbolic expressions?
|
|
319
|
+
parallelisms = parallelisms.reverse.sort_by { |p| p[2] }
|
|
320
|
+
parallelism = parallelisms.reverse.max_by { |p| p[0].to_i }
|
|
321
|
+
@hash[:parallelism] = parallelism[0]
|
|
322
|
+
@hash[:ids] = @hash[parallelism[1]][:ids]
|
|
323
|
+
@hash[:factors] = @hash[parallelism[1]][:factors]
|
|
324
|
+
@arrays.set_representative(parallelism[1])
|
|
325
|
+
end
|
|
326
|
+
|
|
327
|
+
# Helper function to create a the special code which is required
|
|
328
|
+
# for OpenCL function calls to be able to use kernel arguments.
|
|
329
|
+
def opencl_arguments(list,kernel_id)
|
|
330
|
+
return '' if list == ''
|
|
331
|
+
argument_string = ''
|
|
332
|
+
list.split(', ').each_with_index do |variable,id|
|
|
333
|
+
argument_string += 'clSetKernelArg(bones_kernel_'+@name+'_'+kernel_id.to_s+',bones_num_args+'+id.to_s+',sizeof('+variable.strip+'),(void*)&'+variable.strip+');'+NL+INDENT
|
|
334
|
+
end
|
|
335
|
+
return argument_string
|
|
336
|
+
end
|
|
337
|
+
|
|
338
|
+
# This method updates the hash after loops are removed from
|
|
339
|
+
# the code. It takes as an argument a loop variable, which
|
|
340
|
+
# it removes from both the ':argument_name' and ':argument_
|
|
341
|
+
# definition' hash entries.
|
|
342
|
+
def update_hash(loop_variable)
|
|
343
|
+
names = @hash[:argument_name].split(', ')
|
|
344
|
+
definitions = @hash[:argument_definition].split(', ')
|
|
345
|
+
names.delete(loop_variable.to_s)
|
|
346
|
+
definitions.each { |definition| definitions.delete(definition) if definition =~ /\b#{loop_variable}\b/ }
|
|
347
|
+
@hash[:argument_name] = names.join(', ')
|
|
348
|
+
@hash[:argument_definition] = definitions.join(', ')
|
|
349
|
+
|
|
350
|
+
# Now, generate the special code which is required for OpenCL function calls to be able to use kernel arguments.
|
|
351
|
+
@hash[:kernel_argument_list] = opencl_arguments([@hash[:devicenames],@hash[:argument_name]].join(', ').remove_extras,0)
|
|
352
|
+
@hash[:kernel_argument_list_in] = opencl_arguments(@hash[:in_devicenames],0)
|
|
353
|
+
@hash[:kernel_argument_list_out] = opencl_arguments(@hash[:out_devicenames],0)
|
|
354
|
+
@hash[:kernel_argument_list_constants] = opencl_arguments(@hash[:argument_name],0)
|
|
355
|
+
|
|
356
|
+
# Add declarations for the loop variables for the original code in the hash
|
|
357
|
+
@hash[:algorithm_code0] = INDENT+"int #{loop_variable};"+NL+@hash[:algorithm_code0]
|
|
358
|
+
end
|
|
359
|
+
|
|
360
|
+
# Method to create a list of variables for the current
|
|
361
|
+
# algorithm. These variables should hold two conditions:
|
|
362
|
+
# 1) they are not local to the algorithm's code, and 2),
|
|
363
|
+
# they are used in the algorithm's code.
|
|
364
|
+
#
|
|
365
|
+
# The method gets a lists of undefined variables in the
|
|
366
|
+
# algorithm's code and subsequently searches the original
|
|
367
|
+
# code for the definition of this variable.
|
|
368
|
+
def populate_variables(original_code,defines)
|
|
369
|
+
@code.undefined_variables.each do |name|
|
|
370
|
+
type = @function_code.variable_type(name)
|
|
371
|
+
raise_error('Variable '+name+' not declared in original code') if !type
|
|
372
|
+
size = original_code.size(name)
|
|
373
|
+
direction = @code.direction(name)
|
|
374
|
+
size.map! { |s| simplify(replace_defines(s,defines)) }
|
|
375
|
+
variable = Variable.new(name,type,size,direction,@id,@species.shared?)
|
|
376
|
+
(variable.dimensions > 0) ? @arrays.push(variable) : @constants.push(variable)
|
|
377
|
+
end
|
|
378
|
+
raise_error('No input nor output arrays detected, make sure they are properly defined') if arrays.empty?
|
|
379
|
+
|
|
380
|
+
DIRECTIONS.each do |direction|
|
|
381
|
+
species = @species.structures(direction)
|
|
382
|
+
arrays = @arrays.select(direction)
|
|
383
|
+
if !arrays.empty?
|
|
384
|
+
|
|
385
|
+
# Check if the amount of input/ouput arrays is equal to the amount of input/output species
|
|
386
|
+
if species.length < arrays.length
|
|
387
|
+
array_names = arrays.map { |a| a.name }.join('","')
|
|
388
|
+
raise_error(direction.capitalize+'put array count mismatch (expected '+species.length.to_s+', found '+arrays.length.to_s+' ["'+array_names+'"])')
|
|
389
|
+
end
|
|
390
|
+
|
|
391
|
+
# Set the species for the arrays (distinguish between arrays with and without a name)
|
|
392
|
+
species.each do |structure|
|
|
393
|
+
array = arrays[0]
|
|
394
|
+
arrays.each do |free_array|
|
|
395
|
+
if !free_array.species
|
|
396
|
+
if structure.has_arrayname?
|
|
397
|
+
if structure.name == free_array.name
|
|
398
|
+
array = free_array
|
|
399
|
+
break
|
|
400
|
+
end
|
|
401
|
+
else
|
|
402
|
+
array = free_array
|
|
403
|
+
break
|
|
404
|
+
end
|
|
405
|
+
end
|
|
406
|
+
end
|
|
407
|
+
array.species = structure
|
|
408
|
+
#structure.name = array.name
|
|
409
|
+
|
|
410
|
+
# Check if the array size was set, if not, it will be set to the species' size
|
|
411
|
+
if array.size.empty?
|
|
412
|
+
array.size = array.species.dimensions.map { |d| sum(d) }
|
|
413
|
+
array.guess = true
|
|
414
|
+
puts WARNING+'Could not determine size for array "'+array.name+'" automatically, assuming: '+array.size.inspect+'.'
|
|
415
|
+
end
|
|
416
|
+
|
|
417
|
+
# Set the multiplication factors (for later)
|
|
418
|
+
array.set_factors
|
|
419
|
+
end
|
|
420
|
+
end
|
|
421
|
+
end
|
|
422
|
+
|
|
423
|
+
# Sort the arrays according to the alphabet
|
|
424
|
+
if @arrays.length > 1
|
|
425
|
+
@arrays.sort_by(['chunk','neighbourhood','element','shared','full'])
|
|
426
|
+
end
|
|
427
|
+
end
|
|
428
|
+
|
|
429
|
+
# Method to populate 5 lists with variable information.
|
|
430
|
+
# Below are listed the names of the four lists with an
|
|
431
|
+
# example value:
|
|
432
|
+
#
|
|
433
|
+
# host_name:: Example: 'array'
|
|
434
|
+
# host_definition:: Example: 'int array[10][10]'
|
|
435
|
+
# argument_name:: Example: 'threshold'
|
|
436
|
+
# argument_definition:: Example: 'float threshold'
|
|
437
|
+
# golden_name:: Example: 'golden_array'
|
|
438
|
+
def populate_lists
|
|
439
|
+
@constants.each do |variable|
|
|
440
|
+
@lists[:host_name] .push(variable.name)
|
|
441
|
+
@lists[:host_definition] .push(variable.definition)
|
|
442
|
+
@lists[:argument_name] .push(variable.name)
|
|
443
|
+
@lists[:argument_definition].push(variable.definition)
|
|
444
|
+
@lists[:golden_name] .push(variable.name)
|
|
445
|
+
end
|
|
446
|
+
@arrays.each do |variable|
|
|
447
|
+
@lists[:host_name] .push(variable.name)
|
|
448
|
+
@lists[:host_definition] .push(variable.definition)
|
|
449
|
+
@lists[:golden_name] .push(variable.golden_name)
|
|
450
|
+
end
|
|
451
|
+
@lists.each { |name,list| @lists[name] = list.join(', ') }
|
|
452
|
+
end
|
|
453
|
+
|
|
454
|
+
# This method is used to generate verification code. This
|
|
455
|
+
# verification code contains a copy of the original code.
|
|
456
|
+
# It also provides a verification which compares the output
|
|
457
|
+
# of the original code with the output of the generated
|
|
458
|
+
# code. The verification code prints warnings if the outputs
|
|
459
|
+
# are not equal, else it prints a success message.
|
|
460
|
+
def generate_replacement_code(options, skeleton, verify_code, prefix, timer_start, timer_stop)
|
|
461
|
+
replacement = C::NodeArray.new
|
|
462
|
+
replacement.push(C::ExpressionStatement.parse(@accelerated_name+'('+@lists[:host_name]+');'))
|
|
463
|
+
original_definition = ''
|
|
464
|
+
verify_definitions = []
|
|
465
|
+
if options[:verify]
|
|
466
|
+
guesses = @arrays.map { |array| array.guess }
|
|
467
|
+
if guesses.include?(true)
|
|
468
|
+
puts WARNING+'Verification not supported for this class'
|
|
469
|
+
else
|
|
470
|
+
|
|
471
|
+
# Generate the replacement code and the original function
|
|
472
|
+
@arrays.each do |array|
|
|
473
|
+
replacement.insert(0,C::ExpressionStatement.parse("memcpy(#{array.golden_name},#{array.name},#{array.size.join('*')}*sizeof(#{array.type_name}));"))
|
|
474
|
+
replacement.insert(0,C::Declaration.parse(array.definition.gsub!(/\b#{array.name}\b/,array.golden_name)+array.initialization))
|
|
475
|
+
end
|
|
476
|
+
replacement.push(C::ExpressionStatement.parse(@original_name+'('+@lists[:golden_name]+');'))
|
|
477
|
+
original_definition = "void #{@original_name}(#{@lists[:host_definition]})"
|
|
478
|
+
body = "#{timer_start}#{NL} // Original code#{NL}#{@code}#{NL}#{timer_stop}"
|
|
479
|
+
verify_code.push(prefix+original_definition+' {'+NL+body+'}'+NL+NL)
|
|
480
|
+
@arrays.select(OUTPUT).each do |array|
|
|
481
|
+
replacement.push(C::ExpressionStatement.parse(("bones_verify_results_#{array.name}_#{@id}(#{array.name}#{array.flatten},#{array.golden_name}#{array.flatten},#{@hash[:argument_name]});").remove_extras))
|
|
482
|
+
end
|
|
483
|
+
@arrays.each do |array|
|
|
484
|
+
replacement.push(C::ExpressionStatement.parse("free(#{array.golden_name});")) if array.dynamic?
|
|
485
|
+
end
|
|
486
|
+
|
|
487
|
+
# Generate the verification function itself
|
|
488
|
+
@arrays.select(OUTPUT).each_with_index do |array,num_array|
|
|
489
|
+
minihash = @hash["out#{num_array}".to_sym]
|
|
490
|
+
minihash[:name] = minihash[:name]+'_'+@id
|
|
491
|
+
minihash[:argument_definition] = @hash[:argument_definition]
|
|
492
|
+
instantiated_skeleton = search_and_replace(minihash,skeleton)
|
|
493
|
+
verify_definitions.push(instantiated_skeleton.scan(/#{START_DEFINITION}(.+)#{END_DEFINITION}/m).join.strip.remove_extras)
|
|
494
|
+
verify_code.push(instantiated_skeleton.remove_extras.gsub!(/#{START_DEFINITION}(.+)#{END_DEFINITION}/m,''))
|
|
495
|
+
end
|
|
496
|
+
end
|
|
497
|
+
end
|
|
498
|
+
return replacement, original_definition, verify_definitions.join(NL)
|
|
499
|
+
end
|
|
500
|
+
|
|
501
|
+
# Method to generate performance modeling code.
|
|
502
|
+
# This method is still under construction and will not be called yet.
|
|
503
|
+
# TODO: Complete this method
|
|
504
|
+
def performance_model_code(model_dir)
|
|
505
|
+
|
|
506
|
+
# Load the profile database
|
|
507
|
+
profiles = Array.new
|
|
508
|
+
File.read(File.join(model_dir,'profile.txt')).each do |line|
|
|
509
|
+
profiles.push(line.split(','))
|
|
510
|
+
end
|
|
511
|
+
|
|
512
|
+
# Iterate over all the profiles
|
|
513
|
+
result = C::NodeArray.new
|
|
514
|
+
profiles.each do |profile|
|
|
515
|
+
|
|
516
|
+
# Fill the hash with profile information and species information
|
|
517
|
+
mini_hash = {
|
|
518
|
+
:name => profile[0].strip,
|
|
519
|
+
:comp => profile[1].strip,
|
|
520
|
+
:coal => profile[2].strip,
|
|
521
|
+
:unco => profile[3].strip,
|
|
522
|
+
:copy => profile[4].strip,
|
|
523
|
+
:f => @hash[:complexity],
|
|
524
|
+
:w => @hash[:parallelism],
|
|
525
|
+
:c => @species.all_structures.map { |s| simplify('4*('+s.dimensions.map { |d| sum(d) }.join('*')+')') }.join(' + '),
|
|
526
|
+
:m => '1',
|
|
527
|
+
:u => '0',
|
|
528
|
+
:o => '8'
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
# Load the skeleton for the performance model and set the values according to the hash
|
|
532
|
+
model_skeleton = File.read(File.join(model_dir,'model.c'))
|
|
533
|
+
search_and_replace!(mini_hash,model_skeleton)
|
|
534
|
+
result.push(C::Block.parse(model_skeleton))
|
|
535
|
+
end
|
|
536
|
+
return result
|
|
537
|
+
end
|
|
538
|
+
end
|
|
539
|
+
|
|
540
|
+
end
|
|
541
|
+
|