bones-compiler 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +117 -0
- data/LICENSE +9 -0
- data/README.rdoc +126 -0
- data/Rakefile +107 -0
- data/VERSION +1 -0
- data/bin/bones +20 -0
- data/examples/applications/ffos.c +552 -0
- data/examples/benchmarks/2mm.c +70 -0
- data/examples/benchmarks/3mm.c +81 -0
- data/examples/benchmarks/adi.c +81 -0
- data/examples/benchmarks/atax.c +65 -0
- data/examples/benchmarks/bicg.c +67 -0
- data/examples/benchmarks/cholesky.c +64 -0
- data/examples/benchmarks/common.h +168 -0
- data/examples/benchmarks/correlation.c +97 -0
- data/examples/benchmarks/covariance.c +77 -0
- data/examples/benchmarks/doitgen.c +63 -0
- data/examples/benchmarks/durbin.c +76 -0
- data/examples/benchmarks/dynprog.c +67 -0
- data/examples/benchmarks/fdtd-2d-apml.c +114 -0
- data/examples/benchmarks/fdtd-2d.c +74 -0
- data/examples/benchmarks/floyd-warshall.c +50 -0
- data/examples/benchmarks/gemm.c +69 -0
- data/examples/benchmarks/gemver.c +89 -0
- data/examples/benchmarks/gesummv.c +64 -0
- data/examples/benchmarks/gramschmidt.c +84 -0
- data/examples/benchmarks/jacobi-1d-imper.c +55 -0
- data/examples/benchmarks/jacobi-2d-imper.c +61 -0
- data/examples/benchmarks/lu.c +57 -0
- data/examples/benchmarks/ludcmp.c +91 -0
- data/examples/benchmarks/mvt.c +65 -0
- data/examples/benchmarks/overview.txt +38 -0
- data/examples/benchmarks/reg_detect.c +82 -0
- data/examples/benchmarks/saxpy.c +45 -0
- data/examples/benchmarks/seidel-2d.c +51 -0
- data/examples/benchmarks/symm.c +74 -0
- data/examples/benchmarks/syr2k.c +65 -0
- data/examples/benchmarks/syrk.c +62 -0
- data/examples/benchmarks/trisolv.c +57 -0
- data/examples/benchmarks/trmm.c +57 -0
- data/examples/chunk/example1.c +54 -0
- data/examples/chunk/example2.c +44 -0
- data/examples/chunk/example3.c +59 -0
- data/examples/chunk/example4.c +55 -0
- data/examples/chunk/example5.c +52 -0
- data/examples/element/example1.c +46 -0
- data/examples/element/example10.c +50 -0
- data/examples/element/example11.c +47 -0
- data/examples/element/example12.c +56 -0
- data/examples/element/example2.c +46 -0
- data/examples/element/example3.c +58 -0
- data/examples/element/example4.c +49 -0
- data/examples/element/example5.c +56 -0
- data/examples/element/example6.c +46 -0
- data/examples/element/example7.c +54 -0
- data/examples/element/example8.c +45 -0
- data/examples/element/example9.c +48 -0
- data/examples/neighbourhood/example1.c +54 -0
- data/examples/neighbourhood/example2.c +55 -0
- data/examples/neighbourhood/example3.c +82 -0
- data/examples/neighbourhood/example4.c +52 -0
- data/examples/shared/example1.c +45 -0
- data/examples/shared/example2.c +51 -0
- data/examples/shared/example3.c +55 -0
- data/examples/shared/example4.c +52 -0
- data/examples/shared/example5.c +48 -0
- data/lib/bones.rb +266 -0
- data/lib/bones/algorithm.rb +541 -0
- data/lib/bones/engine.rb +386 -0
- data/lib/bones/preprocessor.rb +161 -0
- data/lib/bones/species.rb +196 -0
- data/lib/bones/structure.rb +94 -0
- data/lib/bones/variable.rb +169 -0
- data/lib/bones/variablelist.rb +72 -0
- data/lib/castaddon.rb +27 -0
- data/lib/castaddon/index.rb +40 -0
- data/lib/castaddon/node.rb +753 -0
- data/lib/castaddon/type.rb +37 -0
- data/skeletons/CPU-C/common/epilogue.c +0 -0
- data/skeletons/CPU-C/common/globals.c +17 -0
- data/skeletons/CPU-C/common/globals_kernel.c +1 -0
- data/skeletons/CPU-C/common/header.c +0 -0
- data/skeletons/CPU-C/common/mem_copy_D2H.c +0 -0
- data/skeletons/CPU-C/common/mem_copy_H2D.c +0 -0
- data/skeletons/CPU-C/common/mem_epilogue.c +0 -0
- data/skeletons/CPU-C/common/mem_prologue.c +3 -0
- data/skeletons/CPU-C/common/prologue.c +0 -0
- data/skeletons/CPU-C/common/timer_1_start.c +0 -0
- data/skeletons/CPU-C/common/timer_1_stop.c +0 -0
- data/skeletons/CPU-C/common/timer_2_start.c +20 -0
- data/skeletons/CPU-C/common/timer_2_stop.c +8 -0
- data/skeletons/CPU-C/kernel/default.host.c +3 -0
- data/skeletons/CPU-C/kernel/default.kernel.c +15 -0
- data/skeletons/CPU-C/skeletons.txt +24 -0
- data/skeletons/CPU-OPENCL-AMD/common/epilogue.c +6 -0
- data/skeletons/CPU-OPENCL-AMD/common/globals.c +155 -0
- data/skeletons/CPU-OPENCL-AMD/common/globals_kernel.c +4 -0
- data/skeletons/CPU-OPENCL-AMD/common/header.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/mem_copy_D2H.c +8 -0
- data/skeletons/CPU-OPENCL-AMD/common/mem_copy_H2D.c +4 -0
- data/skeletons/CPU-OPENCL-AMD/common/mem_epilogue.c +3 -0
- data/skeletons/CPU-OPENCL-AMD/common/mem_prologue.c +6 -0
- data/skeletons/CPU-OPENCL-AMD/common/prologue.c +24 -0
- data/skeletons/CPU-OPENCL-AMD/common/timer_1_start.c +5 -0
- data/skeletons/CPU-OPENCL-AMD/common/timer_1_stop.c +9 -0
- data/skeletons/CPU-OPENCL-AMD/common/timer_2_start.c +16 -0
- data/skeletons/CPU-OPENCL-AMD/common/timer_2_stop.c +11 -0
- data/skeletons/CPU-OPENCL-AMD/kernel/D-element-to-1-shared.host.c +67 -0
- data/skeletons/CPU-OPENCL-AMD/kernel/D-element-to-1-shared.kernel.cl +72 -0
- data/skeletons/CPU-OPENCL-AMD/kernel/default.host.c +14 -0
- data/skeletons/CPU-OPENCL-AMD/kernel/default.kernel.cl +13 -0
- data/skeletons/CPU-OPENCL-AMD/skeletons.txt +26 -0
- data/skeletons/CPU-OPENCL-INTEL/common/epilogue.c +3 -0
- data/skeletons/CPU-OPENCL-INTEL/common/globals.c +154 -0
- data/skeletons/CPU-OPENCL-INTEL/common/globals_kernel.c +4 -0
- data/skeletons/CPU-OPENCL-INTEL/common/header.c +31 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +5 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +3 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_epilogue.c +3 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +4 -0
- data/skeletons/CPU-OPENCL-INTEL/common/prologue.c +24 -0
- data/skeletons/CPU-OPENCL-INTEL/common/timer_1_start.c +5 -0
- data/skeletons/CPU-OPENCL-INTEL/common/timer_1_stop.c +9 -0
- data/skeletons/CPU-OPENCL-INTEL/common/timer_2_start.c +16 -0
- data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +11 -0
- data/skeletons/CPU-OPENCL-INTEL/kernel/D-element-to-1-shared.host.c +67 -0
- data/skeletons/CPU-OPENCL-INTEL/kernel/D-element-to-1-shared.kernel.cl +72 -0
- data/skeletons/CPU-OPENCL-INTEL/kernel/default.host.c +14 -0
- data/skeletons/CPU-OPENCL-INTEL/kernel/default.kernel.cl +13 -0
- data/skeletons/CPU-OPENCL-INTEL/skeletons.txt +26 -0
- data/skeletons/CPU-OPENMP/common/epilogue.c +0 -0
- data/skeletons/CPU-OPENMP/common/globals.c +37 -0
- data/skeletons/CPU-OPENMP/common/globals_kernel.c +6 -0
- data/skeletons/CPU-OPENMP/common/header.c +0 -0
- data/skeletons/CPU-OPENMP/common/mem_copy_D2H.c +0 -0
- data/skeletons/CPU-OPENMP/common/mem_copy_H2D.c +0 -0
- data/skeletons/CPU-OPENMP/common/mem_epilogue.c +0 -0
- data/skeletons/CPU-OPENMP/common/mem_prologue.c +3 -0
- data/skeletons/CPU-OPENMP/common/prologue.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_1_start.c +12 -0
- data/skeletons/CPU-OPENMP/common/timer_1_stop.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_2_start.c +18 -0
- data/skeletons/CPU-OPENMP/common/timer_2_stop.c +8 -0
- data/skeletons/CPU-OPENMP/kernel/D-element-to-1-shared.host.c +27 -0
- data/skeletons/CPU-OPENMP/kernel/D-element-to-1-shared.kernel.c +46 -0
- data/skeletons/CPU-OPENMP/kernel/default.host.c +11 -0
- data/skeletons/CPU-OPENMP/kernel/default.kernel.c +18 -0
- data/skeletons/CPU-OPENMP/skeletons.txt +26 -0
- data/skeletons/GPU-CUDA/common/epilogue.c +0 -0
- data/skeletons/GPU-CUDA/common/globals.c +31 -0
- data/skeletons/GPU-CUDA/common/globals_kernel.c +4 -0
- data/skeletons/GPU-CUDA/common/header.c +0 -0
- data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +3 -0
- data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +3 -0
- data/skeletons/GPU-CUDA/common/mem_epilogue.c +3 -0
- data/skeletons/GPU-CUDA/common/mem_prologue.c +5 -0
- data/skeletons/GPU-CUDA/common/prologue.c +6 -0
- data/skeletons/GPU-CUDA/common/timer_1_start.c +6 -0
- data/skeletons/GPU-CUDA/common/timer_1_stop.c +10 -0
- data/skeletons/GPU-CUDA/common/timer_2_start.c +6 -0
- data/skeletons/GPU-CUDA/common/timer_2_stop.c +10 -0
- data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.host.c +3 -0
- data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +105 -0
- data/skeletons/GPU-CUDA/kernel/D-element-to-1-shared.host.c +3 -0
- data/skeletons/GPU-CUDA/kernel/D-element-to-1-shared.kernel.cu +119 -0
- data/skeletons/GPU-CUDA/kernel/D-element-to-N-shared.host.c +3 -0
- data/skeletons/GPU-CUDA/kernel/D-element-to-N-shared.kernel.cu +166 -0
- data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.host.c +3 -0
- data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +69 -0
- data/skeletons/GPU-CUDA/kernel/N-neighbourhood-N-to-N-element.host.c +3 -0
- data/skeletons/GPU-CUDA/kernel/N-neighbourhood-N-to-N-element.kernel.cu +42 -0
- data/skeletons/GPU-CUDA/kernel/default.host.c +3 -0
- data/skeletons/GPU-CUDA/kernel/default.kernel.cu +28 -0
- data/skeletons/GPU-CUDA/skeletons.txt +30 -0
- data/skeletons/GPU-OPENCL-AMD/common/epilogue.c +3 -0
- data/skeletons/GPU-OPENCL-AMD/common/globals.c +155 -0
- data/skeletons/GPU-OPENCL-AMD/common/globals_kernel.c +4 -0
- data/skeletons/GPU-OPENCL-AMD/common/header.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/common/mem_copy_D2H.c +4 -0
- data/skeletons/GPU-OPENCL-AMD/common/mem_copy_H2D.c +4 -0
- data/skeletons/GPU-OPENCL-AMD/common/mem_epilogue.c +3 -0
- data/skeletons/GPU-OPENCL-AMD/common/mem_prologue.c +3 -0
- data/skeletons/GPU-OPENCL-AMD/common/prologue.c +24 -0
- data/skeletons/GPU-OPENCL-AMD/common/timer_1_start.c +5 -0
- data/skeletons/GPU-OPENCL-AMD/common/timer_1_stop.c +9 -0
- data/skeletons/GPU-OPENCL-AMD/common/timer_2_start.c +4 -0
- data/skeletons/GPU-OPENCL-AMD/common/timer_2_stop.c +11 -0
- data/skeletons/GPU-OPENCL-AMD/kernel/D-element-to-1-shared.host.c +67 -0
- data/skeletons/GPU-OPENCL-AMD/kernel/D-element-to-1-shared.kernel.cl +72 -0
- data/skeletons/GPU-OPENCL-AMD/kernel/default.host.c +14 -0
- data/skeletons/GPU-OPENCL-AMD/kernel/default.kernel.cl +13 -0
- data/skeletons/GPU-OPENCL-AMD/skeletons.txt +26 -0
- data/skeletons/verification/header.c +2 -0
- data/skeletons/verification/timer_start.c +4 -0
- data/skeletons/verification/timer_stop.c +6 -0
- data/skeletons/verification/verify_results.c +23 -0
- data/test/bones/test_algorithm.rb +40 -0
- data/test/bones/test_common.rb +54 -0
- data/test/bones/test_preprocessor.rb +46 -0
- data/test/bones/test_species.rb +21 -0
- data/test/bones/test_variable.rb +84 -0
- data/test/test_helper.rb +106 -0
- metadata +303 -0
@@ -0,0 +1,541 @@
|
|
1
|
+
|
2
|
+
module Bones
|
3
|
+
# This class holds one algorithm, which includes a species,
|
4
|
+
# a name, and the source C-code.
|
5
|
+
#
|
6
|
+
# The algorithm class holds all sorts of information on var-
|
7
|
+
# iables. This information is only available after calling
|
8
|
+
# the 'populate' method, which populates a lists of varia-
|
9
|
+
# bles of all sorts: a regular list, a specialized hash,
|
10
|
+
# and lists of input/output array variables.
|
11
|
+
class Algorithm < Common
|
12
|
+
attr_reader :name, :species, :code, :lists, :arrays, :id, :function_name
|
13
|
+
attr_accessor :hash, :merge_factor
|
14
|
+
|
15
|
+
# Constant to set the name of the algorithm's accelerated version
|
16
|
+
ACCELERATED = '_accelerated'
|
17
|
+
# Constant to set the name of the algorithm's original version
|
18
|
+
ORIGINAL = '_original'
|
19
|
+
|
20
|
+
# This method initializes the class. It gives the new
|
21
|
+
# algorithm a name, species and source code. At initiali-
|
22
|
+
# zation, this method checks if the name starts with a
|
23
|
+
# digit. This is not allowed, so an underscore is added
|
24
|
+
# prior to the digit.
|
25
|
+
def initialize(name, filename, id, species, code)
|
26
|
+
name = '_'+name if name =~ /^\d/
|
27
|
+
@filename = filename
|
28
|
+
@basename = name
|
29
|
+
@name = (name+'_'+id).gsub(/\W/,'')
|
30
|
+
@id = id
|
31
|
+
@original_name = @name+ORIGINAL
|
32
|
+
@accelerated_name = @name+ACCELERATED
|
33
|
+
@species = species
|
34
|
+
@code = C::Statement.parse(code).preprocess
|
35
|
+
@hash = {}
|
36
|
+
@lists = {:host_name => [],:host_definition => [], :argument_name => [], :argument_definition => [], :golden_name => []}
|
37
|
+
@arrays = Variablelist.new()
|
38
|
+
@constants = Variablelist.new()
|
39
|
+
@merge_factor = 1
|
40
|
+
@function_code = ''
|
41
|
+
@function_name = ''
|
42
|
+
end
|
43
|
+
|
44
|
+
# This method sets the code and name for the function in
|
45
|
+
# which the algorithm is found. This is done based on the
|
46
|
+
# original code, which is given as input to this method.
|
47
|
+
# The method does not return any value, instead, it sets
|
48
|
+
# two class variables (@function_code and @function_name).
|
49
|
+
def set_function(full_code)
|
50
|
+
full_code.get_functions.each do |function|
|
51
|
+
if function.node_exists?(@code)
|
52
|
+
@function_code = function
|
53
|
+
@function_name = function.name
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
# This method performs the code transformations according
|
59
|
+
# to the transformation settings as provided as an argument
|
60
|
+
# to the function. It calls the various code transformation
|
61
|
+
# functions as implemented for the CAST class. The resulting
|
62
|
+
# modified code is finally stored in the search-and-replace
|
63
|
+
# hash.
|
64
|
+
# This method assumes that the populate method has already
|
65
|
+
# been called, such that the hash contains the dimensions
|
66
|
+
# needed to create the global ID definitions.
|
67
|
+
def perform_transformations(transformation_settings)
|
68
|
+
complexity = 0
|
69
|
+
|
70
|
+
# Save the original code (with flattened arrays) in the hash as well
|
71
|
+
new_code = @code.clone
|
72
|
+
@arrays.each do |array|
|
73
|
+
new_code.transform_flatten(array)
|
74
|
+
end
|
75
|
+
@hash[:algorithm_code0] = new_code.to_s
|
76
|
+
|
77
|
+
# Loop over the number of transformation 'blocks'
|
78
|
+
transformation_settings.split(' ').each_with_index do |transformation,num_transformation|
|
79
|
+
new_code = @code.clone
|
80
|
+
extra_indent = ''
|
81
|
+
|
82
|
+
# Replace existing loops in the code (always do this)
|
83
|
+
array = @arrays.representative
|
84
|
+
array.species.dimensions.each_with_index do |dimension,num_dimension|
|
85
|
+
index = (array.species.reverse?) ? num_dimension : array.species.dimensions.length-num_dimension-1
|
86
|
+
index_reverse = !(array.species.reverse?) ? num_dimension : array.species.dimensions.length-num_dimension-1
|
87
|
+
|
88
|
+
# Calculate the loop start and end conditions
|
89
|
+
from = array.species.from_at(index)
|
90
|
+
to = array.species.to_at(index)
|
91
|
+
|
92
|
+
# Process the existing code and update the hash
|
93
|
+
if from != to
|
94
|
+
new_code, loop_variable_name = new_code.remove_loop(from,to)
|
95
|
+
new_variable_name = GLOBAL_ID+'_'+index_reverse.to_s
|
96
|
+
new_code.replace_variable(loop_variable_name,new_variable_name)
|
97
|
+
update_hash(loop_variable_name)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
# Shuffle the indices of the first input(s) (conditionally do this)
|
102
|
+
shuffle_arrays = []
|
103
|
+
if transformation[0,1] == '2'
|
104
|
+
shuffle_arrays.push(@arrays.select(INPUT)[0])
|
105
|
+
elsif transformation[0,1] == '3'
|
106
|
+
shuffle_arrays.push(@arrays.select(INPUT)[0])
|
107
|
+
shuffle_arrays.push(@arrays.select(INPUT)[1])
|
108
|
+
end
|
109
|
+
new_code.transform_shuffle(shuffle_arrays)
|
110
|
+
|
111
|
+
# Use the local on-chip memory (conditionally do this)
|
112
|
+
if transformation[0,1] == '1'
|
113
|
+
local_memory_arrays = [@arrays.select(INPUT)[0]]
|
114
|
+
new_code.transform_use_local_memory(local_memory_arrays)
|
115
|
+
end
|
116
|
+
|
117
|
+
# Flatten the arrays to 1D (always do this)
|
118
|
+
@arrays.each do |array|
|
119
|
+
new_code.transform_flatten(array)
|
120
|
+
end
|
121
|
+
|
122
|
+
# Perform array substitution (conditionally do this)
|
123
|
+
@arrays.outputs.each do |array|
|
124
|
+
if array.species.element?
|
125
|
+
if @arrays.inputs.include?(array)
|
126
|
+
new_code.transform_substitution(array,true)
|
127
|
+
else
|
128
|
+
new_code.transform_substitution(array,false)
|
129
|
+
end
|
130
|
+
extra_indent = INDENT
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
# Perform transformations for reduction operations (conditionally do this)
|
135
|
+
if transformation[1,1].to_i >= 1
|
136
|
+
new_code = new_code.transform_reduction(@arrays.select(INPUT)[0],@arrays.select(OUTPUT)[0],transformation[1,1].to_i)
|
137
|
+
end
|
138
|
+
|
139
|
+
# Perform thread-merging (experimental)
|
140
|
+
# TODO: Solve the problem related to constants (e.g chunk/example1.c)
|
141
|
+
if @merge_factor == 1 && transformation[0,1] == '4'
|
142
|
+
@merge_factor = 4
|
143
|
+
end
|
144
|
+
if @merge_factor > 1
|
145
|
+
puts MESSAGE+'Merging threads by a factor '+@merge_factor.to_s+'.'
|
146
|
+
|
147
|
+
# Update the hash
|
148
|
+
@hash[:ids] = @hash[:ids].split(NL).map { |line|
|
149
|
+
C::parse(line).transform_merge_threads(@merge_factor,[GLOBAL_ID]+@constants.map{ |c| c.name }).to_s.split(NL).each_with_index.map do |id,index|
|
150
|
+
id.gsub(/\b#{GLOBAL_ID}\b/,"(#{GLOBAL_ID}+gridDim.x*blockDim.x*#{index})")
|
151
|
+
end
|
152
|
+
}.join(NL+INDENT*2)
|
153
|
+
@hash[:parallelism] = (@hash[:parallelism].to_i / @merge_factor).to_s
|
154
|
+
|
155
|
+
# Transform the code
|
156
|
+
excludes = (@constants+@arrays).map { |c| c.name }
|
157
|
+
new_code.transform_merge_threads(@merge_factor,excludes)
|
158
|
+
end
|
159
|
+
|
160
|
+
# Obtain the complexity in terms of operations for the resulting code
|
161
|
+
complexity += new_code.get_complexity
|
162
|
+
|
163
|
+
# Store the resulting code in the hash
|
164
|
+
resulting_code = new_code.strip_brackets.to_s
|
165
|
+
@hash[('algorithm_code'+(num_transformation+1).to_s).to_sym] = (transformation[1,1].to_i >= 1) ? resulting_code : extra_indent+INDENT+resulting_code.gsub!(NL,NL+INDENT)
|
166
|
+
end
|
167
|
+
|
168
|
+
@hash[:complexity] = complexity.to_s
|
169
|
+
end
|
170
|
+
|
171
|
+
# This method creates the search-and-replace hash based on
|
172
|
+
# information provided by the algorithm. It is called from
|
173
|
+
# the 'populate' method of this class.
|
174
|
+
#
|
175
|
+
# == List of possible hash keys:
|
176
|
+
#
|
177
|
+
# algorithm_id
|
178
|
+
# _name
|
179
|
+
# _basename
|
180
|
+
# _filename
|
181
|
+
# _code*
|
182
|
+
# (in*|out*)_type
|
183
|
+
# _name
|
184
|
+
# _devicename
|
185
|
+
# _devicepointer
|
186
|
+
# _dimensions
|
187
|
+
# _dimension*_to
|
188
|
+
# _from
|
189
|
+
# _sum
|
190
|
+
# _to
|
191
|
+
# _from
|
192
|
+
# _parameters
|
193
|
+
# _parameter*_to
|
194
|
+
# _from
|
195
|
+
# _sum
|
196
|
+
# _ids
|
197
|
+
# _localids
|
198
|
+
# _flatindex
|
199
|
+
# (in|out)_names
|
200
|
+
# _devicenames
|
201
|
+
# _devicedefinitions
|
202
|
+
# _devicedefinitionsopencl
|
203
|
+
# names
|
204
|
+
# devicenames
|
205
|
+
# devicedefinitions
|
206
|
+
# devicedefinitionsopencl
|
207
|
+
#
|
208
|
+
# parallelism
|
209
|
+
# factors
|
210
|
+
# ids
|
211
|
+
# verifyids
|
212
|
+
#
|
213
|
+
# argument_name
|
214
|
+
# argument_definition
|
215
|
+
# kernel_argument_list
|
216
|
+
#
|
217
|
+
def populate_hash
|
218
|
+
@hash = {:algorithm_id => @id,
|
219
|
+
:algorithm_name => @name,
|
220
|
+
:algorithm_basename => @basename,
|
221
|
+
:algorithm_filename => @filename,
|
222
|
+
:argument_name => @lists[:argument_name],
|
223
|
+
:argument_definition => @lists[:argument_definition]}
|
224
|
+
|
225
|
+
# Obtain the necessary data for the hash per array
|
226
|
+
parallelisms = []
|
227
|
+
DIRECTIONS.each do |direction|
|
228
|
+
arrays = @arrays.select(direction)
|
229
|
+
arrays.each_with_index do |array,num_array|
|
230
|
+
hashid = "#{direction}#{num_array}".to_sym
|
231
|
+
|
232
|
+
# Gather the name and type data
|
233
|
+
minihash = {:type => array.type_name,
|
234
|
+
:name => array.name,
|
235
|
+
:devicepointer => array.device_pointer,
|
236
|
+
:devicename => array.device_name,
|
237
|
+
:flatindex => array.flatindex}
|
238
|
+
|
239
|
+
# Gather the dimensions data
|
240
|
+
dimensions = array.species.dimensions
|
241
|
+
dimensions.each_with_index do |dimension,num_dimension|
|
242
|
+
minihash["dimension#{num_dimension}".to_sym] = {:sum => simplify(sum(dimension)),
|
243
|
+
:from => simplify(from(dimension)),
|
244
|
+
:to => simplify(to(dimension))}
|
245
|
+
end
|
246
|
+
minihash[:dimensions] = simplify(dimensions.map { |d| sum(d) }.join('*'))
|
247
|
+
minihash[:from] = dimensions.map { |d| from(d) }.zip(array.factors.drop(1).reverse).map { |e| simplify(e.join('')) }.join('+')
|
248
|
+
minihash[:to ] = dimensions.map { |d| to(d) }.zip(array.factors.drop(1).reverse).map { |e| simplify(e.join('')) }.join('+')
|
249
|
+
|
250
|
+
# Gather the parameter data
|
251
|
+
if array.species.has_parameter?
|
252
|
+
parameters = array.species.parameters
|
253
|
+
parameters.each_with_index do |parameter,num_parameter|
|
254
|
+
minihash["parameter#{num_parameter}".to_sym] = {:sum => simplify(sum(parameter)),
|
255
|
+
:from => simplify(from(parameter)),
|
256
|
+
:to => simplify(to(parameter))}
|
257
|
+
end
|
258
|
+
minihash[:parameters] = simplify(parameters.map { |p| sum(p) }.join('*'))
|
259
|
+
end
|
260
|
+
|
261
|
+
# Store the data into the hash
|
262
|
+
@hash[hashid] = minihash
|
263
|
+
|
264
|
+
# Gather information regarding the parallelism
|
265
|
+
if array.species.chunk?
|
266
|
+
dim_div = simplify(minihash[:dimensions]+'/'+minihash[:parameters])
|
267
|
+
parallelisms.push([dim_div,hashid,0])
|
268
|
+
elsif array.species.element? || array.species.neighbourhood?
|
269
|
+
parallelisms.push([minihash[:dimensions],hashid,1])
|
270
|
+
end
|
271
|
+
|
272
|
+
# Populate the global ID definitions hash, create the proper indices (and store as '{in/out}*_ids' in the hash)
|
273
|
+
ids, localids, verifyids, factors = [], [], [], ['']
|
274
|
+
dimensions = array.species.dimensions.clone
|
275
|
+
dimensions.each_with_index do |dimension,num_dimension|
|
276
|
+
index = (array.species.reverse?) ? num_dimension : array.species.dimensions.length-num_dimension-1
|
277
|
+
index_reverse = !(array.species.reverse?) ? num_dimension : array.species.dimensions.length-num_dimension-1
|
278
|
+
|
279
|
+
# Generate the index expressions
|
280
|
+
divider = (array.species.chunk?) ? '/'+sum(array.species.parameters[index]) : ''
|
281
|
+
minihash = {:dimensions => (index == dimensions.length-1) ? '1' : dimensions.drop(index+1).map { |d| sum(d) }.join('*'),
|
282
|
+
:modulo => (index_reverse != dimensions.length-1) ? '%('+sum(dimension)+divider+')' : '',
|
283
|
+
:offset => from(dimension)}
|
284
|
+
expr_global = simplify(search_and_replace(minihash,"((#{GLOBAL_ID}/(<dimensions>))<modulo>)+<offset>"))
|
285
|
+
expr_local = simplify(search_and_replace(minihash,"((#{LOCAL_ID }/(<dimensions>))<modulo>)+<offset>"))
|
286
|
+
|
287
|
+
# Selectively push the ID definitions to the result array
|
288
|
+
from = array.species.from_at(index)
|
289
|
+
to = array.species.to_at(index)
|
290
|
+
verifyids.push("const int #{GLOBAL_ID}_#{index_reverse} = "+expr_global+';')
|
291
|
+
if from != to
|
292
|
+
ids.push("const int #{GLOBAL_ID}_#{index_reverse} = "+expr_global+';')
|
293
|
+
localids.push("const int #{LOCAL_ID }_#{index_reverse} = "+expr_local+';')
|
294
|
+
factors.push(array.factors[index_reverse])
|
295
|
+
end
|
296
|
+
end
|
297
|
+
|
298
|
+
# Store the results in the hash
|
299
|
+
@hash[hashid][:ids] = ids.join(NL+INDENT*2)
|
300
|
+
@hash[hashid][:localids] = localids.join(NL+INDENT*2)
|
301
|
+
@hash[hashid][:verifyids] = verifyids.join(NL+INDENT*2)
|
302
|
+
@hash[hashid][:factors] = factors.last
|
303
|
+
end
|
304
|
+
|
305
|
+
# Create lists of array names and definitions
|
306
|
+
@hash["#{direction}_devicedefinitions".to_sym] = arrays.map { |a| a.device_definition }.uniq.join(', ')
|
307
|
+
@hash["#{direction}_devicedefinitionsopencl".to_sym] = arrays.map { |a| '__global '+a.device_definition }.uniq.join(', ')
|
308
|
+
@hash["#{direction}_devicenames".to_sym] = arrays.map { |a| a.device_name }.uniq.join(', ')
|
309
|
+
@hash["#{direction}_names".to_sym] = arrays.map { |a| a.name }.uniq.join(', ')
|
310
|
+
end
|
311
|
+
@hash[:devicedefinitions] = @arrays.map { |a| a.device_definition }.uniq.join(', ')
|
312
|
+
@hash[:devicedefinitionsopencl] = @arrays.map { |a| '__global '+a.device_definition }.uniq.join(', ')
|
313
|
+
@hash[:devicenames] = @arrays.map { |a| a.device_name }.uniq.join(', ')
|
314
|
+
@hash[:names] = @arrays.map { |a| a.name }.uniq.join(', ')
|
315
|
+
|
316
|
+
# Set the parallelism for the complete species, first sort them according to priorities and then find the maximum
|
317
|
+
# TODO: Remove the 'reverse' statement and get the 'ids' part working correctly for chunks
|
318
|
+
# TODO: How to find the maximum of symbolic expressions?
|
319
|
+
parallelisms = parallelisms.reverse.sort_by { |p| p[2] }
|
320
|
+
parallelism = parallelisms.reverse.max_by { |p| p[0].to_i }
|
321
|
+
@hash[:parallelism] = parallelism[0]
|
322
|
+
@hash[:ids] = @hash[parallelism[1]][:ids]
|
323
|
+
@hash[:factors] = @hash[parallelism[1]][:factors]
|
324
|
+
@arrays.set_representative(parallelism[1])
|
325
|
+
end
|
326
|
+
|
327
|
+
# Helper function to create a the special code which is required
|
328
|
+
# for OpenCL function calls to be able to use kernel arguments.
|
329
|
+
def opencl_arguments(list,kernel_id)
|
330
|
+
return '' if list == ''
|
331
|
+
argument_string = ''
|
332
|
+
list.split(', ').each_with_index do |variable,id|
|
333
|
+
argument_string += 'clSetKernelArg(bones_kernel_'+@name+'_'+kernel_id.to_s+',bones_num_args+'+id.to_s+',sizeof('+variable.strip+'),(void*)&'+variable.strip+');'+NL+INDENT
|
334
|
+
end
|
335
|
+
return argument_string
|
336
|
+
end
|
337
|
+
|
338
|
+
# This method updates the hash after loops are removed from
|
339
|
+
# the code. It takes as an argument a loop variable, which
|
340
|
+
# it removes from both the ':argument_name' and ':argument_
|
341
|
+
# definition' hash entries.
|
342
|
+
def update_hash(loop_variable)
|
343
|
+
names = @hash[:argument_name].split(', ')
|
344
|
+
definitions = @hash[:argument_definition].split(', ')
|
345
|
+
names.delete(loop_variable.to_s)
|
346
|
+
definitions.each { |definition| definitions.delete(definition) if definition =~ /\b#{loop_variable}\b/ }
|
347
|
+
@hash[:argument_name] = names.join(', ')
|
348
|
+
@hash[:argument_definition] = definitions.join(', ')
|
349
|
+
|
350
|
+
# Now, generate the special code which is required for OpenCL function calls to be able to use kernel arguments.
|
351
|
+
@hash[:kernel_argument_list] = opencl_arguments([@hash[:devicenames],@hash[:argument_name]].join(', ').remove_extras,0)
|
352
|
+
@hash[:kernel_argument_list_in] = opencl_arguments(@hash[:in_devicenames],0)
|
353
|
+
@hash[:kernel_argument_list_out] = opencl_arguments(@hash[:out_devicenames],0)
|
354
|
+
@hash[:kernel_argument_list_constants] = opencl_arguments(@hash[:argument_name],0)
|
355
|
+
|
356
|
+
# Add declarations for the loop variables for the original code in the hash
|
357
|
+
@hash[:algorithm_code0] = INDENT+"int #{loop_variable};"+NL+@hash[:algorithm_code0]
|
358
|
+
end
|
359
|
+
|
360
|
+
# Method to create a list of variables for the current
|
361
|
+
# algorithm. These variables should hold two conditions:
|
362
|
+
# 1) they are not local to the algorithm's code, and 2),
|
363
|
+
# they are used in the algorithm's code.
|
364
|
+
#
|
365
|
+
# The method gets a lists of undefined variables in the
|
366
|
+
# algorithm's code and subsequently searches the original
|
367
|
+
# code for the definition of this variable.
|
368
|
+
def populate_variables(original_code,defines)
|
369
|
+
@code.undefined_variables.each do |name|
|
370
|
+
type = @function_code.variable_type(name)
|
371
|
+
raise_error('Variable '+name+' not declared in original code') if !type
|
372
|
+
size = original_code.size(name)
|
373
|
+
direction = @code.direction(name)
|
374
|
+
size.map! { |s| simplify(replace_defines(s,defines)) }
|
375
|
+
variable = Variable.new(name,type,size,direction,@id,@species.shared?)
|
376
|
+
(variable.dimensions > 0) ? @arrays.push(variable) : @constants.push(variable)
|
377
|
+
end
|
378
|
+
raise_error('No input nor output arrays detected, make sure they are properly defined') if arrays.empty?
|
379
|
+
|
380
|
+
DIRECTIONS.each do |direction|
|
381
|
+
species = @species.structures(direction)
|
382
|
+
arrays = @arrays.select(direction)
|
383
|
+
if !arrays.empty?
|
384
|
+
|
385
|
+
# Check if the amount of input/ouput arrays is equal to the amount of input/output species
|
386
|
+
if species.length < arrays.length
|
387
|
+
array_names = arrays.map { |a| a.name }.join('","')
|
388
|
+
raise_error(direction.capitalize+'put array count mismatch (expected '+species.length.to_s+', found '+arrays.length.to_s+' ["'+array_names+'"])')
|
389
|
+
end
|
390
|
+
|
391
|
+
# Set the species for the arrays (distinguish between arrays with and without a name)
|
392
|
+
species.each do |structure|
|
393
|
+
array = arrays[0]
|
394
|
+
arrays.each do |free_array|
|
395
|
+
if !free_array.species
|
396
|
+
if structure.has_arrayname?
|
397
|
+
if structure.name == free_array.name
|
398
|
+
array = free_array
|
399
|
+
break
|
400
|
+
end
|
401
|
+
else
|
402
|
+
array = free_array
|
403
|
+
break
|
404
|
+
end
|
405
|
+
end
|
406
|
+
end
|
407
|
+
array.species = structure
|
408
|
+
#structure.name = array.name
|
409
|
+
|
410
|
+
# Check if the array size was set, if not, it will be set to the species' size
|
411
|
+
if array.size.empty?
|
412
|
+
array.size = array.species.dimensions.map { |d| sum(d) }
|
413
|
+
array.guess = true
|
414
|
+
puts WARNING+'Could not determine size for array "'+array.name+'" automatically, assuming: '+array.size.inspect+'.'
|
415
|
+
end
|
416
|
+
|
417
|
+
# Set the multiplication factors (for later)
|
418
|
+
array.set_factors
|
419
|
+
end
|
420
|
+
end
|
421
|
+
end
|
422
|
+
|
423
|
+
# Sort the arrays according to the alphabet
|
424
|
+
if @arrays.length > 1
|
425
|
+
@arrays.sort_by(['chunk','neighbourhood','element','shared','full'])
|
426
|
+
end
|
427
|
+
end
|
428
|
+
|
429
|
+
# Method to populate 5 lists with variable information.
|
430
|
+
# Below are listed the names of the four lists with an
|
431
|
+
# example value:
|
432
|
+
#
|
433
|
+
# host_name:: Example: 'array'
|
434
|
+
# host_definition:: Example: 'int array[10][10]'
|
435
|
+
# argument_name:: Example: 'threshold'
|
436
|
+
# argument_definition:: Example: 'float threshold'
|
437
|
+
# golden_name:: Example: 'golden_array'
|
438
|
+
def populate_lists
|
439
|
+
@constants.each do |variable|
|
440
|
+
@lists[:host_name] .push(variable.name)
|
441
|
+
@lists[:host_definition] .push(variable.definition)
|
442
|
+
@lists[:argument_name] .push(variable.name)
|
443
|
+
@lists[:argument_definition].push(variable.definition)
|
444
|
+
@lists[:golden_name] .push(variable.name)
|
445
|
+
end
|
446
|
+
@arrays.each do |variable|
|
447
|
+
@lists[:host_name] .push(variable.name)
|
448
|
+
@lists[:host_definition] .push(variable.definition)
|
449
|
+
@lists[:golden_name] .push(variable.golden_name)
|
450
|
+
end
|
451
|
+
@lists.each { |name,list| @lists[name] = list.join(', ') }
|
452
|
+
end
|
453
|
+
|
454
|
+
# This method is used to generate verification code. This
|
455
|
+
# verification code contains a copy of the original code.
|
456
|
+
# It also provides a verification which compares the output
|
457
|
+
# of the original code with the output of the generated
|
458
|
+
# code. The verification code prints warnings if the outputs
|
459
|
+
# are not equal, else it prints a success message.
|
460
|
+
def generate_replacement_code(options, skeleton, verify_code, prefix, timer_start, timer_stop)
|
461
|
+
replacement = C::NodeArray.new
|
462
|
+
replacement.push(C::ExpressionStatement.parse(@accelerated_name+'('+@lists[:host_name]+');'))
|
463
|
+
original_definition = ''
|
464
|
+
verify_definitions = []
|
465
|
+
if options[:verify]
|
466
|
+
guesses = @arrays.map { |array| array.guess }
|
467
|
+
if guesses.include?(true)
|
468
|
+
puts WARNING+'Verification not supported for this class'
|
469
|
+
else
|
470
|
+
|
471
|
+
# Generate the replacement code and the original function
|
472
|
+
@arrays.each do |array|
|
473
|
+
replacement.insert(0,C::ExpressionStatement.parse("memcpy(#{array.golden_name},#{array.name},#{array.size.join('*')}*sizeof(#{array.type_name}));"))
|
474
|
+
replacement.insert(0,C::Declaration.parse(array.definition.gsub!(/\b#{array.name}\b/,array.golden_name)+array.initialization))
|
475
|
+
end
|
476
|
+
replacement.push(C::ExpressionStatement.parse(@original_name+'('+@lists[:golden_name]+');'))
|
477
|
+
original_definition = "void #{@original_name}(#{@lists[:host_definition]})"
|
478
|
+
body = "#{timer_start}#{NL} // Original code#{NL}#{@code}#{NL}#{timer_stop}"
|
479
|
+
verify_code.push(prefix+original_definition+' {'+NL+body+'}'+NL+NL)
|
480
|
+
@arrays.select(OUTPUT).each do |array|
|
481
|
+
replacement.push(C::ExpressionStatement.parse(("bones_verify_results_#{array.name}_#{@id}(#{array.name}#{array.flatten},#{array.golden_name}#{array.flatten},#{@hash[:argument_name]});").remove_extras))
|
482
|
+
end
|
483
|
+
@arrays.each do |array|
|
484
|
+
replacement.push(C::ExpressionStatement.parse("free(#{array.golden_name});")) if array.dynamic?
|
485
|
+
end
|
486
|
+
|
487
|
+
# Generate the verification function itself
|
488
|
+
@arrays.select(OUTPUT).each_with_index do |array,num_array|
|
489
|
+
minihash = @hash["out#{num_array}".to_sym]
|
490
|
+
minihash[:name] = minihash[:name]+'_'+@id
|
491
|
+
minihash[:argument_definition] = @hash[:argument_definition]
|
492
|
+
instantiated_skeleton = search_and_replace(minihash,skeleton)
|
493
|
+
verify_definitions.push(instantiated_skeleton.scan(/#{START_DEFINITION}(.+)#{END_DEFINITION}/m).join.strip.remove_extras)
|
494
|
+
verify_code.push(instantiated_skeleton.remove_extras.gsub!(/#{START_DEFINITION}(.+)#{END_DEFINITION}/m,''))
|
495
|
+
end
|
496
|
+
end
|
497
|
+
end
|
498
|
+
return replacement, original_definition, verify_definitions.join(NL)
|
499
|
+
end
|
500
|
+
|
501
|
+
# Method to generate performance modeling code.
|
502
|
+
# This method is still under construction and will not be called yet.
|
503
|
+
# TODO: Complete this method
|
504
|
+
def performance_model_code(model_dir)
|
505
|
+
|
506
|
+
# Load the profile database
|
507
|
+
profiles = Array.new
|
508
|
+
File.read(File.join(model_dir,'profile.txt')).each do |line|
|
509
|
+
profiles.push(line.split(','))
|
510
|
+
end
|
511
|
+
|
512
|
+
# Iterate over all the profiles
|
513
|
+
result = C::NodeArray.new
|
514
|
+
profiles.each do |profile|
|
515
|
+
|
516
|
+
# Fill the hash with profile information and species information
|
517
|
+
mini_hash = {
|
518
|
+
:name => profile[0].strip,
|
519
|
+
:comp => profile[1].strip,
|
520
|
+
:coal => profile[2].strip,
|
521
|
+
:unco => profile[3].strip,
|
522
|
+
:copy => profile[4].strip,
|
523
|
+
:f => @hash[:complexity],
|
524
|
+
:w => @hash[:parallelism],
|
525
|
+
:c => @species.all_structures.map { |s| simplify('4*('+s.dimensions.map { |d| sum(d) }.join('*')+')') }.join(' + '),
|
526
|
+
:m => '1',
|
527
|
+
:u => '0',
|
528
|
+
:o => '8'
|
529
|
+
}
|
530
|
+
|
531
|
+
# Load the skeleton for the performance model and set the values according to the hash
|
532
|
+
model_skeleton = File.read(File.join(model_dir,'model.c'))
|
533
|
+
search_and_replace!(mini_hash,model_skeleton)
|
534
|
+
result.push(C::Block.parse(model_skeleton))
|
535
|
+
end
|
536
|
+
return result
|
537
|
+
end
|
538
|
+
end
|
539
|
+
|
540
|
+
end
|
541
|
+
|