bones-compiler 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (203) hide show
  1. data/CHANGELOG +117 -0
  2. data/LICENSE +9 -0
  3. data/README.rdoc +126 -0
  4. data/Rakefile +107 -0
  5. data/VERSION +1 -0
  6. data/bin/bones +20 -0
  7. data/examples/applications/ffos.c +552 -0
  8. data/examples/benchmarks/2mm.c +70 -0
  9. data/examples/benchmarks/3mm.c +81 -0
  10. data/examples/benchmarks/adi.c +81 -0
  11. data/examples/benchmarks/atax.c +65 -0
  12. data/examples/benchmarks/bicg.c +67 -0
  13. data/examples/benchmarks/cholesky.c +64 -0
  14. data/examples/benchmarks/common.h +168 -0
  15. data/examples/benchmarks/correlation.c +97 -0
  16. data/examples/benchmarks/covariance.c +77 -0
  17. data/examples/benchmarks/doitgen.c +63 -0
  18. data/examples/benchmarks/durbin.c +76 -0
  19. data/examples/benchmarks/dynprog.c +67 -0
  20. data/examples/benchmarks/fdtd-2d-apml.c +114 -0
  21. data/examples/benchmarks/fdtd-2d.c +74 -0
  22. data/examples/benchmarks/floyd-warshall.c +50 -0
  23. data/examples/benchmarks/gemm.c +69 -0
  24. data/examples/benchmarks/gemver.c +89 -0
  25. data/examples/benchmarks/gesummv.c +64 -0
  26. data/examples/benchmarks/gramschmidt.c +84 -0
  27. data/examples/benchmarks/jacobi-1d-imper.c +55 -0
  28. data/examples/benchmarks/jacobi-2d-imper.c +61 -0
  29. data/examples/benchmarks/lu.c +57 -0
  30. data/examples/benchmarks/ludcmp.c +91 -0
  31. data/examples/benchmarks/mvt.c +65 -0
  32. data/examples/benchmarks/overview.txt +38 -0
  33. data/examples/benchmarks/reg_detect.c +82 -0
  34. data/examples/benchmarks/saxpy.c +45 -0
  35. data/examples/benchmarks/seidel-2d.c +51 -0
  36. data/examples/benchmarks/symm.c +74 -0
  37. data/examples/benchmarks/syr2k.c +65 -0
  38. data/examples/benchmarks/syrk.c +62 -0
  39. data/examples/benchmarks/trisolv.c +57 -0
  40. data/examples/benchmarks/trmm.c +57 -0
  41. data/examples/chunk/example1.c +54 -0
  42. data/examples/chunk/example2.c +44 -0
  43. data/examples/chunk/example3.c +59 -0
  44. data/examples/chunk/example4.c +55 -0
  45. data/examples/chunk/example5.c +52 -0
  46. data/examples/element/example1.c +46 -0
  47. data/examples/element/example10.c +50 -0
  48. data/examples/element/example11.c +47 -0
  49. data/examples/element/example12.c +56 -0
  50. data/examples/element/example2.c +46 -0
  51. data/examples/element/example3.c +58 -0
  52. data/examples/element/example4.c +49 -0
  53. data/examples/element/example5.c +56 -0
  54. data/examples/element/example6.c +46 -0
  55. data/examples/element/example7.c +54 -0
  56. data/examples/element/example8.c +45 -0
  57. data/examples/element/example9.c +48 -0
  58. data/examples/neighbourhood/example1.c +54 -0
  59. data/examples/neighbourhood/example2.c +55 -0
  60. data/examples/neighbourhood/example3.c +82 -0
  61. data/examples/neighbourhood/example4.c +52 -0
  62. data/examples/shared/example1.c +45 -0
  63. data/examples/shared/example2.c +51 -0
  64. data/examples/shared/example3.c +55 -0
  65. data/examples/shared/example4.c +52 -0
  66. data/examples/shared/example5.c +48 -0
  67. data/lib/bones.rb +266 -0
  68. data/lib/bones/algorithm.rb +541 -0
  69. data/lib/bones/engine.rb +386 -0
  70. data/lib/bones/preprocessor.rb +161 -0
  71. data/lib/bones/species.rb +196 -0
  72. data/lib/bones/structure.rb +94 -0
  73. data/lib/bones/variable.rb +169 -0
  74. data/lib/bones/variablelist.rb +72 -0
  75. data/lib/castaddon.rb +27 -0
  76. data/lib/castaddon/index.rb +40 -0
  77. data/lib/castaddon/node.rb +753 -0
  78. data/lib/castaddon/type.rb +37 -0
  79. data/skeletons/CPU-C/common/epilogue.c +0 -0
  80. data/skeletons/CPU-C/common/globals.c +17 -0
  81. data/skeletons/CPU-C/common/globals_kernel.c +1 -0
  82. data/skeletons/CPU-C/common/header.c +0 -0
  83. data/skeletons/CPU-C/common/mem_copy_D2H.c +0 -0
  84. data/skeletons/CPU-C/common/mem_copy_H2D.c +0 -0
  85. data/skeletons/CPU-C/common/mem_epilogue.c +0 -0
  86. data/skeletons/CPU-C/common/mem_prologue.c +3 -0
  87. data/skeletons/CPU-C/common/prologue.c +0 -0
  88. data/skeletons/CPU-C/common/timer_1_start.c +0 -0
  89. data/skeletons/CPU-C/common/timer_1_stop.c +0 -0
  90. data/skeletons/CPU-C/common/timer_2_start.c +20 -0
  91. data/skeletons/CPU-C/common/timer_2_stop.c +8 -0
  92. data/skeletons/CPU-C/kernel/default.host.c +3 -0
  93. data/skeletons/CPU-C/kernel/default.kernel.c +15 -0
  94. data/skeletons/CPU-C/skeletons.txt +24 -0
  95. data/skeletons/CPU-OPENCL-AMD/common/epilogue.c +6 -0
  96. data/skeletons/CPU-OPENCL-AMD/common/globals.c +155 -0
  97. data/skeletons/CPU-OPENCL-AMD/common/globals_kernel.c +4 -0
  98. data/skeletons/CPU-OPENCL-AMD/common/header.c +0 -0
  99. data/skeletons/CPU-OPENCL-AMD/common/mem_copy_D2H.c +8 -0
  100. data/skeletons/CPU-OPENCL-AMD/common/mem_copy_H2D.c +4 -0
  101. data/skeletons/CPU-OPENCL-AMD/common/mem_epilogue.c +3 -0
  102. data/skeletons/CPU-OPENCL-AMD/common/mem_prologue.c +6 -0
  103. data/skeletons/CPU-OPENCL-AMD/common/prologue.c +24 -0
  104. data/skeletons/CPU-OPENCL-AMD/common/timer_1_start.c +5 -0
  105. data/skeletons/CPU-OPENCL-AMD/common/timer_1_stop.c +9 -0
  106. data/skeletons/CPU-OPENCL-AMD/common/timer_2_start.c +16 -0
  107. data/skeletons/CPU-OPENCL-AMD/common/timer_2_stop.c +11 -0
  108. data/skeletons/CPU-OPENCL-AMD/kernel/D-element-to-1-shared.host.c +67 -0
  109. data/skeletons/CPU-OPENCL-AMD/kernel/D-element-to-1-shared.kernel.cl +72 -0
  110. data/skeletons/CPU-OPENCL-AMD/kernel/default.host.c +14 -0
  111. data/skeletons/CPU-OPENCL-AMD/kernel/default.kernel.cl +13 -0
  112. data/skeletons/CPU-OPENCL-AMD/skeletons.txt +26 -0
  113. data/skeletons/CPU-OPENCL-INTEL/common/epilogue.c +3 -0
  114. data/skeletons/CPU-OPENCL-INTEL/common/globals.c +154 -0
  115. data/skeletons/CPU-OPENCL-INTEL/common/globals_kernel.c +4 -0
  116. data/skeletons/CPU-OPENCL-INTEL/common/header.c +31 -0
  117. data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +5 -0
  118. data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +3 -0
  119. data/skeletons/CPU-OPENCL-INTEL/common/mem_epilogue.c +3 -0
  120. data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +4 -0
  121. data/skeletons/CPU-OPENCL-INTEL/common/prologue.c +24 -0
  122. data/skeletons/CPU-OPENCL-INTEL/common/timer_1_start.c +5 -0
  123. data/skeletons/CPU-OPENCL-INTEL/common/timer_1_stop.c +9 -0
  124. data/skeletons/CPU-OPENCL-INTEL/common/timer_2_start.c +16 -0
  125. data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +11 -0
  126. data/skeletons/CPU-OPENCL-INTEL/kernel/D-element-to-1-shared.host.c +67 -0
  127. data/skeletons/CPU-OPENCL-INTEL/kernel/D-element-to-1-shared.kernel.cl +72 -0
  128. data/skeletons/CPU-OPENCL-INTEL/kernel/default.host.c +14 -0
  129. data/skeletons/CPU-OPENCL-INTEL/kernel/default.kernel.cl +13 -0
  130. data/skeletons/CPU-OPENCL-INTEL/skeletons.txt +26 -0
  131. data/skeletons/CPU-OPENMP/common/epilogue.c +0 -0
  132. data/skeletons/CPU-OPENMP/common/globals.c +37 -0
  133. data/skeletons/CPU-OPENMP/common/globals_kernel.c +6 -0
  134. data/skeletons/CPU-OPENMP/common/header.c +0 -0
  135. data/skeletons/CPU-OPENMP/common/mem_copy_D2H.c +0 -0
  136. data/skeletons/CPU-OPENMP/common/mem_copy_H2D.c +0 -0
  137. data/skeletons/CPU-OPENMP/common/mem_epilogue.c +0 -0
  138. data/skeletons/CPU-OPENMP/common/mem_prologue.c +3 -0
  139. data/skeletons/CPU-OPENMP/common/prologue.c +0 -0
  140. data/skeletons/CPU-OPENMP/common/timer_1_start.c +12 -0
  141. data/skeletons/CPU-OPENMP/common/timer_1_stop.c +0 -0
  142. data/skeletons/CPU-OPENMP/common/timer_2_start.c +18 -0
  143. data/skeletons/CPU-OPENMP/common/timer_2_stop.c +8 -0
  144. data/skeletons/CPU-OPENMP/kernel/D-element-to-1-shared.host.c +27 -0
  145. data/skeletons/CPU-OPENMP/kernel/D-element-to-1-shared.kernel.c +46 -0
  146. data/skeletons/CPU-OPENMP/kernel/default.host.c +11 -0
  147. data/skeletons/CPU-OPENMP/kernel/default.kernel.c +18 -0
  148. data/skeletons/CPU-OPENMP/skeletons.txt +26 -0
  149. data/skeletons/GPU-CUDA/common/epilogue.c +0 -0
  150. data/skeletons/GPU-CUDA/common/globals.c +31 -0
  151. data/skeletons/GPU-CUDA/common/globals_kernel.c +4 -0
  152. data/skeletons/GPU-CUDA/common/header.c +0 -0
  153. data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +3 -0
  154. data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +3 -0
  155. data/skeletons/GPU-CUDA/common/mem_epilogue.c +3 -0
  156. data/skeletons/GPU-CUDA/common/mem_prologue.c +5 -0
  157. data/skeletons/GPU-CUDA/common/prologue.c +6 -0
  158. data/skeletons/GPU-CUDA/common/timer_1_start.c +6 -0
  159. data/skeletons/GPU-CUDA/common/timer_1_stop.c +10 -0
  160. data/skeletons/GPU-CUDA/common/timer_2_start.c +6 -0
  161. data/skeletons/GPU-CUDA/common/timer_2_stop.c +10 -0
  162. data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.host.c +3 -0
  163. data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +105 -0
  164. data/skeletons/GPU-CUDA/kernel/D-element-to-1-shared.host.c +3 -0
  165. data/skeletons/GPU-CUDA/kernel/D-element-to-1-shared.kernel.cu +119 -0
  166. data/skeletons/GPU-CUDA/kernel/D-element-to-N-shared.host.c +3 -0
  167. data/skeletons/GPU-CUDA/kernel/D-element-to-N-shared.kernel.cu +166 -0
  168. data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.host.c +3 -0
  169. data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +69 -0
  170. data/skeletons/GPU-CUDA/kernel/N-neighbourhood-N-to-N-element.host.c +3 -0
  171. data/skeletons/GPU-CUDA/kernel/N-neighbourhood-N-to-N-element.kernel.cu +42 -0
  172. data/skeletons/GPU-CUDA/kernel/default.host.c +3 -0
  173. data/skeletons/GPU-CUDA/kernel/default.kernel.cu +28 -0
  174. data/skeletons/GPU-CUDA/skeletons.txt +30 -0
  175. data/skeletons/GPU-OPENCL-AMD/common/epilogue.c +3 -0
  176. data/skeletons/GPU-OPENCL-AMD/common/globals.c +155 -0
  177. data/skeletons/GPU-OPENCL-AMD/common/globals_kernel.c +4 -0
  178. data/skeletons/GPU-OPENCL-AMD/common/header.c +0 -0
  179. data/skeletons/GPU-OPENCL-AMD/common/mem_copy_D2H.c +4 -0
  180. data/skeletons/GPU-OPENCL-AMD/common/mem_copy_H2D.c +4 -0
  181. data/skeletons/GPU-OPENCL-AMD/common/mem_epilogue.c +3 -0
  182. data/skeletons/GPU-OPENCL-AMD/common/mem_prologue.c +3 -0
  183. data/skeletons/GPU-OPENCL-AMD/common/prologue.c +24 -0
  184. data/skeletons/GPU-OPENCL-AMD/common/timer_1_start.c +5 -0
  185. data/skeletons/GPU-OPENCL-AMD/common/timer_1_stop.c +9 -0
  186. data/skeletons/GPU-OPENCL-AMD/common/timer_2_start.c +4 -0
  187. data/skeletons/GPU-OPENCL-AMD/common/timer_2_stop.c +11 -0
  188. data/skeletons/GPU-OPENCL-AMD/kernel/D-element-to-1-shared.host.c +67 -0
  189. data/skeletons/GPU-OPENCL-AMD/kernel/D-element-to-1-shared.kernel.cl +72 -0
  190. data/skeletons/GPU-OPENCL-AMD/kernel/default.host.c +14 -0
  191. data/skeletons/GPU-OPENCL-AMD/kernel/default.kernel.cl +13 -0
  192. data/skeletons/GPU-OPENCL-AMD/skeletons.txt +26 -0
  193. data/skeletons/verification/header.c +2 -0
  194. data/skeletons/verification/timer_start.c +4 -0
  195. data/skeletons/verification/timer_stop.c +6 -0
  196. data/skeletons/verification/verify_results.c +23 -0
  197. data/test/bones/test_algorithm.rb +40 -0
  198. data/test/bones/test_common.rb +54 -0
  199. data/test/bones/test_preprocessor.rb +46 -0
  200. data/test/bones/test_species.rb +21 -0
  201. data/test/bones/test_variable.rb +84 -0
  202. data/test/test_helper.rb +106 -0
  203. metadata +303 -0
@@ -0,0 +1,541 @@
1
+
2
+ module Bones
3
+ # This class holds one algorithm, which includes a species,
4
+ # a name, and the source C-code.
5
+ #
6
+ # The algorithm class holds all sorts of information on var-
7
+ # iables. This information is only available after calling
8
+ # the 'populate' method, which populates a lists of varia-
9
+ # bles of all sorts: a regular list, a specialized hash,
10
+ # and lists of input/output array variables.
11
+ class Algorithm < Common
12
+ attr_reader :name, :species, :code, :lists, :arrays, :id, :function_name
13
+ attr_accessor :hash, :merge_factor
14
+
15
+ # Constant to set the name of the algorithm's accelerated version
16
+ ACCELERATED = '_accelerated'
17
+ # Constant to set the name of the algorithm's original version
18
+ ORIGINAL = '_original'
19
+
20
+ # This method initializes the class. It gives the new
21
+ # algorithm a name, species and source code. At initiali-
22
+ # zation, this method checks if the name starts with a
23
+ # digit. This is not allowed, so an underscore is added
24
+ # prior to the digit.
25
+ def initialize(name, filename, id, species, code)
26
+ name = '_'+name if name =~ /^\d/
27
+ @filename = filename
28
+ @basename = name
29
+ @name = (name+'_'+id).gsub(/\W/,'')
30
+ @id = id
31
+ @original_name = @name+ORIGINAL
32
+ @accelerated_name = @name+ACCELERATED
33
+ @species = species
34
+ @code = C::Statement.parse(code).preprocess
35
+ @hash = {}
36
+ @lists = {:host_name => [],:host_definition => [], :argument_name => [], :argument_definition => [], :golden_name => []}
37
+ @arrays = Variablelist.new()
38
+ @constants = Variablelist.new()
39
+ @merge_factor = 1
40
+ @function_code = ''
41
+ @function_name = ''
42
+ end
43
+
44
+ # This method sets the code and name for the function in
45
+ # which the algorithm is found. This is done based on the
46
+ # original code, which is given as input to this method.
47
+ # The method does not return any value, instead, it sets
48
+ # two class variables (@function_code and @function_name).
49
+ def set_function(full_code)
50
+ full_code.get_functions.each do |function|
51
+ if function.node_exists?(@code)
52
+ @function_code = function
53
+ @function_name = function.name
54
+ end
55
+ end
56
+ end
57
+
58
+ # This method performs the code transformations according
59
+ # to the transformation settings as provided as an argument
60
+ # to the function. It calls the various code transformation
61
+ # functions as implemented for the CAST class. The resulting
62
+ # modified code is finally stored in the search-and-replace
63
+ # hash.
64
+ # This method assumes that the populate method has already
65
+ # been called, such that the hash contains the dimensions
66
+ # needed to create the global ID definitions.
67
+ def perform_transformations(transformation_settings)
68
+ complexity = 0
69
+
70
+ # Save the original code (with flattened arrays) in the hash as well
71
+ new_code = @code.clone
72
+ @arrays.each do |array|
73
+ new_code.transform_flatten(array)
74
+ end
75
+ @hash[:algorithm_code0] = new_code.to_s
76
+
77
+ # Loop over the number of transformation 'blocks'
78
+ transformation_settings.split(' ').each_with_index do |transformation,num_transformation|
79
+ new_code = @code.clone
80
+ extra_indent = ''
81
+
82
+ # Replace existing loops in the code (always do this)
83
+ array = @arrays.representative
84
+ array.species.dimensions.each_with_index do |dimension,num_dimension|
85
+ index = (array.species.reverse?) ? num_dimension : array.species.dimensions.length-num_dimension-1
86
+ index_reverse = !(array.species.reverse?) ? num_dimension : array.species.dimensions.length-num_dimension-1
87
+
88
+ # Calculate the loop start and end conditions
89
+ from = array.species.from_at(index)
90
+ to = array.species.to_at(index)
91
+
92
+ # Process the existing code and update the hash
93
+ if from != to
94
+ new_code, loop_variable_name = new_code.remove_loop(from,to)
95
+ new_variable_name = GLOBAL_ID+'_'+index_reverse.to_s
96
+ new_code.replace_variable(loop_variable_name,new_variable_name)
97
+ update_hash(loop_variable_name)
98
+ end
99
+ end
100
+
101
+ # Shuffle the indices of the first input(s) (conditionally do this)
102
+ shuffle_arrays = []
103
+ if transformation[0,1] == '2'
104
+ shuffle_arrays.push(@arrays.select(INPUT)[0])
105
+ elsif transformation[0,1] == '3'
106
+ shuffle_arrays.push(@arrays.select(INPUT)[0])
107
+ shuffle_arrays.push(@arrays.select(INPUT)[1])
108
+ end
109
+ new_code.transform_shuffle(shuffle_arrays)
110
+
111
+ # Use the local on-chip memory (conditionally do this)
112
+ if transformation[0,1] == '1'
113
+ local_memory_arrays = [@arrays.select(INPUT)[0]]
114
+ new_code.transform_use_local_memory(local_memory_arrays)
115
+ end
116
+
117
+ # Flatten the arrays to 1D (always do this)
118
+ @arrays.each do |array|
119
+ new_code.transform_flatten(array)
120
+ end
121
+
122
+ # Perform array substitution (conditionally do this)
123
+ @arrays.outputs.each do |array|
124
+ if array.species.element?
125
+ if @arrays.inputs.include?(array)
126
+ new_code.transform_substitution(array,true)
127
+ else
128
+ new_code.transform_substitution(array,false)
129
+ end
130
+ extra_indent = INDENT
131
+ end
132
+ end
133
+
134
+ # Perform transformations for reduction operations (conditionally do this)
135
+ if transformation[1,1].to_i >= 1
136
+ new_code = new_code.transform_reduction(@arrays.select(INPUT)[0],@arrays.select(OUTPUT)[0],transformation[1,1].to_i)
137
+ end
138
+
139
+ # Perform thread-merging (experimental)
140
+ # TODO: Solve the problem related to constants (e.g chunk/example1.c)
141
+ if @merge_factor == 1 && transformation[0,1] == '4'
142
+ @merge_factor = 4
143
+ end
144
+ if @merge_factor > 1
145
+ puts MESSAGE+'Merging threads by a factor '+@merge_factor.to_s+'.'
146
+
147
+ # Update the hash
148
+ @hash[:ids] = @hash[:ids].split(NL).map { |line|
149
+ C::parse(line).transform_merge_threads(@merge_factor,[GLOBAL_ID]+@constants.map{ |c| c.name }).to_s.split(NL).each_with_index.map do |id,index|
150
+ id.gsub(/\b#{GLOBAL_ID}\b/,"(#{GLOBAL_ID}+gridDim.x*blockDim.x*#{index})")
151
+ end
152
+ }.join(NL+INDENT*2)
153
+ @hash[:parallelism] = (@hash[:parallelism].to_i / @merge_factor).to_s
154
+
155
+ # Transform the code
156
+ excludes = (@constants+@arrays).map { |c| c.name }
157
+ new_code.transform_merge_threads(@merge_factor,excludes)
158
+ end
159
+
160
+ # Obtain the complexity in terms of operations for the resulting code
161
+ complexity += new_code.get_complexity
162
+
163
+ # Store the resulting code in the hash
164
+ resulting_code = new_code.strip_brackets.to_s
165
+ @hash[('algorithm_code'+(num_transformation+1).to_s).to_sym] = (transformation[1,1].to_i >= 1) ? resulting_code : extra_indent+INDENT+resulting_code.gsub!(NL,NL+INDENT)
166
+ end
167
+
168
+ @hash[:complexity] = complexity.to_s
169
+ end
170
+
171
+ # This method creates the search-and-replace hash based on
172
+ # information provided by the algorithm. It is called from
173
+ # the 'populate' method of this class.
174
+ #
175
+ # == List of possible hash keys:
176
+ #
177
+ # algorithm_id
178
+ # _name
179
+ # _basename
180
+ # _filename
181
+ # _code*
182
+ # (in*|out*)_type
183
+ # _name
184
+ # _devicename
185
+ # _devicepointer
186
+ # _dimensions
187
+ # _dimension*_to
188
+ # _from
189
+ # _sum
190
+ # _to
191
+ # _from
192
+ # _parameters
193
+ # _parameter*_to
194
+ # _from
195
+ # _sum
196
+ # _ids
197
+ # _localids
198
+ # _flatindex
199
+ # (in|out)_names
200
+ # _devicenames
201
+ # _devicedefinitions
202
+ # _devicedefinitionsopencl
203
+ # names
204
+ # devicenames
205
+ # devicedefinitions
206
+ # devicedefinitionsopencl
207
+ #
208
+ # parallelism
209
+ # factors
210
+ # ids
211
+ # verifyids
212
+ #
213
+ # argument_name
214
+ # argument_definition
215
+ # kernel_argument_list
216
+ #
217
+ def populate_hash
218
+ @hash = {:algorithm_id => @id,
219
+ :algorithm_name => @name,
220
+ :algorithm_basename => @basename,
221
+ :algorithm_filename => @filename,
222
+ :argument_name => @lists[:argument_name],
223
+ :argument_definition => @lists[:argument_definition]}
224
+
225
+ # Obtain the necessary data for the hash per array
226
+ parallelisms = []
227
+ DIRECTIONS.each do |direction|
228
+ arrays = @arrays.select(direction)
229
+ arrays.each_with_index do |array,num_array|
230
+ hashid = "#{direction}#{num_array}".to_sym
231
+
232
+ # Gather the name and type data
233
+ minihash = {:type => array.type_name,
234
+ :name => array.name,
235
+ :devicepointer => array.device_pointer,
236
+ :devicename => array.device_name,
237
+ :flatindex => array.flatindex}
238
+
239
+ # Gather the dimensions data
240
+ dimensions = array.species.dimensions
241
+ dimensions.each_with_index do |dimension,num_dimension|
242
+ minihash["dimension#{num_dimension}".to_sym] = {:sum => simplify(sum(dimension)),
243
+ :from => simplify(from(dimension)),
244
+ :to => simplify(to(dimension))}
245
+ end
246
+ minihash[:dimensions] = simplify(dimensions.map { |d| sum(d) }.join('*'))
247
+ minihash[:from] = dimensions.map { |d| from(d) }.zip(array.factors.drop(1).reverse).map { |e| simplify(e.join('')) }.join('+')
248
+ minihash[:to ] = dimensions.map { |d| to(d) }.zip(array.factors.drop(1).reverse).map { |e| simplify(e.join('')) }.join('+')
249
+
250
+ # Gather the parameter data
251
+ if array.species.has_parameter?
252
+ parameters = array.species.parameters
253
+ parameters.each_with_index do |parameter,num_parameter|
254
+ minihash["parameter#{num_parameter}".to_sym] = {:sum => simplify(sum(parameter)),
255
+ :from => simplify(from(parameter)),
256
+ :to => simplify(to(parameter))}
257
+ end
258
+ minihash[:parameters] = simplify(parameters.map { |p| sum(p) }.join('*'))
259
+ end
260
+
261
+ # Store the data into the hash
262
+ @hash[hashid] = minihash
263
+
264
+ # Gather information regarding the parallelism
265
+ if array.species.chunk?
266
+ dim_div = simplify(minihash[:dimensions]+'/'+minihash[:parameters])
267
+ parallelisms.push([dim_div,hashid,0])
268
+ elsif array.species.element? || array.species.neighbourhood?
269
+ parallelisms.push([minihash[:dimensions],hashid,1])
270
+ end
271
+
272
+ # Populate the global ID definitions hash, create the proper indices (and store as '{in/out}*_ids' in the hash)
273
+ ids, localids, verifyids, factors = [], [], [], ['']
274
+ dimensions = array.species.dimensions.clone
275
+ dimensions.each_with_index do |dimension,num_dimension|
276
+ index = (array.species.reverse?) ? num_dimension : array.species.dimensions.length-num_dimension-1
277
+ index_reverse = !(array.species.reverse?) ? num_dimension : array.species.dimensions.length-num_dimension-1
278
+
279
+ # Generate the index expressions
280
+ divider = (array.species.chunk?) ? '/'+sum(array.species.parameters[index]) : ''
281
+ minihash = {:dimensions => (index == dimensions.length-1) ? '1' : dimensions.drop(index+1).map { |d| sum(d) }.join('*'),
282
+ :modulo => (index_reverse != dimensions.length-1) ? '%('+sum(dimension)+divider+')' : '',
283
+ :offset => from(dimension)}
284
+ expr_global = simplify(search_and_replace(minihash,"((#{GLOBAL_ID}/(<dimensions>))<modulo>)+<offset>"))
285
+ expr_local = simplify(search_and_replace(minihash,"((#{LOCAL_ID }/(<dimensions>))<modulo>)+<offset>"))
286
+
287
+ # Selectively push the ID definitions to the result array
288
+ from = array.species.from_at(index)
289
+ to = array.species.to_at(index)
290
+ verifyids.push("const int #{GLOBAL_ID}_#{index_reverse} = "+expr_global+';')
291
+ if from != to
292
+ ids.push("const int #{GLOBAL_ID}_#{index_reverse} = "+expr_global+';')
293
+ localids.push("const int #{LOCAL_ID }_#{index_reverse} = "+expr_local+';')
294
+ factors.push(array.factors[index_reverse])
295
+ end
296
+ end
297
+
298
+ # Store the results in the hash
299
+ @hash[hashid][:ids] = ids.join(NL+INDENT*2)
300
+ @hash[hashid][:localids] = localids.join(NL+INDENT*2)
301
+ @hash[hashid][:verifyids] = verifyids.join(NL+INDENT*2)
302
+ @hash[hashid][:factors] = factors.last
303
+ end
304
+
305
+ # Create lists of array names and definitions
306
+ @hash["#{direction}_devicedefinitions".to_sym] = arrays.map { |a| a.device_definition }.uniq.join(', ')
307
+ @hash["#{direction}_devicedefinitionsopencl".to_sym] = arrays.map { |a| '__global '+a.device_definition }.uniq.join(', ')
308
+ @hash["#{direction}_devicenames".to_sym] = arrays.map { |a| a.device_name }.uniq.join(', ')
309
+ @hash["#{direction}_names".to_sym] = arrays.map { |a| a.name }.uniq.join(', ')
310
+ end
311
+ @hash[:devicedefinitions] = @arrays.map { |a| a.device_definition }.uniq.join(', ')
312
+ @hash[:devicedefinitionsopencl] = @arrays.map { |a| '__global '+a.device_definition }.uniq.join(', ')
313
+ @hash[:devicenames] = @arrays.map { |a| a.device_name }.uniq.join(', ')
314
+ @hash[:names] = @arrays.map { |a| a.name }.uniq.join(', ')
315
+
316
+ # Set the parallelism for the complete species, first sort them according to priorities and then find the maximum
317
+ # TODO: Remove the 'reverse' statement and get the 'ids' part working correctly for chunks
318
+ # TODO: How to find the maximum of symbolic expressions?
319
+ parallelisms = parallelisms.reverse.sort_by { |p| p[2] }
320
+ parallelism = parallelisms.reverse.max_by { |p| p[0].to_i }
321
+ @hash[:parallelism] = parallelism[0]
322
+ @hash[:ids] = @hash[parallelism[1]][:ids]
323
+ @hash[:factors] = @hash[parallelism[1]][:factors]
324
+ @arrays.set_representative(parallelism[1])
325
+ end
326
+
327
+ # Helper function to create a the special code which is required
328
+ # for OpenCL function calls to be able to use kernel arguments.
329
+ def opencl_arguments(list,kernel_id)
330
+ return '' if list == ''
331
+ argument_string = ''
332
+ list.split(', ').each_with_index do |variable,id|
333
+ argument_string += 'clSetKernelArg(bones_kernel_'+@name+'_'+kernel_id.to_s+',bones_num_args+'+id.to_s+',sizeof('+variable.strip+'),(void*)&'+variable.strip+');'+NL+INDENT
334
+ end
335
+ return argument_string
336
+ end
337
+
338
+ # This method updates the hash after loops are removed from
339
+ # the code. It takes as an argument a loop variable, which
340
+ # it removes from both the ':argument_name' and ':argument_
341
+ # definition' hash entries.
342
+ def update_hash(loop_variable)
343
+ names = @hash[:argument_name].split(', ')
344
+ definitions = @hash[:argument_definition].split(', ')
345
+ names.delete(loop_variable.to_s)
346
+ definitions.each { |definition| definitions.delete(definition) if definition =~ /\b#{loop_variable}\b/ }
347
+ @hash[:argument_name] = names.join(', ')
348
+ @hash[:argument_definition] = definitions.join(', ')
349
+
350
+ # Now, generate the special code which is required for OpenCL function calls to be able to use kernel arguments.
351
+ @hash[:kernel_argument_list] = opencl_arguments([@hash[:devicenames],@hash[:argument_name]].join(', ').remove_extras,0)
352
+ @hash[:kernel_argument_list_in] = opencl_arguments(@hash[:in_devicenames],0)
353
+ @hash[:kernel_argument_list_out] = opencl_arguments(@hash[:out_devicenames],0)
354
+ @hash[:kernel_argument_list_constants] = opencl_arguments(@hash[:argument_name],0)
355
+
356
+ # Add declarations for the loop variables for the original code in the hash
357
+ @hash[:algorithm_code0] = INDENT+"int #{loop_variable};"+NL+@hash[:algorithm_code0]
358
+ end
359
+
360
+ # Method to create a list of variables for the current
361
+ # algorithm. These variables should hold two conditions:
362
+ # 1) they are not local to the algorithm's code, and 2),
363
+ # they are used in the algorithm's code.
364
+ #
365
+ # The method gets a lists of undefined variables in the
366
+ # algorithm's code and subsequently searches the original
367
+ # code for the definition of this variable.
368
+ def populate_variables(original_code,defines)
369
+ @code.undefined_variables.each do |name|
370
+ type = @function_code.variable_type(name)
371
+ raise_error('Variable '+name+' not declared in original code') if !type
372
+ size = original_code.size(name)
373
+ direction = @code.direction(name)
374
+ size.map! { |s| simplify(replace_defines(s,defines)) }
375
+ variable = Variable.new(name,type,size,direction,@id,@species.shared?)
376
+ (variable.dimensions > 0) ? @arrays.push(variable) : @constants.push(variable)
377
+ end
378
+ raise_error('No input nor output arrays detected, make sure they are properly defined') if arrays.empty?
379
+
380
+ DIRECTIONS.each do |direction|
381
+ species = @species.structures(direction)
382
+ arrays = @arrays.select(direction)
383
+ if !arrays.empty?
384
+
385
+ # Check if the amount of input/ouput arrays is equal to the amount of input/output species
386
+ if species.length < arrays.length
387
+ array_names = arrays.map { |a| a.name }.join('","')
388
+ raise_error(direction.capitalize+'put array count mismatch (expected '+species.length.to_s+', found '+arrays.length.to_s+' ["'+array_names+'"])')
389
+ end
390
+
391
+ # Set the species for the arrays (distinguish between arrays with and without a name)
392
+ species.each do |structure|
393
+ array = arrays[0]
394
+ arrays.each do |free_array|
395
+ if !free_array.species
396
+ if structure.has_arrayname?
397
+ if structure.name == free_array.name
398
+ array = free_array
399
+ break
400
+ end
401
+ else
402
+ array = free_array
403
+ break
404
+ end
405
+ end
406
+ end
407
+ array.species = structure
408
+ #structure.name = array.name
409
+
410
+ # Check if the array size was set, if not, it will be set to the species' size
411
+ if array.size.empty?
412
+ array.size = array.species.dimensions.map { |d| sum(d) }
413
+ array.guess = true
414
+ puts WARNING+'Could not determine size for array "'+array.name+'" automatically, assuming: '+array.size.inspect+'.'
415
+ end
416
+
417
+ # Set the multiplication factors (for later)
418
+ array.set_factors
419
+ end
420
+ end
421
+ end
422
+
423
+ # Sort the arrays according to the alphabet
424
+ if @arrays.length > 1
425
+ @arrays.sort_by(['chunk','neighbourhood','element','shared','full'])
426
+ end
427
+ end
428
+
429
+ # Method to populate 5 lists with variable information.
430
+ # Below are listed the names of the four lists with an
431
+ # example value:
432
+ #
433
+ # host_name:: Example: 'array'
434
+ # host_definition:: Example: 'int array[10][10]'
435
+ # argument_name:: Example: 'threshold'
436
+ # argument_definition:: Example: 'float threshold'
437
+ # golden_name:: Example: 'golden_array'
438
+ def populate_lists
439
+ @constants.each do |variable|
440
+ @lists[:host_name] .push(variable.name)
441
+ @lists[:host_definition] .push(variable.definition)
442
+ @lists[:argument_name] .push(variable.name)
443
+ @lists[:argument_definition].push(variable.definition)
444
+ @lists[:golden_name] .push(variable.name)
445
+ end
446
+ @arrays.each do |variable|
447
+ @lists[:host_name] .push(variable.name)
448
+ @lists[:host_definition] .push(variable.definition)
449
+ @lists[:golden_name] .push(variable.golden_name)
450
+ end
451
+ @lists.each { |name,list| @lists[name] = list.join(', ') }
452
+ end
453
+
454
+ # This method is used to generate verification code. This
455
+ # verification code contains a copy of the original code.
456
+ # It also provides a verification which compares the output
457
+ # of the original code with the output of the generated
458
+ # code. The verification code prints warnings if the outputs
459
+ # are not equal, else it prints a success message.
460
+ def generate_replacement_code(options, skeleton, verify_code, prefix, timer_start, timer_stop)
461
+ replacement = C::NodeArray.new
462
+ replacement.push(C::ExpressionStatement.parse(@accelerated_name+'('+@lists[:host_name]+');'))
463
+ original_definition = ''
464
+ verify_definitions = []
465
+ if options[:verify]
466
+ guesses = @arrays.map { |array| array.guess }
467
+ if guesses.include?(true)
468
+ puts WARNING+'Verification not supported for this class'
469
+ else
470
+
471
+ # Generate the replacement code and the original function
472
+ @arrays.each do |array|
473
+ replacement.insert(0,C::ExpressionStatement.parse("memcpy(#{array.golden_name},#{array.name},#{array.size.join('*')}*sizeof(#{array.type_name}));"))
474
+ replacement.insert(0,C::Declaration.parse(array.definition.gsub!(/\b#{array.name}\b/,array.golden_name)+array.initialization))
475
+ end
476
+ replacement.push(C::ExpressionStatement.parse(@original_name+'('+@lists[:golden_name]+');'))
477
+ original_definition = "void #{@original_name}(#{@lists[:host_definition]})"
478
+ body = "#{timer_start}#{NL} // Original code#{NL}#{@code}#{NL}#{timer_stop}"
479
+ verify_code.push(prefix+original_definition+' {'+NL+body+'}'+NL+NL)
480
+ @arrays.select(OUTPUT).each do |array|
481
+ replacement.push(C::ExpressionStatement.parse(("bones_verify_results_#{array.name}_#{@id}(#{array.name}#{array.flatten},#{array.golden_name}#{array.flatten},#{@hash[:argument_name]});").remove_extras))
482
+ end
483
+ @arrays.each do |array|
484
+ replacement.push(C::ExpressionStatement.parse("free(#{array.golden_name});")) if array.dynamic?
485
+ end
486
+
487
+ # Generate the verification function itself
488
+ @arrays.select(OUTPUT).each_with_index do |array,num_array|
489
+ minihash = @hash["out#{num_array}".to_sym]
490
+ minihash[:name] = minihash[:name]+'_'+@id
491
+ minihash[:argument_definition] = @hash[:argument_definition]
492
+ instantiated_skeleton = search_and_replace(minihash,skeleton)
493
+ verify_definitions.push(instantiated_skeleton.scan(/#{START_DEFINITION}(.+)#{END_DEFINITION}/m).join.strip.remove_extras)
494
+ verify_code.push(instantiated_skeleton.remove_extras.gsub!(/#{START_DEFINITION}(.+)#{END_DEFINITION}/m,''))
495
+ end
496
+ end
497
+ end
498
+ return replacement, original_definition, verify_definitions.join(NL)
499
+ end
500
+
501
+ # Method to generate performance modeling code.
502
+ # This method is still under construction and will not be called yet.
503
+ # TODO: Complete this method
504
+ def performance_model_code(model_dir)
505
+
506
+ # Load the profile database
507
+ profiles = Array.new
508
+ File.read(File.join(model_dir,'profile.txt')).each do |line|
509
+ profiles.push(line.split(','))
510
+ end
511
+
512
+ # Iterate over all the profiles
513
+ result = C::NodeArray.new
514
+ profiles.each do |profile|
515
+
516
+ # Fill the hash with profile information and species information
517
+ mini_hash = {
518
+ :name => profile[0].strip,
519
+ :comp => profile[1].strip,
520
+ :coal => profile[2].strip,
521
+ :unco => profile[3].strip,
522
+ :copy => profile[4].strip,
523
+ :f => @hash[:complexity],
524
+ :w => @hash[:parallelism],
525
+ :c => @species.all_structures.map { |s| simplify('4*('+s.dimensions.map { |d| sum(d) }.join('*')+')') }.join(' + '),
526
+ :m => '1',
527
+ :u => '0',
528
+ :o => '8'
529
+ }
530
+
531
+ # Load the skeleton for the performance model and set the values according to the hash
532
+ model_skeleton = File.read(File.join(model_dir,'model.c'))
533
+ search_and_replace!(mini_hash,model_skeleton)
534
+ result.push(C::Block.parse(model_skeleton))
535
+ end
536
+ return result
537
+ end
538
+ end
539
+
540
+ end
541
+