bones-compiler 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (203) hide show
  1. data/CHANGELOG +117 -0
  2. data/LICENSE +9 -0
  3. data/README.rdoc +126 -0
  4. data/Rakefile +107 -0
  5. data/VERSION +1 -0
  6. data/bin/bones +20 -0
  7. data/examples/applications/ffos.c +552 -0
  8. data/examples/benchmarks/2mm.c +70 -0
  9. data/examples/benchmarks/3mm.c +81 -0
  10. data/examples/benchmarks/adi.c +81 -0
  11. data/examples/benchmarks/atax.c +65 -0
  12. data/examples/benchmarks/bicg.c +67 -0
  13. data/examples/benchmarks/cholesky.c +64 -0
  14. data/examples/benchmarks/common.h +168 -0
  15. data/examples/benchmarks/correlation.c +97 -0
  16. data/examples/benchmarks/covariance.c +77 -0
  17. data/examples/benchmarks/doitgen.c +63 -0
  18. data/examples/benchmarks/durbin.c +76 -0
  19. data/examples/benchmarks/dynprog.c +67 -0
  20. data/examples/benchmarks/fdtd-2d-apml.c +114 -0
  21. data/examples/benchmarks/fdtd-2d.c +74 -0
  22. data/examples/benchmarks/floyd-warshall.c +50 -0
  23. data/examples/benchmarks/gemm.c +69 -0
  24. data/examples/benchmarks/gemver.c +89 -0
  25. data/examples/benchmarks/gesummv.c +64 -0
  26. data/examples/benchmarks/gramschmidt.c +84 -0
  27. data/examples/benchmarks/jacobi-1d-imper.c +55 -0
  28. data/examples/benchmarks/jacobi-2d-imper.c +61 -0
  29. data/examples/benchmarks/lu.c +57 -0
  30. data/examples/benchmarks/ludcmp.c +91 -0
  31. data/examples/benchmarks/mvt.c +65 -0
  32. data/examples/benchmarks/overview.txt +38 -0
  33. data/examples/benchmarks/reg_detect.c +82 -0
  34. data/examples/benchmarks/saxpy.c +45 -0
  35. data/examples/benchmarks/seidel-2d.c +51 -0
  36. data/examples/benchmarks/symm.c +74 -0
  37. data/examples/benchmarks/syr2k.c +65 -0
  38. data/examples/benchmarks/syrk.c +62 -0
  39. data/examples/benchmarks/trisolv.c +57 -0
  40. data/examples/benchmarks/trmm.c +57 -0
  41. data/examples/chunk/example1.c +54 -0
  42. data/examples/chunk/example2.c +44 -0
  43. data/examples/chunk/example3.c +59 -0
  44. data/examples/chunk/example4.c +55 -0
  45. data/examples/chunk/example5.c +52 -0
  46. data/examples/element/example1.c +46 -0
  47. data/examples/element/example10.c +50 -0
  48. data/examples/element/example11.c +47 -0
  49. data/examples/element/example12.c +56 -0
  50. data/examples/element/example2.c +46 -0
  51. data/examples/element/example3.c +58 -0
  52. data/examples/element/example4.c +49 -0
  53. data/examples/element/example5.c +56 -0
  54. data/examples/element/example6.c +46 -0
  55. data/examples/element/example7.c +54 -0
  56. data/examples/element/example8.c +45 -0
  57. data/examples/element/example9.c +48 -0
  58. data/examples/neighbourhood/example1.c +54 -0
  59. data/examples/neighbourhood/example2.c +55 -0
  60. data/examples/neighbourhood/example3.c +82 -0
  61. data/examples/neighbourhood/example4.c +52 -0
  62. data/examples/shared/example1.c +45 -0
  63. data/examples/shared/example2.c +51 -0
  64. data/examples/shared/example3.c +55 -0
  65. data/examples/shared/example4.c +52 -0
  66. data/examples/shared/example5.c +48 -0
  67. data/lib/bones.rb +266 -0
  68. data/lib/bones/algorithm.rb +541 -0
  69. data/lib/bones/engine.rb +386 -0
  70. data/lib/bones/preprocessor.rb +161 -0
  71. data/lib/bones/species.rb +196 -0
  72. data/lib/bones/structure.rb +94 -0
  73. data/lib/bones/variable.rb +169 -0
  74. data/lib/bones/variablelist.rb +72 -0
  75. data/lib/castaddon.rb +27 -0
  76. data/lib/castaddon/index.rb +40 -0
  77. data/lib/castaddon/node.rb +753 -0
  78. data/lib/castaddon/type.rb +37 -0
  79. data/skeletons/CPU-C/common/epilogue.c +0 -0
  80. data/skeletons/CPU-C/common/globals.c +17 -0
  81. data/skeletons/CPU-C/common/globals_kernel.c +1 -0
  82. data/skeletons/CPU-C/common/header.c +0 -0
  83. data/skeletons/CPU-C/common/mem_copy_D2H.c +0 -0
  84. data/skeletons/CPU-C/common/mem_copy_H2D.c +0 -0
  85. data/skeletons/CPU-C/common/mem_epilogue.c +0 -0
  86. data/skeletons/CPU-C/common/mem_prologue.c +3 -0
  87. data/skeletons/CPU-C/common/prologue.c +0 -0
  88. data/skeletons/CPU-C/common/timer_1_start.c +0 -0
  89. data/skeletons/CPU-C/common/timer_1_stop.c +0 -0
  90. data/skeletons/CPU-C/common/timer_2_start.c +20 -0
  91. data/skeletons/CPU-C/common/timer_2_stop.c +8 -0
  92. data/skeletons/CPU-C/kernel/default.host.c +3 -0
  93. data/skeletons/CPU-C/kernel/default.kernel.c +15 -0
  94. data/skeletons/CPU-C/skeletons.txt +24 -0
  95. data/skeletons/CPU-OPENCL-AMD/common/epilogue.c +6 -0
  96. data/skeletons/CPU-OPENCL-AMD/common/globals.c +155 -0
  97. data/skeletons/CPU-OPENCL-AMD/common/globals_kernel.c +4 -0
  98. data/skeletons/CPU-OPENCL-AMD/common/header.c +0 -0
  99. data/skeletons/CPU-OPENCL-AMD/common/mem_copy_D2H.c +8 -0
  100. data/skeletons/CPU-OPENCL-AMD/common/mem_copy_H2D.c +4 -0
  101. data/skeletons/CPU-OPENCL-AMD/common/mem_epilogue.c +3 -0
  102. data/skeletons/CPU-OPENCL-AMD/common/mem_prologue.c +6 -0
  103. data/skeletons/CPU-OPENCL-AMD/common/prologue.c +24 -0
  104. data/skeletons/CPU-OPENCL-AMD/common/timer_1_start.c +5 -0
  105. data/skeletons/CPU-OPENCL-AMD/common/timer_1_stop.c +9 -0
  106. data/skeletons/CPU-OPENCL-AMD/common/timer_2_start.c +16 -0
  107. data/skeletons/CPU-OPENCL-AMD/common/timer_2_stop.c +11 -0
  108. data/skeletons/CPU-OPENCL-AMD/kernel/D-element-to-1-shared.host.c +67 -0
  109. data/skeletons/CPU-OPENCL-AMD/kernel/D-element-to-1-shared.kernel.cl +72 -0
  110. data/skeletons/CPU-OPENCL-AMD/kernel/default.host.c +14 -0
  111. data/skeletons/CPU-OPENCL-AMD/kernel/default.kernel.cl +13 -0
  112. data/skeletons/CPU-OPENCL-AMD/skeletons.txt +26 -0
  113. data/skeletons/CPU-OPENCL-INTEL/common/epilogue.c +3 -0
  114. data/skeletons/CPU-OPENCL-INTEL/common/globals.c +154 -0
  115. data/skeletons/CPU-OPENCL-INTEL/common/globals_kernel.c +4 -0
  116. data/skeletons/CPU-OPENCL-INTEL/common/header.c +31 -0
  117. data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +5 -0
  118. data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +3 -0
  119. data/skeletons/CPU-OPENCL-INTEL/common/mem_epilogue.c +3 -0
  120. data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +4 -0
  121. data/skeletons/CPU-OPENCL-INTEL/common/prologue.c +24 -0
  122. data/skeletons/CPU-OPENCL-INTEL/common/timer_1_start.c +5 -0
  123. data/skeletons/CPU-OPENCL-INTEL/common/timer_1_stop.c +9 -0
  124. data/skeletons/CPU-OPENCL-INTEL/common/timer_2_start.c +16 -0
  125. data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +11 -0
  126. data/skeletons/CPU-OPENCL-INTEL/kernel/D-element-to-1-shared.host.c +67 -0
  127. data/skeletons/CPU-OPENCL-INTEL/kernel/D-element-to-1-shared.kernel.cl +72 -0
  128. data/skeletons/CPU-OPENCL-INTEL/kernel/default.host.c +14 -0
  129. data/skeletons/CPU-OPENCL-INTEL/kernel/default.kernel.cl +13 -0
  130. data/skeletons/CPU-OPENCL-INTEL/skeletons.txt +26 -0
  131. data/skeletons/CPU-OPENMP/common/epilogue.c +0 -0
  132. data/skeletons/CPU-OPENMP/common/globals.c +37 -0
  133. data/skeletons/CPU-OPENMP/common/globals_kernel.c +6 -0
  134. data/skeletons/CPU-OPENMP/common/header.c +0 -0
  135. data/skeletons/CPU-OPENMP/common/mem_copy_D2H.c +0 -0
  136. data/skeletons/CPU-OPENMP/common/mem_copy_H2D.c +0 -0
  137. data/skeletons/CPU-OPENMP/common/mem_epilogue.c +0 -0
  138. data/skeletons/CPU-OPENMP/common/mem_prologue.c +3 -0
  139. data/skeletons/CPU-OPENMP/common/prologue.c +0 -0
  140. data/skeletons/CPU-OPENMP/common/timer_1_start.c +12 -0
  141. data/skeletons/CPU-OPENMP/common/timer_1_stop.c +0 -0
  142. data/skeletons/CPU-OPENMP/common/timer_2_start.c +18 -0
  143. data/skeletons/CPU-OPENMP/common/timer_2_stop.c +8 -0
  144. data/skeletons/CPU-OPENMP/kernel/D-element-to-1-shared.host.c +27 -0
  145. data/skeletons/CPU-OPENMP/kernel/D-element-to-1-shared.kernel.c +46 -0
  146. data/skeletons/CPU-OPENMP/kernel/default.host.c +11 -0
  147. data/skeletons/CPU-OPENMP/kernel/default.kernel.c +18 -0
  148. data/skeletons/CPU-OPENMP/skeletons.txt +26 -0
  149. data/skeletons/GPU-CUDA/common/epilogue.c +0 -0
  150. data/skeletons/GPU-CUDA/common/globals.c +31 -0
  151. data/skeletons/GPU-CUDA/common/globals_kernel.c +4 -0
  152. data/skeletons/GPU-CUDA/common/header.c +0 -0
  153. data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +3 -0
  154. data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +3 -0
  155. data/skeletons/GPU-CUDA/common/mem_epilogue.c +3 -0
  156. data/skeletons/GPU-CUDA/common/mem_prologue.c +5 -0
  157. data/skeletons/GPU-CUDA/common/prologue.c +6 -0
  158. data/skeletons/GPU-CUDA/common/timer_1_start.c +6 -0
  159. data/skeletons/GPU-CUDA/common/timer_1_stop.c +10 -0
  160. data/skeletons/GPU-CUDA/common/timer_2_start.c +6 -0
  161. data/skeletons/GPU-CUDA/common/timer_2_stop.c +10 -0
  162. data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.host.c +3 -0
  163. data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +105 -0
  164. data/skeletons/GPU-CUDA/kernel/D-element-to-1-shared.host.c +3 -0
  165. data/skeletons/GPU-CUDA/kernel/D-element-to-1-shared.kernel.cu +119 -0
  166. data/skeletons/GPU-CUDA/kernel/D-element-to-N-shared.host.c +3 -0
  167. data/skeletons/GPU-CUDA/kernel/D-element-to-N-shared.kernel.cu +166 -0
  168. data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.host.c +3 -0
  169. data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +69 -0
  170. data/skeletons/GPU-CUDA/kernel/N-neighbourhood-N-to-N-element.host.c +3 -0
  171. data/skeletons/GPU-CUDA/kernel/N-neighbourhood-N-to-N-element.kernel.cu +42 -0
  172. data/skeletons/GPU-CUDA/kernel/default.host.c +3 -0
  173. data/skeletons/GPU-CUDA/kernel/default.kernel.cu +28 -0
  174. data/skeletons/GPU-CUDA/skeletons.txt +30 -0
  175. data/skeletons/GPU-OPENCL-AMD/common/epilogue.c +3 -0
  176. data/skeletons/GPU-OPENCL-AMD/common/globals.c +155 -0
  177. data/skeletons/GPU-OPENCL-AMD/common/globals_kernel.c +4 -0
  178. data/skeletons/GPU-OPENCL-AMD/common/header.c +0 -0
  179. data/skeletons/GPU-OPENCL-AMD/common/mem_copy_D2H.c +4 -0
  180. data/skeletons/GPU-OPENCL-AMD/common/mem_copy_H2D.c +4 -0
  181. data/skeletons/GPU-OPENCL-AMD/common/mem_epilogue.c +3 -0
  182. data/skeletons/GPU-OPENCL-AMD/common/mem_prologue.c +3 -0
  183. data/skeletons/GPU-OPENCL-AMD/common/prologue.c +24 -0
  184. data/skeletons/GPU-OPENCL-AMD/common/timer_1_start.c +5 -0
  185. data/skeletons/GPU-OPENCL-AMD/common/timer_1_stop.c +9 -0
  186. data/skeletons/GPU-OPENCL-AMD/common/timer_2_start.c +4 -0
  187. data/skeletons/GPU-OPENCL-AMD/common/timer_2_stop.c +11 -0
  188. data/skeletons/GPU-OPENCL-AMD/kernel/D-element-to-1-shared.host.c +67 -0
  189. data/skeletons/GPU-OPENCL-AMD/kernel/D-element-to-1-shared.kernel.cl +72 -0
  190. data/skeletons/GPU-OPENCL-AMD/kernel/default.host.c +14 -0
  191. data/skeletons/GPU-OPENCL-AMD/kernel/default.kernel.cl +13 -0
  192. data/skeletons/GPU-OPENCL-AMD/skeletons.txt +26 -0
  193. data/skeletons/verification/header.c +2 -0
  194. data/skeletons/verification/timer_start.c +4 -0
  195. data/skeletons/verification/timer_stop.c +6 -0
  196. data/skeletons/verification/verify_results.c +23 -0
  197. data/test/bones/test_algorithm.rb +40 -0
  198. data/test/bones/test_common.rb +54 -0
  199. data/test/bones/test_preprocessor.rb +46 -0
  200. data/test/bones/test_species.rb +21 -0
  201. data/test/bones/test_variable.rb +84 -0
  202. data/test/test_helper.rb +106 -0
  203. metadata +303 -0
@@ -0,0 +1,541 @@
1
+
2
+ module Bones
3
+ # This class holds one algorithm, which includes a species,
4
+ # a name, and the source C-code.
5
+ #
6
+ # The algorithm class holds all sorts of information on var-
7
+ # iables. This information is only available after calling
8
+ # the 'populate' method, which populates a lists of varia-
9
+ # bles of all sorts: a regular list, a specialized hash,
10
+ # and lists of input/output array variables.
11
+ class Algorithm < Common
12
+ attr_reader :name, :species, :code, :lists, :arrays, :id, :function_name
13
+ attr_accessor :hash, :merge_factor
14
+
15
+ # Constant to set the name of the algorithm's accelerated version
16
+ ACCELERATED = '_accelerated'
17
+ # Constant to set the name of the algorithm's original version
18
+ ORIGINAL = '_original'
19
+
20
+ # This method initializes the class. It gives the new
21
+ # algorithm a name, species and source code. At initiali-
22
+ # zation, this method checks if the name starts with a
23
+ # digit. This is not allowed, so an underscore is added
24
+ # prior to the digit.
25
+ def initialize(name, filename, id, species, code)
26
+ name = '_'+name if name =~ /^\d/
27
+ @filename = filename
28
+ @basename = name
29
+ @name = (name+'_'+id).gsub(/\W/,'')
30
+ @id = id
31
+ @original_name = @name+ORIGINAL
32
+ @accelerated_name = @name+ACCELERATED
33
+ @species = species
34
+ @code = C::Statement.parse(code).preprocess
35
+ @hash = {}
36
+ @lists = {:host_name => [],:host_definition => [], :argument_name => [], :argument_definition => [], :golden_name => []}
37
+ @arrays = Variablelist.new()
38
+ @constants = Variablelist.new()
39
+ @merge_factor = 1
40
+ @function_code = ''
41
+ @function_name = ''
42
+ end
43
+
44
+ # This method sets the code and name for the function in
45
+ # which the algorithm is found. This is done based on the
46
+ # original code, which is given as input to this method.
47
+ # The method does not return any value, instead, it sets
48
+ # two class variables (@function_code and @function_name).
49
+ def set_function(full_code)
50
+ full_code.get_functions.each do |function|
51
+ if function.node_exists?(@code)
52
+ @function_code = function
53
+ @function_name = function.name
54
+ end
55
+ end
56
+ end
57
+
58
+ # This method performs the code transformations according
59
+ # to the transformation settings as provided as an argument
60
+ # to the function. It calls the various code transformation
61
+ # functions as implemented for the CAST class. The resulting
62
+ # modified code is finally stored in the search-and-replace
63
+ # hash.
64
+ # This method assumes that the populate method has already
65
+ # been called, such that the hash contains the dimensions
66
+ # needed to create the global ID definitions.
67
+ def perform_transformations(transformation_settings)
68
+ complexity = 0
69
+
70
+ # Save the original code (with flattened arrays) in the hash as well
71
+ new_code = @code.clone
72
+ @arrays.each do |array|
73
+ new_code.transform_flatten(array)
74
+ end
75
+ @hash[:algorithm_code0] = new_code.to_s
76
+
77
+ # Loop over the number of transformation 'blocks'
78
+ transformation_settings.split(' ').each_with_index do |transformation,num_transformation|
79
+ new_code = @code.clone
80
+ extra_indent = ''
81
+
82
+ # Replace existing loops in the code (always do this)
83
+ array = @arrays.representative
84
+ array.species.dimensions.each_with_index do |dimension,num_dimension|
85
+ index = (array.species.reverse?) ? num_dimension : array.species.dimensions.length-num_dimension-1
86
+ index_reverse = !(array.species.reverse?) ? num_dimension : array.species.dimensions.length-num_dimension-1
87
+
88
+ # Calculate the loop start and end conditions
89
+ from = array.species.from_at(index)
90
+ to = array.species.to_at(index)
91
+
92
+ # Process the existing code and update the hash
93
+ if from != to
94
+ new_code, loop_variable_name = new_code.remove_loop(from,to)
95
+ new_variable_name = GLOBAL_ID+'_'+index_reverse.to_s
96
+ new_code.replace_variable(loop_variable_name,new_variable_name)
97
+ update_hash(loop_variable_name)
98
+ end
99
+ end
100
+
101
+ # Shuffle the indices of the first input(s) (conditionally do this)
102
+ shuffle_arrays = []
103
+ if transformation[0,1] == '2'
104
+ shuffle_arrays.push(@arrays.select(INPUT)[0])
105
+ elsif transformation[0,1] == '3'
106
+ shuffle_arrays.push(@arrays.select(INPUT)[0])
107
+ shuffle_arrays.push(@arrays.select(INPUT)[1])
108
+ end
109
+ new_code.transform_shuffle(shuffle_arrays)
110
+
111
+ # Use the local on-chip memory (conditionally do this)
112
+ if transformation[0,1] == '1'
113
+ local_memory_arrays = [@arrays.select(INPUT)[0]]
114
+ new_code.transform_use_local_memory(local_memory_arrays)
115
+ end
116
+
117
+ # Flatten the arrays to 1D (always do this)
118
+ @arrays.each do |array|
119
+ new_code.transform_flatten(array)
120
+ end
121
+
122
+ # Perform array substitution (conditionally do this)
123
+ @arrays.outputs.each do |array|
124
+ if array.species.element?
125
+ if @arrays.inputs.include?(array)
126
+ new_code.transform_substitution(array,true)
127
+ else
128
+ new_code.transform_substitution(array,false)
129
+ end
130
+ extra_indent = INDENT
131
+ end
132
+ end
133
+
134
+ # Perform transformations for reduction operations (conditionally do this)
135
+ if transformation[1,1].to_i >= 1
136
+ new_code = new_code.transform_reduction(@arrays.select(INPUT)[0],@arrays.select(OUTPUT)[0],transformation[1,1].to_i)
137
+ end
138
+
139
+ # Perform thread-merging (experimental)
140
+ # TODO: Solve the problem related to constants (e.g chunk/example1.c)
141
+ if @merge_factor == 1 && transformation[0,1] == '4'
142
+ @merge_factor = 4
143
+ end
144
+ if @merge_factor > 1
145
+ puts MESSAGE+'Merging threads by a factor '+@merge_factor.to_s+'.'
146
+
147
+ # Update the hash
148
+ @hash[:ids] = @hash[:ids].split(NL).map { |line|
149
+ C::parse(line).transform_merge_threads(@merge_factor,[GLOBAL_ID]+@constants.map{ |c| c.name }).to_s.split(NL).each_with_index.map do |id,index|
150
+ id.gsub(/\b#{GLOBAL_ID}\b/,"(#{GLOBAL_ID}+gridDim.x*blockDim.x*#{index})")
151
+ end
152
+ }.join(NL+INDENT*2)
153
+ @hash[:parallelism] = (@hash[:parallelism].to_i / @merge_factor).to_s
154
+
155
+ # Transform the code
156
+ excludes = (@constants+@arrays).map { |c| c.name }
157
+ new_code.transform_merge_threads(@merge_factor,excludes)
158
+ end
159
+
160
+ # Obtain the complexity in terms of operations for the resulting code
161
+ complexity += new_code.get_complexity
162
+
163
+ # Store the resulting code in the hash
164
+ resulting_code = new_code.strip_brackets.to_s
165
+ @hash[('algorithm_code'+(num_transformation+1).to_s).to_sym] = (transformation[1,1].to_i >= 1) ? resulting_code : extra_indent+INDENT+resulting_code.gsub!(NL,NL+INDENT)
166
+ end
167
+
168
+ @hash[:complexity] = complexity.to_s
169
+ end
170
+
171
+ # This method creates the search-and-replace hash based on
172
+ # information provided by the algorithm. It is called from
173
+ # the 'populate' method of this class.
174
+ #
175
+ # == List of possible hash keys:
176
+ #
177
+ # algorithm_id
178
+ # _name
179
+ # _basename
180
+ # _filename
181
+ # _code*
182
+ # (in*|out*)_type
183
+ # _name
184
+ # _devicename
185
+ # _devicepointer
186
+ # _dimensions
187
+ # _dimension*_to
188
+ # _from
189
+ # _sum
190
+ # _to
191
+ # _from
192
+ # _parameters
193
+ # _parameter*_to
194
+ # _from
195
+ # _sum
196
+ # _ids
197
+ # _localids
198
+ # _flatindex
199
+ # (in|out)_names
200
+ # _devicenames
201
+ # _devicedefinitions
202
+ # _devicedefinitionsopencl
203
+ # names
204
+ # devicenames
205
+ # devicedefinitions
206
+ # devicedefinitionsopencl
207
+ #
208
+ # parallelism
209
+ # factors
210
+ # ids
211
+ # verifyids
212
+ #
213
+ # argument_name
214
+ # argument_definition
215
+ # kernel_argument_list
216
+ #
217
+ def populate_hash
218
+ @hash = {:algorithm_id => @id,
219
+ :algorithm_name => @name,
220
+ :algorithm_basename => @basename,
221
+ :algorithm_filename => @filename,
222
+ :argument_name => @lists[:argument_name],
223
+ :argument_definition => @lists[:argument_definition]}
224
+
225
+ # Obtain the necessary data for the hash per array
226
+ parallelisms = []
227
+ DIRECTIONS.each do |direction|
228
+ arrays = @arrays.select(direction)
229
+ arrays.each_with_index do |array,num_array|
230
+ hashid = "#{direction}#{num_array}".to_sym
231
+
232
+ # Gather the name and type data
233
+ minihash = {:type => array.type_name,
234
+ :name => array.name,
235
+ :devicepointer => array.device_pointer,
236
+ :devicename => array.device_name,
237
+ :flatindex => array.flatindex}
238
+
239
+ # Gather the dimensions data
240
+ dimensions = array.species.dimensions
241
+ dimensions.each_with_index do |dimension,num_dimension|
242
+ minihash["dimension#{num_dimension}".to_sym] = {:sum => simplify(sum(dimension)),
243
+ :from => simplify(from(dimension)),
244
+ :to => simplify(to(dimension))}
245
+ end
246
+ minihash[:dimensions] = simplify(dimensions.map { |d| sum(d) }.join('*'))
247
+ minihash[:from] = dimensions.map { |d| from(d) }.zip(array.factors.drop(1).reverse).map { |e| simplify(e.join('')) }.join('+')
248
+ minihash[:to ] = dimensions.map { |d| to(d) }.zip(array.factors.drop(1).reverse).map { |e| simplify(e.join('')) }.join('+')
249
+
250
+ # Gather the parameter data
251
+ if array.species.has_parameter?
252
+ parameters = array.species.parameters
253
+ parameters.each_with_index do |parameter,num_parameter|
254
+ minihash["parameter#{num_parameter}".to_sym] = {:sum => simplify(sum(parameter)),
255
+ :from => simplify(from(parameter)),
256
+ :to => simplify(to(parameter))}
257
+ end
258
+ minihash[:parameters] = simplify(parameters.map { |p| sum(p) }.join('*'))
259
+ end
260
+
261
+ # Store the data into the hash
262
+ @hash[hashid] = minihash
263
+
264
+ # Gather information regarding the parallelism
265
+ if array.species.chunk?
266
+ dim_div = simplify(minihash[:dimensions]+'/'+minihash[:parameters])
267
+ parallelisms.push([dim_div,hashid,0])
268
+ elsif array.species.element? || array.species.neighbourhood?
269
+ parallelisms.push([minihash[:dimensions],hashid,1])
270
+ end
271
+
272
+ # Populate the global ID definitions hash, create the proper indices (and store as '{in/out}*_ids' in the hash)
273
+ ids, localids, verifyids, factors = [], [], [], ['']
274
+ dimensions = array.species.dimensions.clone
275
+ dimensions.each_with_index do |dimension,num_dimension|
276
+ index = (array.species.reverse?) ? num_dimension : array.species.dimensions.length-num_dimension-1
277
+ index_reverse = !(array.species.reverse?) ? num_dimension : array.species.dimensions.length-num_dimension-1
278
+
279
+ # Generate the index expressions
280
+ divider = (array.species.chunk?) ? '/'+sum(array.species.parameters[index]) : ''
281
+ minihash = {:dimensions => (index == dimensions.length-1) ? '1' : dimensions.drop(index+1).map { |d| sum(d) }.join('*'),
282
+ :modulo => (index_reverse != dimensions.length-1) ? '%('+sum(dimension)+divider+')' : '',
283
+ :offset => from(dimension)}
284
+ expr_global = simplify(search_and_replace(minihash,"((#{GLOBAL_ID}/(<dimensions>))<modulo>)+<offset>"))
285
+ expr_local = simplify(search_and_replace(minihash,"((#{LOCAL_ID }/(<dimensions>))<modulo>)+<offset>"))
286
+
287
+ # Selectively push the ID definitions to the result array
288
+ from = array.species.from_at(index)
289
+ to = array.species.to_at(index)
290
+ verifyids.push("const int #{GLOBAL_ID}_#{index_reverse} = "+expr_global+';')
291
+ if from != to
292
+ ids.push("const int #{GLOBAL_ID}_#{index_reverse} = "+expr_global+';')
293
+ localids.push("const int #{LOCAL_ID }_#{index_reverse} = "+expr_local+';')
294
+ factors.push(array.factors[index_reverse])
295
+ end
296
+ end
297
+
298
+ # Store the results in the hash
299
+ @hash[hashid][:ids] = ids.join(NL+INDENT*2)
300
+ @hash[hashid][:localids] = localids.join(NL+INDENT*2)
301
+ @hash[hashid][:verifyids] = verifyids.join(NL+INDENT*2)
302
+ @hash[hashid][:factors] = factors.last
303
+ end
304
+
305
+ # Create lists of array names and definitions
306
+ @hash["#{direction}_devicedefinitions".to_sym] = arrays.map { |a| a.device_definition }.uniq.join(', ')
307
+ @hash["#{direction}_devicedefinitionsopencl".to_sym] = arrays.map { |a| '__global '+a.device_definition }.uniq.join(', ')
308
+ @hash["#{direction}_devicenames".to_sym] = arrays.map { |a| a.device_name }.uniq.join(', ')
309
+ @hash["#{direction}_names".to_sym] = arrays.map { |a| a.name }.uniq.join(', ')
310
+ end
311
+ @hash[:devicedefinitions] = @arrays.map { |a| a.device_definition }.uniq.join(', ')
312
+ @hash[:devicedefinitionsopencl] = @arrays.map { |a| '__global '+a.device_definition }.uniq.join(', ')
313
+ @hash[:devicenames] = @arrays.map { |a| a.device_name }.uniq.join(', ')
314
+ @hash[:names] = @arrays.map { |a| a.name }.uniq.join(', ')
315
+
316
+ # Set the parallelism for the complete species, first sort them according to priorities and then find the maximum
317
+ # TODO: Remove the 'reverse' statement and get the 'ids' part working correctly for chunks
318
+ # TODO: How to find the maximum of symbolic expressions?
319
+ parallelisms = parallelisms.reverse.sort_by { |p| p[2] }
320
+ parallelism = parallelisms.reverse.max_by { |p| p[0].to_i }
321
+ @hash[:parallelism] = parallelism[0]
322
+ @hash[:ids] = @hash[parallelism[1]][:ids]
323
+ @hash[:factors] = @hash[parallelism[1]][:factors]
324
+ @arrays.set_representative(parallelism[1])
325
+ end
326
+
327
+ # Helper function to create a the special code which is required
328
+ # for OpenCL function calls to be able to use kernel arguments.
329
+ def opencl_arguments(list,kernel_id)
330
+ return '' if list == ''
331
+ argument_string = ''
332
+ list.split(', ').each_with_index do |variable,id|
333
+ argument_string += 'clSetKernelArg(bones_kernel_'+@name+'_'+kernel_id.to_s+',bones_num_args+'+id.to_s+',sizeof('+variable.strip+'),(void*)&'+variable.strip+');'+NL+INDENT
334
+ end
335
+ return argument_string
336
+ end
337
+
338
+ # This method updates the hash after loops are removed from
339
+ # the code. It takes as an argument a loop variable, which
340
+ # it removes from both the ':argument_name' and ':argument_
341
+ # definition' hash entries.
342
+ def update_hash(loop_variable)
343
+ names = @hash[:argument_name].split(', ')
344
+ definitions = @hash[:argument_definition].split(', ')
345
+ names.delete(loop_variable.to_s)
346
+ definitions.each { |definition| definitions.delete(definition) if definition =~ /\b#{loop_variable}\b/ }
347
+ @hash[:argument_name] = names.join(', ')
348
+ @hash[:argument_definition] = definitions.join(', ')
349
+
350
+ # Now, generate the special code which is required for OpenCL function calls to be able to use kernel arguments.
351
+ @hash[:kernel_argument_list] = opencl_arguments([@hash[:devicenames],@hash[:argument_name]].join(', ').remove_extras,0)
352
+ @hash[:kernel_argument_list_in] = opencl_arguments(@hash[:in_devicenames],0)
353
+ @hash[:kernel_argument_list_out] = opencl_arguments(@hash[:out_devicenames],0)
354
+ @hash[:kernel_argument_list_constants] = opencl_arguments(@hash[:argument_name],0)
355
+
356
+ # Add declarations for the loop variables for the original code in the hash
357
+ @hash[:algorithm_code0] = INDENT+"int #{loop_variable};"+NL+@hash[:algorithm_code0]
358
+ end
359
+
360
+ # Method to create a list of variables for the current
361
+ # algorithm. These variables should hold two conditions:
362
+ # 1) they are not local to the algorithm's code, and 2),
363
+ # they are used in the algorithm's code.
364
+ #
365
+ # The method gets a lists of undefined variables in the
366
+ # algorithm's code and subsequently searches the original
367
+ # code for the definition of this variable.
368
+ def populate_variables(original_code,defines)
369
+ @code.undefined_variables.each do |name|
370
+ type = @function_code.variable_type(name)
371
+ raise_error('Variable '+name+' not declared in original code') if !type
372
+ size = original_code.size(name)
373
+ direction = @code.direction(name)
374
+ size.map! { |s| simplify(replace_defines(s,defines)) }
375
+ variable = Variable.new(name,type,size,direction,@id,@species.shared?)
376
+ (variable.dimensions > 0) ? @arrays.push(variable) : @constants.push(variable)
377
+ end
378
+ raise_error('No input nor output arrays detected, make sure they are properly defined') if arrays.empty?
379
+
380
+ DIRECTIONS.each do |direction|
381
+ species = @species.structures(direction)
382
+ arrays = @arrays.select(direction)
383
+ if !arrays.empty?
384
+
385
+ # Check if the amount of input/ouput arrays is equal to the amount of input/output species
386
+ if species.length < arrays.length
387
+ array_names = arrays.map { |a| a.name }.join('","')
388
+ raise_error(direction.capitalize+'put array count mismatch (expected '+species.length.to_s+', found '+arrays.length.to_s+' ["'+array_names+'"])')
389
+ end
390
+
391
+ # Set the species for the arrays (distinguish between arrays with and without a name)
392
+ species.each do |structure|
393
+ array = arrays[0]
394
+ arrays.each do |free_array|
395
+ if !free_array.species
396
+ if structure.has_arrayname?
397
+ if structure.name == free_array.name
398
+ array = free_array
399
+ break
400
+ end
401
+ else
402
+ array = free_array
403
+ break
404
+ end
405
+ end
406
+ end
407
+ array.species = structure
408
+ #structure.name = array.name
409
+
410
+ # Check if the array size was set, if not, it will be set to the species' size
411
+ if array.size.empty?
412
+ array.size = array.species.dimensions.map { |d| sum(d) }
413
+ array.guess = true
414
+ puts WARNING+'Could not determine size for array "'+array.name+'" automatically, assuming: '+array.size.inspect+'.'
415
+ end
416
+
417
+ # Set the multiplication factors (for later)
418
+ array.set_factors
419
+ end
420
+ end
421
+ end
422
+
423
+ # Sort the arrays according to the alphabet
424
+ if @arrays.length > 1
425
+ @arrays.sort_by(['chunk','neighbourhood','element','shared','full'])
426
+ end
427
+ end
428
+
429
+ # Method to populate 5 lists with variable information.
430
+ # Below are listed the names of the four lists with an
431
+ # example value:
432
+ #
433
+ # host_name:: Example: 'array'
434
+ # host_definition:: Example: 'int array[10][10]'
435
+ # argument_name:: Example: 'threshold'
436
+ # argument_definition:: Example: 'float threshold'
437
+ # golden_name:: Example: 'golden_array'
438
+ def populate_lists
439
+ @constants.each do |variable|
440
+ @lists[:host_name] .push(variable.name)
441
+ @lists[:host_definition] .push(variable.definition)
442
+ @lists[:argument_name] .push(variable.name)
443
+ @lists[:argument_definition].push(variable.definition)
444
+ @lists[:golden_name] .push(variable.name)
445
+ end
446
+ @arrays.each do |variable|
447
+ @lists[:host_name] .push(variable.name)
448
+ @lists[:host_definition] .push(variable.definition)
449
+ @lists[:golden_name] .push(variable.golden_name)
450
+ end
451
+ @lists.each { |name,list| @lists[name] = list.join(', ') }
452
+ end
453
+
454
+ # This method is used to generate verification code. This
455
+ # verification code contains a copy of the original code.
456
+ # It also provides a verification which compares the output
457
+ # of the original code with the output of the generated
458
+ # code. The verification code prints warnings if the outputs
459
+ # are not equal, else it prints a success message.
460
+ def generate_replacement_code(options, skeleton, verify_code, prefix, timer_start, timer_stop)
461
+ replacement = C::NodeArray.new
462
+ replacement.push(C::ExpressionStatement.parse(@accelerated_name+'('+@lists[:host_name]+');'))
463
+ original_definition = ''
464
+ verify_definitions = []
465
+ if options[:verify]
466
+ guesses = @arrays.map { |array| array.guess }
467
+ if guesses.include?(true)
468
+ puts WARNING+'Verification not supported for this class'
469
+ else
470
+
471
+ # Generate the replacement code and the original function
472
+ @arrays.each do |array|
473
+ replacement.insert(0,C::ExpressionStatement.parse("memcpy(#{array.golden_name},#{array.name},#{array.size.join('*')}*sizeof(#{array.type_name}));"))
474
+ replacement.insert(0,C::Declaration.parse(array.definition.gsub!(/\b#{array.name}\b/,array.golden_name)+array.initialization))
475
+ end
476
+ replacement.push(C::ExpressionStatement.parse(@original_name+'('+@lists[:golden_name]+');'))
477
+ original_definition = "void #{@original_name}(#{@lists[:host_definition]})"
478
+ body = "#{timer_start}#{NL} // Original code#{NL}#{@code}#{NL}#{timer_stop}"
479
+ verify_code.push(prefix+original_definition+' {'+NL+body+'}'+NL+NL)
480
+ @arrays.select(OUTPUT).each do |array|
481
+ replacement.push(C::ExpressionStatement.parse(("bones_verify_results_#{array.name}_#{@id}(#{array.name}#{array.flatten},#{array.golden_name}#{array.flatten},#{@hash[:argument_name]});").remove_extras))
482
+ end
483
+ @arrays.each do |array|
484
+ replacement.push(C::ExpressionStatement.parse("free(#{array.golden_name});")) if array.dynamic?
485
+ end
486
+
487
+ # Generate the verification function itself
488
+ @arrays.select(OUTPUT).each_with_index do |array,num_array|
489
+ minihash = @hash["out#{num_array}".to_sym]
490
+ minihash[:name] = minihash[:name]+'_'+@id
491
+ minihash[:argument_definition] = @hash[:argument_definition]
492
+ instantiated_skeleton = search_and_replace(minihash,skeleton)
493
+ verify_definitions.push(instantiated_skeleton.scan(/#{START_DEFINITION}(.+)#{END_DEFINITION}/m).join.strip.remove_extras)
494
+ verify_code.push(instantiated_skeleton.remove_extras.gsub!(/#{START_DEFINITION}(.+)#{END_DEFINITION}/m,''))
495
+ end
496
+ end
497
+ end
498
+ return replacement, original_definition, verify_definitions.join(NL)
499
+ end
500
+
501
+ # Method to generate performance modeling code.
502
+ # This method is still under construction and will not be called yet.
503
+ # TODO: Complete this method
504
+ def performance_model_code(model_dir)
505
+
506
+ # Load the profile database
507
+ profiles = Array.new
508
+ File.read(File.join(model_dir,'profile.txt')).each do |line|
509
+ profiles.push(line.split(','))
510
+ end
511
+
512
+ # Iterate over all the profiles
513
+ result = C::NodeArray.new
514
+ profiles.each do |profile|
515
+
516
+ # Fill the hash with profile information and species information
517
+ mini_hash = {
518
+ :name => profile[0].strip,
519
+ :comp => profile[1].strip,
520
+ :coal => profile[2].strip,
521
+ :unco => profile[3].strip,
522
+ :copy => profile[4].strip,
523
+ :f => @hash[:complexity],
524
+ :w => @hash[:parallelism],
525
+ :c => @species.all_structures.map { |s| simplify('4*('+s.dimensions.map { |d| sum(d) }.join('*')+')') }.join(' + '),
526
+ :m => '1',
527
+ :u => '0',
528
+ :o => '8'
529
+ }
530
+
531
+ # Load the skeleton for the performance model and set the values according to the hash
532
+ model_skeleton = File.read(File.join(model_dir,'model.c'))
533
+ search_and_replace!(mini_hash,model_skeleton)
534
+ result.push(C::Block.parse(model_skeleton))
535
+ end
536
+ return result
537
+ end
538
+ end
539
+
540
+ end
541
+