bones-compiler 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (203) hide show
  1. data/CHANGELOG +117 -0
  2. data/LICENSE +9 -0
  3. data/README.rdoc +126 -0
  4. data/Rakefile +107 -0
  5. data/VERSION +1 -0
  6. data/bin/bones +20 -0
  7. data/examples/applications/ffos.c +552 -0
  8. data/examples/benchmarks/2mm.c +70 -0
  9. data/examples/benchmarks/3mm.c +81 -0
  10. data/examples/benchmarks/adi.c +81 -0
  11. data/examples/benchmarks/atax.c +65 -0
  12. data/examples/benchmarks/bicg.c +67 -0
  13. data/examples/benchmarks/cholesky.c +64 -0
  14. data/examples/benchmarks/common.h +168 -0
  15. data/examples/benchmarks/correlation.c +97 -0
  16. data/examples/benchmarks/covariance.c +77 -0
  17. data/examples/benchmarks/doitgen.c +63 -0
  18. data/examples/benchmarks/durbin.c +76 -0
  19. data/examples/benchmarks/dynprog.c +67 -0
  20. data/examples/benchmarks/fdtd-2d-apml.c +114 -0
  21. data/examples/benchmarks/fdtd-2d.c +74 -0
  22. data/examples/benchmarks/floyd-warshall.c +50 -0
  23. data/examples/benchmarks/gemm.c +69 -0
  24. data/examples/benchmarks/gemver.c +89 -0
  25. data/examples/benchmarks/gesummv.c +64 -0
  26. data/examples/benchmarks/gramschmidt.c +84 -0
  27. data/examples/benchmarks/jacobi-1d-imper.c +55 -0
  28. data/examples/benchmarks/jacobi-2d-imper.c +61 -0
  29. data/examples/benchmarks/lu.c +57 -0
  30. data/examples/benchmarks/ludcmp.c +91 -0
  31. data/examples/benchmarks/mvt.c +65 -0
  32. data/examples/benchmarks/overview.txt +38 -0
  33. data/examples/benchmarks/reg_detect.c +82 -0
  34. data/examples/benchmarks/saxpy.c +45 -0
  35. data/examples/benchmarks/seidel-2d.c +51 -0
  36. data/examples/benchmarks/symm.c +74 -0
  37. data/examples/benchmarks/syr2k.c +65 -0
  38. data/examples/benchmarks/syrk.c +62 -0
  39. data/examples/benchmarks/trisolv.c +57 -0
  40. data/examples/benchmarks/trmm.c +57 -0
  41. data/examples/chunk/example1.c +54 -0
  42. data/examples/chunk/example2.c +44 -0
  43. data/examples/chunk/example3.c +59 -0
  44. data/examples/chunk/example4.c +55 -0
  45. data/examples/chunk/example5.c +52 -0
  46. data/examples/element/example1.c +46 -0
  47. data/examples/element/example10.c +50 -0
  48. data/examples/element/example11.c +47 -0
  49. data/examples/element/example12.c +56 -0
  50. data/examples/element/example2.c +46 -0
  51. data/examples/element/example3.c +58 -0
  52. data/examples/element/example4.c +49 -0
  53. data/examples/element/example5.c +56 -0
  54. data/examples/element/example6.c +46 -0
  55. data/examples/element/example7.c +54 -0
  56. data/examples/element/example8.c +45 -0
  57. data/examples/element/example9.c +48 -0
  58. data/examples/neighbourhood/example1.c +54 -0
  59. data/examples/neighbourhood/example2.c +55 -0
  60. data/examples/neighbourhood/example3.c +82 -0
  61. data/examples/neighbourhood/example4.c +52 -0
  62. data/examples/shared/example1.c +45 -0
  63. data/examples/shared/example2.c +51 -0
  64. data/examples/shared/example3.c +55 -0
  65. data/examples/shared/example4.c +52 -0
  66. data/examples/shared/example5.c +48 -0
  67. data/lib/bones.rb +266 -0
  68. data/lib/bones/algorithm.rb +541 -0
  69. data/lib/bones/engine.rb +386 -0
  70. data/lib/bones/preprocessor.rb +161 -0
  71. data/lib/bones/species.rb +196 -0
  72. data/lib/bones/structure.rb +94 -0
  73. data/lib/bones/variable.rb +169 -0
  74. data/lib/bones/variablelist.rb +72 -0
  75. data/lib/castaddon.rb +27 -0
  76. data/lib/castaddon/index.rb +40 -0
  77. data/lib/castaddon/node.rb +753 -0
  78. data/lib/castaddon/type.rb +37 -0
  79. data/skeletons/CPU-C/common/epilogue.c +0 -0
  80. data/skeletons/CPU-C/common/globals.c +17 -0
  81. data/skeletons/CPU-C/common/globals_kernel.c +1 -0
  82. data/skeletons/CPU-C/common/header.c +0 -0
  83. data/skeletons/CPU-C/common/mem_copy_D2H.c +0 -0
  84. data/skeletons/CPU-C/common/mem_copy_H2D.c +0 -0
  85. data/skeletons/CPU-C/common/mem_epilogue.c +0 -0
  86. data/skeletons/CPU-C/common/mem_prologue.c +3 -0
  87. data/skeletons/CPU-C/common/prologue.c +0 -0
  88. data/skeletons/CPU-C/common/timer_1_start.c +0 -0
  89. data/skeletons/CPU-C/common/timer_1_stop.c +0 -0
  90. data/skeletons/CPU-C/common/timer_2_start.c +20 -0
  91. data/skeletons/CPU-C/common/timer_2_stop.c +8 -0
  92. data/skeletons/CPU-C/kernel/default.host.c +3 -0
  93. data/skeletons/CPU-C/kernel/default.kernel.c +15 -0
  94. data/skeletons/CPU-C/skeletons.txt +24 -0
  95. data/skeletons/CPU-OPENCL-AMD/common/epilogue.c +6 -0
  96. data/skeletons/CPU-OPENCL-AMD/common/globals.c +155 -0
  97. data/skeletons/CPU-OPENCL-AMD/common/globals_kernel.c +4 -0
  98. data/skeletons/CPU-OPENCL-AMD/common/header.c +0 -0
  99. data/skeletons/CPU-OPENCL-AMD/common/mem_copy_D2H.c +8 -0
  100. data/skeletons/CPU-OPENCL-AMD/common/mem_copy_H2D.c +4 -0
  101. data/skeletons/CPU-OPENCL-AMD/common/mem_epilogue.c +3 -0
  102. data/skeletons/CPU-OPENCL-AMD/common/mem_prologue.c +6 -0
  103. data/skeletons/CPU-OPENCL-AMD/common/prologue.c +24 -0
  104. data/skeletons/CPU-OPENCL-AMD/common/timer_1_start.c +5 -0
  105. data/skeletons/CPU-OPENCL-AMD/common/timer_1_stop.c +9 -0
  106. data/skeletons/CPU-OPENCL-AMD/common/timer_2_start.c +16 -0
  107. data/skeletons/CPU-OPENCL-AMD/common/timer_2_stop.c +11 -0
  108. data/skeletons/CPU-OPENCL-AMD/kernel/D-element-to-1-shared.host.c +67 -0
  109. data/skeletons/CPU-OPENCL-AMD/kernel/D-element-to-1-shared.kernel.cl +72 -0
  110. data/skeletons/CPU-OPENCL-AMD/kernel/default.host.c +14 -0
  111. data/skeletons/CPU-OPENCL-AMD/kernel/default.kernel.cl +13 -0
  112. data/skeletons/CPU-OPENCL-AMD/skeletons.txt +26 -0
  113. data/skeletons/CPU-OPENCL-INTEL/common/epilogue.c +3 -0
  114. data/skeletons/CPU-OPENCL-INTEL/common/globals.c +154 -0
  115. data/skeletons/CPU-OPENCL-INTEL/common/globals_kernel.c +4 -0
  116. data/skeletons/CPU-OPENCL-INTEL/common/header.c +31 -0
  117. data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +5 -0
  118. data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +3 -0
  119. data/skeletons/CPU-OPENCL-INTEL/common/mem_epilogue.c +3 -0
  120. data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +4 -0
  121. data/skeletons/CPU-OPENCL-INTEL/common/prologue.c +24 -0
  122. data/skeletons/CPU-OPENCL-INTEL/common/timer_1_start.c +5 -0
  123. data/skeletons/CPU-OPENCL-INTEL/common/timer_1_stop.c +9 -0
  124. data/skeletons/CPU-OPENCL-INTEL/common/timer_2_start.c +16 -0
  125. data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +11 -0
  126. data/skeletons/CPU-OPENCL-INTEL/kernel/D-element-to-1-shared.host.c +67 -0
  127. data/skeletons/CPU-OPENCL-INTEL/kernel/D-element-to-1-shared.kernel.cl +72 -0
  128. data/skeletons/CPU-OPENCL-INTEL/kernel/default.host.c +14 -0
  129. data/skeletons/CPU-OPENCL-INTEL/kernel/default.kernel.cl +13 -0
  130. data/skeletons/CPU-OPENCL-INTEL/skeletons.txt +26 -0
  131. data/skeletons/CPU-OPENMP/common/epilogue.c +0 -0
  132. data/skeletons/CPU-OPENMP/common/globals.c +37 -0
  133. data/skeletons/CPU-OPENMP/common/globals_kernel.c +6 -0
  134. data/skeletons/CPU-OPENMP/common/header.c +0 -0
  135. data/skeletons/CPU-OPENMP/common/mem_copy_D2H.c +0 -0
  136. data/skeletons/CPU-OPENMP/common/mem_copy_H2D.c +0 -0
  137. data/skeletons/CPU-OPENMP/common/mem_epilogue.c +0 -0
  138. data/skeletons/CPU-OPENMP/common/mem_prologue.c +3 -0
  139. data/skeletons/CPU-OPENMP/common/prologue.c +0 -0
  140. data/skeletons/CPU-OPENMP/common/timer_1_start.c +12 -0
  141. data/skeletons/CPU-OPENMP/common/timer_1_stop.c +0 -0
  142. data/skeletons/CPU-OPENMP/common/timer_2_start.c +18 -0
  143. data/skeletons/CPU-OPENMP/common/timer_2_stop.c +8 -0
  144. data/skeletons/CPU-OPENMP/kernel/D-element-to-1-shared.host.c +27 -0
  145. data/skeletons/CPU-OPENMP/kernel/D-element-to-1-shared.kernel.c +46 -0
  146. data/skeletons/CPU-OPENMP/kernel/default.host.c +11 -0
  147. data/skeletons/CPU-OPENMP/kernel/default.kernel.c +18 -0
  148. data/skeletons/CPU-OPENMP/skeletons.txt +26 -0
  149. data/skeletons/GPU-CUDA/common/epilogue.c +0 -0
  150. data/skeletons/GPU-CUDA/common/globals.c +31 -0
  151. data/skeletons/GPU-CUDA/common/globals_kernel.c +4 -0
  152. data/skeletons/GPU-CUDA/common/header.c +0 -0
  153. data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +3 -0
  154. data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +3 -0
  155. data/skeletons/GPU-CUDA/common/mem_epilogue.c +3 -0
  156. data/skeletons/GPU-CUDA/common/mem_prologue.c +5 -0
  157. data/skeletons/GPU-CUDA/common/prologue.c +6 -0
  158. data/skeletons/GPU-CUDA/common/timer_1_start.c +6 -0
  159. data/skeletons/GPU-CUDA/common/timer_1_stop.c +10 -0
  160. data/skeletons/GPU-CUDA/common/timer_2_start.c +6 -0
  161. data/skeletons/GPU-CUDA/common/timer_2_stop.c +10 -0
  162. data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.host.c +3 -0
  163. data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +105 -0
  164. data/skeletons/GPU-CUDA/kernel/D-element-to-1-shared.host.c +3 -0
  165. data/skeletons/GPU-CUDA/kernel/D-element-to-1-shared.kernel.cu +119 -0
  166. data/skeletons/GPU-CUDA/kernel/D-element-to-N-shared.host.c +3 -0
  167. data/skeletons/GPU-CUDA/kernel/D-element-to-N-shared.kernel.cu +166 -0
  168. data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.host.c +3 -0
  169. data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +69 -0
  170. data/skeletons/GPU-CUDA/kernel/N-neighbourhood-N-to-N-element.host.c +3 -0
  171. data/skeletons/GPU-CUDA/kernel/N-neighbourhood-N-to-N-element.kernel.cu +42 -0
  172. data/skeletons/GPU-CUDA/kernel/default.host.c +3 -0
  173. data/skeletons/GPU-CUDA/kernel/default.kernel.cu +28 -0
  174. data/skeletons/GPU-CUDA/skeletons.txt +30 -0
  175. data/skeletons/GPU-OPENCL-AMD/common/epilogue.c +3 -0
  176. data/skeletons/GPU-OPENCL-AMD/common/globals.c +155 -0
  177. data/skeletons/GPU-OPENCL-AMD/common/globals_kernel.c +4 -0
  178. data/skeletons/GPU-OPENCL-AMD/common/header.c +0 -0
  179. data/skeletons/GPU-OPENCL-AMD/common/mem_copy_D2H.c +4 -0
  180. data/skeletons/GPU-OPENCL-AMD/common/mem_copy_H2D.c +4 -0
  181. data/skeletons/GPU-OPENCL-AMD/common/mem_epilogue.c +3 -0
  182. data/skeletons/GPU-OPENCL-AMD/common/mem_prologue.c +3 -0
  183. data/skeletons/GPU-OPENCL-AMD/common/prologue.c +24 -0
  184. data/skeletons/GPU-OPENCL-AMD/common/timer_1_start.c +5 -0
  185. data/skeletons/GPU-OPENCL-AMD/common/timer_1_stop.c +9 -0
  186. data/skeletons/GPU-OPENCL-AMD/common/timer_2_start.c +4 -0
  187. data/skeletons/GPU-OPENCL-AMD/common/timer_2_stop.c +11 -0
  188. data/skeletons/GPU-OPENCL-AMD/kernel/D-element-to-1-shared.host.c +67 -0
  189. data/skeletons/GPU-OPENCL-AMD/kernel/D-element-to-1-shared.kernel.cl +72 -0
  190. data/skeletons/GPU-OPENCL-AMD/kernel/default.host.c +14 -0
  191. data/skeletons/GPU-OPENCL-AMD/kernel/default.kernel.cl +13 -0
  192. data/skeletons/GPU-OPENCL-AMD/skeletons.txt +26 -0
  193. data/skeletons/verification/header.c +2 -0
  194. data/skeletons/verification/timer_start.c +4 -0
  195. data/skeletons/verification/timer_stop.c +6 -0
  196. data/skeletons/verification/verify_results.c +23 -0
  197. data/test/bones/test_algorithm.rb +40 -0
  198. data/test/bones/test_common.rb +54 -0
  199. data/test/bones/test_preprocessor.rb +46 -0
  200. data/test/bones/test_species.rb +21 -0
  201. data/test/bones/test_variable.rb +84 -0
  202. data/test/test_helper.rb +106 -0
  203. metadata +303 -0
@@ -0,0 +1,386 @@
1
+
2
+ module Bones
3
+ # This class holds the main functionality: the Bones source-
4
+ # to-source compilation engine based on algorithmic skeletons.
5
+ # This class processes command line arguments, makes calls to
6
+ # the Bones preprocessor and the CAST gem, analyzes the source
7
+ # code, performs source transformations, instantiates the
8
+ # skeletons, and finally writes output code to file.
9
+ class Engine < Common
10
+
11
+ # Locate the skeletons directory.
12
+ BONES_DIR_SKELETONS = File.join(BONES_DIR,'skeletons')
13
+
14
+ # Set the name of the transformations file as found in the skeleton library.
15
+ SKELETON_FILE = 'skeletons.txt'
16
+
17
+ # A list of timer files to be found in the skeleton library.
18
+ TIMER_FILES = ['timer_1_start','timer_1_stop','timer_2_start','timer_2_stop']
19
+ # A list of files to be found in the common directory of the skeleton library (excluding timer files).
20
+ COMMON_FILES = ['prologue','epilogue','mem_prologue','mem_copy_H2D','mem_copy_D2H','mem_epilogue']
21
+ # The name of the file containing the globals as found in the skeleton library
22
+ COMMON_GLOBALS = 'globals'
23
+ # The name of the file containing the header file for the original C code as found in the skeleton library
24
+ COMMON_HEADER = 'header'
25
+ # The name of the file containing the globals for the kernel files as found in the skeleton library
26
+ COMMON_GLOBALS_KERNEL = 'globals_kernel'
27
+
28
+ # The extension of a host file in the skeleton library. See also SKELETON_DEVICE.
29
+ SKELETON_HOST = '.host'
30
+ # The extension of a device file in the skeleton library. See also SKELETON_HOST.
31
+ SKELETON_DEVICE = '.kernel'
32
+
33
+ # The suffix added to the generated output file for the host file. See also OUTPUT_DEVICE.
34
+ OUTPUT_HOST = '_host'
35
+ # The suffix added to the generated output file for the device file. See also OUTPUT_HOST.
36
+ OUTPUT_DEVICE = '_device'
37
+ # The suffix added to the generated verification file. See also OUTPUT_DEVICE and OUTPUT_HOST.
38
+ OUTPUT_VERIFICATION = '_verification'
39
+
40
+ # Initializes the engine and processes the command line
41
+ # arguments. This method uses the 'trollop' gem to parse
42
+ # the arguments and to create a nicely formatted help menu.
43
+ # This method additionally initializes a result-hash and
44
+ # reads the contents of the source file from disk.
45
+ #
46
+ # ==== Command-line usage:
47
+ # bones --application <input> --target <target> [OPTIONS]
48
+ #
49
+ # ==== Options:
50
+ # --application, -a <s>: Input application file
51
+ # --target, -t <s>: Target processor (choose from: 'GPU-CUDA','GPU-OPENCL-AMD','CPU-OPENCL-INTEL','CPU-OPENCL-AMD','CPU-OPENMP','CPU-C')
52
+ # --measurements, -m: Enable/disable timers
53
+ # --version, -v: Print version and exit
54
+ # --help, -h: Show this message
55
+ #
56
+ def initialize
57
+ @result = {:original_code => [],
58
+ :header_code => [],
59
+ :host_declarations => [],
60
+ :host_code_lists => [],
61
+ :algorithm_declarations => [],
62
+ :algorithm_code_lists => [],
63
+ :verify_code => []}
64
+
65
+ # Provides a list of possible targets (e.g. GPU-CUDA, 'CPU-OPENCL-INTEL').
66
+ targets = []
67
+ Dir[File.join(BONES_DIR_SKELETONS,'*')].each do |entry|
68
+ if (File.directory?(entry)) && !(entry =~ /verification/)
69
+ targets.push(File.basename(entry))
70
+ end
71
+ end
72
+ targets = targets.sort
73
+
74
+ # Parse the command line options using the 'trollop' gem.
75
+ pp_targets = targets.inspect.gsub(/("|\[)|\]/,'')
76
+ @options = Trollop::options do
77
+ version 'Bones '+File.read(BONES_DIR+'/VERSION').strip+' (c) 2012 Cedric Nugteren, Eindhoven University of Technology'
78
+ banner NL+'Bones is a parallelizing source-to-source compiler based on algorithmic skeletons. ' +
79
+ 'For more information, see the README.rdoc file or visit the Bones website at http://parse.ele.tue.nl/bones/.' + NL + NL +
80
+ 'Usage:' + NL +
81
+ ' bones --application <input> --target <target> [OPTIONS]' + NL +
82
+ 'using the following flags:'
83
+ opt :application, 'Input application file', :short => 'a', :type => String
84
+ opt :target, 'Target processor (choose from: '+pp_targets+')', :short => 't', :type => String
85
+ opt :measurements, 'Enable/disable timers', :short => 'm', :default => false
86
+ opt :verify, 'Verify correctness of the generated code', :short => 'c', :default => false
87
+ opt :only_alg_number, 'Only generate code for the x-th species (99 -> all)', :short => 'o', :type => Integer, :default => 99
88
+ opt :merge_factor, 'Thread merge factor, default is 1 (==disabled)', :short => 'f', :type => Integer, :default => 1
89
+ end
90
+ Trollop::die 'no input file supplied (use: --application)' if !@options[:application_given]
91
+ Trollop::die 'no target supplied (use: --target)' if !@options[:target_given]
92
+ Trollop::die 'input file "'+@options[:application]+'"does not exist ' if !File.exists?(@options[:application])
93
+ Trollop::die 'target not supported, supported targets are: '+pp_targets if !targets.include?(@options[:target].upcase)
94
+ @options[:name] = @options[:application].split('/').last.split('.').first
95
+ @options[:target] = @options[:target].upcase
96
+
97
+ # Extension for the host files corresponding to the target.
98
+ @extension = File.extname(Dir[File.join(BONES_DIR_SKELETONS,@options[:target],'common','*')][0])
99
+
100
+ # Extension for the device files corresponding to the target.
101
+ @algorithm_extension = File.extname(Dir[File.join(BONES_DIR_SKELETONS,@options[:target],'kernel','*.kernel.*')][0])
102
+
103
+ # Set a prefix for functions called from the original file but defined in a host file
104
+ @prefix = (@options[:target] == 'GPU-CUDA') ? '' : ''
105
+
106
+ # Set the location for the skeleton library
107
+ @dir = {}
108
+ @dir[:library] = File.join(BONES_DIR_SKELETONS,@options[:target])
109
+ @dir[:skeleton_library] = File.join(@dir[:library],'kernel')
110
+ @dir[:common_library] = File.join(@dir[:library],'common')
111
+ @dir[:verify_library] = File.join(BONES_DIR_SKELETONS,'verification')
112
+
113
+ # Obtain the source code from file
114
+ @source = File.open(@options[:application],'r'){|f| f.read}
115
+ @basename = File.basename(@options[:application],'.c')
116
+ end
117
+
118
+ # Method to process a file and to output target code. This
119
+ # method calls all relevant private methods.
120
+ #
121
+ # ==== Tasks:
122
+ # * Run the preprocessor to obtain algorithm information.
123
+ # * Use the 'CAST' gem to parse the source into an AST.
124
+ # * Call the code generator to perform the real work and produce output.
125
+ def process
126
+
127
+ # Run the preprocessor
128
+ preprocessor = Bones::Preprocessor.new(@source,File.dirname(@options[:application]),@basename)
129
+ preprocessor.process
130
+ @result[:header_code] = preprocessor.header_code
131
+ @result[:device_header] = preprocessor.device_header
132
+ @result[:header_code] += '#include <sys/time.h>'+NL if @options[:measurements]
133
+
134
+ # Parse the source code into AST
135
+ parser = C::Parser.new
136
+ parser.type_names << 'FILE'
137
+ parser.type_names << 'size_t'
138
+ ast = parser.parse(preprocessor.target_code)
139
+ ast.preprocess
140
+
141
+ # Set the algorithm's skeleton and generate the global code
142
+ one_time = true
143
+ preprocessor.algorithms.each_with_index do |algorithm,algorithm_number|
144
+ algorithm.species.set_skeleton(File.join(@dir[:library],SKELETON_FILE))
145
+ if algorithm.species.skeleton_name && one_time
146
+ @result[:host_code_lists].push(File.read(File.join(@dir[:common_library],COMMON_GLOBALS+@extension)))
147
+ @result[:algorithm_code_lists].push(File.read(File.join(@dir[:common_library],COMMON_GLOBALS_KERNEL+@extension)))
148
+ one_time = false
149
+ end
150
+ end
151
+
152
+ # Perform code generation
153
+ @result[:original_code] = ast
154
+ preprocessor.algorithms.each_with_index do |algorithm,algorithm_number|
155
+ if @options[:only_alg_number] == 99 || algorithm_number == [@options[:only_alg_number],preprocessor.algorithms.length-1].min
156
+ puts MESSAGE+'Starting code generation for algorithm "'+algorithm.name+'"'
157
+ if algorithm.species.skeleton_name
158
+ algorithm.merge_factor = @options[:merge_factor] if (@options[:target] == 'GPU-CUDA')
159
+ algorithm.set_function(ast)
160
+ algorithm.populate_variables(ast,preprocessor.defines)
161
+ algorithm.populate_lists()
162
+ algorithm.populate_hash()
163
+ generate(algorithm)
164
+ puts MESSAGE+'Code generated using the "'+algorithm.species.skeleton_name+'" skeleton'
165
+ else
166
+ puts WARNING+'Skeleton "'+algorithm.species.name+'" not available'
167
+ end
168
+ end
169
+ end
170
+ end
171
+
172
+ # This method writes the output code to files. It creates
173
+ # a new directory formatted as 'name_target' and produces
174
+ # three files.
175
+ #
176
+ # ==== Output files:
177
+ # * +main+ - a file containing the original code with function calls substituting the original algorithms.
178
+ # * +target+ - a file containing the host code for the target.
179
+ # * +kernel+ - a file containing the kernel code for the target.
180
+ def write_output
181
+
182
+ # Create a new directory for the output
183
+ directory = @options[:application].split('.').first+'_'+@options[:target]
184
+ Dir.mkdir(directory,0744) unless File.directory?(directory)
185
+
186
+ parser = C::Parser.new
187
+ parser.type_names << 'FILE'
188
+ parser.type_names << 'size_t'
189
+
190
+ # Populate the main file
191
+ File.open(File.join(directory,@options[:application].split(File::SEPARATOR).last),'w') do |main|
192
+ main.puts '#include <string.h>' if @options[:verify]
193
+ main.puts @result[:header_code]
194
+ main.puts File.read(File.join(@dir[:common_library],COMMON_HEADER+@extension))
195
+ main.puts @result[:host_declarations]
196
+ main.puts
197
+ begin
198
+ main.puts parser.parse(@result[:original_code]).to_s
199
+ rescue
200
+ puts WARNING+'Recovering from CAST parse error'
201
+ main.puts parser.parse(@result[:original_code].clone).to_s
202
+ end
203
+ end
204
+
205
+ # Populate the verification file4
206
+ if @options[:verify]
207
+ File.open(File.join(directory,@options[:name]+OUTPUT_VERIFICATION+@extension),'w') do |verification|
208
+ verification.puts @result[:header_code]
209
+ verification.puts File.read(File.join(@dir[:verify_library],'header.c'))
210
+ verification.puts
211
+ verification.puts @result[:verify_code]
212
+ end
213
+ end
214
+
215
+ # Populate the target file
216
+ File.open(File.join(directory,@options[:name]+OUTPUT_HOST+@extension),'w') do |target|
217
+ target.puts @result[:header_code]
218
+ target.puts @result[:algorithm_declarations]
219
+ target.puts
220
+ target.puts @result[:host_code_lists]
221
+ end
222
+
223
+ # Populate the algorithm file
224
+ File.open(File.join(directory,@options[:name]+OUTPUT_DEVICE+@algorithm_extension),'w') do |algorithm|
225
+ algorithm.puts @result[:device_header]
226
+ algorithm.puts @result[:algorithm_code_lists]
227
+ end
228
+
229
+ end
230
+
231
+ # Start of the class's private methods.
232
+ private
233
+
234
+ # This method takes as an input an indivual algorithm and
235
+ # generates the corresponding output code. The method first
236
+ # creates a search-and-replace hash, after which it instan-
237
+ # tiates a skeleton.
238
+ #
239
+ # This method returns a message informing the user whether
240
+ # the code was succesfully generated or the skeleton was
241
+ # not available.
242
+ def generate(algorithm)
243
+
244
+ # Determine the skeleton filenames and load them skeletons from the skeleton library
245
+ file_name_host = File.join(@dir[:skeleton_library],algorithm.species.skeleton_name+SKELETON_HOST)
246
+ file_name_device = File.join(@dir[:skeleton_library],algorithm.species.skeleton_name+SKELETON_DEVICE)
247
+ if !File.exists?(file_name_host+@extension) || !File.exists?(file_name_device+@algorithm_extension)
248
+ raise_error('Skeleton files for skeleton "'+algorithm.species.skeleton_name+'" not available')
249
+ end
250
+ skeletons = {:host => File.read(file_name_host+@extension),
251
+ :device => File.read(file_name_device+@algorithm_extension)}
252
+
253
+ # Perform the transformations on the algorithm's code
254
+ algorithm.perform_transformations(algorithm.species.settings)
255
+
256
+ # Load the common skeletons from the skeleton library
257
+ COMMON_FILES.each do |skeleton|
258
+ skeletons[skeleton.to_sym] = File.read(File.join(@dir[:common_library],skeleton+@extension))
259
+ end
260
+
261
+ # Load the timer code from the skeleton library (only if the '--measurements' flag is given)
262
+ TIMER_FILES.each do |skeleton|
263
+ skeletons[skeleton.to_sym] = @options[:measurements] ? File.read(File.join(@dir[:common_library],skeleton+@extension)) : ''
264
+ end
265
+
266
+ # Perform search-and-replace on the device skeleton
267
+ search_and_replace!(algorithm.hash,skeletons[:device])
268
+ skeletons[:device].remove_extras
269
+
270
+ # Replace mathematical functions with their equivalent device functions
271
+ if @options[:target] == 'GPU-CUDA'
272
+ math_functions = {:sqrt => 'sqrtf', :max => 'fmaxf', :min => 'fminf'}
273
+ math_functions.each do |original, replacement|
274
+ skeletons[:device].gsub!(/\b#{original}\(/,replacement+'(')
275
+ end
276
+ end
277
+
278
+ # Create the algorithm declaration list from the header supplied in the skeletons
279
+ algorithm_declaration = skeletons[:device].scan(/#{START_DEFINITION}(.+)#{END_DEFINITION}/m).join.strip.remove_extras
280
+ @result[:algorithm_declarations].push(algorithm_declaration)
281
+
282
+ # Remove the (commented) algorithm declaration from the code and push the skeleton to the output
283
+ @result[:algorithm_code_lists].push(skeletons[:device].gsub!(/#{START_DEFINITION}(.+)#{END_DEFINITION}/m,''))
284
+
285
+ # Setup some variables to create the host body function including memory allocation and memory copies
286
+ processed = {:mem_prologue => '', :mem_copy_H2D => '', :mem_copy_D2H => '', :mem_epilogue => ''}
287
+ counter = {:out => 0, :in => 0}
288
+
289
+ # Iterate over all the array variables and create a mini-search-and-replace hash for each array (all arrays)
290
+ algorithm.arrays.each_with_index do |array, arrayid|
291
+ minihash = { :array => array.name,
292
+ :type => array.type_name,
293
+ :flatten => array.flatten,
294
+ :variable_dimensions => array.size.join('*')}
295
+
296
+ # Apply the mini-search-and-replace hash to create the memory allocations, memory copies (if input only), etc.
297
+ processed[:mem_prologue] += search_and_replace(minihash,skeletons[:mem_prologue])
298
+ processed[:mem_copy_H2D] += search_and_replace(minihash,skeletons[:mem_copy_H2D]) if array.input? || array.species.shared?
299
+ processed[:mem_epilogue] += search_and_replace(minihash,skeletons[:mem_epilogue])
300
+ end
301
+ # Iterate over all the array variables and create a mini-search-and-replace hash for each array (output arrays)
302
+ algorithm.arrays.select(OUTPUT).each_with_index do |array, num_array|
303
+ hash = algorithm.hash["out#{num_array}".to_sym]
304
+ minihash = { :array => array.name,
305
+ :type => array.type_name,
306
+ :flatten => array.flatten,
307
+ :offset => '('+hash[:dimension0][:from]+')',
308
+ :variable_dimensions => '('+hash[:dimensions]+')'}
309
+
310
+ # Perform selective copy for arrays with 2 dimensions (uses a for-loop over the memory copies)
311
+ if array.dimensions == 2 && @options[:target] == 'GPU-CUDA' && false
312
+ x_from = '('+hash[:dimension0][:from]+')'
313
+ x_to = '('+hash[:dimension0][:to]+')'
314
+ x_sum = '('+hash[:dimension0][:sum]+')'
315
+ x_size = array.size[0]
316
+ y_from = '('+hash[:dimension1][:from]+')'
317
+ y_to = '('+hash[:dimension1][:to]+')'
318
+ y_sum = '('+hash[:dimension1][:sum]+')'
319
+ y_size = array.size[1]
320
+ processed[:mem_copy_D2H] += NL+INDENT+"for(int bones_x=#{x_from}; bones_x<=#{x_to}; bones_x++) {"+INDENT*2
321
+ minihash[:offset] = "(bones_x*#{y_size})+#{y_from}"
322
+ minihash[:variable_dimensions] = "#{y_sum}"
323
+ # Don't do selective copy for multi-dimensional arrays (yet)
324
+ elsif array.dimensions > 1
325
+ minihash[:offset] = '0'
326
+ minihash[:variable_dimensions] = array.size.join('*')
327
+ end
328
+
329
+ # Apply the mini-search-and-replace hash to create the memory copies from device to host
330
+ processed[:mem_copy_D2H] += search_and_replace(minihash,skeletons[:mem_copy_D2H])
331
+ if array.dimensions == 2 && @options[:target] == 'GPU-CUDA' && false
332
+ processed[:mem_copy_D2H] += INDENT+'}'
333
+ end
334
+ end
335
+
336
+ # Apply the search-and-replace hash to all timer skeletons and the host skeleton
337
+ (['host']+TIMER_FILES).each do |skeleton|
338
+ search_and_replace!(algorithm.hash,skeletons[skeleton.to_sym])
339
+ end
340
+
341
+ # Repair some invalid syntax that could have been introduced by performing the search-and-replace
342
+ skeletons[:host].remove_extras
343
+
344
+ # Run the prologue/epilogue code through the search-and-replace hash
345
+ search_and_replace!(algorithm.hash,skeletons[:prologue])
346
+ search_and_replace!(algorithm.hash,skeletons[:epilogue])
347
+
348
+ # Construct the final host function, inluding the timers and memory copies
349
+ host = skeletons[:prologue ] + skeletons[:timer_1_start] +
350
+ processed[:mem_prologue ] + processed[:mem_copy_H2D ] +
351
+ skeletons[:timer_2_start] + skeletons[:host ] + skeletons[:timer_2_stop ] +
352
+ processed[:mem_copy_D2H ] + processed[:mem_epilogue ] +
353
+ skeletons[:timer_1_stop ] + skeletons[:epilogue ]
354
+
355
+ # Generate code to replace the original code, including verification code if specified by the option flag
356
+ verify_skeleton = File.read(File.join(@dir[:verify_library],'verify_results.c'))
357
+ timer_start = (@options[:measurements]) ? File.read(File.join(@dir[:verify_library],'timer_start.c')) : ''
358
+ timer_stop = (@options[:measurements]) ? File.read(File.join(@dir[:verify_library],'timer_stop.c')) : ''
359
+ replacement_code, original_definition, verify_definition = algorithm.generate_replacement_code(@options, verify_skeleton, @result[:verify_code], @prefix, timer_start, timer_stop)
360
+ @result[:host_declarations].push(verify_definition)
361
+
362
+ # Add a performance model to the original code
363
+ #replacement_code.insert(0,algorithm.performance_model_code('model'))
364
+
365
+ # Replace mallocs and frees in the original code with aligned memory allocations (only for CPU-OpenCL targets)
366
+ if @options[:target] == 'CPU-OPENCL-INTEL'
367
+ @result[:original_code].seach_and_replace_function_call(C::Variable.parse('malloc'),C::Variable.parse(VARIABLE_PREFIX+'malloc_128'))
368
+ @result[:original_code].seach_and_replace_function_call(C::Variable.parse('free'),C::Variable.parse(VARIABLE_PREFIX+'free_128'))
369
+ end
370
+
371
+ # Give the original main function a new name
372
+ @result[:original_code].seach_and_replace_function_definition('main',VARIABLE_PREFIX+'main')
373
+
374
+ # Replace the original code with a function call to the newly generated code
375
+ @result[:original_code].seach_and_replace_node(algorithm.code,replacement_code)
376
+
377
+ # The host code is generated, push the data to the output hashes
378
+ accelerated_definition = 'void '+algorithm.name+'_accelerated('+algorithm.lists[:host_definition]+')'
379
+ @result[:host_code_lists].push(@prefix+accelerated_definition+' {'+NL+host+NL+'}')
380
+ @result[:host_declarations].push(@prefix+accelerated_definition+';'+NL+@prefix+original_definition+';')
381
+ end
382
+
383
+ end
384
+
385
+ end
386
+
@@ -0,0 +1,161 @@
1
+
2
+ module Bones
3
+ # This is the C99 pre-processor for Bones. It has two tasks:
4
+ # * To remove all lines starting with '#' (pre-processor directives).
5
+ # * To detect all pragma's forming algorithm classes from the source.
6
+ #
7
+ # ==== Attributes:
8
+ # * +header_code+ - All the code that was removed by the pre-processor but was not relevant to Bones. This contains for example includes and defines.
9
+ # * +algorithms+ - An array of identified algorithms, each of class Bones::Algorithm.
10
+ # * +target_code+ - The processed code containing no Bones directives nor other pre-processor directives (such as includes and defines).
11
+ class Preprocessor < Common
12
+ attr_reader :header_code, :algorithms, :target_code, :device_header, :defines
13
+
14
+ # Denotes the start of an algorithmic species.
15
+ IDENTIFIER = '#pragma species'
16
+
17
+ # Regular expression to identify whitespaces (tabs, spaces).
18
+ WHITESPACE = '\s*'
19
+
20
+ # This directive denotes the start of a algorithm. It is based on the IDENTIFIER constant.
21
+ PRIMITIVE_START = IDENTIFIER+' kernel'
22
+ # This directive denotes the end of a algorithm. It is based on the IDENTIFIER constant.
23
+ PRIMITIVE_END = IDENTIFIER+' endkernel'
24
+
25
+ # A regular expression captures a prefix in a algorithm (e.g. unordered/multiple).
26
+ REGEXP_PREFIX = /^[a-z]+ /
27
+
28
+ # Providing a default name in case a algorithm is not named.
29
+ DEFAULT_NAME = 'algorithm'
30
+
31
+ # This is the method which initializes the preprocessor.
32
+ # Initialization requires the target source code to process,
33
+ # which is then set as the class variable +@source_code+.
34
+ def initialize(source_code,directory,filename)
35
+ @source_code = source_code
36
+ @target_code = ''
37
+ @header_code = ''
38
+ @device_header = ''
39
+ @directory = directory
40
+ @filename = filename
41
+ @algorithms = Array.new
42
+ @defines = {}
43
+ @found_algorithms = 0
44
+ end
45
+
46
+ # This is the method to perform the actual preprocessing.
47
+ # This method takes care of all the pre-processor tasks.
48
+ # The output is stored in the three attributes +header_code+,
49
+ # +algorithms+, and +target_code+.
50
+ def process
51
+ algorithm_code = ''
52
+ species = nil
53
+ found = 0
54
+
55
+ # Process the file line by line
56
+ @source_code.each_line.with_index do |line,index|
57
+ if line =~ /^#{WHITESPACE}#/
58
+
59
+ # Keep 'include' statements as header code
60
+ if line =~ /^#{WHITESPACE}#include/
61
+ @header_code += line
62
+ if line =~ /"(.*)"/
63
+ process_header($1)
64
+ end
65
+
66
+ # Process 'define' statements for the algorithm code, but also keep as header code
67
+ elsif line =~ /^#{WHITESPACE}#define/
68
+ @header_code += line
69
+ @device_header += line
70
+ match = line.split(/\/\//)[0].scan(/^#{WHITESPACE}#define\s+(\w+)\s+(\S*)/)
71
+ @defines[match.first[0].to_sym] = match.first[1]
72
+
73
+ # Found the start of algorithm marker
74
+ elsif line =~ /^#{WHITESPACE}#{PRIMITIVE_START}/
75
+ if found == 0
76
+ line = replace_defines(line,@defines)
77
+ prefix, input, output = marker_to_algorithm(line)
78
+ puts MESSAGE+'Found algorithm "'+(prefix+' '+input+' '+ARROW+' '+output).lstrip+'"' if VERBOSE
79
+ species = Bones::Species.new(prefix,input,output)
80
+ @found_algorithms = @found_algorithms + 1
81
+ end
82
+ found = found + 1
83
+
84
+ # Found the end of algorithm marker
85
+ elsif line =~ /^#{WHITESPACE}#{PRIMITIVE_END}/
86
+ if found == 1
87
+ name = line.strip.scan(/^#{WHITESPACE}#{PRIMITIVE_END} (.+)/).join
88
+ name = DEFAULT_NAME if name == ''
89
+ @algorithms.push(Bones::Algorithm.new(name,@filename,index.to_s,species,algorithm_code))
90
+ algorithm_code = ''
91
+ end
92
+ found = found - 1
93
+ end
94
+ else
95
+ if found > 0
96
+ algorithm_line = replace_defines(line,@defines)
97
+ @target_code += algorithm_line
98
+ algorithm_code += algorithm_line if line !~ /^#{WHITESPACE}#/
99
+ else
100
+ @target_code += line
101
+ end
102
+ end
103
+ end
104
+ puts WARNING+'Begin/end kernel mismatch ('+@found_algorithms.to_s+' versus '+@algorithms.length.to_s+'), probably missing a "'+PRIMITIVE_END+'"' unless @algorithms.length == @found_algorithms
105
+ end
106
+
107
+ # This is the method to preprocess a header file. Currently,
108
+ # it only searches for defines and adds those to a list. In
109
+ # the meanwhile, it also handles ifdef's.
110
+ def process_header(filename)
111
+ ifdefs = [true]
112
+
113
+ # Process the file line by line
114
+ File.read(File.join(@directory,filename)).each_line.with_index do |line,index|
115
+ if line =~ /^#{WHITESPACE}#/
116
+
117
+ # Process 'include' statements
118
+ if line =~ /^#{WHITESPACE}#include/ && ifdefs.last
119
+ if line =~ /"(.*)"/
120
+ process_header($1)
121
+ end
122
+
123
+ # Process 'define' statements
124
+ elsif line =~ /^#{WHITESPACE}#define/ && ifdefs.last
125
+ match = line.split(/\/\//)[0].scan(/^#{WHITESPACE}#define\s+(\w+)\s+(\S*)/)
126
+ @defines[match.first[0].to_sym] = match.first[1].strip
127
+
128
+ # Process 'ifdef' statements
129
+ elsif line =~ /^#{WHITESPACE}#ifdef#{WHITESPACE}(\w+)/
130
+ valid = (ifdefs.last) ? @defines.has_key?($1.to_sym) : false
131
+ ifdefs.push(valid)
132
+
133
+ # Process 'endif' statements
134
+ elsif line =~ /^#{WHITESPACE}#endif/
135
+ ifdefs.pop
136
+ end
137
+ end
138
+ end
139
+ end
140
+
141
+ # From this point on are the private methods.
142
+ private
143
+
144
+ # Method to extract the algorithm details from a marker found in code.
145
+ def marker_to_algorithm(marker)
146
+ algorithm = marker.strip.scan(/^#{WHITESPACE}#{PRIMITIVE_START} (.+)/).join
147
+ prefix = ''
148
+ if algorithm =~ REGEXP_PREFIX
149
+ split = algorithm.partition(' ')
150
+ prefix = split[0]
151
+ algorithm = split[2]
152
+ end
153
+ input = algorithm.split(ARROW)[0].strip
154
+ output = algorithm.split(ARROW)[1].strip
155
+ return prefix, input, output
156
+ end
157
+
158
+ end
159
+
160
+ end
161
+