bones-compiler 1.1.0 → 1.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (211) hide show
  1. checksums.yaml +15 -0
  2. data/CHANGELOG +37 -0
  3. data/LICENSE +1 -1
  4. data/README.rdoc +95 -70
  5. data/Rakefile +78 -3
  6. data/VERSION +1 -1
  7. data/bin/adarwin +17 -0
  8. data/examples/benchmarks/PolyBench/2mm.c +104 -0
  9. data/examples/benchmarks/{3mm.c → PolyBench/3mm.c} +5 -2
  10. data/examples/benchmarks/{adi.c → PolyBench/adi.c} +6 -3
  11. data/examples/benchmarks/{atax.c → PolyBench/atax.c} +5 -2
  12. data/examples/benchmarks/{bicg.c → PolyBench/bicg.c} +5 -2
  13. data/examples/benchmarks/{cholesky.c → PolyBench/cholesky.c} +3 -0
  14. data/examples/benchmarks/{common.h → PolyBench/common.h} +2 -2
  15. data/examples/benchmarks/{correlation.c → PolyBench/correlation.c} +16 -7
  16. data/examples/benchmarks/{covariance.c → PolyBench/covariance.c} +7 -2
  17. data/examples/benchmarks/{doitgen.c → PolyBench/doitgen.c} +5 -2
  18. data/examples/benchmarks/{durbin.c → PolyBench/durbin.c} +3 -0
  19. data/examples/benchmarks/{dynprog.c → PolyBench/dynprog.c} +3 -0
  20. data/examples/benchmarks/{fdtd-2d-apml.c → PolyBench/fdtd-2d-apml.c} +3 -0
  21. data/examples/benchmarks/{fdtd-2d.c → PolyBench/fdtd-2d.c} +5 -2
  22. data/examples/benchmarks/{floyd-warshall.c → PolyBench/floyd-warshall.c} +3 -0
  23. data/examples/benchmarks/{gemm.c → PolyBench/gemm.c} +5 -2
  24. data/examples/benchmarks/{gemver.c → PolyBench/gemver.c} +5 -2
  25. data/examples/benchmarks/{gesummv.c → PolyBench/gesummv.c} +5 -2
  26. data/examples/benchmarks/{gramschmidt.c → PolyBench/gramschmidt.c} +3 -0
  27. data/examples/benchmarks/{jacobi-1d-imper.c → PolyBench/jacobi-1d-imper.c} +10 -2
  28. data/examples/benchmarks/{jacobi-2d-imper.c → PolyBench/jacobi-2d-imper.c} +8 -3
  29. data/examples/benchmarks/{lu.c → PolyBench/lu.c} +3 -0
  30. data/examples/benchmarks/{ludcmp.c → PolyBench/ludcmp.c} +3 -0
  31. data/examples/benchmarks/{mvt.c → PolyBench/mvt.c} +6 -2
  32. data/examples/benchmarks/{reg_detect.c → PolyBench/reg_detect.c} +3 -0
  33. data/examples/benchmarks/{seidel-2d.c → PolyBench/seidel-2d.c} +3 -0
  34. data/examples/benchmarks/{symm.c → PolyBench/symm.c} +3 -0
  35. data/examples/benchmarks/{syr2k.c → PolyBench/syr2k.c} +5 -2
  36. data/examples/benchmarks/{syrk.c → PolyBench/syrk.c} +7 -4
  37. data/examples/benchmarks/{trisolv.c → PolyBench/trisolv.c} +3 -0
  38. data/examples/benchmarks/{trmm.c → PolyBench/trmm.c} +3 -0
  39. data/examples/benchmarks/Rodinia/cfd.c +180 -0
  40. data/examples/benchmarks/Rodinia/hotspot.c +228 -0
  41. data/examples/benchmarks/Rodinia/kmeans.c +164 -0
  42. data/examples/benchmarks/Rodinia/srad.c +188 -0
  43. data/examples/benchmarks/other/common.h +0 -0
  44. data/examples/benchmarks/other/dct.c +58 -0
  45. data/examples/benchmarks/other/mm.c +50 -0
  46. data/examples/benchmarks/{saxpy.c → other/saxpy.c} +11 -7
  47. data/examples/chunk/{example1.c → example01.c} +0 -0
  48. data/examples/chunk/{example2.c → example02.c} +0 -0
  49. data/examples/chunk/{example3.c → example03.c} +0 -0
  50. data/examples/chunk/{example4.c → example04.c} +0 -0
  51. data/examples/chunk/{example5.c → example05.c} +0 -0
  52. data/examples/chunk/example06.c +45 -0
  53. data/examples/chunk/example07.c +49 -0
  54. data/examples/dependences/example01.c +42 -0
  55. data/examples/dependences/example02.c +40 -0
  56. data/examples/dependences/example03.c +43 -0
  57. data/examples/dependences/example04.c +44 -0
  58. data/examples/dependences/example05.c +42 -0
  59. data/examples/element/{example1.c → example01.c} +0 -0
  60. data/examples/element/{example2.c → example02.c} +2 -2
  61. data/examples/element/{example3.c → example03.c} +0 -0
  62. data/examples/element/{example4.c → example04.c} +0 -0
  63. data/examples/element/{example5.c → example05.c} +0 -0
  64. data/examples/element/{example6.c → example06.c} +0 -0
  65. data/examples/element/{example7.c → example07.c} +0 -0
  66. data/examples/element/{example8.c → example08.c} +0 -0
  67. data/examples/element/{example9.c → example09.c} +0 -0
  68. data/examples/element/example13.c +73 -0
  69. data/examples/fusion/example01.c +68 -0
  70. data/examples/fusion/example02.c +73 -0
  71. data/examples/fusion/example03.c +72 -0
  72. data/examples/fusion/example04.c +61 -0
  73. data/examples/fusion/example05.c +55 -0
  74. data/examples/neighbourhood/{example1.c → example01.c} +0 -0
  75. data/examples/neighbourhood/{example2.c → example02.c} +0 -0
  76. data/examples/neighbourhood/{example3.c → example03.c} +0 -0
  77. data/examples/neighbourhood/{example4.c → example04.c} +0 -0
  78. data/examples/neighbourhood/example05.c +44 -0
  79. data/examples/shared/{example1.c → example01.c} +0 -0
  80. data/examples/shared/{example2.c → example02.c} +0 -0
  81. data/examples/shared/{example3.c → example03.c} +0 -0
  82. data/examples/shared/{example4.c → example04.c} +0 -0
  83. data/examples/shared/{example5.c → example05.c} +0 -0
  84. data/lib/adarwin.rb +62 -0
  85. data/lib/adarwin/dependences.rb +268 -0
  86. data/lib/adarwin/engine.rb +277 -0
  87. data/lib/adarwin/fusion.rb +174 -0
  88. data/lib/adarwin/interval.rb +57 -0
  89. data/lib/adarwin/memorycopies.rb +153 -0
  90. data/lib/adarwin/nest.rb +225 -0
  91. data/lib/adarwin/preprocessor.rb +76 -0
  92. data/lib/adarwin/reference.rb +261 -0
  93. data/lib/bones.rb +4 -55
  94. data/lib/bones/algorithm.rb +77 -40
  95. data/lib/bones/copy.rb +26 -0
  96. data/lib/bones/engine.rb +147 -31
  97. data/lib/bones/preprocessor.rb +92 -12
  98. data/lib/bones/species.rb +4 -3
  99. data/lib/bones/structure.rb +14 -4
  100. data/lib/castaddon.rb +11 -6
  101. data/lib/castaddon/node_adarwin.rb +245 -0
  102. data/lib/castaddon/node_bones.rb +316 -0
  103. data/lib/castaddon/node_common.rb +289 -0
  104. data/lib/castaddon/transformations.rb +236 -0
  105. data/lib/common.rb +216 -0
  106. data/skeletons/CPU-C/common/header.c +3 -0
  107. data/skeletons/CPU-C/common/mem_global.c +0 -0
  108. data/skeletons/CPU-C/common/timer_2_start.c +11 -13
  109. data/skeletons/CPU-C/common/timer_2_stop.c +1 -1
  110. data/skeletons/CPU-C/common/timer_globals.c +29 -0
  111. data/skeletons/CPU-OPENCL-INTEL/common/globals.c +1 -1
  112. data/skeletons/CPU-OPENCL-INTEL/common/header.c +3 -0
  113. data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +7 -2
  114. data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +4 -2
  115. data/skeletons/CPU-OPENCL-INTEL/common/mem_global.c +0 -0
  116. data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +6 -3
  117. data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +1 -1
  118. data/skeletons/CPU-OPENCL-INTEL/common/timer_globals.c +24 -0
  119. data/skeletons/CPU-OPENMP/common/globals.c +1 -0
  120. data/skeletons/CPU-OPENMP/common/header.c +3 -0
  121. data/skeletons/CPU-OPENMP/common/mem_global.c +0 -0
  122. data/skeletons/CPU-OPENMP/common/timer_1_start.c +0 -12
  123. data/skeletons/CPU-OPENMP/common/timer_2_stop.c +1 -1
  124. data/skeletons/CPU-OPENMP/common/timer_globals.c +33 -0
  125. data/skeletons/GPU-CUDA/common/globals.c +27 -3
  126. data/skeletons/GPU-CUDA/common/header.c +2 -0
  127. data/skeletons/GPU-CUDA/common/mem_async_alloc.c +6 -0
  128. data/skeletons/GPU-CUDA/common/mem_async_copyin.c +6 -0
  129. data/skeletons/GPU-CUDA/common/mem_async_copyout.c +6 -0
  130. data/skeletons/GPU-CUDA/common/mem_async_free.c +6 -0
  131. data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +2 -1
  132. data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +2 -1
  133. data/skeletons/GPU-CUDA/common/mem_global.c +1 -0
  134. data/skeletons/GPU-CUDA/common/mem_prologue.c +1 -2
  135. data/skeletons/GPU-CUDA/common/scheduler.c +86 -0
  136. data/skeletons/GPU-CUDA/common/timer_2_start.c +2 -4
  137. data/skeletons/GPU-CUDA/common/timer_2_stop.c +3 -5
  138. data/skeletons/GPU-CUDA/common/timer_globals.c +26 -0
  139. data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +5 -7
  140. data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +4 -6
  141. data/skeletons/GPU-CUDA/kernel/default.host.c +1 -1
  142. data/skeletons/GPU-CUDA/kernel/default.kernel.cu +6 -8
  143. data/skeletons/GPU-CUDA/skeletons.txt +6 -5
  144. data/{examples/benchmarks/2mm.c → test/examples/benchmarks/PolyBench/2mm_species.c} +19 -15
  145. data/test/examples/benchmarks/PolyBench/3mm_species.c +82 -0
  146. data/test/examples/benchmarks/PolyBench/adi_species.c +89 -0
  147. data/test/examples/benchmarks/PolyBench/atax_species.c +69 -0
  148. data/test/examples/benchmarks/PolyBench/bicg_species.c +71 -0
  149. data/test/examples/benchmarks/PolyBench/cholesky_species.c +68 -0
  150. data/test/examples/benchmarks/PolyBench/correlation_species.c +97 -0
  151. data/test/examples/benchmarks/PolyBench/covariance_species.c +78 -0
  152. data/test/examples/benchmarks/PolyBench/doitgen_species.c +67 -0
  153. data/test/examples/benchmarks/PolyBench/durbin_species.c +80 -0
  154. data/test/examples/benchmarks/PolyBench/dynprog_species.c +71 -0
  155. data/test/examples/benchmarks/PolyBench/fdtd-2d-apml_species.c +112 -0
  156. data/test/examples/benchmarks/PolyBench/fdtd-2d_species.c +78 -0
  157. data/test/examples/benchmarks/PolyBench/floyd-warshall_species.c +54 -0
  158. data/test/examples/benchmarks/PolyBench/gemm_species.c +73 -0
  159. data/test/examples/benchmarks/PolyBench/gemver_species.c +93 -0
  160. data/test/examples/benchmarks/PolyBench/gesummv_species.c +68 -0
  161. data/test/examples/benchmarks/PolyBench/gramschmidt_species.c +78 -0
  162. data/test/examples/benchmarks/PolyBench/jacobi-1d-imper_species.c +59 -0
  163. data/test/examples/benchmarks/PolyBench/jacobi-2d-imper_species.c +65 -0
  164. data/test/examples/benchmarks/PolyBench/lu_species.c +57 -0
  165. data/test/examples/benchmarks/PolyBench/ludcmp_species.c +89 -0
  166. data/test/examples/benchmarks/PolyBench/mvt_species.c +69 -0
  167. data/test/examples/benchmarks/PolyBench/reg_detect_species.c +86 -0
  168. data/test/examples/benchmarks/PolyBench/seidel-2d_species.c +53 -0
  169. data/test/examples/benchmarks/PolyBench/symm_species.c +74 -0
  170. data/test/examples/benchmarks/PolyBench/syr2k_species.c +69 -0
  171. data/test/examples/benchmarks/PolyBench/syrk_species.c +66 -0
  172. data/test/examples/benchmarks/PolyBench/trisolv_species.c +61 -0
  173. data/test/examples/benchmarks/PolyBench/trmm_species.c +61 -0
  174. data/test/examples/chunk/example01_species.c +58 -0
  175. data/test/examples/chunk/example02_species.c +48 -0
  176. data/test/examples/chunk/example03_species.c +63 -0
  177. data/test/examples/chunk/example04_species.c +58 -0
  178. data/test/examples/chunk/example05_species.c +56 -0
  179. data/test/examples/chunk/example06_species.c +49 -0
  180. data/test/examples/chunk/example07_species.c +53 -0
  181. data/test/examples/dependences/example01_species.c +46 -0
  182. data/test/examples/dependences/example02_species.c +44 -0
  183. data/test/examples/dependences/example03_species.c +47 -0
  184. data/test/examples/dependences/example04_species.c +48 -0
  185. data/test/examples/dependences/example05_species.c +46 -0
  186. data/test/examples/element/example01_species.c +50 -0
  187. data/test/examples/element/example02_species.c +50 -0
  188. data/test/examples/element/example03_species.c +62 -0
  189. data/test/examples/element/example04_species.c +53 -0
  190. data/test/examples/element/example05_species.c +59 -0
  191. data/test/examples/element/example06_species.c +50 -0
  192. data/test/examples/element/example07_species.c +58 -0
  193. data/test/examples/element/example08_species.c +49 -0
  194. data/test/examples/element/example09_species.c +52 -0
  195. data/test/examples/element/example10_species.c +54 -0
  196. data/test/examples/element/example11_species.c +51 -0
  197. data/test/examples/element/example12_species.c +60 -0
  198. data/test/examples/element/example13_species.c +77 -0
  199. data/test/examples/neighbourhood/example01_species.c +57 -0
  200. data/test/examples/neighbourhood/example02_species.c +56 -0
  201. data/test/examples/neighbourhood/example03_species.c +83 -0
  202. data/test/examples/neighbourhood/example04_species.c +55 -0
  203. data/test/examples/neighbourhood/example05_species.c +48 -0
  204. data/test/examples/shared/example01_species.c +49 -0
  205. data/test/examples/shared/example02_species.c +55 -0
  206. data/test/examples/shared/example03_species.c +59 -0
  207. data/test/examples/shared/example04_species.c +56 -0
  208. data/test/examples/shared/example05_species.c +52 -0
  209. metadata +193 -73
  210. data/examples/benchmarks/overview.txt +0 -38
  211. data/lib/castaddon/node.rb +0 -753
@@ -0,0 +1,76 @@
1
+
2
+ module Adarwin
3
+
4
+ # This is the C99 pre-processor for Adarwin. It has the following tasks:
5
+ # * Extract the SCoP part from the code (the region of interest)
6
+ # * Extract the header code (defines, includes, etc.)
7
+ # * Output the original code without pre-processor directives
8
+ # * Output the original code minus the SCoP (SCoP to be filled in later)
9
+ class Preprocessor < Common
10
+ attr_reader :source_code, :header_code, :parsed_code, :scop_code, :target_code
11
+
12
+ # Regular expression to identify whitespaces (tabs, spaces).
13
+ WHITESPACE = '\s*'
14
+
15
+ # This is the method which initializes the preprocessor. Initialization
16
+ # requires the target source code to process, which is then set as the class
17
+ # variable +@source_code+.
18
+ def initialize(source_code)
19
+ @source_code = source_code
20
+ @header_code = ''
21
+ @parsed_code = ''
22
+ @target_code = ''
23
+ @scop_code = ''
24
+ end
25
+
26
+ # This is the method to perform the actual preprocessing. This method takes
27
+ # care of all the pre-processor tasks. The output is stored in the two
28
+ # attributes +header_code+, and +scop+.
29
+ # FIXME: What about multi-line statements? For example, a multi-line comment
30
+ # could have a commented-out SCoP or define or include.
31
+ def process
32
+ scop = false
33
+ scop_in_code = false
34
+
35
+ # Process the file line by line
36
+ @source_code.each_line.with_index do |line,index|
37
+ if line =~ /^#{WHITESPACE}#/
38
+
39
+ # Keep 'include' statements as header code
40
+ if line =~ /^#{WHITESPACE}#include/
41
+ @header_code += line
42
+ @target_code += line
43
+
44
+ # Process 'define' statements
45
+ elsif line =~ /^#{WHITESPACE}#define/
46
+ @header_code += line
47
+ @target_code += line
48
+
49
+ # Found the start of a SCoP
50
+ elsif line =~ /^#{WHITESPACE}#{SCOP_START}/
51
+ scop = true
52
+ scop_in_code = true
53
+ @parsed_code += '{'+NL
54
+
55
+ # Found the end of a SCoP
56
+ elsif line =~ /^#{WHITESPACE}#{SCOP_END}/
57
+ scop = false
58
+ @parsed_code += '}'+NL
59
+ end
60
+
61
+ # Nothing special in the code going on here
62
+ else
63
+ @scop_code += line if scop
64
+ @parsed_code += line
65
+ @target_code += line
66
+ end
67
+ end
68
+
69
+ # Exit if there is no SCoP found
70
+ if !scop_in_code
71
+ raise_error('No "#pragma scop" found in the source code')
72
+ end
73
+ end
74
+ end
75
+ end
76
+
@@ -0,0 +1,261 @@
1
+
2
+ module Adarwin
3
+
4
+ # This class represents an array reference characterisation. This reference is
5
+ # constructed as a 5-tuple (tN,tA,tD,tE,tS) with the following information:
6
+ # * tN: The name of the reference.
7
+ # * tA: The access direction (read or write).
8
+ # * tD: The full domain accessed.
9
+ # * tE: The number of elements accessed each iteration (the size).
10
+ # * tS: The step of a accesses among iterations.
11
+ # To be able to compute the 5-tuple, the reference also stores information
12
+ # about the loops and conditional statements to which the original array
13
+ # reference is subjected.
14
+ #
15
+ # This class contains methods to perform among others the following:
16
+ # * Initialise the class and sets the 5-tuple (N,A,D,E,S)
17
+ # * Retrieve information on array indices
18
+ # * Print in different forms (species, ARC, copy/sync pragma's)
19
+ class Reference
20
+ attr_accessor :tN, :tA, :tD, :tE, :tS
21
+ attr_accessor :bounds, :indices, :pattern, :id
22
+ attr_accessor :all_loops
23
+
24
+ # This method initialises the array reference class. It takes details of the
25
+ # reference itself and details of the loop nest it belongs to. The method
26
+ # performs among others the following:
27
+ # * It initialises the 5-tuple (N,A,D,E,S)
28
+ # * It constructs the sets of loops (all,inner,outer) for this reference
29
+ # * It computes the bounds based on loop data and on if-statements
30
+ # * It computes the domain (D), number of elements (E), and step (S)
31
+ def initialize(reference,id,inner_loops,outer_loops,verbose)
32
+ @id = id
33
+
34
+ # Initialise the 5-tuple (already fill in N and A)
35
+ @tN = reference[:name]
36
+ @tA = reference[:type]
37
+ @tD = []
38
+ @tE = []
39
+ @tS = []
40
+
41
+ # Set the inner loops as the loop nest's inner loop intersected with all
42
+ # loops found for this statement. Beware of the difference between loops
43
+ # of a loop nest and loops of a statement.
44
+ @all_loops = reference[:loop_data]
45
+ @inner_loops = inner_loops & @all_loops
46
+ @outer_loops = outer_loops
47
+
48
+ # Set the indices of the array reference (e.g. 2*i+4). The size of this
49
+ # array is equal to the number of dimensions of the array.
50
+ @indices = reference[:indices]
51
+
52
+ # Set the if-statements for the reference. Process them together with the
53
+ # loop start/end conditions to obtain a final set of conditions/bounds.
54
+ @bounds = []
55
+ loop_vars = @all_loops.map{ |l| l[:var]}
56
+ @all_loops.each do |loop_data|
57
+ conditions = [loop_data[:min],loop_data[:max]]
58
+ reference[:if_statements].each do |if_statement|
59
+ condition_if = if_statement.map{ |c| solve(c,loop_data[:var],loop_vars) }
60
+ conditions = [
61
+ max(conditions[0],condition_if[0]),
62
+ min(conditions[1],condition_if[1])
63
+ ]
64
+ end
65
+ @bounds << { :var => loop_data[:var], :min => conditions[0], :max => conditions[1] }
66
+ end
67
+
68
+ # Compute the domain (D) based on the bounds. The bounds are derived from
69
+ # the if-statements and for-loops.
70
+ @tD = @indices.map do |i|
71
+ index_to_interval(i,@bounds)
72
+ end
73
+
74
+ # Compute the number of elements (E) accessed every iteration (the size).
75
+ # TODO: Clean-up this method.
76
+ @tE = @indices.map do |i|
77
+ #if !dependent?(i,@all_loops)
78
+ # puts "independent"
79
+ # index_to_interval(i,@inner_loops)
80
+ #else
81
+ #puts "dependent"
82
+ get_base_offset(i)
83
+ #end
84
+ end
85
+
86
+ # Compute the step taken. There are 3 cases considered the index is: 1)
87
+ # dependent on the outer loops, 2) dependent on the inner loops, or 3)
88
+ # indepdent of any loops.
89
+ @tS = @indices.map do |i|
90
+ if dependent?(i,@inner_loops)
91
+ index_to_interval(i,@inner_loops).length
92
+ elsif dependent?(i,@outer_loops)
93
+ get_step(i,@outer_loops)
94
+ else
95
+ '0'
96
+ end
97
+ end
98
+
99
+ # If the step and the domain are equal in size, the step can also be set
100
+ # to zero to reflect accessing the full array.
101
+ @tS.each_with_index do |tS,index|
102
+ if (tS == @tD[index].length) || (@tD[index].length == '1')
103
+ @tS[index] = '0'
104
+ end
105
+ end
106
+
107
+ # Print the result
108
+ puts MESSAGE+"Found: #{to_arc}" if verbose
109
+ end
110
+
111
+ # This method replaces loop variables for a given set of loops with 0. This
112
+ # basically gives us the offset of array references with respect to the loop
113
+ # variable. For example, A[2*i+4] and A[i+j+3] will give us [4,j+3] with
114
+ # repsect to an i-loop.
115
+ def get_base_offset(index)
116
+ index = index.clone
117
+ @outer_loops.each do |for_loop|
118
+ search = C::Variable.parse(for_loop[:var])
119
+ replace = C::Expression.parse('0')
120
+ index = index.search_and_replace_node(search,replace)
121
+ end
122
+ return index_to_interval(index,@inner_loops)
123
+ end
124
+
125
+ # Method to fill in the ranges for an array reference. This is based on
126
+ # information of the loop nests. The output is an interval.
127
+ def index_to_interval(index,loops)
128
+ access_min = find_extreme(:min,index,loops)
129
+ access_max = find_extreme(:max,index,loops)
130
+ return Interval.new(access_min,access_max,@all_loops)
131
+ end
132
+
133
+ # Substitute loop data with the upper-bound or lower-bound of a loop to find
134
+ # the minimum/maximum of an array reference. The body is executed twice,
135
+ # because a loop bound can be based on another loop variable.
136
+ def find_extreme(position,index,loops)
137
+ index = index.clone
138
+ 2.times do
139
+ loops.each do |for_loop|
140
+ search = C::Variable.parse(for_loop[:var])
141
+ replace = C::Expression.parse(for_loop[position])
142
+ index = index.search_and_replace_node(search,replace)
143
+ end
144
+ end
145
+ return simplify(index.to_s.gsub(';','').gsub(' ','').gsub("\t",''))
146
+ end
147
+
148
+ # Method to check whether the an index is dependent on a given set of loops.
149
+ # For example, A[i+3] is independent of j, but dependent on i.
150
+ def dependent?(index,loops)
151
+ index.preorder do |node|
152
+ if node.variable?
153
+ loops.each do |for_loop|
154
+ return true if (node.name == for_loop[:var])
155
+ end
156
+ end
157
+ end
158
+ return false
159
+ end
160
+
161
+ # Method to retrieve the step for a given array index and loops. The method
162
+ # returns the difference between two subsequent iterations: one with the
163
+ # loop variable at 0 and one after the first increment.
164
+ def get_step(index,loops)
165
+
166
+ # Replace the loop indices with 0
167
+ index1 = index.clone
168
+ loops.each do |for_loop|
169
+ search = C::Variable.parse(for_loop[:var])
170
+ replace = C::Expression.parse('0')
171
+ index1 = index1.search_and_replace_node(search,replace)
172
+ end
173
+
174
+ # Replace the loop indices with the loop step
175
+ index2 = index.clone
176
+ loops.each do |for_loop|
177
+ search = C::Variable.parse(for_loop[:var])
178
+ replace = C::Expression.parse(for_loop[:step])
179
+ index2 = index2.search_and_replace_node(search,replace)
180
+ end
181
+
182
+ # Return the difference
183
+ return abs(simplify("(#{index2})-(#{index1})"))
184
+ end
185
+
186
+ # Method to output the result as algorithmic species. This reflects the
187
+ # algorithm as presented in the scientific paper.
188
+ def to_species
189
+ if @tS.reject{ |s| s == "0"}.empty?
190
+ if (@tA == 'read') # Full (steps length 0 and read)
191
+ @pattern = 'full'
192
+ else # Shared (steps length 0 and write)
193
+ @pattern = 'shared'
194
+ end
195
+ elsif @tE.reject{ |s| s.length == "1"}.empty? # Element (sizes length 1)
196
+ @pattern = 'element'
197
+ elsif step_smaller_than_num_elements? # Neighbourhood (tS < tE)
198
+ @pattern = 'neighbourhood('+@tE.join(DIM_SEP)+')'
199
+ else # Chunk (tS >= tE)
200
+ @pattern = 'chunk('+@tE.join(DIM_SEP)+')'
201
+ end
202
+
203
+ # Fill in the name and the domain and return the result
204
+ return @tN+'['+@tD.join(DIM_SEP)+']'+PIPE+@pattern
205
+ end
206
+
207
+ # Method to output the result as an array reference characterisation (ARC).
208
+ def to_arc
209
+ return "(#{tN},#{tA},#{tD},#{tE},#{tS})".gsub('"','').gsub(' ','')
210
+ end
211
+
212
+ # Method to output a copyin/copyout statement. This indicates the name (N),
213
+ # the domain (D), and a unique identifier.
214
+ def to_copy(id)
215
+ @tN+'['+@tD.join(DIM_SEP)+']'+'|'+id.to_s
216
+ end
217
+
218
+ # Method to print the unique identifier of the loop nest in terms of
219
+ # synchronisation statements to be printed. This is a per-reference id
220
+ # instead of a per-loop id, because it depends on the type of access (read
221
+ # or write).
222
+ def get_sync_id
223
+ (@tA == 'write') ? 2*@id+1 : 2*@id
224
+ end
225
+
226
+ # Helper method for the +to_species+ method. This method compares the step
227
+ # with the number of elements accessed to determine which one is smaller.
228
+ # FIXME: This is based on the +compare+ method which might take a guess.
229
+ def step_smaller_than_num_elements?
230
+ @tS.each_with_index do |step,index|
231
+ if step != '0'
232
+ comparison = compare(step,@tE[index].length,@all_loops)
233
+ if (comparison == 'lt')
234
+ return true
235
+ end
236
+ end
237
+ end
238
+ return false
239
+ end
240
+
241
+ # Method to print out a human readable form of the array references (e.g.
242
+ # [4*i+6][j]). This is basically what the +puts+ method also does.
243
+ def get_references
244
+ return @indices.to_ary.map{ |i| i.to_s }
245
+ end
246
+
247
+ # Method to find out if the reference is dependent on a variable. It is
248
+ # used by the copy optimisations.
249
+ def depends_on?(var)
250
+ @indices.each do |index|
251
+ index.preorder do |node|
252
+ if node.variable?
253
+ return true if (node.name == var)
254
+ end
255
+ end
256
+ end
257
+ return false
258
+ end
259
+
260
+ end
261
+ end
data/lib/bones.rb CHANGED
@@ -1,10 +1,6 @@
1
1
 
2
- # Bones requires 'fileutils' from the Ruby standard library.
3
- require 'fileutils'
4
-
5
- # Bones uses the 'trollop' gem to parse command line options.
6
- require 'rubygems'
7
- require 'trollop'
2
+ # Include the common part between Bones and Aset
3
+ require 'common.rb'
8
4
 
9
5
  # We define a custom error class for code generation related
10
6
  # errors (any error raised by Bones).
@@ -16,20 +12,9 @@ def raise_error(message) #:nodoc:
16
12
  end
17
13
 
18
14
  # Extending the Ruby standard string class to support some
19
- # addition methods. They include a hack of the gsub! command,
20
- # and two methods related to comma removal.
15
+ # additional methods: two methods related to comma removal.
21
16
  class String #:nodoc:
22
17
 
23
- # Extend the Ruby string class to be able to chain 'gsub!'
24
- #-commands. This code is taken from the web.
25
- meth = 'gsub!'
26
- orig_meth = "orig_#{meth}"
27
- alias_method orig_meth, meth
28
- define_method(meth) do |*args|
29
- self.send(orig_meth, *args)
30
- self
31
- end
32
-
33
18
  # Replace double comma's in a string with a single comma.
34
19
  # This method is useful for function-argument lists.
35
20
  def remove_double_commas
@@ -213,43 +198,6 @@ module Bones
213
198
  return code
214
199
  end
215
200
 
216
- # Helper method to evaluate mathematical expressions, possibly containing
217
- # symbols. This method is only used for readability, without it the code
218
- # is functionally correct, but expressions might be larger than needed.
219
- # This method is only tested on integers.
220
- def simplify(expr)
221
- raise_error('Invalid expression to simplify') if !expr
222
- done = false
223
- while !done do
224
- old_expr = expr
225
- case expr
226
- when /^\(([^\(\)]*)\)$/ then expr = $1 # Remove outer brackets
227
- when /(.*)\((-?\w*)\)(.*)/ then expr = $1+$2+$3 # Remove brackets with one constant or variable inside
228
- when /(.*)\(\(([^\(\)]*)\)\)(.*)/ then expr = $1+'('+$2+')'+$3 # Substitute double brackets into single brackets
229
- when /(.*)(\-\d+)\*(\d+)\b(.*)/ then expr = $1+'+'+(($2.to_i)*($3.to_i)).to_s+$4 # Perform multiplications on constants (starting with a '-')
230
- when /(.*)(\-\d+)\+(\d+)\b(.*)/ then expr = $1+'+'+(($2.to_i)+($3.to_i)).to_s+$4 # Perform additions on constants (starting with a '-')
231
- when /(.*)(\-\d+)\-(\d+)\b(.*)/ then expr = $1+'+'+(($2.to_i)-($3.to_i)).to_s+$4 # Perform subtractions on constants (starting with a '-')
232
- when /(.*)\b(\d+)\*(\d+)\b(.*)/ then expr = $1+(($2.to_i)*($3.to_i)).to_s+$4 # Perform multiplications on constants
233
- when /(.*)\b(\d+)\+(\d+)\b(.*)/ then expr = $1+(($2.to_i)+($3.to_i)).to_s+$4 # Perform additions on constants
234
- when /(.*)\b(\d+)\-(\d+)\b(.*)/ then expr = $1+(($2.to_i)-($3.to_i)).to_s+$4 # Perform subtractions on constants
235
- when /(.*)\b(\w+)\-(\2)\b(.*)/ then expr = $1+'0'+$4 # Perform subtractions of variables to zero (e.g. 'a-a=0')
236
- when /(.*)\/1\b(.*)/ then expr = $1+$2 # Remove divisions by 1
237
- when /(.*)(\+0\b|\b0\+)(.*)/ then expr = $1+$3 # Remove additions with 0
238
- when /(.*[\+\(])\(([^\(\)\*\/\%]+)\)([\+\-\)].*)/ then expr = $1+$2+$3 # Remove brackets that are not needed (e.g. '(a+b)+c')
239
- end
240
- expr.gsub!(/\s/,'') # Remove whitespaces
241
- expr.gsub!(/\-\-/,'+') # Substitute double minusses for a plus
242
- expr.gsub!(/\+\-/,'-') # Substitute plus-minus for a minus
243
- expr.gsub!(/(^|\()\+/,'') # Remove plus signs at the start of a line or after an opening bracket
244
- if expr =~ /(.*)\b(\d+)\/(\d+)\b(.*)/ # Perform divisions on constants...
245
- division = ($2.to_i)/($3.to_i) # ...but first check whether the result will be correct (integer division)
246
- expr = $1+division.to_s+$4 if division*$3.to_i == $2.to_i
247
- end
248
- done = true if old_expr == expr
249
- end
250
- return expr
251
- end
252
-
253
201
  end
254
202
 
255
203
  end
@@ -261,6 +209,7 @@ require 'bones/species.rb'
261
209
  require 'bones/algorithm.rb'
262
210
  require 'bones/variablelist.rb'
263
211
  require 'bones/variable.rb'
212
+ require 'bones/copy.rb'
264
213
  require 'bones/preprocessor.rb'
265
214
  require 'bones/engine.rb'
266
215
 
@@ -10,7 +10,7 @@ module Bones
10
10
  # and lists of input/output array variables.
11
11
  class Algorithm < Common
12
12
  attr_reader :name, :species, :code, :lists, :arrays, :id, :function_name
13
- attr_accessor :hash, :merge_factor
13
+ attr_accessor :hash, :merge_factor, :register_caching_enabled
14
14
 
15
15
  # Constant to set the name of the algorithm's accelerated version
16
16
  ACCELERATED = '_accelerated'
@@ -31,14 +31,25 @@ module Bones
31
31
  @original_name = @name+ORIGINAL
32
32
  @accelerated_name = @name+ACCELERATED
33
33
  @species = species
34
- @code = C::Statement.parse(code).preprocess
34
+ begin
35
+ @code = C::Statement.parse(code).preprocess
36
+ rescue
37
+ @code = C::Statement.parse('{'+code+'}').preprocess
38
+ end
35
39
  @hash = {}
36
40
  @lists = {:host_name => [],:host_definition => [], :argument_name => [], :argument_definition => [], :golden_name => []}
37
41
  @arrays = Variablelist.new()
38
42
  @constants = Variablelist.new()
39
- @merge_factor = 1
43
+ @merge_factor = nil
44
+ @register_caching_enabled = 1
40
45
  @function_code = ''
41
46
  @function_name = ''
47
+
48
+ # Set the initial hash
49
+ @hash = {:algorithm_id => @id,
50
+ :algorithm_name => @name,
51
+ :algorithm_basename => @basename,
52
+ :algorithm_filename => @filename}
42
53
  end
43
54
 
44
55
  # This method sets the code and name for the function in
@@ -119,15 +130,17 @@ module Bones
119
130
  new_code.transform_flatten(array)
120
131
  end
121
132
 
122
- # Perform array substitution (conditionally do this)
123
- @arrays.outputs.each do |array|
124
- if array.species.element?
125
- if @arrays.inputs.include?(array)
126
- new_code.transform_substitution(array,true)
127
- else
128
- new_code.transform_substitution(array,false)
133
+ # Perform array substitution a.k.a. register caching (conditionally do this)
134
+ if @register_caching_enabled == 1
135
+ @arrays.outputs.each do |array|
136
+ if array.species.element?
137
+ if @arrays.inputs.include?(array)
138
+ new_code.transform_substitution(array,true)
139
+ else
140
+ new_code.transform_substitution(array,false)
141
+ end
142
+ extra_indent = INDENT
129
143
  end
130
- extra_indent = INDENT
131
144
  end
132
145
  end
133
146
 
@@ -138,23 +151,35 @@ module Bones
138
151
 
139
152
  # Perform thread-merging (experimental)
140
153
  # TODO: Solve the problem related to constants (e.g chunk/example1.c)
141
- if @merge_factor == 1 && transformation[0,1] == '4'
142
- @merge_factor = 4
154
+ if @merge_factor == nil
155
+ if transformation[0,1] == '4' && @hash[:parallelism].to_i >= 1024*1024
156
+ @merge_factor = 4
157
+ else
158
+ @merge_factor = 1
159
+ end
143
160
  end
144
161
  if @merge_factor > 1
145
- puts MESSAGE+'Merging threads by a factor '+@merge_factor.to_s+'.'
146
-
147
- # Update the hash
148
- @hash[:ids] = @hash[:ids].split(NL).map { |line|
149
- C::parse(line).transform_merge_threads(@merge_factor,[GLOBAL_ID]+@constants.map{ |c| c.name }).to_s.split(NL).each_with_index.map do |id,index|
150
- id.gsub(/\b#{GLOBAL_ID}\b/,"(#{GLOBAL_ID}+gridDim.x*blockDim.x*#{index})")
151
- end
152
- }.join(NL+INDENT*2)
153
- @hash[:parallelism] = (@hash[:parallelism].to_i / @merge_factor).to_s
154
-
155
- # Transform the code
156
- excludes = (@constants+@arrays).map { |c| c.name }
157
- new_code.transform_merge_threads(@merge_factor,excludes)
162
+ puts @hash[:parallelism]
163
+ if new_code.has_conditional_statements?
164
+ puts MESSAGE+'Not coarsening ('+@merge_factor.to_s+'x) because of conditional statements in kernel body.'
165
+ # TODO: Fix this temporary hack for multiple loops with mismatching bounds
166
+ elsif ((@hash[:parallelism].to_i % @merge_factor) != 0) || (@hash[:parallelism].to_i == 4192256)
167
+ puts MESSAGE+'Not coarsening ('+@merge_factor.to_s+'x) because of mismatching amount of parallelism ('+@hash[:parallelism]+').'
168
+ else
169
+ puts MESSAGE+'Coarsening threads by a factor '+@merge_factor.to_s+'.'
170
+
171
+ # Update the hash
172
+ @hash[:ids] = @hash[:ids].split(NL).map { |line|
173
+ C::parse(line).transform_merge_threads(@merge_factor,[GLOBAL_ID]+@constants.map{ |c| c.name }).to_s.split(NL).each_with_index.map do |id,index|
174
+ id.gsub(/\b#{GLOBAL_ID}\b/,"(#{GLOBAL_ID}+gridDim.x*blockDim.x*#{index})")
175
+ end
176
+ }.join(NL+INDENT*2)
177
+ @hash[:parallelism] = (@hash[:parallelism].to_i / @merge_factor).to_s
178
+
179
+ # Transform the code
180
+ excludes = (@constants+@arrays).map { |c| c.name }
181
+ new_code.transform_merge_threads(@merge_factor,excludes)
182
+ end
158
183
  end
159
184
 
160
185
  # Obtain the complexity in terms of operations for the resulting code
@@ -215,12 +240,8 @@ module Bones
215
240
  # kernel_argument_list
216
241
  #
217
242
  def populate_hash
218
- @hash = {:algorithm_id => @id,
219
- :algorithm_name => @name,
220
- :algorithm_basename => @basename,
221
- :algorithm_filename => @filename,
222
- :argument_name => @lists[:argument_name],
223
- :argument_definition => @lists[:argument_definition]}
243
+ @hash[:argument_name] = @lists[:argument_name]
244
+ @hash[:argument_definition] = @lists[:argument_definition]
224
245
 
225
246
  # Obtain the necessary data for the hash per array
226
247
  parallelisms = []
@@ -278,11 +299,15 @@ module Bones
278
299
 
279
300
  # Generate the index expressions
280
301
  divider = (array.species.chunk?) ? '/'+sum(array.species.parameters[index]) : ''
281
- minihash = {:dimensions => (index == dimensions.length-1) ? '1' : dimensions.drop(index+1).map { |d| sum(d) }.join('*'),
282
- :modulo => (index_reverse != dimensions.length-1) ? '%('+sum(dimension)+divider+')' : '',
283
- :offset => from(dimension)}
284
- expr_global = simplify(search_and_replace(minihash,"((#{GLOBAL_ID}/(<dimensions>))<modulo>)+<offset>"))
285
- expr_local = simplify(search_and_replace(minihash,"((#{LOCAL_ID }/(<dimensions>))<modulo>)+<offset>"))
302
+ dimensions_hash = (index == dimensions.length-1) ? '1' : dimensions.drop(index+1).map { |d| sum(d) }.join('*')
303
+ dimensions_hash = simplify(dimensions_hash)
304
+ dimensions_division = (dimensions_hash == '1') ? '' : '/('+dimensions_hash+')'
305
+ minihash = {:dimensions1 => "#{GLOBAL_ID}#{dimensions_division}",
306
+ :dimensions2 => "#{LOCAL_ID }#{dimensions_division}",
307
+ :modulo => (index_reverse != dimensions.length-1) ? '%('+simplify(sum(dimension)+divider)+')' : '',
308
+ :offset => simplify(from(dimension))}
309
+ expr_global = search_and_replace(minihash,"((<dimensions1>)<modulo>)+<offset>")
310
+ expr_local = search_and_replace(minihash,"((<dimensions2>)<modulo>)+<offset>")
286
311
 
287
312
  # Selectively push the ID definitions to the result array
288
313
  from = array.species.from_at(index)
@@ -342,6 +367,7 @@ module Bones
342
367
  def update_hash(loop_variable)
343
368
  names = @hash[:argument_name].split(', ')
344
369
  definitions = @hash[:argument_definition].split(', ')
370
+ # TODO: The following two lines give problems with correlation-k4
345
371
  names.delete(loop_variable.to_s)
346
372
  definitions.each { |definition| definitions.delete(definition) if definition =~ /\b#{loop_variable}\b/ }
347
373
  @hash[:argument_name] = names.join(', ')
@@ -387,10 +413,12 @@ module Bones
387
413
  array_names = arrays.map { |a| a.name }.join('","')
388
414
  raise_error(direction.capitalize+'put array count mismatch (expected '+species.length.to_s+', found '+arrays.length.to_s+' ["'+array_names+'"])')
389
415
  end
390
-
416
+
391
417
  # Set the species for the arrays (distinguish between arrays with and without a name)
392
418
  species.each do |structure|
393
- array = arrays[0]
419
+
420
+ # Loop over all found arrays and match it with a species
421
+ array = nil
394
422
  arrays.each do |free_array|
395
423
  if !free_array.species
396
424
  if structure.has_arrayname?
@@ -404,8 +432,17 @@ module Bones
404
432
  end
405
433
  end
406
434
  end
435
+
436
+ # Still haven't found anything, assign the species to an array of equal name
437
+ if !array
438
+ arrays.each do |free_array|
439
+ array = free_array if structure.name == free_array.name
440
+ end
441
+ end
442
+
443
+ # Process the assignment
407
444
  array.species = structure
408
- #structure.name = array.name
445
+ raise_error("Species of '#{array.species.name}' is mismatched with array '#{array.name}'") if array.species.name != array.name
409
446
 
410
447
  # Check if the array size was set, if not, it will be set to the species' size
411
448
  if array.size.empty?