bones-compiler 1.1.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. checksums.yaml +15 -0
  2. data/CHANGELOG +37 -0
  3. data/LICENSE +1 -1
  4. data/README.rdoc +95 -70
  5. data/Rakefile +78 -3
  6. data/VERSION +1 -1
  7. data/bin/adarwin +17 -0
  8. data/examples/benchmarks/PolyBench/2mm.c +104 -0
  9. data/examples/benchmarks/{3mm.c → PolyBench/3mm.c} +5 -2
  10. data/examples/benchmarks/{adi.c → PolyBench/adi.c} +6 -3
  11. data/examples/benchmarks/{atax.c → PolyBench/atax.c} +5 -2
  12. data/examples/benchmarks/{bicg.c → PolyBench/bicg.c} +5 -2
  13. data/examples/benchmarks/{cholesky.c → PolyBench/cholesky.c} +3 -0
  14. data/examples/benchmarks/{common.h → PolyBench/common.h} +2 -2
  15. data/examples/benchmarks/{correlation.c → PolyBench/correlation.c} +16 -7
  16. data/examples/benchmarks/{covariance.c → PolyBench/covariance.c} +7 -2
  17. data/examples/benchmarks/{doitgen.c → PolyBench/doitgen.c} +5 -2
  18. data/examples/benchmarks/{durbin.c → PolyBench/durbin.c} +3 -0
  19. data/examples/benchmarks/{dynprog.c → PolyBench/dynprog.c} +3 -0
  20. data/examples/benchmarks/{fdtd-2d-apml.c → PolyBench/fdtd-2d-apml.c} +3 -0
  21. data/examples/benchmarks/{fdtd-2d.c → PolyBench/fdtd-2d.c} +5 -2
  22. data/examples/benchmarks/{floyd-warshall.c → PolyBench/floyd-warshall.c} +3 -0
  23. data/examples/benchmarks/{gemm.c → PolyBench/gemm.c} +5 -2
  24. data/examples/benchmarks/{gemver.c → PolyBench/gemver.c} +5 -2
  25. data/examples/benchmarks/{gesummv.c → PolyBench/gesummv.c} +5 -2
  26. data/examples/benchmarks/{gramschmidt.c → PolyBench/gramschmidt.c} +3 -0
  27. data/examples/benchmarks/{jacobi-1d-imper.c → PolyBench/jacobi-1d-imper.c} +10 -2
  28. data/examples/benchmarks/{jacobi-2d-imper.c → PolyBench/jacobi-2d-imper.c} +8 -3
  29. data/examples/benchmarks/{lu.c → PolyBench/lu.c} +3 -0
  30. data/examples/benchmarks/{ludcmp.c → PolyBench/ludcmp.c} +3 -0
  31. data/examples/benchmarks/{mvt.c → PolyBench/mvt.c} +6 -2
  32. data/examples/benchmarks/{reg_detect.c → PolyBench/reg_detect.c} +3 -0
  33. data/examples/benchmarks/{seidel-2d.c → PolyBench/seidel-2d.c} +3 -0
  34. data/examples/benchmarks/{symm.c → PolyBench/symm.c} +3 -0
  35. data/examples/benchmarks/{syr2k.c → PolyBench/syr2k.c} +5 -2
  36. data/examples/benchmarks/{syrk.c → PolyBench/syrk.c} +7 -4
  37. data/examples/benchmarks/{trisolv.c → PolyBench/trisolv.c} +3 -0
  38. data/examples/benchmarks/{trmm.c → PolyBench/trmm.c} +3 -0
  39. data/examples/benchmarks/Rodinia/cfd.c +180 -0
  40. data/examples/benchmarks/Rodinia/hotspot.c +228 -0
  41. data/examples/benchmarks/Rodinia/kmeans.c +164 -0
  42. data/examples/benchmarks/Rodinia/srad.c +188 -0
  43. data/examples/benchmarks/other/common.h +0 -0
  44. data/examples/benchmarks/other/dct.c +58 -0
  45. data/examples/benchmarks/other/mm.c +50 -0
  46. data/examples/benchmarks/{saxpy.c → other/saxpy.c} +11 -7
  47. data/examples/chunk/{example1.c → example01.c} +0 -0
  48. data/examples/chunk/{example2.c → example02.c} +0 -0
  49. data/examples/chunk/{example3.c → example03.c} +0 -0
  50. data/examples/chunk/{example4.c → example04.c} +0 -0
  51. data/examples/chunk/{example5.c → example05.c} +0 -0
  52. data/examples/chunk/example06.c +45 -0
  53. data/examples/chunk/example07.c +49 -0
  54. data/examples/dependences/example01.c +42 -0
  55. data/examples/dependences/example02.c +40 -0
  56. data/examples/dependences/example03.c +43 -0
  57. data/examples/dependences/example04.c +44 -0
  58. data/examples/dependences/example05.c +42 -0
  59. data/examples/element/{example1.c → example01.c} +0 -0
  60. data/examples/element/{example2.c → example02.c} +2 -2
  61. data/examples/element/{example3.c → example03.c} +0 -0
  62. data/examples/element/{example4.c → example04.c} +0 -0
  63. data/examples/element/{example5.c → example05.c} +0 -0
  64. data/examples/element/{example6.c → example06.c} +0 -0
  65. data/examples/element/{example7.c → example07.c} +0 -0
  66. data/examples/element/{example8.c → example08.c} +0 -0
  67. data/examples/element/{example9.c → example09.c} +0 -0
  68. data/examples/element/example13.c +73 -0
  69. data/examples/fusion/example01.c +68 -0
  70. data/examples/fusion/example02.c +73 -0
  71. data/examples/fusion/example03.c +72 -0
  72. data/examples/fusion/example04.c +61 -0
  73. data/examples/fusion/example05.c +55 -0
  74. data/examples/neighbourhood/{example1.c → example01.c} +0 -0
  75. data/examples/neighbourhood/{example2.c → example02.c} +0 -0
  76. data/examples/neighbourhood/{example3.c → example03.c} +0 -0
  77. data/examples/neighbourhood/{example4.c → example04.c} +0 -0
  78. data/examples/neighbourhood/example05.c +44 -0
  79. data/examples/shared/{example1.c → example01.c} +0 -0
  80. data/examples/shared/{example2.c → example02.c} +0 -0
  81. data/examples/shared/{example3.c → example03.c} +0 -0
  82. data/examples/shared/{example4.c → example04.c} +0 -0
  83. data/examples/shared/{example5.c → example05.c} +0 -0
  84. data/lib/adarwin.rb +62 -0
  85. data/lib/adarwin/dependences.rb +268 -0
  86. data/lib/adarwin/engine.rb +277 -0
  87. data/lib/adarwin/fusion.rb +174 -0
  88. data/lib/adarwin/interval.rb +57 -0
  89. data/lib/adarwin/memorycopies.rb +153 -0
  90. data/lib/adarwin/nest.rb +225 -0
  91. data/lib/adarwin/preprocessor.rb +76 -0
  92. data/lib/adarwin/reference.rb +261 -0
  93. data/lib/bones.rb +4 -55
  94. data/lib/bones/algorithm.rb +77 -40
  95. data/lib/bones/copy.rb +26 -0
  96. data/lib/bones/engine.rb +147 -31
  97. data/lib/bones/preprocessor.rb +92 -12
  98. data/lib/bones/species.rb +4 -3
  99. data/lib/bones/structure.rb +14 -4
  100. data/lib/castaddon.rb +11 -6
  101. data/lib/castaddon/node_adarwin.rb +245 -0
  102. data/lib/castaddon/node_bones.rb +316 -0
  103. data/lib/castaddon/node_common.rb +289 -0
  104. data/lib/castaddon/transformations.rb +236 -0
  105. data/lib/common.rb +216 -0
  106. data/skeletons/CPU-C/common/header.c +3 -0
  107. data/skeletons/CPU-C/common/mem_global.c +0 -0
  108. data/skeletons/CPU-C/common/timer_2_start.c +11 -13
  109. data/skeletons/CPU-C/common/timer_2_stop.c +1 -1
  110. data/skeletons/CPU-C/common/timer_globals.c +29 -0
  111. data/skeletons/CPU-OPENCL-INTEL/common/globals.c +1 -1
  112. data/skeletons/CPU-OPENCL-INTEL/common/header.c +3 -0
  113. data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +7 -2
  114. data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +4 -2
  115. data/skeletons/CPU-OPENCL-INTEL/common/mem_global.c +0 -0
  116. data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +6 -3
  117. data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +1 -1
  118. data/skeletons/CPU-OPENCL-INTEL/common/timer_globals.c +24 -0
  119. data/skeletons/CPU-OPENMP/common/globals.c +1 -0
  120. data/skeletons/CPU-OPENMP/common/header.c +3 -0
  121. data/skeletons/CPU-OPENMP/common/mem_global.c +0 -0
  122. data/skeletons/CPU-OPENMP/common/timer_1_start.c +0 -12
  123. data/skeletons/CPU-OPENMP/common/timer_2_stop.c +1 -1
  124. data/skeletons/CPU-OPENMP/common/timer_globals.c +33 -0
  125. data/skeletons/GPU-CUDA/common/globals.c +27 -3
  126. data/skeletons/GPU-CUDA/common/header.c +2 -0
  127. data/skeletons/GPU-CUDA/common/mem_async_alloc.c +6 -0
  128. data/skeletons/GPU-CUDA/common/mem_async_copyin.c +6 -0
  129. data/skeletons/GPU-CUDA/common/mem_async_copyout.c +6 -0
  130. data/skeletons/GPU-CUDA/common/mem_async_free.c +6 -0
  131. data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +2 -1
  132. data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +2 -1
  133. data/skeletons/GPU-CUDA/common/mem_global.c +1 -0
  134. data/skeletons/GPU-CUDA/common/mem_prologue.c +1 -2
  135. data/skeletons/GPU-CUDA/common/scheduler.c +86 -0
  136. data/skeletons/GPU-CUDA/common/timer_2_start.c +2 -4
  137. data/skeletons/GPU-CUDA/common/timer_2_stop.c +3 -5
  138. data/skeletons/GPU-CUDA/common/timer_globals.c +26 -0
  139. data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +5 -7
  140. data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +4 -6
  141. data/skeletons/GPU-CUDA/kernel/default.host.c +1 -1
  142. data/skeletons/GPU-CUDA/kernel/default.kernel.cu +6 -8
  143. data/skeletons/GPU-CUDA/skeletons.txt +6 -5
  144. data/{examples/benchmarks/2mm.c → test/examples/benchmarks/PolyBench/2mm_species.c} +19 -15
  145. data/test/examples/benchmarks/PolyBench/3mm_species.c +82 -0
  146. data/test/examples/benchmarks/PolyBench/adi_species.c +89 -0
  147. data/test/examples/benchmarks/PolyBench/atax_species.c +69 -0
  148. data/test/examples/benchmarks/PolyBench/bicg_species.c +71 -0
  149. data/test/examples/benchmarks/PolyBench/cholesky_species.c +68 -0
  150. data/test/examples/benchmarks/PolyBench/correlation_species.c +97 -0
  151. data/test/examples/benchmarks/PolyBench/covariance_species.c +78 -0
  152. data/test/examples/benchmarks/PolyBench/doitgen_species.c +67 -0
  153. data/test/examples/benchmarks/PolyBench/durbin_species.c +80 -0
  154. data/test/examples/benchmarks/PolyBench/dynprog_species.c +71 -0
  155. data/test/examples/benchmarks/PolyBench/fdtd-2d-apml_species.c +112 -0
  156. data/test/examples/benchmarks/PolyBench/fdtd-2d_species.c +78 -0
  157. data/test/examples/benchmarks/PolyBench/floyd-warshall_species.c +54 -0
  158. data/test/examples/benchmarks/PolyBench/gemm_species.c +73 -0
  159. data/test/examples/benchmarks/PolyBench/gemver_species.c +93 -0
  160. data/test/examples/benchmarks/PolyBench/gesummv_species.c +68 -0
  161. data/test/examples/benchmarks/PolyBench/gramschmidt_species.c +78 -0
  162. data/test/examples/benchmarks/PolyBench/jacobi-1d-imper_species.c +59 -0
  163. data/test/examples/benchmarks/PolyBench/jacobi-2d-imper_species.c +65 -0
  164. data/test/examples/benchmarks/PolyBench/lu_species.c +57 -0
  165. data/test/examples/benchmarks/PolyBench/ludcmp_species.c +89 -0
  166. data/test/examples/benchmarks/PolyBench/mvt_species.c +69 -0
  167. data/test/examples/benchmarks/PolyBench/reg_detect_species.c +86 -0
  168. data/test/examples/benchmarks/PolyBench/seidel-2d_species.c +53 -0
  169. data/test/examples/benchmarks/PolyBench/symm_species.c +74 -0
  170. data/test/examples/benchmarks/PolyBench/syr2k_species.c +69 -0
  171. data/test/examples/benchmarks/PolyBench/syrk_species.c +66 -0
  172. data/test/examples/benchmarks/PolyBench/trisolv_species.c +61 -0
  173. data/test/examples/benchmarks/PolyBench/trmm_species.c +61 -0
  174. data/test/examples/chunk/example01_species.c +58 -0
  175. data/test/examples/chunk/example02_species.c +48 -0
  176. data/test/examples/chunk/example03_species.c +63 -0
  177. data/test/examples/chunk/example04_species.c +58 -0
  178. data/test/examples/chunk/example05_species.c +56 -0
  179. data/test/examples/chunk/example06_species.c +49 -0
  180. data/test/examples/chunk/example07_species.c +53 -0
  181. data/test/examples/dependences/example01_species.c +46 -0
  182. data/test/examples/dependences/example02_species.c +44 -0
  183. data/test/examples/dependences/example03_species.c +47 -0
  184. data/test/examples/dependences/example04_species.c +48 -0
  185. data/test/examples/dependences/example05_species.c +46 -0
  186. data/test/examples/element/example01_species.c +50 -0
  187. data/test/examples/element/example02_species.c +50 -0
  188. data/test/examples/element/example03_species.c +62 -0
  189. data/test/examples/element/example04_species.c +53 -0
  190. data/test/examples/element/example05_species.c +59 -0
  191. data/test/examples/element/example06_species.c +50 -0
  192. data/test/examples/element/example07_species.c +58 -0
  193. data/test/examples/element/example08_species.c +49 -0
  194. data/test/examples/element/example09_species.c +52 -0
  195. data/test/examples/element/example10_species.c +54 -0
  196. data/test/examples/element/example11_species.c +51 -0
  197. data/test/examples/element/example12_species.c +60 -0
  198. data/test/examples/element/example13_species.c +77 -0
  199. data/test/examples/neighbourhood/example01_species.c +57 -0
  200. data/test/examples/neighbourhood/example02_species.c +56 -0
  201. data/test/examples/neighbourhood/example03_species.c +83 -0
  202. data/test/examples/neighbourhood/example04_species.c +55 -0
  203. data/test/examples/neighbourhood/example05_species.c +48 -0
  204. data/test/examples/shared/example01_species.c +49 -0
  205. data/test/examples/shared/example02_species.c +55 -0
  206. data/test/examples/shared/example03_species.c +59 -0
  207. data/test/examples/shared/example04_species.c +56 -0
  208. data/test/examples/shared/example05_species.c +52 -0
  209. metadata +193 -73
  210. data/examples/benchmarks/overview.txt +0 -38
  211. data/lib/castaddon/node.rb +0 -753
@@ -0,0 +1,76 @@
1
+
2
+ module Adarwin
3
+
4
+ # This is the C99 pre-processor for Adarwin. It has the following tasks:
5
+ # * Extract the SCoP part from the code (the region of interest)
6
+ # * Extract the header code (defines, includes, etc.)
7
+ # * Output the original code without pre-processor directives
8
+ # * Output the original code minus the SCoP (SCoP to be filled in later)
9
+ class Preprocessor < Common
10
+ attr_reader :source_code, :header_code, :parsed_code, :scop_code, :target_code
11
+
12
+ # Regular expression to identify whitespaces (tabs, spaces).
13
+ WHITESPACE = '\s*'
14
+
15
+ # This is the method which initializes the preprocessor. Initialization
16
+ # requires the target source code to process, which is then set as the class
17
+ # variable +@source_code+.
18
+ def initialize(source_code)
19
+ @source_code = source_code
20
+ @header_code = ''
21
+ @parsed_code = ''
22
+ @target_code = ''
23
+ @scop_code = ''
24
+ end
25
+
26
+ # This is the method to perform the actual preprocessing. This method takes
27
+ # care of all the pre-processor tasks. The output is stored in the two
28
+ # attributes +header_code+, and +scop+.
29
+ # FIXME: What about multi-line statements? For example, a multi-line comment
30
+ # could have a commented-out SCoP or define or include.
31
+ def process
32
+ scop = false
33
+ scop_in_code = false
34
+
35
+ # Process the file line by line
36
+ @source_code.each_line.with_index do |line,index|
37
+ if line =~ /^#{WHITESPACE}#/
38
+
39
+ # Keep 'include' statements as header code
40
+ if line =~ /^#{WHITESPACE}#include/
41
+ @header_code += line
42
+ @target_code += line
43
+
44
+ # Process 'define' statements
45
+ elsif line =~ /^#{WHITESPACE}#define/
46
+ @header_code += line
47
+ @target_code += line
48
+
49
+ # Found the start of a SCoP
50
+ elsif line =~ /^#{WHITESPACE}#{SCOP_START}/
51
+ scop = true
52
+ scop_in_code = true
53
+ @parsed_code += '{'+NL
54
+
55
+ # Found the end of a SCoP
56
+ elsif line =~ /^#{WHITESPACE}#{SCOP_END}/
57
+ scop = false
58
+ @parsed_code += '}'+NL
59
+ end
60
+
61
+ # Nothing special in the code going on here
62
+ else
63
+ @scop_code += line if scop
64
+ @parsed_code += line
65
+ @target_code += line
66
+ end
67
+ end
68
+
69
+ # Exit if there is no SCoP found
70
+ if !scop_in_code
71
+ raise_error('No "#pragma scop" found in the source code')
72
+ end
73
+ end
74
+ end
75
+ end
76
+
@@ -0,0 +1,261 @@
1
+
2
+ module Adarwin
3
+
4
+ # This class represents an array reference characterisation. This reference is
5
+ # constructed as a 5-tuple (tN,tA,tD,tE,tS) with the following information:
6
+ # * tN: The name of the reference.
7
+ # * tA: The access direction (read or write).
8
+ # * tD: The full domain accessed.
9
+ # * tE: The number of elements accessed each iteration (the size).
10
+ # * tS: The step of a accesses among iterations.
11
+ # To be able to compute the 5-tuple, the reference also stores information
12
+ # about the loops and conditional statements to which the original array
13
+ # reference is subjected.
14
+ #
15
+ # This class contains methods to perform among others the following:
16
+ # * Initialise the class and sets the 5-tuple (N,A,D,E,S)
17
+ # * Retrieve information on array indices
18
+ # * Print in different forms (species, ARC, copy/sync pragma's)
19
+ class Reference
20
+ attr_accessor :tN, :tA, :tD, :tE, :tS
21
+ attr_accessor :bounds, :indices, :pattern, :id
22
+ attr_accessor :all_loops
23
+
24
+ # This method initialises the array reference class. It takes details of the
25
+ # reference itself and details of the loop nest it belongs to. The method
26
+ # performs among others the following:
27
+ # * It initialises the 5-tuple (N,A,D,E,S)
28
+ # * It constructs the sets of loops (all,inner,outer) for this reference
29
+ # * It computes the bounds based on loop data and on if-statements
30
+ # * It computes the domain (D), number of elements (E), and step (S)
31
+ def initialize(reference,id,inner_loops,outer_loops,verbose)
32
+ @id = id
33
+
34
+ # Initialise the 5-tuple (already fill in N and A)
35
+ @tN = reference[:name]
36
+ @tA = reference[:type]
37
+ @tD = []
38
+ @tE = []
39
+ @tS = []
40
+
41
+ # Set the inner loops as the loop nest's inner loop intersected with all
42
+ # loops found for this statement. Beware of the difference between loops
43
+ # of a loop nest and loops of a statement.
44
+ @all_loops = reference[:loop_data]
45
+ @inner_loops = inner_loops & @all_loops
46
+ @outer_loops = outer_loops
47
+
48
+ # Set the indices of the array reference (e.g. 2*i+4). The size of this
49
+ # array is equal to the number of dimensions of the array.
50
+ @indices = reference[:indices]
51
+
52
+ # Set the if-statements for the reference. Process them together with the
53
+ # loop start/end conditions to obtain a final set of conditions/bounds.
54
+ @bounds = []
55
+ loop_vars = @all_loops.map{ |l| l[:var]}
56
+ @all_loops.each do |loop_data|
57
+ conditions = [loop_data[:min],loop_data[:max]]
58
+ reference[:if_statements].each do |if_statement|
59
+ condition_if = if_statement.map{ |c| solve(c,loop_data[:var],loop_vars) }
60
+ conditions = [
61
+ max(conditions[0],condition_if[0]),
62
+ min(conditions[1],condition_if[1])
63
+ ]
64
+ end
65
+ @bounds << { :var => loop_data[:var], :min => conditions[0], :max => conditions[1] }
66
+ end
67
+
68
+ # Compute the domain (D) based on the bounds. The bounds are derived from
69
+ # the if-statements and for-loops.
70
+ @tD = @indices.map do |i|
71
+ index_to_interval(i,@bounds)
72
+ end
73
+
74
+ # Compute the number of elements (E) accessed every iteration (the size).
75
+ # TODO: Clean-up this method.
76
+ @tE = @indices.map do |i|
77
+ #if !dependent?(i,@all_loops)
78
+ # puts "independent"
79
+ # index_to_interval(i,@inner_loops)
80
+ #else
81
+ #puts "dependent"
82
+ get_base_offset(i)
83
+ #end
84
+ end
85
+
86
+ # Compute the step taken. There are 3 cases considered the index is: 1)
87
+ # dependent on the outer loops, 2) dependent on the inner loops, or 3)
88
+ # indepdent of any loops.
89
+ @tS = @indices.map do |i|
90
+ if dependent?(i,@inner_loops)
91
+ index_to_interval(i,@inner_loops).length
92
+ elsif dependent?(i,@outer_loops)
93
+ get_step(i,@outer_loops)
94
+ else
95
+ '0'
96
+ end
97
+ end
98
+
99
+ # If the step and the domain are equal in size, the step can also be set
100
+ # to zero to reflect accessing the full array.
101
+ @tS.each_with_index do |tS,index|
102
+ if (tS == @tD[index].length) || (@tD[index].length == '1')
103
+ @tS[index] = '0'
104
+ end
105
+ end
106
+
107
+ # Print the result
108
+ puts MESSAGE+"Found: #{to_arc}" if verbose
109
+ end
110
+
111
+ # This method replaces loop variables for a given set of loops with 0. This
112
+ # basically gives us the offset of array references with respect to the loop
113
+ # variable. For example, A[2*i+4] and A[i+j+3] will give us [4,j+3] with
114
+ # repsect to an i-loop.
115
+ def get_base_offset(index)
116
+ index = index.clone
117
+ @outer_loops.each do |for_loop|
118
+ search = C::Variable.parse(for_loop[:var])
119
+ replace = C::Expression.parse('0')
120
+ index = index.search_and_replace_node(search,replace)
121
+ end
122
+ return index_to_interval(index,@inner_loops)
123
+ end
124
+
125
+ # Method to fill in the ranges for an array reference. This is based on
126
+ # information of the loop nests. The output is an interval.
127
+ def index_to_interval(index,loops)
128
+ access_min = find_extreme(:min,index,loops)
129
+ access_max = find_extreme(:max,index,loops)
130
+ return Interval.new(access_min,access_max,@all_loops)
131
+ end
132
+
133
+ # Substitute loop data with the upper-bound or lower-bound of a loop to find
134
+ # the minimum/maximum of an array reference. The body is executed twice,
135
+ # because a loop bound can be based on another loop variable.
136
+ def find_extreme(position,index,loops)
137
+ index = index.clone
138
+ 2.times do
139
+ loops.each do |for_loop|
140
+ search = C::Variable.parse(for_loop[:var])
141
+ replace = C::Expression.parse(for_loop[position])
142
+ index = index.search_and_replace_node(search,replace)
143
+ end
144
+ end
145
+ return simplify(index.to_s.gsub(';','').gsub(' ','').gsub("\t",''))
146
+ end
147
+
148
+ # Method to check whether the an index is dependent on a given set of loops.
149
+ # For example, A[i+3] is independent of j, but dependent on i.
150
+ def dependent?(index,loops)
151
+ index.preorder do |node|
152
+ if node.variable?
153
+ loops.each do |for_loop|
154
+ return true if (node.name == for_loop[:var])
155
+ end
156
+ end
157
+ end
158
+ return false
159
+ end
160
+
161
+ # Method to retrieve the step for a given array index and loops. The method
162
+ # returns the difference between two subsequent iterations: one with the
163
+ # loop variable at 0 and one after the first increment.
164
+ def get_step(index,loops)
165
+
166
+ # Replace the loop indices with 0
167
+ index1 = index.clone
168
+ loops.each do |for_loop|
169
+ search = C::Variable.parse(for_loop[:var])
170
+ replace = C::Expression.parse('0')
171
+ index1 = index1.search_and_replace_node(search,replace)
172
+ end
173
+
174
+ # Replace the loop indices with the loop step
175
+ index2 = index.clone
176
+ loops.each do |for_loop|
177
+ search = C::Variable.parse(for_loop[:var])
178
+ replace = C::Expression.parse(for_loop[:step])
179
+ index2 = index2.search_and_replace_node(search,replace)
180
+ end
181
+
182
+ # Return the difference
183
+ return abs(simplify("(#{index2})-(#{index1})"))
184
+ end
185
+
186
+ # Method to output the result as algorithmic species. This reflects the
187
+ # algorithm as presented in the scientific paper.
188
+ def to_species
189
+ if @tS.reject{ |s| s == "0"}.empty?
190
+ if (@tA == 'read') # Full (steps length 0 and read)
191
+ @pattern = 'full'
192
+ else # Shared (steps length 0 and write)
193
+ @pattern = 'shared'
194
+ end
195
+ elsif @tE.reject{ |s| s.length == "1"}.empty? # Element (sizes length 1)
196
+ @pattern = 'element'
197
+ elsif step_smaller_than_num_elements? # Neighbourhood (tS < tE)
198
+ @pattern = 'neighbourhood('+@tE.join(DIM_SEP)+')'
199
+ else # Chunk (tS >= tE)
200
+ @pattern = 'chunk('+@tE.join(DIM_SEP)+')'
201
+ end
202
+
203
+ # Fill in the name and the domain and return the result
204
+ return @tN+'['+@tD.join(DIM_SEP)+']'+PIPE+@pattern
205
+ end
206
+
207
+ # Method to output the result as an array reference characterisation (ARC).
208
+ def to_arc
209
+ return "(#{tN},#{tA},#{tD},#{tE},#{tS})".gsub('"','').gsub(' ','')
210
+ end
211
+
212
+ # Method to output a copyin/copyout statement. This indicates the name (N),
213
+ # the domain (D), and a unique identifier.
214
+ def to_copy(id)
215
+ @tN+'['+@tD.join(DIM_SEP)+']'+'|'+id.to_s
216
+ end
217
+
218
+ # Method to print the unique identifier of the loop nest in terms of
219
+ # synchronisation statements to be printed. This is a per-reference id
220
+ # instead of a per-loop id, because it depends on the type of access (read
221
+ # or write).
222
+ def get_sync_id
223
+ (@tA == 'write') ? 2*@id+1 : 2*@id
224
+ end
225
+
226
+ # Helper method for the +to_species+ method. This method compares the step
227
+ # with the number of elements accessed to determine which one is smaller.
228
+ # FIXME: This is based on the +compare+ method which might take a guess.
229
+ def step_smaller_than_num_elements?
230
+ @tS.each_with_index do |step,index|
231
+ if step != '0'
232
+ comparison = compare(step,@tE[index].length,@all_loops)
233
+ if (comparison == 'lt')
234
+ return true
235
+ end
236
+ end
237
+ end
238
+ return false
239
+ end
240
+
241
+ # Method to print out a human readable form of the array references (e.g.
242
+ # [4*i+6][j]). This is basically what the +puts+ method also does.
243
+ def get_references
244
+ return @indices.to_ary.map{ |i| i.to_s }
245
+ end
246
+
247
+ # Method to find out if the reference is dependent on a variable. It is
248
+ # used by the copy optimisations.
249
+ def depends_on?(var)
250
+ @indices.each do |index|
251
+ index.preorder do |node|
252
+ if node.variable?
253
+ return true if (node.name == var)
254
+ end
255
+ end
256
+ end
257
+ return false
258
+ end
259
+
260
+ end
261
+ end
data/lib/bones.rb CHANGED
@@ -1,10 +1,6 @@
1
1
 
2
- # Bones requires 'fileutils' from the Ruby standard library.
3
- require 'fileutils'
4
-
5
- # Bones uses the 'trollop' gem to parse command line options.
6
- require 'rubygems'
7
- require 'trollop'
2
+ # Include the common part between Bones and Aset
3
+ require 'common.rb'
8
4
 
9
5
  # We define a custom error class for code generation related
10
6
  # errors (any error raised by Bones).
@@ -16,20 +12,9 @@ def raise_error(message) #:nodoc:
16
12
  end
17
13
 
18
14
  # Extending the Ruby standard string class to support some
19
- # addition methods. They include a hack of the gsub! command,
20
- # and two methods related to comma removal.
15
+ # additional methods: two methods related to comma removal.
21
16
  class String #:nodoc:
22
17
 
23
- # Extend the Ruby string class to be able to chain 'gsub!'
24
- #-commands. This code is taken from the web.
25
- meth = 'gsub!'
26
- orig_meth = "orig_#{meth}"
27
- alias_method orig_meth, meth
28
- define_method(meth) do |*args|
29
- self.send(orig_meth, *args)
30
- self
31
- end
32
-
33
18
  # Replace double comma's in a string with a single comma.
34
19
  # This method is useful for function-argument lists.
35
20
  def remove_double_commas
@@ -213,43 +198,6 @@ module Bones
213
198
  return code
214
199
  end
215
200
 
216
- # Helper method to evaluate mathematical expressions, possibly containing
217
- # symbols. This method is only used for readability, without it the code
218
- # is functionally correct, but expressions might be larger than needed.
219
- # This method is only tested on integers.
220
- def simplify(expr)
221
- raise_error('Invalid expression to simplify') if !expr
222
- done = false
223
- while !done do
224
- old_expr = expr
225
- case expr
226
- when /^\(([^\(\)]*)\)$/ then expr = $1 # Remove outer brackets
227
- when /(.*)\((-?\w*)\)(.*)/ then expr = $1+$2+$3 # Remove brackets with one constant or variable inside
228
- when /(.*)\(\(([^\(\)]*)\)\)(.*)/ then expr = $1+'('+$2+')'+$3 # Substitute double brackets into single brackets
229
- when /(.*)(\-\d+)\*(\d+)\b(.*)/ then expr = $1+'+'+(($2.to_i)*($3.to_i)).to_s+$4 # Perform multiplications on constants (starting with a '-')
230
- when /(.*)(\-\d+)\+(\d+)\b(.*)/ then expr = $1+'+'+(($2.to_i)+($3.to_i)).to_s+$4 # Perform additions on constants (starting with a '-')
231
- when /(.*)(\-\d+)\-(\d+)\b(.*)/ then expr = $1+'+'+(($2.to_i)-($3.to_i)).to_s+$4 # Perform subtractions on constants (starting with a '-')
232
- when /(.*)\b(\d+)\*(\d+)\b(.*)/ then expr = $1+(($2.to_i)*($3.to_i)).to_s+$4 # Perform multiplications on constants
233
- when /(.*)\b(\d+)\+(\d+)\b(.*)/ then expr = $1+(($2.to_i)+($3.to_i)).to_s+$4 # Perform additions on constants
234
- when /(.*)\b(\d+)\-(\d+)\b(.*)/ then expr = $1+(($2.to_i)-($3.to_i)).to_s+$4 # Perform subtractions on constants
235
- when /(.*)\b(\w+)\-(\2)\b(.*)/ then expr = $1+'0'+$4 # Perform subtractions of variables to zero (e.g. 'a-a=0')
236
- when /(.*)\/1\b(.*)/ then expr = $1+$2 # Remove divisions by 1
237
- when /(.*)(\+0\b|\b0\+)(.*)/ then expr = $1+$3 # Remove additions with 0
238
- when /(.*[\+\(])\(([^\(\)\*\/\%]+)\)([\+\-\)].*)/ then expr = $1+$2+$3 # Remove brackets that are not needed (e.g. '(a+b)+c')
239
- end
240
- expr.gsub!(/\s/,'') # Remove whitespaces
241
- expr.gsub!(/\-\-/,'+') # Substitute double minusses for a plus
242
- expr.gsub!(/\+\-/,'-') # Substitute plus-minus for a minus
243
- expr.gsub!(/(^|\()\+/,'') # Remove plus signs at the start of a line or after an opening bracket
244
- if expr =~ /(.*)\b(\d+)\/(\d+)\b(.*)/ # Perform divisions on constants...
245
- division = ($2.to_i)/($3.to_i) # ...but first check whether the result will be correct (integer division)
246
- expr = $1+division.to_s+$4 if division*$3.to_i == $2.to_i
247
- end
248
- done = true if old_expr == expr
249
- end
250
- return expr
251
- end
252
-
253
201
  end
254
202
 
255
203
  end
@@ -261,6 +209,7 @@ require 'bones/species.rb'
261
209
  require 'bones/algorithm.rb'
262
210
  require 'bones/variablelist.rb'
263
211
  require 'bones/variable.rb'
212
+ require 'bones/copy.rb'
264
213
  require 'bones/preprocessor.rb'
265
214
  require 'bones/engine.rb'
266
215
 
@@ -10,7 +10,7 @@ module Bones
10
10
  # and lists of input/output array variables.
11
11
  class Algorithm < Common
12
12
  attr_reader :name, :species, :code, :lists, :arrays, :id, :function_name
13
- attr_accessor :hash, :merge_factor
13
+ attr_accessor :hash, :merge_factor, :register_caching_enabled
14
14
 
15
15
  # Constant to set the name of the algorithm's accelerated version
16
16
  ACCELERATED = '_accelerated'
@@ -31,14 +31,25 @@ module Bones
31
31
  @original_name = @name+ORIGINAL
32
32
  @accelerated_name = @name+ACCELERATED
33
33
  @species = species
34
- @code = C::Statement.parse(code).preprocess
34
+ begin
35
+ @code = C::Statement.parse(code).preprocess
36
+ rescue
37
+ @code = C::Statement.parse('{'+code+'}').preprocess
38
+ end
35
39
  @hash = {}
36
40
  @lists = {:host_name => [],:host_definition => [], :argument_name => [], :argument_definition => [], :golden_name => []}
37
41
  @arrays = Variablelist.new()
38
42
  @constants = Variablelist.new()
39
- @merge_factor = 1
43
+ @merge_factor = nil
44
+ @register_caching_enabled = 1
40
45
  @function_code = ''
41
46
  @function_name = ''
47
+
48
+ # Set the initial hash
49
+ @hash = {:algorithm_id => @id,
50
+ :algorithm_name => @name,
51
+ :algorithm_basename => @basename,
52
+ :algorithm_filename => @filename}
42
53
  end
43
54
 
44
55
  # This method sets the code and name for the function in
@@ -119,15 +130,17 @@ module Bones
119
130
  new_code.transform_flatten(array)
120
131
  end
121
132
 
122
- # Perform array substitution (conditionally do this)
123
- @arrays.outputs.each do |array|
124
- if array.species.element?
125
- if @arrays.inputs.include?(array)
126
- new_code.transform_substitution(array,true)
127
- else
128
- new_code.transform_substitution(array,false)
133
+ # Perform array substitution a.k.a. register caching (conditionally do this)
134
+ if @register_caching_enabled == 1
135
+ @arrays.outputs.each do |array|
136
+ if array.species.element?
137
+ if @arrays.inputs.include?(array)
138
+ new_code.transform_substitution(array,true)
139
+ else
140
+ new_code.transform_substitution(array,false)
141
+ end
142
+ extra_indent = INDENT
129
143
  end
130
- extra_indent = INDENT
131
144
  end
132
145
  end
133
146
 
@@ -138,23 +151,35 @@ module Bones
138
151
 
139
152
  # Perform thread-merging (experimental)
140
153
  # TODO: Solve the problem related to constants (e.g chunk/example1.c)
141
- if @merge_factor == 1 && transformation[0,1] == '4'
142
- @merge_factor = 4
154
+ if @merge_factor == nil
155
+ if transformation[0,1] == '4' && @hash[:parallelism].to_i >= 1024*1024
156
+ @merge_factor = 4
157
+ else
158
+ @merge_factor = 1
159
+ end
143
160
  end
144
161
  if @merge_factor > 1
145
- puts MESSAGE+'Merging threads by a factor '+@merge_factor.to_s+'.'
146
-
147
- # Update the hash
148
- @hash[:ids] = @hash[:ids].split(NL).map { |line|
149
- C::parse(line).transform_merge_threads(@merge_factor,[GLOBAL_ID]+@constants.map{ |c| c.name }).to_s.split(NL).each_with_index.map do |id,index|
150
- id.gsub(/\b#{GLOBAL_ID}\b/,"(#{GLOBAL_ID}+gridDim.x*blockDim.x*#{index})")
151
- end
152
- }.join(NL+INDENT*2)
153
- @hash[:parallelism] = (@hash[:parallelism].to_i / @merge_factor).to_s
154
-
155
- # Transform the code
156
- excludes = (@constants+@arrays).map { |c| c.name }
157
- new_code.transform_merge_threads(@merge_factor,excludes)
162
+ puts @hash[:parallelism]
163
+ if new_code.has_conditional_statements?
164
+ puts MESSAGE+'Not coarsening ('+@merge_factor.to_s+'x) because of conditional statements in kernel body.'
165
+ # TODO: Fix this temporary hack for multiple loops with mismatching bounds
166
+ elsif ((@hash[:parallelism].to_i % @merge_factor) != 0) || (@hash[:parallelism].to_i == 4192256)
167
+ puts MESSAGE+'Not coarsening ('+@merge_factor.to_s+'x) because of mismatching amount of parallelism ('+@hash[:parallelism]+').'
168
+ else
169
+ puts MESSAGE+'Coarsening threads by a factor '+@merge_factor.to_s+'.'
170
+
171
+ # Update the hash
172
+ @hash[:ids] = @hash[:ids].split(NL).map { |line|
173
+ C::parse(line).transform_merge_threads(@merge_factor,[GLOBAL_ID]+@constants.map{ |c| c.name }).to_s.split(NL).each_with_index.map do |id,index|
174
+ id.gsub(/\b#{GLOBAL_ID}\b/,"(#{GLOBAL_ID}+gridDim.x*blockDim.x*#{index})")
175
+ end
176
+ }.join(NL+INDENT*2)
177
+ @hash[:parallelism] = (@hash[:parallelism].to_i / @merge_factor).to_s
178
+
179
+ # Transform the code
180
+ excludes = (@constants+@arrays).map { |c| c.name }
181
+ new_code.transform_merge_threads(@merge_factor,excludes)
182
+ end
158
183
  end
159
184
 
160
185
  # Obtain the complexity in terms of operations for the resulting code
@@ -215,12 +240,8 @@ module Bones
215
240
  # kernel_argument_list
216
241
  #
217
242
  def populate_hash
218
- @hash = {:algorithm_id => @id,
219
- :algorithm_name => @name,
220
- :algorithm_basename => @basename,
221
- :algorithm_filename => @filename,
222
- :argument_name => @lists[:argument_name],
223
- :argument_definition => @lists[:argument_definition]}
243
+ @hash[:argument_name] = @lists[:argument_name]
244
+ @hash[:argument_definition] = @lists[:argument_definition]
224
245
 
225
246
  # Obtain the necessary data for the hash per array
226
247
  parallelisms = []
@@ -278,11 +299,15 @@ module Bones
278
299
 
279
300
  # Generate the index expressions
280
301
  divider = (array.species.chunk?) ? '/'+sum(array.species.parameters[index]) : ''
281
- minihash = {:dimensions => (index == dimensions.length-1) ? '1' : dimensions.drop(index+1).map { |d| sum(d) }.join('*'),
282
- :modulo => (index_reverse != dimensions.length-1) ? '%('+sum(dimension)+divider+')' : '',
283
- :offset => from(dimension)}
284
- expr_global = simplify(search_and_replace(minihash,"((#{GLOBAL_ID}/(<dimensions>))<modulo>)+<offset>"))
285
- expr_local = simplify(search_and_replace(minihash,"((#{LOCAL_ID }/(<dimensions>))<modulo>)+<offset>"))
302
+ dimensions_hash = (index == dimensions.length-1) ? '1' : dimensions.drop(index+1).map { |d| sum(d) }.join('*')
303
+ dimensions_hash = simplify(dimensions_hash)
304
+ dimensions_division = (dimensions_hash == '1') ? '' : '/('+dimensions_hash+')'
305
+ minihash = {:dimensions1 => "#{GLOBAL_ID}#{dimensions_division}",
306
+ :dimensions2 => "#{LOCAL_ID }#{dimensions_division}",
307
+ :modulo => (index_reverse != dimensions.length-1) ? '%('+simplify(sum(dimension)+divider)+')' : '',
308
+ :offset => simplify(from(dimension))}
309
+ expr_global = search_and_replace(minihash,"((<dimensions1>)<modulo>)+<offset>")
310
+ expr_local = search_and_replace(minihash,"((<dimensions2>)<modulo>)+<offset>")
286
311
 
287
312
  # Selectively push the ID definitions to the result array
288
313
  from = array.species.from_at(index)
@@ -342,6 +367,7 @@ module Bones
342
367
  def update_hash(loop_variable)
343
368
  names = @hash[:argument_name].split(', ')
344
369
  definitions = @hash[:argument_definition].split(', ')
370
+ # TODO: The following two lines give problems with correlation-k4
345
371
  names.delete(loop_variable.to_s)
346
372
  definitions.each { |definition| definitions.delete(definition) if definition =~ /\b#{loop_variable}\b/ }
347
373
  @hash[:argument_name] = names.join(', ')
@@ -387,10 +413,12 @@ module Bones
387
413
  array_names = arrays.map { |a| a.name }.join('","')
388
414
  raise_error(direction.capitalize+'put array count mismatch (expected '+species.length.to_s+', found '+arrays.length.to_s+' ["'+array_names+'"])')
389
415
  end
390
-
416
+
391
417
  # Set the species for the arrays (distinguish between arrays with and without a name)
392
418
  species.each do |structure|
393
- array = arrays[0]
419
+
420
+ # Loop over all found arrays and match it with a species
421
+ array = nil
394
422
  arrays.each do |free_array|
395
423
  if !free_array.species
396
424
  if structure.has_arrayname?
@@ -404,8 +432,17 @@ module Bones
404
432
  end
405
433
  end
406
434
  end
435
+
436
+ # Still haven't found anything, assign the species to an array of equal name
437
+ if !array
438
+ arrays.each do |free_array|
439
+ array = free_array if structure.name == free_array.name
440
+ end
441
+ end
442
+
443
+ # Process the assignment
407
444
  array.species = structure
408
- #structure.name = array.name
445
+ raise_error("Species of '#{array.species.name}' is mismatched with array '#{array.name}'") if array.species.name != array.name
409
446
 
410
447
  # Check if the array size was set, if not, it will be set to the species' size
411
448
  if array.size.empty?