bones-compiler 1.1.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. checksums.yaml +15 -0
  2. data/CHANGELOG +37 -0
  3. data/LICENSE +1 -1
  4. data/README.rdoc +95 -70
  5. data/Rakefile +78 -3
  6. data/VERSION +1 -1
  7. data/bin/adarwin +17 -0
  8. data/examples/benchmarks/PolyBench/2mm.c +104 -0
  9. data/examples/benchmarks/{3mm.c → PolyBench/3mm.c} +5 -2
  10. data/examples/benchmarks/{adi.c → PolyBench/adi.c} +6 -3
  11. data/examples/benchmarks/{atax.c → PolyBench/atax.c} +5 -2
  12. data/examples/benchmarks/{bicg.c → PolyBench/bicg.c} +5 -2
  13. data/examples/benchmarks/{cholesky.c → PolyBench/cholesky.c} +3 -0
  14. data/examples/benchmarks/{common.h → PolyBench/common.h} +2 -2
  15. data/examples/benchmarks/{correlation.c → PolyBench/correlation.c} +16 -7
  16. data/examples/benchmarks/{covariance.c → PolyBench/covariance.c} +7 -2
  17. data/examples/benchmarks/{doitgen.c → PolyBench/doitgen.c} +5 -2
  18. data/examples/benchmarks/{durbin.c → PolyBench/durbin.c} +3 -0
  19. data/examples/benchmarks/{dynprog.c → PolyBench/dynprog.c} +3 -0
  20. data/examples/benchmarks/{fdtd-2d-apml.c → PolyBench/fdtd-2d-apml.c} +3 -0
  21. data/examples/benchmarks/{fdtd-2d.c → PolyBench/fdtd-2d.c} +5 -2
  22. data/examples/benchmarks/{floyd-warshall.c → PolyBench/floyd-warshall.c} +3 -0
  23. data/examples/benchmarks/{gemm.c → PolyBench/gemm.c} +5 -2
  24. data/examples/benchmarks/{gemver.c → PolyBench/gemver.c} +5 -2
  25. data/examples/benchmarks/{gesummv.c → PolyBench/gesummv.c} +5 -2
  26. data/examples/benchmarks/{gramschmidt.c → PolyBench/gramschmidt.c} +3 -0
  27. data/examples/benchmarks/{jacobi-1d-imper.c → PolyBench/jacobi-1d-imper.c} +10 -2
  28. data/examples/benchmarks/{jacobi-2d-imper.c → PolyBench/jacobi-2d-imper.c} +8 -3
  29. data/examples/benchmarks/{lu.c → PolyBench/lu.c} +3 -0
  30. data/examples/benchmarks/{ludcmp.c → PolyBench/ludcmp.c} +3 -0
  31. data/examples/benchmarks/{mvt.c → PolyBench/mvt.c} +6 -2
  32. data/examples/benchmarks/{reg_detect.c → PolyBench/reg_detect.c} +3 -0
  33. data/examples/benchmarks/{seidel-2d.c → PolyBench/seidel-2d.c} +3 -0
  34. data/examples/benchmarks/{symm.c → PolyBench/symm.c} +3 -0
  35. data/examples/benchmarks/{syr2k.c → PolyBench/syr2k.c} +5 -2
  36. data/examples/benchmarks/{syrk.c → PolyBench/syrk.c} +7 -4
  37. data/examples/benchmarks/{trisolv.c → PolyBench/trisolv.c} +3 -0
  38. data/examples/benchmarks/{trmm.c → PolyBench/trmm.c} +3 -0
  39. data/examples/benchmarks/Rodinia/cfd.c +180 -0
  40. data/examples/benchmarks/Rodinia/hotspot.c +228 -0
  41. data/examples/benchmarks/Rodinia/kmeans.c +164 -0
  42. data/examples/benchmarks/Rodinia/srad.c +188 -0
  43. data/examples/benchmarks/other/common.h +0 -0
  44. data/examples/benchmarks/other/dct.c +58 -0
  45. data/examples/benchmarks/other/mm.c +50 -0
  46. data/examples/benchmarks/{saxpy.c → other/saxpy.c} +11 -7
  47. data/examples/chunk/{example1.c → example01.c} +0 -0
  48. data/examples/chunk/{example2.c → example02.c} +0 -0
  49. data/examples/chunk/{example3.c → example03.c} +0 -0
  50. data/examples/chunk/{example4.c → example04.c} +0 -0
  51. data/examples/chunk/{example5.c → example05.c} +0 -0
  52. data/examples/chunk/example06.c +45 -0
  53. data/examples/chunk/example07.c +49 -0
  54. data/examples/dependences/example01.c +42 -0
  55. data/examples/dependences/example02.c +40 -0
  56. data/examples/dependences/example03.c +43 -0
  57. data/examples/dependences/example04.c +44 -0
  58. data/examples/dependences/example05.c +42 -0
  59. data/examples/element/{example1.c → example01.c} +0 -0
  60. data/examples/element/{example2.c → example02.c} +2 -2
  61. data/examples/element/{example3.c → example03.c} +0 -0
  62. data/examples/element/{example4.c → example04.c} +0 -0
  63. data/examples/element/{example5.c → example05.c} +0 -0
  64. data/examples/element/{example6.c → example06.c} +0 -0
  65. data/examples/element/{example7.c → example07.c} +0 -0
  66. data/examples/element/{example8.c → example08.c} +0 -0
  67. data/examples/element/{example9.c → example09.c} +0 -0
  68. data/examples/element/example13.c +73 -0
  69. data/examples/fusion/example01.c +68 -0
  70. data/examples/fusion/example02.c +73 -0
  71. data/examples/fusion/example03.c +72 -0
  72. data/examples/fusion/example04.c +61 -0
  73. data/examples/fusion/example05.c +55 -0
  74. data/examples/neighbourhood/{example1.c → example01.c} +0 -0
  75. data/examples/neighbourhood/{example2.c → example02.c} +0 -0
  76. data/examples/neighbourhood/{example3.c → example03.c} +0 -0
  77. data/examples/neighbourhood/{example4.c → example04.c} +0 -0
  78. data/examples/neighbourhood/example05.c +44 -0
  79. data/examples/shared/{example1.c → example01.c} +0 -0
  80. data/examples/shared/{example2.c → example02.c} +0 -0
  81. data/examples/shared/{example3.c → example03.c} +0 -0
  82. data/examples/shared/{example4.c → example04.c} +0 -0
  83. data/examples/shared/{example5.c → example05.c} +0 -0
  84. data/lib/adarwin.rb +62 -0
  85. data/lib/adarwin/dependences.rb +268 -0
  86. data/lib/adarwin/engine.rb +277 -0
  87. data/lib/adarwin/fusion.rb +174 -0
  88. data/lib/adarwin/interval.rb +57 -0
  89. data/lib/adarwin/memorycopies.rb +153 -0
  90. data/lib/adarwin/nest.rb +225 -0
  91. data/lib/adarwin/preprocessor.rb +76 -0
  92. data/lib/adarwin/reference.rb +261 -0
  93. data/lib/bones.rb +4 -55
  94. data/lib/bones/algorithm.rb +77 -40
  95. data/lib/bones/copy.rb +26 -0
  96. data/lib/bones/engine.rb +147 -31
  97. data/lib/bones/preprocessor.rb +92 -12
  98. data/lib/bones/species.rb +4 -3
  99. data/lib/bones/structure.rb +14 -4
  100. data/lib/castaddon.rb +11 -6
  101. data/lib/castaddon/node_adarwin.rb +245 -0
  102. data/lib/castaddon/node_bones.rb +316 -0
  103. data/lib/castaddon/node_common.rb +289 -0
  104. data/lib/castaddon/transformations.rb +236 -0
  105. data/lib/common.rb +216 -0
  106. data/skeletons/CPU-C/common/header.c +3 -0
  107. data/skeletons/CPU-C/common/mem_global.c +0 -0
  108. data/skeletons/CPU-C/common/timer_2_start.c +11 -13
  109. data/skeletons/CPU-C/common/timer_2_stop.c +1 -1
  110. data/skeletons/CPU-C/common/timer_globals.c +29 -0
  111. data/skeletons/CPU-OPENCL-INTEL/common/globals.c +1 -1
  112. data/skeletons/CPU-OPENCL-INTEL/common/header.c +3 -0
  113. data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +7 -2
  114. data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +4 -2
  115. data/skeletons/CPU-OPENCL-INTEL/common/mem_global.c +0 -0
  116. data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +6 -3
  117. data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +1 -1
  118. data/skeletons/CPU-OPENCL-INTEL/common/timer_globals.c +24 -0
  119. data/skeletons/CPU-OPENMP/common/globals.c +1 -0
  120. data/skeletons/CPU-OPENMP/common/header.c +3 -0
  121. data/skeletons/CPU-OPENMP/common/mem_global.c +0 -0
  122. data/skeletons/CPU-OPENMP/common/timer_1_start.c +0 -12
  123. data/skeletons/CPU-OPENMP/common/timer_2_stop.c +1 -1
  124. data/skeletons/CPU-OPENMP/common/timer_globals.c +33 -0
  125. data/skeletons/GPU-CUDA/common/globals.c +27 -3
  126. data/skeletons/GPU-CUDA/common/header.c +2 -0
  127. data/skeletons/GPU-CUDA/common/mem_async_alloc.c +6 -0
  128. data/skeletons/GPU-CUDA/common/mem_async_copyin.c +6 -0
  129. data/skeletons/GPU-CUDA/common/mem_async_copyout.c +6 -0
  130. data/skeletons/GPU-CUDA/common/mem_async_free.c +6 -0
  131. data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +2 -1
  132. data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +2 -1
  133. data/skeletons/GPU-CUDA/common/mem_global.c +1 -0
  134. data/skeletons/GPU-CUDA/common/mem_prologue.c +1 -2
  135. data/skeletons/GPU-CUDA/common/scheduler.c +86 -0
  136. data/skeletons/GPU-CUDA/common/timer_2_start.c +2 -4
  137. data/skeletons/GPU-CUDA/common/timer_2_stop.c +3 -5
  138. data/skeletons/GPU-CUDA/common/timer_globals.c +26 -0
  139. data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +5 -7
  140. data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +4 -6
  141. data/skeletons/GPU-CUDA/kernel/default.host.c +1 -1
  142. data/skeletons/GPU-CUDA/kernel/default.kernel.cu +6 -8
  143. data/skeletons/GPU-CUDA/skeletons.txt +6 -5
  144. data/{examples/benchmarks/2mm.c → test/examples/benchmarks/PolyBench/2mm_species.c} +19 -15
  145. data/test/examples/benchmarks/PolyBench/3mm_species.c +82 -0
  146. data/test/examples/benchmarks/PolyBench/adi_species.c +89 -0
  147. data/test/examples/benchmarks/PolyBench/atax_species.c +69 -0
  148. data/test/examples/benchmarks/PolyBench/bicg_species.c +71 -0
  149. data/test/examples/benchmarks/PolyBench/cholesky_species.c +68 -0
  150. data/test/examples/benchmarks/PolyBench/correlation_species.c +97 -0
  151. data/test/examples/benchmarks/PolyBench/covariance_species.c +78 -0
  152. data/test/examples/benchmarks/PolyBench/doitgen_species.c +67 -0
  153. data/test/examples/benchmarks/PolyBench/durbin_species.c +80 -0
  154. data/test/examples/benchmarks/PolyBench/dynprog_species.c +71 -0
  155. data/test/examples/benchmarks/PolyBench/fdtd-2d-apml_species.c +112 -0
  156. data/test/examples/benchmarks/PolyBench/fdtd-2d_species.c +78 -0
  157. data/test/examples/benchmarks/PolyBench/floyd-warshall_species.c +54 -0
  158. data/test/examples/benchmarks/PolyBench/gemm_species.c +73 -0
  159. data/test/examples/benchmarks/PolyBench/gemver_species.c +93 -0
  160. data/test/examples/benchmarks/PolyBench/gesummv_species.c +68 -0
  161. data/test/examples/benchmarks/PolyBench/gramschmidt_species.c +78 -0
  162. data/test/examples/benchmarks/PolyBench/jacobi-1d-imper_species.c +59 -0
  163. data/test/examples/benchmarks/PolyBench/jacobi-2d-imper_species.c +65 -0
  164. data/test/examples/benchmarks/PolyBench/lu_species.c +57 -0
  165. data/test/examples/benchmarks/PolyBench/ludcmp_species.c +89 -0
  166. data/test/examples/benchmarks/PolyBench/mvt_species.c +69 -0
  167. data/test/examples/benchmarks/PolyBench/reg_detect_species.c +86 -0
  168. data/test/examples/benchmarks/PolyBench/seidel-2d_species.c +53 -0
  169. data/test/examples/benchmarks/PolyBench/symm_species.c +74 -0
  170. data/test/examples/benchmarks/PolyBench/syr2k_species.c +69 -0
  171. data/test/examples/benchmarks/PolyBench/syrk_species.c +66 -0
  172. data/test/examples/benchmarks/PolyBench/trisolv_species.c +61 -0
  173. data/test/examples/benchmarks/PolyBench/trmm_species.c +61 -0
  174. data/test/examples/chunk/example01_species.c +58 -0
  175. data/test/examples/chunk/example02_species.c +48 -0
  176. data/test/examples/chunk/example03_species.c +63 -0
  177. data/test/examples/chunk/example04_species.c +58 -0
  178. data/test/examples/chunk/example05_species.c +56 -0
  179. data/test/examples/chunk/example06_species.c +49 -0
  180. data/test/examples/chunk/example07_species.c +53 -0
  181. data/test/examples/dependences/example01_species.c +46 -0
  182. data/test/examples/dependences/example02_species.c +44 -0
  183. data/test/examples/dependences/example03_species.c +47 -0
  184. data/test/examples/dependences/example04_species.c +48 -0
  185. data/test/examples/dependences/example05_species.c +46 -0
  186. data/test/examples/element/example01_species.c +50 -0
  187. data/test/examples/element/example02_species.c +50 -0
  188. data/test/examples/element/example03_species.c +62 -0
  189. data/test/examples/element/example04_species.c +53 -0
  190. data/test/examples/element/example05_species.c +59 -0
  191. data/test/examples/element/example06_species.c +50 -0
  192. data/test/examples/element/example07_species.c +58 -0
  193. data/test/examples/element/example08_species.c +49 -0
  194. data/test/examples/element/example09_species.c +52 -0
  195. data/test/examples/element/example10_species.c +54 -0
  196. data/test/examples/element/example11_species.c +51 -0
  197. data/test/examples/element/example12_species.c +60 -0
  198. data/test/examples/element/example13_species.c +77 -0
  199. data/test/examples/neighbourhood/example01_species.c +57 -0
  200. data/test/examples/neighbourhood/example02_species.c +56 -0
  201. data/test/examples/neighbourhood/example03_species.c +83 -0
  202. data/test/examples/neighbourhood/example04_species.c +55 -0
  203. data/test/examples/neighbourhood/example05_species.c +48 -0
  204. data/test/examples/shared/example01_species.c +49 -0
  205. data/test/examples/shared/example02_species.c +55 -0
  206. data/test/examples/shared/example03_species.c +59 -0
  207. data/test/examples/shared/example04_species.c +56 -0
  208. data/test/examples/shared/example05_species.c +52 -0
  209. metadata +193 -73
  210. data/examples/benchmarks/overview.txt +0 -38
  211. data/lib/castaddon/node.rb +0 -753
@@ -0,0 +1,174 @@
1
+
2
+ # Determine whether kernel fusion is legal (see algorithm in paper/thesis)
3
+ def fusion_is_legal?(a, b)
4
+ (a.writes + a.reads).each do |x|
5
+ (b.writes + b.reads).each do |y|
6
+ if (x.tN == y.tN) && (x.tA == 'write' || y.tA == 'write')
7
+ puts Adarwin::MESSAGE+"Evaluating #{x.to_arc} and #{y.to_arc} for fusion"
8
+ if x.tD.to_s != y.tD.to_s || x.tE.to_s != y.tE.to_s || x.tS.to_s != y.tS.to_s
9
+ puts Adarwin::MESSAGE+"Unable to fuse #{x.to_arc} and #{y.to_arc}"
10
+ return false
11
+ end
12
+ end
13
+ end
14
+ end
15
+ puts Adarwin::MESSAGE+"Applying fusion"
16
+ return true
17
+ end
18
+
19
+
20
+ # Perform the kernel fusion transformations
21
+ def kernel_fusion(nests, settings)
22
+
23
+ # Select
24
+ candidates = nests.select{ |n| n.has_species? }
25
+
26
+ # Iterate
27
+ prev = nil
28
+ candidates.each_with_index do |nest,nest_index|
29
+ curr = nest
30
+ if prev
31
+
32
+ # Get the loop details
33
+ loops_prev = prev.code.get_direct_loops
34
+ loops_curr = curr.code.get_direct_loops
35
+ if loops_prev.size != loops_curr.size
36
+ puts Adarwin::MESSAGE+"Unable to apply fusion, loop count does not match"
37
+ next
38
+ end
39
+
40
+ # Only proceed if fusion is legal for this combination
41
+ if fusion_is_legal?(prev, curr)
42
+ fused_code = []
43
+
44
+ # Get the bodies
45
+ body_curr = get_body(loops_curr.size,curr.code.clone)
46
+ body_prev = get_body(loops_prev.size,prev.code.clone)
47
+
48
+ # Fuse everything together: include if-statements for non-matching loop bounds
49
+ if settings == 1
50
+
51
+ # Create new loops
52
+ loops_target = []
53
+ loops_prev.zip(loops_curr).each do |prevl,currl|
54
+ raise_error("Unequal step count #{prevl[:step]} versus #{currl[:step]}") if prevl[:step] != currl[:step]
55
+ minmin = exact_min(prevl[:min],currl[:min])
56
+ maxmax = exact_max(prevl[:max],currl[:max])
57
+ loop_datum = { :var => prevl[:var]+currl[:var], :min => minmin, :max => maxmax, :step => prevl[:step]}
58
+ loops_target.push(loop_datum)
59
+
60
+ # Replace all occurances of the fused loop variable in the current/previous codes
61
+ body_prev = body_prev.replace_variable(prevl[:var],loop_datum[:var])
62
+ body_curr = body_curr.replace_variable(currl[:var],loop_datum[:var])
63
+
64
+ # Set minimum if-statement conditions
65
+ body_prev = create_if(loop_datum[:var],minmin,prevl[:min],body_prev,'>=')
66
+ body_curr = create_if(loop_datum[:var],minmin,currl[:min],body_curr,'>=')
67
+
68
+ # Set maximum if-statement conditions
69
+ body_prev = create_if(loop_datum[:var],maxmax,prevl[:max],body_prev,'<=')
70
+ body_curr = create_if(loop_datum[:var],maxmax,currl[:max],body_curr,'<=')
71
+ end
72
+
73
+ # Generate the new code
74
+ fused_code.push(code_from_loops(loops_target,[body_prev,body_curr]))
75
+
76
+ # Create a prologue in case of mismatching loop bounds (experimental)
77
+ elsif settings == 2
78
+
79
+ # Generate the loop body
80
+ loops_target = []
81
+ loops_prev.zip(loops_curr).each do |prevl,currl|
82
+ raise_error("Unequal step count #{prevl[:step]} versus #{currl[:step]}") if prevl[:step] != currl[:step]
83
+ body_prev = body_prev.replace_variable(prevl[:var],prevl[:var]+currl[:var])
84
+ body_curr = body_curr.replace_variable(currl[:var],prevl[:var]+currl[:var])
85
+ end
86
+
87
+ # Create the main loop nest
88
+ loops_target = []
89
+ loops_prev.zip(loops_curr).each do |prevl,currl|
90
+ minmin = exact_min(prevl[:min],currl[:min])
91
+ minmax = exact_min(prevl[:max],currl[:max])
92
+ loop_datum = { :var => prevl[:var]+currl[:var], :min => minmin, :max => minmax, :step => prevl[:step]}
93
+ loops_target.push(loop_datum)
94
+ end
95
+ fused_code.push(code_from_loops(loops_target,[body_prev,body_curr]))
96
+
97
+ # Create the epilogue
98
+ body = []
99
+ loops_target = []
100
+ loops_prev.zip(loops_curr).each do |prevl,currl|
101
+ minmax = exact_min(prevl[:max],currl[:max])
102
+ maxmax = exact_max(prevl[:max],currl[:max])
103
+ loop_datum = { :var => prevl[:var]+currl[:var], :min => minmax, :max => maxmax, :step => prevl[:step]}
104
+ loops_target.push(loop_datum)
105
+ if prevl[:max] != currl[:max]
106
+ body = (prevl[:max] == maxmax) ? [body_curr] : [body_prev]
107
+ end
108
+ end
109
+ fused_code.push(code_from_loops(loops_target,body))
110
+ end
111
+
112
+ # Add the newly created code to the original code
113
+ fused_code.each_with_index do |fused_codelet,nest_id|
114
+ puts fused_codelet
115
+ prev.code.insert_prev(fused_codelet)
116
+
117
+ # Create a new nest
118
+ nest = Adarwin::Nest.new(prev.level, fused_codelet, prev.id, prev.name.gsub(/_k(\d+)/,'_fused')+nest_id.to_s, prev.verbose, 1)
119
+ nests.push(nest)
120
+ end
121
+
122
+
123
+ # Set the other nests as to-be-removed
124
+ prev.removed = true
125
+ curr.removed = true
126
+ end
127
+ end
128
+
129
+ # Next nest
130
+ prev = nest
131
+ end
132
+ end
133
+
134
+ # Return the body of a loop nest
135
+ def get_body(num_loops,code)
136
+ return code if num_loops == 0
137
+ if code.first.for_statement? && code.first.stmt
138
+ code = code.first
139
+ end
140
+ if code.for_statement? && code.stmt
141
+ return get_body(num_loops-1,code.stmt.stmts)
142
+ end
143
+ raise_error("Not a perfect nested loop")
144
+ end
145
+
146
+ # Create an if-statement in front of a statement
147
+ def create_if(loop_var,reference_bound,loop_bound,code,condition)
148
+ if reference_bound != loop_bound
149
+ return C::Statement.parse("if(#{loop_var} #{condition} #{loop_bound}) { #{code.to_s} }")
150
+ end
151
+ return code
152
+ end
153
+
154
+ # Generate code from a combination of loops and statements (the body)
155
+ def code_from_loops(loops,statements)
156
+ code = ""
157
+
158
+ # Start of the loops
159
+ definition = "int "
160
+ loops.each do |loop_datum|
161
+ increment = (loop_datum[:step] == '1') ? "#{loop_datum[:var]}++" : "#{loop_datum[:var]}=#{loop_datum[:var]}+#{loop_datum[:step]}"
162
+ code += "for(#{definition}#{loop_datum[:var]}=#{loop_datum[:min]}; #{loop_datum[:var]}<=#{loop_datum[:max]}; #{increment}) {"
163
+ end
164
+
165
+ # Loop body
166
+ statements.each do |statement|
167
+ code += statement.to_s
168
+ end
169
+
170
+ # End of the loops
171
+ loops.size.times{ |i| code += "}" }
172
+
173
+ C::Statement.parse(code)
174
+ end
@@ -0,0 +1,57 @@
1
+
2
+ module Adarwin
3
+
4
+ # This class represents an interval [a..b] including a and b. The class has
5
+ # the following methods:
6
+ # * Initialise the interval (+initialize+)
7
+ # * Print the interval (+to_s+)
8
+ # * Merge an interval with another interval (+merge+)
9
+ # * Return the length of the interval (+length+)
10
+ class Interval
11
+ attr_accessor :a, :b
12
+
13
+ # Initialise the interval. This method performs a comparison to see whether
14
+ # a or b is the upper-bound. This comparison is based on guesses made by the
15
+ # +compare+ method. This method uses loop information if needed.
16
+ # FIXME: Uses the +compare+ method which might be based on a guess
17
+ def initialize(a,b,loops)
18
+ @loops = loops
19
+ a = simplify(a.to_s)
20
+ b = simplify(b.to_s)
21
+ case compare(a,b,@loops)
22
+ when 'lt' || 'eq' then @a = a; @b = b
23
+ when 'gt' then @a = b; @b = a
24
+ else @a = a; @b = b
25
+ end
26
+ end
27
+
28
+ # Print the interval as a string (e.g. [4..9]).
29
+ def to_s
30
+ @a+RANGE_SEP+@b
31
+ end
32
+
33
+ # Merge this interval with another interval. This is based on a comparison
34
+ # made by the +compare+ method, which is an approximation based on loop
35
+ # information.
36
+ # FIXME: Uses the +compare+ method which might be based on a guess
37
+ def merge(other_interval)
38
+ @a = case compare(@a,other_interval.a,@loops)
39
+ when 'gt' || 'eq' then other_interval.a
40
+ when 'lt' then @a
41
+ else other_interval.a
42
+ end
43
+ @b = case compare(@b,other_interval.b,@loops)
44
+ when 'gt' || 'eq' then @b
45
+ when 'lt' then other_interval.b
46
+ else @b
47
+ end
48
+ end
49
+
50
+ # Method to compute the length of the interval. For example, the length of
51
+ # [a..b] is equal to (b-a+1).
52
+ def length
53
+ simplify("(#{@b})-(#{a})+1")
54
+ end
55
+ end
56
+
57
+ end
@@ -0,0 +1,153 @@
1
+
2
+
3
+ # Recursive copy optimisations
4
+ def recursive_copy_optimisations(nests,options)
5
+ perform_copy_optimisations1(nests,options)
6
+ perform_copy_optimisations2(nests,options)
7
+ nests.each do |nest|
8
+ children = get_children(nest)
9
+ recursive_copy_optimisations(children,options) if !children.empty?
10
+ end
11
+ perform_copy_optimisations3(nests,options)
12
+ perform_copy_optimisations3(nests,options)
13
+ end
14
+
15
+ # First set of copyin/copyout optimisations (recursive)
16
+ def perform_copy_optimisations1(nests,options)
17
+ previous = nil
18
+ nests.each_with_index do |nest,nest_index|
19
+ current = nest
20
+ if previous
21
+
22
+ # Remove spurious copies (out/in)
23
+ if options[:mem_remove_spurious]
24
+ previous.copyouts.each do |copyout|
25
+ current.copyins.each do |copyin|
26
+ if copyout.tN.to_s == copyin.tN.to_s && copyout.tD.to_s == copyin.tD.to_s
27
+ current.copyins.delete(copyin)
28
+ return perform_copy_optimisations1(nests,options)
29
+ end
30
+ end
31
+ end
32
+ end
33
+
34
+ # Remove spurious copies (out/out)
35
+ if options[:mem_remove_spurious]
36
+ previous.copyouts.each do |copyout|
37
+ current.copyouts.each do |other_copyout|
38
+ if copyout.tN.to_s == other_copyout.tN.to_s && copyout.tD.to_s == other_copyout.tD.to_s
39
+ previous.copyouts.delete(copyout)
40
+ return perform_copy_optimisations1(nests,options)
41
+ end
42
+ end
43
+ end
44
+ end
45
+
46
+ # Move copyins to the front
47
+ if options[:mem_copyin_to_front]
48
+ current.copyins.each do |copyin|
49
+ if previous.writes && !previous.writes.map{ |w| w.tN }.include?(copyin.tN)
50
+ previous.copyins.push(copyin)
51
+ current.copyins.delete(copyin)
52
+ return perform_copy_optimisations1(nests,options)
53
+ end
54
+ end
55
+ end
56
+
57
+ end
58
+
59
+ # Next nest
60
+ previous = nest
61
+ end
62
+ end
63
+
64
+ # Second set of copyin/copyout optimisations (non-recursive)
65
+ def perform_copy_optimisations2(nests,options)
66
+ nests.each_with_index do |nest,nest_index|
67
+ current = nest
68
+
69
+ # Move copyouts to the back
70
+ if options[:mem_copyout_to_back]
71
+ current.copyouts.each do |copyout|
72
+ nests.each_with_index do |other_nest,other_nest_index|
73
+ if other_nest.id > nest.id && other_nest.depth == nest.depth
74
+ if other_nest.writes && !other_nest.writes.map{ |w| w.tN }.include?(copyout.tN)
75
+ copyout.id = copyout.id+1
76
+ else
77
+ break
78
+ end
79
+ end
80
+ end
81
+ end
82
+ end
83
+
84
+ # Remove spurious copies (double in)
85
+ if options[:mem_remove_spurious]
86
+ current.copyins.each_with_index do |copyin,index|
87
+ current.copyins.each_with_index do |other_copyin,other_index|
88
+ if index != other_index
89
+ if copyin.tN.to_s == other_copyin.tN.to_s && copyin.tD.to_s == other_copyin.tD.to_s
90
+ if copyin.id > other_copyin.id
91
+ current.copyins.delete(copyin)
92
+ else
93
+ current.copyins.delete(other_copyin)
94
+ end
95
+ end
96
+ end
97
+ end
98
+ end
99
+ end
100
+
101
+ end
102
+ end
103
+
104
+ # Third set of copyin/copyout optimisations (inter-level)
105
+ def perform_copy_optimisations3(nests,options)
106
+ nests.each do |nest|
107
+ current = nest
108
+ children = get_children(nest)
109
+ if !children.empty?
110
+
111
+ # Inter-level loop optimisations (move to outer loop)
112
+ if options[:mem_to_outer_loop]
113
+
114
+ # Move copyouts to outer loops
115
+ max_id = children.map{ |c| 2*c.id+1 }.max
116
+ children.each do |child|
117
+ child.copyouts.each do |copyout|
118
+ to_outer_loop = true
119
+ nest.outer_loops.map{ |l| l[:var] }.each do |var|
120
+ to_outer_loop = false if copyout.depends_on?(var)
121
+ end
122
+ children.each do |other_child|
123
+ to_outer_loop = false if other_child.copyins.map{ |c| c.tN }.include?(copyout.tN)
124
+ end
125
+ to_outer_loop = false if copyout.get_sync_id < max_id
126
+ if to_outer_loop
127
+ copyout.id = nest.id
128
+ nest.copyouts.push(copyout)
129
+ child.copyouts.delete(copyout)
130
+ end
131
+ end
132
+ end
133
+
134
+ # Move copyins to outer loops
135
+ children.first.copyins.each do |copyin|
136
+ to_outer_loop = true
137
+ nest.outer_loops.map{ |l| l[:var] }.each do |var|
138
+ to_outer_loop = false if copyin.depends_on?(var)
139
+ end
140
+ children.drop(1).each do |child|
141
+ to_outer_loop = false if child.copyins.map{ |c| c.tN }.include?(copyin.tN)
142
+ to_outer_loop = false if child.copyouts.map{ |c| c.tN }.include?(copyin.tN) && child != children.last
143
+ end
144
+ if to_outer_loop
145
+ nest.copyins.push(copyin)
146
+ children.first.copyins.delete(copyin)
147
+ end
148
+ end
149
+
150
+ end
151
+ end
152
+ end
153
+ end
@@ -0,0 +1,225 @@
1
+
2
+ module Adarwin
3
+
4
+ # This class represents a loop nest. The end goal is to annotate the loop nest
5
+ # with the corresponding species information. If the loop nest cannot be
6
+ # parallelised (if there are dependences), the species information is not
7
+ # printed.
8
+ #
9
+ # This class contains methods to perform among others the following:
10
+ # * Find all array references in the loop nest
11
+ # * Merge found array references into another array reference
12
+ # * Translate array references into species
13
+ # * Perform dependence tests to check for parallelism
14
+ #
15
+ class Nest
16
+ attr_accessor :code, :species, :name, :verbose
17
+ attr_accessor :fused, :removed
18
+ attr_accessor :copyins, :copyouts
19
+ attr_accessor :depth, :level, :id
20
+ attr_accessor :reads, :writes
21
+ attr_accessor :outer_loops
22
+
23
+ # Method to initialise the loop nest. The loop nest is initialised with the
24
+ # following variables:
25
+ # * An identifier for the order/depth in which the nest appears (+level+)
26
+ # * The loop nest body in AST form (+code+)
27
+ # * A unique identifier for this loop nest (+id+)
28
+ # * A human readable name for this loop nest (+name+)
29
+ # * Whether or not verbose information should be printed (+verbose+)
30
+ def initialize(level, code, id, name, verbose, fused=0)
31
+ @depth = level.length
32
+ @level = level
33
+ @code = code
34
+ @id = id
35
+ @name = name+'_k'+(@id+1).to_s
36
+ @verbose = verbose
37
+
38
+ # Set the default values in case there are dependences
39
+ @species = ''
40
+ @fused = fused
41
+ @removed = false
42
+ @copyins = []
43
+ @copyouts = []
44
+
45
+ # Get all loops from the loop body and subtract the outer loops from all
46
+ # loops to obtain the set of inner loops (loops in the body).
47
+ @all_loops = @code.get_all_loops()
48
+ @outer_loops = @code.get_direct_loops()
49
+ @inner_loops = @all_loops - @outer_loops
50
+
51
+ # Process the read/write nodes in the loop body to obtain the array
52
+ # reference characterisations. The references also need to be aware of all
53
+ # loop data and of any if-statements in the loop body.
54
+ @references = @code.clone.get_accesses().map do |reference|
55
+ Reference.new(reference,@id,@inner_loops,@outer_loops,@verbose)
56
+ end
57
+
58
+ # Perform the dependence test. The result can be either true or false.
59
+ # Proceed only if there are no dependences.
60
+ # Don't perform the dependence test if this is a fused loopnest
61
+ @has_dependences = (@fused > 0) ? false : has_dependences?
62
+ if !@has_dependences && !@references.empty?
63
+
64
+ # Merge array reference characterisations into other array references
65
+ merge_references()
66
+
67
+ # Translate array reference characterisations into species and ARC
68
+ translate_into_species()
69
+ translate_into_arc()
70
+
71
+ # Set the copyin/copyout data from the array references
72
+ @copyins = @references.select{ |r| r.tA == 'read' }
73
+ @copyouts = @references.select{ |r| r.tA == 'write' }
74
+ end
75
+ end
76
+
77
+ # Perform the algorithm to merge array reference characterisations into
78
+ # merged array references. This method is a copy of the merging algorithm
79
+ # as found in the scientific paper.
80
+ # TODO: Complete this algorithm to match the scientific paper version.
81
+ def merge_references
82
+ @references.each do |ref1|
83
+ @references.each do |ref2|
84
+ if ref1 != ref2
85
+
86
+ # Perform the checks to see if merging is valid
87
+ if ref1.tN == ref2.tN && ref1.tA == ref2.tA && ref1.tS == ref2.tS
88
+
89
+ # Merge the domain (ref2 into ref1)
90
+ ref1.tD.each_with_index do |tD,i|
91
+ tD.merge(ref2.tD[i])
92
+ end
93
+
94
+ # Merge the number of elements (ref2 into ref1)
95
+ ref1.tE.each_with_index do |tE,i|
96
+ tE.merge(ref2.tE[i])
97
+ end
98
+
99
+ # Delete ref2
100
+ @references.delete(ref2)
101
+
102
+ # Something has changed: re-run the whole algorithm again
103
+ merge_references()
104
+ return
105
+ end
106
+ end
107
+ end
108
+ end
109
+ end
110
+
111
+ # Method to translate the array reference characterisations into species.
112
+ # The actual logic is performed within the Reference class. In this method,
113
+ # only the combining of the separate parts is performed.
114
+ def translate_into_species
115
+
116
+ # Obtain the reads and writes
117
+ @reads = @references.select{ |r| r.tA == 'read' }
118
+ @writes = @references.select{ |r| r.tA == 'write' }
119
+
120
+ # Create a 'void' access pattern in case there is no read or no write.
121
+ # Else, set the species for the individual accesses.
122
+ read_names = (@reads.empty?) ? ['0:0|void'] : @reads.map{ |r| r.to_species }
123
+ write_names = (@writes.empty?) ? ['0:0|void'] : @writes.map{ |r| r.to_species }
124
+
125
+ # Combine the descriptions (using Reference's +to_s+ method) into species
126
+ species_in = read_names.uniq.join(' '+WEDGE+' ')
127
+ species_out = write_names.uniq.join(' '+WEDGE+' ')
128
+ @species = species_in+' '+ARROW+' '+species_out
129
+ end
130
+
131
+ # Method to translate the array reference characterisations into a string.
132
+ def translate_into_arc
133
+ @arc = @references.map{ |r| r.to_arc }.join(' , ')
134
+ end
135
+
136
+ # Perform the dependence test for the current loop nest. This method gathers
137
+ # all pairs of array references to test and calls the actual dependence
138
+ # tests. Currently, the dependence tests are a combination of the GCD test
139
+ # and the Banerjee test.
140
+ def has_dependences?
141
+
142
+ # Gather all the read/write and write/write pairs to test
143
+ to_test = []
144
+ writes = @references.select{ |r| r.tA == 'write' }
145
+ writes.each do |ref1|
146
+ @references.each do |ref2|
147
+
148
+ # Only if the array names are the same and they are not tested before
149
+ if ref1.tN == ref2.tN && !to_test.include?([ref2,ref1])
150
+
151
+ # Only if the array references are different (e.g. don't test
152
+ # A[i][j+4] and A[i][j+4]).
153
+ if (ref1.get_references != ref2.get_references)
154
+ to_test << [ref1,ref2]
155
+ end
156
+ end
157
+ end
158
+ end
159
+
160
+ # Test all pairs using the GCD and Banerjee tests
161
+ #p to_test.map{ |t| t.map{ |r| r.to_arc }}
162
+ to_test.uniq.each do |pair|
163
+ dependence_test = Dependence.new(pair[0],pair[1],@verbose)
164
+ if dependence_test.result
165
+ return true
166
+ end
167
+ end
168
+ return false
169
+ end
170
+
171
+ # Perform a check to see if the loop nest has species that are not just
172
+ # formed from shared or full patterns. If so, there is no parallelism.
173
+ def has_species?
174
+ return false if @removed
175
+ return false if @has_dependences
176
+ return false if @species == ''
177
+ only_full = (@reads) ? @reads.select{ |a| a.pattern != 'full' }.empty? : false
178
+ only_shared = (@writes) ? @writes.select{ |a| a.pattern != 'shared' }.empty? : false
179
+ return !(only_full && only_shared)
180
+ end
181
+
182
+ # Method to print the start pragma of a species.
183
+ def print_species_start
184
+ PRAGMA_DELIMITER_START+PRAGMA_SPECIES+' kernel '+@species+PRAGMA_DELIMITER_END
185
+ end
186
+
187
+ # Method to print the end pragma of a species.
188
+ def print_species_end
189
+ PRAGMA_DELIMITER_START+PRAGMA_SPECIES+' endkernel '+@name+PRAGMA_DELIMITER_END
190
+ end
191
+
192
+ # Method to print the start of an array reference characterisation (ARC).
193
+ def print_arc_start
194
+ PRAGMA_DELIMITER_START+PRAGMA_ARC+' kernel '+@arc+PRAGMA_DELIMITER_END
195
+ end
196
+
197
+ # Method to print the end of an array reference characterisation (ARC).
198
+ def print_arc_end
199
+ PRAGMA_DELIMITER_START+PRAGMA_ARC+' endkernel '+@name+PRAGMA_DELIMITER_END
200
+ end
201
+
202
+ # Method to print the copyin pragma.
203
+ def print_copyins
204
+ copys = @copyins.map{ |a| a.to_copy(2*a.id) }
205
+ PRAGMA_DELIMITER_START+PRAGMA_SPECIES+' copyin '+copys.join(' '+WEDGE+' ')+PRAGMA_DELIMITER_END
206
+ end
207
+
208
+ # Method to print the copyout pragma.
209
+ def print_copyouts
210
+ copys = @copyouts.map{ |a| a.to_copy(2*a.id+1) }
211
+ PRAGMA_DELIMITER_START+PRAGMA_SPECIES+' copyout '+copys.join(' '+WEDGE+' ')+PRAGMA_DELIMITER_END
212
+ end
213
+
214
+ # Method to check if the loop nest has copyins.
215
+ def has_copyins?
216
+ return !(copyins.empty?) && !(copyins.select{ |r| r.tD if !r.tD.empty? }.empty?)
217
+ end
218
+
219
+ # Method to check if the loop nest has copyouts.
220
+ def has_copyouts?
221
+ return !(copyouts.empty?) && !(copyouts.select{ |r| r.tD if !r.tD.empty? }.empty?)
222
+ end
223
+ end
224
+
225
+ end