bones-compiler 1.1.0 → 1.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (211) hide show
  1. checksums.yaml +15 -0
  2. data/CHANGELOG +37 -0
  3. data/LICENSE +1 -1
  4. data/README.rdoc +95 -70
  5. data/Rakefile +78 -3
  6. data/VERSION +1 -1
  7. data/bin/adarwin +17 -0
  8. data/examples/benchmarks/PolyBench/2mm.c +104 -0
  9. data/examples/benchmarks/{3mm.c → PolyBench/3mm.c} +5 -2
  10. data/examples/benchmarks/{adi.c → PolyBench/adi.c} +6 -3
  11. data/examples/benchmarks/{atax.c → PolyBench/atax.c} +5 -2
  12. data/examples/benchmarks/{bicg.c → PolyBench/bicg.c} +5 -2
  13. data/examples/benchmarks/{cholesky.c → PolyBench/cholesky.c} +3 -0
  14. data/examples/benchmarks/{common.h → PolyBench/common.h} +2 -2
  15. data/examples/benchmarks/{correlation.c → PolyBench/correlation.c} +16 -7
  16. data/examples/benchmarks/{covariance.c → PolyBench/covariance.c} +7 -2
  17. data/examples/benchmarks/{doitgen.c → PolyBench/doitgen.c} +5 -2
  18. data/examples/benchmarks/{durbin.c → PolyBench/durbin.c} +3 -0
  19. data/examples/benchmarks/{dynprog.c → PolyBench/dynprog.c} +3 -0
  20. data/examples/benchmarks/{fdtd-2d-apml.c → PolyBench/fdtd-2d-apml.c} +3 -0
  21. data/examples/benchmarks/{fdtd-2d.c → PolyBench/fdtd-2d.c} +5 -2
  22. data/examples/benchmarks/{floyd-warshall.c → PolyBench/floyd-warshall.c} +3 -0
  23. data/examples/benchmarks/{gemm.c → PolyBench/gemm.c} +5 -2
  24. data/examples/benchmarks/{gemver.c → PolyBench/gemver.c} +5 -2
  25. data/examples/benchmarks/{gesummv.c → PolyBench/gesummv.c} +5 -2
  26. data/examples/benchmarks/{gramschmidt.c → PolyBench/gramschmidt.c} +3 -0
  27. data/examples/benchmarks/{jacobi-1d-imper.c → PolyBench/jacobi-1d-imper.c} +10 -2
  28. data/examples/benchmarks/{jacobi-2d-imper.c → PolyBench/jacobi-2d-imper.c} +8 -3
  29. data/examples/benchmarks/{lu.c → PolyBench/lu.c} +3 -0
  30. data/examples/benchmarks/{ludcmp.c → PolyBench/ludcmp.c} +3 -0
  31. data/examples/benchmarks/{mvt.c → PolyBench/mvt.c} +6 -2
  32. data/examples/benchmarks/{reg_detect.c → PolyBench/reg_detect.c} +3 -0
  33. data/examples/benchmarks/{seidel-2d.c → PolyBench/seidel-2d.c} +3 -0
  34. data/examples/benchmarks/{symm.c → PolyBench/symm.c} +3 -0
  35. data/examples/benchmarks/{syr2k.c → PolyBench/syr2k.c} +5 -2
  36. data/examples/benchmarks/{syrk.c → PolyBench/syrk.c} +7 -4
  37. data/examples/benchmarks/{trisolv.c → PolyBench/trisolv.c} +3 -0
  38. data/examples/benchmarks/{trmm.c → PolyBench/trmm.c} +3 -0
  39. data/examples/benchmarks/Rodinia/cfd.c +180 -0
  40. data/examples/benchmarks/Rodinia/hotspot.c +228 -0
  41. data/examples/benchmarks/Rodinia/kmeans.c +164 -0
  42. data/examples/benchmarks/Rodinia/srad.c +188 -0
  43. data/examples/benchmarks/other/common.h +0 -0
  44. data/examples/benchmarks/other/dct.c +58 -0
  45. data/examples/benchmarks/other/mm.c +50 -0
  46. data/examples/benchmarks/{saxpy.c → other/saxpy.c} +11 -7
  47. data/examples/chunk/{example1.c → example01.c} +0 -0
  48. data/examples/chunk/{example2.c → example02.c} +0 -0
  49. data/examples/chunk/{example3.c → example03.c} +0 -0
  50. data/examples/chunk/{example4.c → example04.c} +0 -0
  51. data/examples/chunk/{example5.c → example05.c} +0 -0
  52. data/examples/chunk/example06.c +45 -0
  53. data/examples/chunk/example07.c +49 -0
  54. data/examples/dependences/example01.c +42 -0
  55. data/examples/dependences/example02.c +40 -0
  56. data/examples/dependences/example03.c +43 -0
  57. data/examples/dependences/example04.c +44 -0
  58. data/examples/dependences/example05.c +42 -0
  59. data/examples/element/{example1.c → example01.c} +0 -0
  60. data/examples/element/{example2.c → example02.c} +2 -2
  61. data/examples/element/{example3.c → example03.c} +0 -0
  62. data/examples/element/{example4.c → example04.c} +0 -0
  63. data/examples/element/{example5.c → example05.c} +0 -0
  64. data/examples/element/{example6.c → example06.c} +0 -0
  65. data/examples/element/{example7.c → example07.c} +0 -0
  66. data/examples/element/{example8.c → example08.c} +0 -0
  67. data/examples/element/{example9.c → example09.c} +0 -0
  68. data/examples/element/example13.c +73 -0
  69. data/examples/fusion/example01.c +68 -0
  70. data/examples/fusion/example02.c +73 -0
  71. data/examples/fusion/example03.c +72 -0
  72. data/examples/fusion/example04.c +61 -0
  73. data/examples/fusion/example05.c +55 -0
  74. data/examples/neighbourhood/{example1.c → example01.c} +0 -0
  75. data/examples/neighbourhood/{example2.c → example02.c} +0 -0
  76. data/examples/neighbourhood/{example3.c → example03.c} +0 -0
  77. data/examples/neighbourhood/{example4.c → example04.c} +0 -0
  78. data/examples/neighbourhood/example05.c +44 -0
  79. data/examples/shared/{example1.c → example01.c} +0 -0
  80. data/examples/shared/{example2.c → example02.c} +0 -0
  81. data/examples/shared/{example3.c → example03.c} +0 -0
  82. data/examples/shared/{example4.c → example04.c} +0 -0
  83. data/examples/shared/{example5.c → example05.c} +0 -0
  84. data/lib/adarwin.rb +62 -0
  85. data/lib/adarwin/dependences.rb +268 -0
  86. data/lib/adarwin/engine.rb +277 -0
  87. data/lib/adarwin/fusion.rb +174 -0
  88. data/lib/adarwin/interval.rb +57 -0
  89. data/lib/adarwin/memorycopies.rb +153 -0
  90. data/lib/adarwin/nest.rb +225 -0
  91. data/lib/adarwin/preprocessor.rb +76 -0
  92. data/lib/adarwin/reference.rb +261 -0
  93. data/lib/bones.rb +4 -55
  94. data/lib/bones/algorithm.rb +77 -40
  95. data/lib/bones/copy.rb +26 -0
  96. data/lib/bones/engine.rb +147 -31
  97. data/lib/bones/preprocessor.rb +92 -12
  98. data/lib/bones/species.rb +4 -3
  99. data/lib/bones/structure.rb +14 -4
  100. data/lib/castaddon.rb +11 -6
  101. data/lib/castaddon/node_adarwin.rb +245 -0
  102. data/lib/castaddon/node_bones.rb +316 -0
  103. data/lib/castaddon/node_common.rb +289 -0
  104. data/lib/castaddon/transformations.rb +236 -0
  105. data/lib/common.rb +216 -0
  106. data/skeletons/CPU-C/common/header.c +3 -0
  107. data/skeletons/CPU-C/common/mem_global.c +0 -0
  108. data/skeletons/CPU-C/common/timer_2_start.c +11 -13
  109. data/skeletons/CPU-C/common/timer_2_stop.c +1 -1
  110. data/skeletons/CPU-C/common/timer_globals.c +29 -0
  111. data/skeletons/CPU-OPENCL-INTEL/common/globals.c +1 -1
  112. data/skeletons/CPU-OPENCL-INTEL/common/header.c +3 -0
  113. data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +7 -2
  114. data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +4 -2
  115. data/skeletons/CPU-OPENCL-INTEL/common/mem_global.c +0 -0
  116. data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +6 -3
  117. data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +1 -1
  118. data/skeletons/CPU-OPENCL-INTEL/common/timer_globals.c +24 -0
  119. data/skeletons/CPU-OPENMP/common/globals.c +1 -0
  120. data/skeletons/CPU-OPENMP/common/header.c +3 -0
  121. data/skeletons/CPU-OPENMP/common/mem_global.c +0 -0
  122. data/skeletons/CPU-OPENMP/common/timer_1_start.c +0 -12
  123. data/skeletons/CPU-OPENMP/common/timer_2_stop.c +1 -1
  124. data/skeletons/CPU-OPENMP/common/timer_globals.c +33 -0
  125. data/skeletons/GPU-CUDA/common/globals.c +27 -3
  126. data/skeletons/GPU-CUDA/common/header.c +2 -0
  127. data/skeletons/GPU-CUDA/common/mem_async_alloc.c +6 -0
  128. data/skeletons/GPU-CUDA/common/mem_async_copyin.c +6 -0
  129. data/skeletons/GPU-CUDA/common/mem_async_copyout.c +6 -0
  130. data/skeletons/GPU-CUDA/common/mem_async_free.c +6 -0
  131. data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +2 -1
  132. data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +2 -1
  133. data/skeletons/GPU-CUDA/common/mem_global.c +1 -0
  134. data/skeletons/GPU-CUDA/common/mem_prologue.c +1 -2
  135. data/skeletons/GPU-CUDA/common/scheduler.c +86 -0
  136. data/skeletons/GPU-CUDA/common/timer_2_start.c +2 -4
  137. data/skeletons/GPU-CUDA/common/timer_2_stop.c +3 -5
  138. data/skeletons/GPU-CUDA/common/timer_globals.c +26 -0
  139. data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +5 -7
  140. data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +4 -6
  141. data/skeletons/GPU-CUDA/kernel/default.host.c +1 -1
  142. data/skeletons/GPU-CUDA/kernel/default.kernel.cu +6 -8
  143. data/skeletons/GPU-CUDA/skeletons.txt +6 -5
  144. data/{examples/benchmarks/2mm.c → test/examples/benchmarks/PolyBench/2mm_species.c} +19 -15
  145. data/test/examples/benchmarks/PolyBench/3mm_species.c +82 -0
  146. data/test/examples/benchmarks/PolyBench/adi_species.c +89 -0
  147. data/test/examples/benchmarks/PolyBench/atax_species.c +69 -0
  148. data/test/examples/benchmarks/PolyBench/bicg_species.c +71 -0
  149. data/test/examples/benchmarks/PolyBench/cholesky_species.c +68 -0
  150. data/test/examples/benchmarks/PolyBench/correlation_species.c +97 -0
  151. data/test/examples/benchmarks/PolyBench/covariance_species.c +78 -0
  152. data/test/examples/benchmarks/PolyBench/doitgen_species.c +67 -0
  153. data/test/examples/benchmarks/PolyBench/durbin_species.c +80 -0
  154. data/test/examples/benchmarks/PolyBench/dynprog_species.c +71 -0
  155. data/test/examples/benchmarks/PolyBench/fdtd-2d-apml_species.c +112 -0
  156. data/test/examples/benchmarks/PolyBench/fdtd-2d_species.c +78 -0
  157. data/test/examples/benchmarks/PolyBench/floyd-warshall_species.c +54 -0
  158. data/test/examples/benchmarks/PolyBench/gemm_species.c +73 -0
  159. data/test/examples/benchmarks/PolyBench/gemver_species.c +93 -0
  160. data/test/examples/benchmarks/PolyBench/gesummv_species.c +68 -0
  161. data/test/examples/benchmarks/PolyBench/gramschmidt_species.c +78 -0
  162. data/test/examples/benchmarks/PolyBench/jacobi-1d-imper_species.c +59 -0
  163. data/test/examples/benchmarks/PolyBench/jacobi-2d-imper_species.c +65 -0
  164. data/test/examples/benchmarks/PolyBench/lu_species.c +57 -0
  165. data/test/examples/benchmarks/PolyBench/ludcmp_species.c +89 -0
  166. data/test/examples/benchmarks/PolyBench/mvt_species.c +69 -0
  167. data/test/examples/benchmarks/PolyBench/reg_detect_species.c +86 -0
  168. data/test/examples/benchmarks/PolyBench/seidel-2d_species.c +53 -0
  169. data/test/examples/benchmarks/PolyBench/symm_species.c +74 -0
  170. data/test/examples/benchmarks/PolyBench/syr2k_species.c +69 -0
  171. data/test/examples/benchmarks/PolyBench/syrk_species.c +66 -0
  172. data/test/examples/benchmarks/PolyBench/trisolv_species.c +61 -0
  173. data/test/examples/benchmarks/PolyBench/trmm_species.c +61 -0
  174. data/test/examples/chunk/example01_species.c +58 -0
  175. data/test/examples/chunk/example02_species.c +48 -0
  176. data/test/examples/chunk/example03_species.c +63 -0
  177. data/test/examples/chunk/example04_species.c +58 -0
  178. data/test/examples/chunk/example05_species.c +56 -0
  179. data/test/examples/chunk/example06_species.c +49 -0
  180. data/test/examples/chunk/example07_species.c +53 -0
  181. data/test/examples/dependences/example01_species.c +46 -0
  182. data/test/examples/dependences/example02_species.c +44 -0
  183. data/test/examples/dependences/example03_species.c +47 -0
  184. data/test/examples/dependences/example04_species.c +48 -0
  185. data/test/examples/dependences/example05_species.c +46 -0
  186. data/test/examples/element/example01_species.c +50 -0
  187. data/test/examples/element/example02_species.c +50 -0
  188. data/test/examples/element/example03_species.c +62 -0
  189. data/test/examples/element/example04_species.c +53 -0
  190. data/test/examples/element/example05_species.c +59 -0
  191. data/test/examples/element/example06_species.c +50 -0
  192. data/test/examples/element/example07_species.c +58 -0
  193. data/test/examples/element/example08_species.c +49 -0
  194. data/test/examples/element/example09_species.c +52 -0
  195. data/test/examples/element/example10_species.c +54 -0
  196. data/test/examples/element/example11_species.c +51 -0
  197. data/test/examples/element/example12_species.c +60 -0
  198. data/test/examples/element/example13_species.c +77 -0
  199. data/test/examples/neighbourhood/example01_species.c +57 -0
  200. data/test/examples/neighbourhood/example02_species.c +56 -0
  201. data/test/examples/neighbourhood/example03_species.c +83 -0
  202. data/test/examples/neighbourhood/example04_species.c +55 -0
  203. data/test/examples/neighbourhood/example05_species.c +48 -0
  204. data/test/examples/shared/example01_species.c +49 -0
  205. data/test/examples/shared/example02_species.c +55 -0
  206. data/test/examples/shared/example03_species.c +59 -0
  207. data/test/examples/shared/example04_species.c +56 -0
  208. data/test/examples/shared/example05_species.c +52 -0
  209. metadata +193 -73
  210. data/examples/benchmarks/overview.txt +0 -38
  211. data/lib/castaddon/node.rb +0 -753
@@ -0,0 +1,174 @@
1
+
2
+ # Determine whether kernel fusion is legal (see algorithm in paper/thesis)
3
+ def fusion_is_legal?(a, b)
4
+ (a.writes + a.reads).each do |x|
5
+ (b.writes + b.reads).each do |y|
6
+ if (x.tN == y.tN) && (x.tA == 'write' || y.tA == 'write')
7
+ puts Adarwin::MESSAGE+"Evaluating #{x.to_arc} and #{y.to_arc} for fusion"
8
+ if x.tD.to_s != y.tD.to_s || x.tE.to_s != y.tE.to_s || x.tS.to_s != y.tS.to_s
9
+ puts Adarwin::MESSAGE+"Unable to fuse #{x.to_arc} and #{y.to_arc}"
10
+ return false
11
+ end
12
+ end
13
+ end
14
+ end
15
+ puts Adarwin::MESSAGE+"Applying fusion"
16
+ return true
17
+ end
18
+
19
+
20
+ # Perform the kernel fusion transformations
21
+ def kernel_fusion(nests, settings)
22
+
23
+ # Select
24
+ candidates = nests.select{ |n| n.has_species? }
25
+
26
+ # Iterate
27
+ prev = nil
28
+ candidates.each_with_index do |nest,nest_index|
29
+ curr = nest
30
+ if prev
31
+
32
+ # Get the loop details
33
+ loops_prev = prev.code.get_direct_loops
34
+ loops_curr = curr.code.get_direct_loops
35
+ if loops_prev.size != loops_curr.size
36
+ puts Adarwin::MESSAGE+"Unable to apply fusion, loop count does not match"
37
+ next
38
+ end
39
+
40
+ # Only proceed if fusion is legal for this combination
41
+ if fusion_is_legal?(prev, curr)
42
+ fused_code = []
43
+
44
+ # Get the bodies
45
+ body_curr = get_body(loops_curr.size,curr.code.clone)
46
+ body_prev = get_body(loops_prev.size,prev.code.clone)
47
+
48
+ # Fuse everything together: include if-statements for non-matching loop bounds
49
+ if settings == 1
50
+
51
+ # Create new loops
52
+ loops_target = []
53
+ loops_prev.zip(loops_curr).each do |prevl,currl|
54
+ raise_error("Unequal step count #{prevl[:step]} versus #{currl[:step]}") if prevl[:step] != currl[:step]
55
+ minmin = exact_min(prevl[:min],currl[:min])
56
+ maxmax = exact_max(prevl[:max],currl[:max])
57
+ loop_datum = { :var => prevl[:var]+currl[:var], :min => minmin, :max => maxmax, :step => prevl[:step]}
58
+ loops_target.push(loop_datum)
59
+
60
+ # Replace all occurances of the fused loop variable in the current/previous codes
61
+ body_prev = body_prev.replace_variable(prevl[:var],loop_datum[:var])
62
+ body_curr = body_curr.replace_variable(currl[:var],loop_datum[:var])
63
+
64
+ # Set minimum if-statement conditions
65
+ body_prev = create_if(loop_datum[:var],minmin,prevl[:min],body_prev,'>=')
66
+ body_curr = create_if(loop_datum[:var],minmin,currl[:min],body_curr,'>=')
67
+
68
+ # Set maximum if-statement conditions
69
+ body_prev = create_if(loop_datum[:var],maxmax,prevl[:max],body_prev,'<=')
70
+ body_curr = create_if(loop_datum[:var],maxmax,currl[:max],body_curr,'<=')
71
+ end
72
+
73
+ # Generate the new code
74
+ fused_code.push(code_from_loops(loops_target,[body_prev,body_curr]))
75
+
76
+ # Create a prologue in case of mismatching loop bounds (experimental)
77
+ elsif settings == 2
78
+
79
+ # Generate the loop body
80
+ loops_target = []
81
+ loops_prev.zip(loops_curr).each do |prevl,currl|
82
+ raise_error("Unequal step count #{prevl[:step]} versus #{currl[:step]}") if prevl[:step] != currl[:step]
83
+ body_prev = body_prev.replace_variable(prevl[:var],prevl[:var]+currl[:var])
84
+ body_curr = body_curr.replace_variable(currl[:var],prevl[:var]+currl[:var])
85
+ end
86
+
87
+ # Create the main loop nest
88
+ loops_target = []
89
+ loops_prev.zip(loops_curr).each do |prevl,currl|
90
+ minmin = exact_min(prevl[:min],currl[:min])
91
+ minmax = exact_min(prevl[:max],currl[:max])
92
+ loop_datum = { :var => prevl[:var]+currl[:var], :min => minmin, :max => minmax, :step => prevl[:step]}
93
+ loops_target.push(loop_datum)
94
+ end
95
+ fused_code.push(code_from_loops(loops_target,[body_prev,body_curr]))
96
+
97
+ # Create the epilogue
98
+ body = []
99
+ loops_target = []
100
+ loops_prev.zip(loops_curr).each do |prevl,currl|
101
+ minmax = exact_min(prevl[:max],currl[:max])
102
+ maxmax = exact_max(prevl[:max],currl[:max])
103
+ loop_datum = { :var => prevl[:var]+currl[:var], :min => minmax, :max => maxmax, :step => prevl[:step]}
104
+ loops_target.push(loop_datum)
105
+ if prevl[:max] != currl[:max]
106
+ body = (prevl[:max] == maxmax) ? [body_curr] : [body_prev]
107
+ end
108
+ end
109
+ fused_code.push(code_from_loops(loops_target,body))
110
+ end
111
+
112
+ # Add the newly created code to the original code
113
+ fused_code.each_with_index do |fused_codelet,nest_id|
114
+ puts fused_codelet
115
+ prev.code.insert_prev(fused_codelet)
116
+
117
+ # Create a new nest
118
+ nest = Adarwin::Nest.new(prev.level, fused_codelet, prev.id, prev.name.gsub(/_k(\d+)/,'_fused')+nest_id.to_s, prev.verbose, 1)
119
+ nests.push(nest)
120
+ end
121
+
122
+
123
+ # Set the other nests as to-be-removed
124
+ prev.removed = true
125
+ curr.removed = true
126
+ end
127
+ end
128
+
129
+ # Next nest
130
+ prev = nest
131
+ end
132
+ end
133
+
134
+ # Return the body of a loop nest
135
+ def get_body(num_loops,code)
136
+ return code if num_loops == 0
137
+ if code.first.for_statement? && code.first.stmt
138
+ code = code.first
139
+ end
140
+ if code.for_statement? && code.stmt
141
+ return get_body(num_loops-1,code.stmt.stmts)
142
+ end
143
+ raise_error("Not a perfect nested loop")
144
+ end
145
+
146
+ # Create an if-statement in front of a statement
147
+ def create_if(loop_var,reference_bound,loop_bound,code,condition)
148
+ if reference_bound != loop_bound
149
+ return C::Statement.parse("if(#{loop_var} #{condition} #{loop_bound}) { #{code.to_s} }")
150
+ end
151
+ return code
152
+ end
153
+
154
+ # Generate code from a combination of loops and statements (the body)
155
+ def code_from_loops(loops,statements)
156
+ code = ""
157
+
158
+ # Start of the loops
159
+ definition = "int "
160
+ loops.each do |loop_datum|
161
+ increment = (loop_datum[:step] == '1') ? "#{loop_datum[:var]}++" : "#{loop_datum[:var]}=#{loop_datum[:var]}+#{loop_datum[:step]}"
162
+ code += "for(#{definition}#{loop_datum[:var]}=#{loop_datum[:min]}; #{loop_datum[:var]}<=#{loop_datum[:max]}; #{increment}) {"
163
+ end
164
+
165
+ # Loop body
166
+ statements.each do |statement|
167
+ code += statement.to_s
168
+ end
169
+
170
+ # End of the loops
171
+ loops.size.times{ |i| code += "}" }
172
+
173
+ C::Statement.parse(code)
174
+ end
@@ -0,0 +1,57 @@
1
+
2
+ module Adarwin
3
+
4
+ # This class represents an interval [a..b] including a and b. The class has
5
+ # the following methods:
6
+ # * Initialise the interval (+initialize+)
7
+ # * Print the interval (+to_s+)
8
+ # * Merge an interval with another interval (+merge+)
9
+ # * Return the length of the interval (+length+)
10
+ class Interval
11
+ attr_accessor :a, :b
12
+
13
+ # Initialise the interval. This method performs a comparison to see whether
14
+ # a or b is the upper-bound. This comparison is based on guesses made by the
15
+ # +compare+ method. This method uses loop information if needed.
16
+ # FIXME: Uses the +compare+ method which might be based on a guess
17
+ def initialize(a,b,loops)
18
+ @loops = loops
19
+ a = simplify(a.to_s)
20
+ b = simplify(b.to_s)
21
+ case compare(a,b,@loops)
22
+ when 'lt' || 'eq' then @a = a; @b = b
23
+ when 'gt' then @a = b; @b = a
24
+ else @a = a; @b = b
25
+ end
26
+ end
27
+
28
+ # Print the interval as a string (e.g. [4..9]).
29
+ def to_s
30
+ @a+RANGE_SEP+@b
31
+ end
32
+
33
+ # Merge this interval with another interval. This is based on a comparison
34
+ # made by the +compare+ method, which is an approximation based on loop
35
+ # information.
36
+ # FIXME: Uses the +compare+ method which might be based on a guess
37
+ def merge(other_interval)
38
+ @a = case compare(@a,other_interval.a,@loops)
39
+ when 'gt' || 'eq' then other_interval.a
40
+ when 'lt' then @a
41
+ else other_interval.a
42
+ end
43
+ @b = case compare(@b,other_interval.b,@loops)
44
+ when 'gt' || 'eq' then @b
45
+ when 'lt' then other_interval.b
46
+ else @b
47
+ end
48
+ end
49
+
50
+ # Method to compute the length of the interval. For example, the length of
51
+ # [a..b] is equal to (b-a+1).
52
+ def length
53
+ simplify("(#{@b})-(#{a})+1")
54
+ end
55
+ end
56
+
57
+ end
@@ -0,0 +1,153 @@
1
+
2
+
3
+ # Recursive copy optimisations
4
+ def recursive_copy_optimisations(nests,options)
5
+ perform_copy_optimisations1(nests,options)
6
+ perform_copy_optimisations2(nests,options)
7
+ nests.each do |nest|
8
+ children = get_children(nest)
9
+ recursive_copy_optimisations(children,options) if !children.empty?
10
+ end
11
+ perform_copy_optimisations3(nests,options)
12
+ perform_copy_optimisations3(nests,options)
13
+ end
14
+
15
+ # First set of copyin/copyout optimisations (recursive)
16
+ def perform_copy_optimisations1(nests,options)
17
+ previous = nil
18
+ nests.each_with_index do |nest,nest_index|
19
+ current = nest
20
+ if previous
21
+
22
+ # Remove spurious copies (out/in)
23
+ if options[:mem_remove_spurious]
24
+ previous.copyouts.each do |copyout|
25
+ current.copyins.each do |copyin|
26
+ if copyout.tN.to_s == copyin.tN.to_s && copyout.tD.to_s == copyin.tD.to_s
27
+ current.copyins.delete(copyin)
28
+ return perform_copy_optimisations1(nests,options)
29
+ end
30
+ end
31
+ end
32
+ end
33
+
34
+ # Remove spurious copies (out/out)
35
+ if options[:mem_remove_spurious]
36
+ previous.copyouts.each do |copyout|
37
+ current.copyouts.each do |other_copyout|
38
+ if copyout.tN.to_s == other_copyout.tN.to_s && copyout.tD.to_s == other_copyout.tD.to_s
39
+ previous.copyouts.delete(copyout)
40
+ return perform_copy_optimisations1(nests,options)
41
+ end
42
+ end
43
+ end
44
+ end
45
+
46
+ # Move copyins to the front
47
+ if options[:mem_copyin_to_front]
48
+ current.copyins.each do |copyin|
49
+ if previous.writes && !previous.writes.map{ |w| w.tN }.include?(copyin.tN)
50
+ previous.copyins.push(copyin)
51
+ current.copyins.delete(copyin)
52
+ return perform_copy_optimisations1(nests,options)
53
+ end
54
+ end
55
+ end
56
+
57
+ end
58
+
59
+ # Next nest
60
+ previous = nest
61
+ end
62
+ end
63
+
64
+ # Second set of copyin/copyout optimisations (non-recursive)
65
+ def perform_copy_optimisations2(nests,options)
66
+ nests.each_with_index do |nest,nest_index|
67
+ current = nest
68
+
69
+ # Move copyouts to the back
70
+ if options[:mem_copyout_to_back]
71
+ current.copyouts.each do |copyout|
72
+ nests.each_with_index do |other_nest,other_nest_index|
73
+ if other_nest.id > nest.id && other_nest.depth == nest.depth
74
+ if other_nest.writes && !other_nest.writes.map{ |w| w.tN }.include?(copyout.tN)
75
+ copyout.id = copyout.id+1
76
+ else
77
+ break
78
+ end
79
+ end
80
+ end
81
+ end
82
+ end
83
+
84
+ # Remove spurious copies (double in)
85
+ if options[:mem_remove_spurious]
86
+ current.copyins.each_with_index do |copyin,index|
87
+ current.copyins.each_with_index do |other_copyin,other_index|
88
+ if index != other_index
89
+ if copyin.tN.to_s == other_copyin.tN.to_s && copyin.tD.to_s == other_copyin.tD.to_s
90
+ if copyin.id > other_copyin.id
91
+ current.copyins.delete(copyin)
92
+ else
93
+ current.copyins.delete(other_copyin)
94
+ end
95
+ end
96
+ end
97
+ end
98
+ end
99
+ end
100
+
101
+ end
102
+ end
103
+
104
+ # Third set of copyin/copyout optimisations (inter-level)
105
+ def perform_copy_optimisations3(nests,options)
106
+ nests.each do |nest|
107
+ current = nest
108
+ children = get_children(nest)
109
+ if !children.empty?
110
+
111
+ # Inter-level loop optimisations (move to outer loop)
112
+ if options[:mem_to_outer_loop]
113
+
114
+ # Move copyouts to outer loops
115
+ max_id = children.map{ |c| 2*c.id+1 }.max
116
+ children.each do |child|
117
+ child.copyouts.each do |copyout|
118
+ to_outer_loop = true
119
+ nest.outer_loops.map{ |l| l[:var] }.each do |var|
120
+ to_outer_loop = false if copyout.depends_on?(var)
121
+ end
122
+ children.each do |other_child|
123
+ to_outer_loop = false if other_child.copyins.map{ |c| c.tN }.include?(copyout.tN)
124
+ end
125
+ to_outer_loop = false if copyout.get_sync_id < max_id
126
+ if to_outer_loop
127
+ copyout.id = nest.id
128
+ nest.copyouts.push(copyout)
129
+ child.copyouts.delete(copyout)
130
+ end
131
+ end
132
+ end
133
+
134
+ # Move copyins to outer loops
135
+ children.first.copyins.each do |copyin|
136
+ to_outer_loop = true
137
+ nest.outer_loops.map{ |l| l[:var] }.each do |var|
138
+ to_outer_loop = false if copyin.depends_on?(var)
139
+ end
140
+ children.drop(1).each do |child|
141
+ to_outer_loop = false if child.copyins.map{ |c| c.tN }.include?(copyin.tN)
142
+ to_outer_loop = false if child.copyouts.map{ |c| c.tN }.include?(copyin.tN) && child != children.last
143
+ end
144
+ if to_outer_loop
145
+ nest.copyins.push(copyin)
146
+ children.first.copyins.delete(copyin)
147
+ end
148
+ end
149
+
150
+ end
151
+ end
152
+ end
153
+ end
@@ -0,0 +1,225 @@
1
+
2
+ module Adarwin
3
+
4
+ # This class represents a loop nest. The end goal is to annotate the loop nest
5
+ # with the corresponding species information. If the loop nest cannot be
6
+ # parallelised (if there are dependences), the species information is not
7
+ # printed.
8
+ #
9
+ # This class contains methods to perform among others the following:
10
+ # * Find all array references in the loop nest
11
+ # * Merge found array references into another array reference
12
+ # * Translate array references into species
13
+ # * Perform dependence tests to check for parallelism
14
+ #
15
+ class Nest
16
+ attr_accessor :code, :species, :name, :verbose
17
+ attr_accessor :fused, :removed
18
+ attr_accessor :copyins, :copyouts
19
+ attr_accessor :depth, :level, :id
20
+ attr_accessor :reads, :writes
21
+ attr_accessor :outer_loops
22
+
23
+ # Method to initialise the loop nest. The loop nest is initialised with the
24
+ # following variables:
25
+ # * An identifier for the order/depth in which the nest appears (+level+)
26
+ # * The loop nest body in AST form (+code+)
27
+ # * A unique identifier for this loop nest (+id+)
28
+ # * A human readable name for this loop nest (+name+)
29
+ # * Whether or not verbose information should be printed (+verbose+)
30
+ def initialize(level, code, id, name, verbose, fused=0)
31
+ @depth = level.length
32
+ @level = level
33
+ @code = code
34
+ @id = id
35
+ @name = name+'_k'+(@id+1).to_s
36
+ @verbose = verbose
37
+
38
+ # Set the default values in case there are dependences
39
+ @species = ''
40
+ @fused = fused
41
+ @removed = false
42
+ @copyins = []
43
+ @copyouts = []
44
+
45
+ # Get all loops from the loop body and subtract the outer loops from all
46
+ # loops to obtain the set of inner loops (loops in the body).
47
+ @all_loops = @code.get_all_loops()
48
+ @outer_loops = @code.get_direct_loops()
49
+ @inner_loops = @all_loops - @outer_loops
50
+
51
+ # Process the read/write nodes in the loop body to obtain the array
52
+ # reference characterisations. The references also need to be aware of all
53
+ # loop data and of any if-statements in the loop body.
54
+ @references = @code.clone.get_accesses().map do |reference|
55
+ Reference.new(reference,@id,@inner_loops,@outer_loops,@verbose)
56
+ end
57
+
58
+ # Perform the dependence test. The result can be either true or false.
59
+ # Proceed only if there are no dependences.
60
+ # Don't perform the dependence test if this is a fused loopnest
61
+ @has_dependences = (@fused > 0) ? false : has_dependences?
62
+ if !@has_dependences && !@references.empty?
63
+
64
+ # Merge array reference characterisations into other array references
65
+ merge_references()
66
+
67
+ # Translate array reference characterisations into species and ARC
68
+ translate_into_species()
69
+ translate_into_arc()
70
+
71
+ # Set the copyin/copyout data from the array references
72
+ @copyins = @references.select{ |r| r.tA == 'read' }
73
+ @copyouts = @references.select{ |r| r.tA == 'write' }
74
+ end
75
+ end
76
+
77
+ # Perform the algorithm to merge array reference characterisations into
78
+ # merged array references. This method is a copy of the merging algorithm
79
+ # as found in the scientific paper.
80
+ # TODO: Complete this algorithm to match the scientific paper version.
81
+ def merge_references
82
+ @references.each do |ref1|
83
+ @references.each do |ref2|
84
+ if ref1 != ref2
85
+
86
+ # Perform the checks to see if merging is valid
87
+ if ref1.tN == ref2.tN && ref1.tA == ref2.tA && ref1.tS == ref2.tS
88
+
89
+ # Merge the domain (ref2 into ref1)
90
+ ref1.tD.each_with_index do |tD,i|
91
+ tD.merge(ref2.tD[i])
92
+ end
93
+
94
+ # Merge the number of elements (ref2 into ref1)
95
+ ref1.tE.each_with_index do |tE,i|
96
+ tE.merge(ref2.tE[i])
97
+ end
98
+
99
+ # Delete ref2
100
+ @references.delete(ref2)
101
+
102
+ # Something has changed: re-run the whole algorithm again
103
+ merge_references()
104
+ return
105
+ end
106
+ end
107
+ end
108
+ end
109
+ end
110
+
111
+ # Method to translate the array reference characterisations into species.
112
+ # The actual logic is performed within the Reference class. In this method,
113
+ # only the combining of the separate parts is performed.
114
+ def translate_into_species
115
+
116
+ # Obtain the reads and writes
117
+ @reads = @references.select{ |r| r.tA == 'read' }
118
+ @writes = @references.select{ |r| r.tA == 'write' }
119
+
120
+ # Create a 'void' access pattern in case there is no read or no write.
121
+ # Else, set the species for the individual accesses.
122
+ read_names = (@reads.empty?) ? ['0:0|void'] : @reads.map{ |r| r.to_species }
123
+ write_names = (@writes.empty?) ? ['0:0|void'] : @writes.map{ |r| r.to_species }
124
+
125
+ # Combine the descriptions (using Reference's +to_s+ method) into species
126
+ species_in = read_names.uniq.join(' '+WEDGE+' ')
127
+ species_out = write_names.uniq.join(' '+WEDGE+' ')
128
+ @species = species_in+' '+ARROW+' '+species_out
129
+ end
130
+
131
+ # Method to translate the array reference characterisations into a string.
132
+ def translate_into_arc
133
+ @arc = @references.map{ |r| r.to_arc }.join(' , ')
134
+ end
135
+
136
+ # Perform the dependence test for the current loop nest. This method gathers
137
+ # all pairs of array references to test and calls the actual dependence
138
+ # tests. Currently, the dependence tests are a combination of the GCD test
139
+ # and the Banerjee test.
140
+ def has_dependences?
141
+
142
+ # Gather all the read/write and write/write pairs to test
143
+ to_test = []
144
+ writes = @references.select{ |r| r.tA == 'write' }
145
+ writes.each do |ref1|
146
+ @references.each do |ref2|
147
+
148
+ # Only if the array names are the same and they are not tested before
149
+ if ref1.tN == ref2.tN && !to_test.include?([ref2,ref1])
150
+
151
+ # Only if the array references are different (e.g. don't test
152
+ # A[i][j+4] and A[i][j+4]).
153
+ if (ref1.get_references != ref2.get_references)
154
+ to_test << [ref1,ref2]
155
+ end
156
+ end
157
+ end
158
+ end
159
+
160
+ # Test all pairs using the GCD and Banerjee tests
161
+ #p to_test.map{ |t| t.map{ |r| r.to_arc }}
162
+ to_test.uniq.each do |pair|
163
+ dependence_test = Dependence.new(pair[0],pair[1],@verbose)
164
+ if dependence_test.result
165
+ return true
166
+ end
167
+ end
168
+ return false
169
+ end
170
+
171
+ # Perform a check to see if the loop nest has species that are not just
172
+ # formed from shared or full patterns. If so, there is no parallelism.
173
+ def has_species?
174
+ return false if @removed
175
+ return false if @has_dependences
176
+ return false if @species == ''
177
+ only_full = (@reads) ? @reads.select{ |a| a.pattern != 'full' }.empty? : false
178
+ only_shared = (@writes) ? @writes.select{ |a| a.pattern != 'shared' }.empty? : false
179
+ return !(only_full && only_shared)
180
+ end
181
+
182
+ # Method to print the start pragma of a species.
183
+ def print_species_start
184
+ PRAGMA_DELIMITER_START+PRAGMA_SPECIES+' kernel '+@species+PRAGMA_DELIMITER_END
185
+ end
186
+
187
+ # Method to print the end pragma of a species.
188
+ def print_species_end
189
+ PRAGMA_DELIMITER_START+PRAGMA_SPECIES+' endkernel '+@name+PRAGMA_DELIMITER_END
190
+ end
191
+
192
+ # Method to print the start of an array reference characterisation (ARC).
193
+ def print_arc_start
194
+ PRAGMA_DELIMITER_START+PRAGMA_ARC+' kernel '+@arc+PRAGMA_DELIMITER_END
195
+ end
196
+
197
+ # Method to print the end of an array reference characterisation (ARC).
198
+ def print_arc_end
199
+ PRAGMA_DELIMITER_START+PRAGMA_ARC+' endkernel '+@name+PRAGMA_DELIMITER_END
200
+ end
201
+
202
+ # Method to print the copyin pragma.
203
+ def print_copyins
204
+ copys = @copyins.map{ |a| a.to_copy(2*a.id) }
205
+ PRAGMA_DELIMITER_START+PRAGMA_SPECIES+' copyin '+copys.join(' '+WEDGE+' ')+PRAGMA_DELIMITER_END
206
+ end
207
+
208
+ # Method to print the copyout pragma.
209
+ def print_copyouts
210
+ copys = @copyouts.map{ |a| a.to_copy(2*a.id+1) }
211
+ PRAGMA_DELIMITER_START+PRAGMA_SPECIES+' copyout '+copys.join(' '+WEDGE+' ')+PRAGMA_DELIMITER_END
212
+ end
213
+
214
+ # Method to check if the loop nest has copyins.
215
+ def has_copyins?
216
+ return !(copyins.empty?) && !(copyins.select{ |r| r.tD if !r.tD.empty? }.empty?)
217
+ end
218
+
219
+ # Method to check if the loop nest has copyouts.
220
+ def has_copyouts?
221
+ return !(copyouts.empty?) && !(copyouts.select{ |r| r.tD if !r.tD.empty? }.empty?)
222
+ end
223
+ end
224
+
225
+ end