bones-compiler 1.1.0 → 1.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/CHANGELOG +37 -0
- data/LICENSE +1 -1
- data/README.rdoc +95 -70
- data/Rakefile +78 -3
- data/VERSION +1 -1
- data/bin/adarwin +17 -0
- data/examples/benchmarks/PolyBench/2mm.c +104 -0
- data/examples/benchmarks/{3mm.c → PolyBench/3mm.c} +5 -2
- data/examples/benchmarks/{adi.c → PolyBench/adi.c} +6 -3
- data/examples/benchmarks/{atax.c → PolyBench/atax.c} +5 -2
- data/examples/benchmarks/{bicg.c → PolyBench/bicg.c} +5 -2
- data/examples/benchmarks/{cholesky.c → PolyBench/cholesky.c} +3 -0
- data/examples/benchmarks/{common.h → PolyBench/common.h} +2 -2
- data/examples/benchmarks/{correlation.c → PolyBench/correlation.c} +16 -7
- data/examples/benchmarks/{covariance.c → PolyBench/covariance.c} +7 -2
- data/examples/benchmarks/{doitgen.c → PolyBench/doitgen.c} +5 -2
- data/examples/benchmarks/{durbin.c → PolyBench/durbin.c} +3 -0
- data/examples/benchmarks/{dynprog.c → PolyBench/dynprog.c} +3 -0
- data/examples/benchmarks/{fdtd-2d-apml.c → PolyBench/fdtd-2d-apml.c} +3 -0
- data/examples/benchmarks/{fdtd-2d.c → PolyBench/fdtd-2d.c} +5 -2
- data/examples/benchmarks/{floyd-warshall.c → PolyBench/floyd-warshall.c} +3 -0
- data/examples/benchmarks/{gemm.c → PolyBench/gemm.c} +5 -2
- data/examples/benchmarks/{gemver.c → PolyBench/gemver.c} +5 -2
- data/examples/benchmarks/{gesummv.c → PolyBench/gesummv.c} +5 -2
- data/examples/benchmarks/{gramschmidt.c → PolyBench/gramschmidt.c} +3 -0
- data/examples/benchmarks/{jacobi-1d-imper.c → PolyBench/jacobi-1d-imper.c} +10 -2
- data/examples/benchmarks/{jacobi-2d-imper.c → PolyBench/jacobi-2d-imper.c} +8 -3
- data/examples/benchmarks/{lu.c → PolyBench/lu.c} +3 -0
- data/examples/benchmarks/{ludcmp.c → PolyBench/ludcmp.c} +3 -0
- data/examples/benchmarks/{mvt.c → PolyBench/mvt.c} +6 -2
- data/examples/benchmarks/{reg_detect.c → PolyBench/reg_detect.c} +3 -0
- data/examples/benchmarks/{seidel-2d.c → PolyBench/seidel-2d.c} +3 -0
- data/examples/benchmarks/{symm.c → PolyBench/symm.c} +3 -0
- data/examples/benchmarks/{syr2k.c → PolyBench/syr2k.c} +5 -2
- data/examples/benchmarks/{syrk.c → PolyBench/syrk.c} +7 -4
- data/examples/benchmarks/{trisolv.c → PolyBench/trisolv.c} +3 -0
- data/examples/benchmarks/{trmm.c → PolyBench/trmm.c} +3 -0
- data/examples/benchmarks/Rodinia/cfd.c +180 -0
- data/examples/benchmarks/Rodinia/hotspot.c +228 -0
- data/examples/benchmarks/Rodinia/kmeans.c +164 -0
- data/examples/benchmarks/Rodinia/srad.c +188 -0
- data/examples/benchmarks/other/common.h +0 -0
- data/examples/benchmarks/other/dct.c +58 -0
- data/examples/benchmarks/other/mm.c +50 -0
- data/examples/benchmarks/{saxpy.c → other/saxpy.c} +11 -7
- data/examples/chunk/{example1.c → example01.c} +0 -0
- data/examples/chunk/{example2.c → example02.c} +0 -0
- data/examples/chunk/{example3.c → example03.c} +0 -0
- data/examples/chunk/{example4.c → example04.c} +0 -0
- data/examples/chunk/{example5.c → example05.c} +0 -0
- data/examples/chunk/example06.c +45 -0
- data/examples/chunk/example07.c +49 -0
- data/examples/dependences/example01.c +42 -0
- data/examples/dependences/example02.c +40 -0
- data/examples/dependences/example03.c +43 -0
- data/examples/dependences/example04.c +44 -0
- data/examples/dependences/example05.c +42 -0
- data/examples/element/{example1.c → example01.c} +0 -0
- data/examples/element/{example2.c → example02.c} +2 -2
- data/examples/element/{example3.c → example03.c} +0 -0
- data/examples/element/{example4.c → example04.c} +0 -0
- data/examples/element/{example5.c → example05.c} +0 -0
- data/examples/element/{example6.c → example06.c} +0 -0
- data/examples/element/{example7.c → example07.c} +0 -0
- data/examples/element/{example8.c → example08.c} +0 -0
- data/examples/element/{example9.c → example09.c} +0 -0
- data/examples/element/example13.c +73 -0
- data/examples/fusion/example01.c +68 -0
- data/examples/fusion/example02.c +73 -0
- data/examples/fusion/example03.c +72 -0
- data/examples/fusion/example04.c +61 -0
- data/examples/fusion/example05.c +55 -0
- data/examples/neighbourhood/{example1.c → example01.c} +0 -0
- data/examples/neighbourhood/{example2.c → example02.c} +0 -0
- data/examples/neighbourhood/{example3.c → example03.c} +0 -0
- data/examples/neighbourhood/{example4.c → example04.c} +0 -0
- data/examples/neighbourhood/example05.c +44 -0
- data/examples/shared/{example1.c → example01.c} +0 -0
- data/examples/shared/{example2.c → example02.c} +0 -0
- data/examples/shared/{example3.c → example03.c} +0 -0
- data/examples/shared/{example4.c → example04.c} +0 -0
- data/examples/shared/{example5.c → example05.c} +0 -0
- data/lib/adarwin.rb +62 -0
- data/lib/adarwin/dependences.rb +268 -0
- data/lib/adarwin/engine.rb +277 -0
- data/lib/adarwin/fusion.rb +174 -0
- data/lib/adarwin/interval.rb +57 -0
- data/lib/adarwin/memorycopies.rb +153 -0
- data/lib/adarwin/nest.rb +225 -0
- data/lib/adarwin/preprocessor.rb +76 -0
- data/lib/adarwin/reference.rb +261 -0
- data/lib/bones.rb +4 -55
- data/lib/bones/algorithm.rb +77 -40
- data/lib/bones/copy.rb +26 -0
- data/lib/bones/engine.rb +147 -31
- data/lib/bones/preprocessor.rb +92 -12
- data/lib/bones/species.rb +4 -3
- data/lib/bones/structure.rb +14 -4
- data/lib/castaddon.rb +11 -6
- data/lib/castaddon/node_adarwin.rb +245 -0
- data/lib/castaddon/node_bones.rb +316 -0
- data/lib/castaddon/node_common.rb +289 -0
- data/lib/castaddon/transformations.rb +236 -0
- data/lib/common.rb +216 -0
- data/skeletons/CPU-C/common/header.c +3 -0
- data/skeletons/CPU-C/common/mem_global.c +0 -0
- data/skeletons/CPU-C/common/timer_2_start.c +11 -13
- data/skeletons/CPU-C/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-C/common/timer_globals.c +29 -0
- data/skeletons/CPU-OPENCL-INTEL/common/globals.c +1 -1
- data/skeletons/CPU-OPENCL-INTEL/common/header.c +3 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +7 -2
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +4 -2
- data/skeletons/CPU-OPENCL-INTEL/common/mem_global.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +6 -3
- data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-OPENCL-INTEL/common/timer_globals.c +24 -0
- data/skeletons/CPU-OPENMP/common/globals.c +1 -0
- data/skeletons/CPU-OPENMP/common/header.c +3 -0
- data/skeletons/CPU-OPENMP/common/mem_global.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_1_start.c +0 -12
- data/skeletons/CPU-OPENMP/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-OPENMP/common/timer_globals.c +33 -0
- data/skeletons/GPU-CUDA/common/globals.c +27 -3
- data/skeletons/GPU-CUDA/common/header.c +2 -0
- data/skeletons/GPU-CUDA/common/mem_async_alloc.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_copyin.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_copyout.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_free.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +2 -1
- data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +2 -1
- data/skeletons/GPU-CUDA/common/mem_global.c +1 -0
- data/skeletons/GPU-CUDA/common/mem_prologue.c +1 -2
- data/skeletons/GPU-CUDA/common/scheduler.c +86 -0
- data/skeletons/GPU-CUDA/common/timer_2_start.c +2 -4
- data/skeletons/GPU-CUDA/common/timer_2_stop.c +3 -5
- data/skeletons/GPU-CUDA/common/timer_globals.c +26 -0
- data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +5 -7
- data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +4 -6
- data/skeletons/GPU-CUDA/kernel/default.host.c +1 -1
- data/skeletons/GPU-CUDA/kernel/default.kernel.cu +6 -8
- data/skeletons/GPU-CUDA/skeletons.txt +6 -5
- data/{examples/benchmarks/2mm.c → test/examples/benchmarks/PolyBench/2mm_species.c} +19 -15
- data/test/examples/benchmarks/PolyBench/3mm_species.c +82 -0
- data/test/examples/benchmarks/PolyBench/adi_species.c +89 -0
- data/test/examples/benchmarks/PolyBench/atax_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/bicg_species.c +71 -0
- data/test/examples/benchmarks/PolyBench/cholesky_species.c +68 -0
- data/test/examples/benchmarks/PolyBench/correlation_species.c +97 -0
- data/test/examples/benchmarks/PolyBench/covariance_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/doitgen_species.c +67 -0
- data/test/examples/benchmarks/PolyBench/durbin_species.c +80 -0
- data/test/examples/benchmarks/PolyBench/dynprog_species.c +71 -0
- data/test/examples/benchmarks/PolyBench/fdtd-2d-apml_species.c +112 -0
- data/test/examples/benchmarks/PolyBench/fdtd-2d_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/floyd-warshall_species.c +54 -0
- data/test/examples/benchmarks/PolyBench/gemm_species.c +73 -0
- data/test/examples/benchmarks/PolyBench/gemver_species.c +93 -0
- data/test/examples/benchmarks/PolyBench/gesummv_species.c +68 -0
- data/test/examples/benchmarks/PolyBench/gramschmidt_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/jacobi-1d-imper_species.c +59 -0
- data/test/examples/benchmarks/PolyBench/jacobi-2d-imper_species.c +65 -0
- data/test/examples/benchmarks/PolyBench/lu_species.c +57 -0
- data/test/examples/benchmarks/PolyBench/ludcmp_species.c +89 -0
- data/test/examples/benchmarks/PolyBench/mvt_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/reg_detect_species.c +86 -0
- data/test/examples/benchmarks/PolyBench/seidel-2d_species.c +53 -0
- data/test/examples/benchmarks/PolyBench/symm_species.c +74 -0
- data/test/examples/benchmarks/PolyBench/syr2k_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/syrk_species.c +66 -0
- data/test/examples/benchmarks/PolyBench/trisolv_species.c +61 -0
- data/test/examples/benchmarks/PolyBench/trmm_species.c +61 -0
- data/test/examples/chunk/example01_species.c +58 -0
- data/test/examples/chunk/example02_species.c +48 -0
- data/test/examples/chunk/example03_species.c +63 -0
- data/test/examples/chunk/example04_species.c +58 -0
- data/test/examples/chunk/example05_species.c +56 -0
- data/test/examples/chunk/example06_species.c +49 -0
- data/test/examples/chunk/example07_species.c +53 -0
- data/test/examples/dependences/example01_species.c +46 -0
- data/test/examples/dependences/example02_species.c +44 -0
- data/test/examples/dependences/example03_species.c +47 -0
- data/test/examples/dependences/example04_species.c +48 -0
- data/test/examples/dependences/example05_species.c +46 -0
- data/test/examples/element/example01_species.c +50 -0
- data/test/examples/element/example02_species.c +50 -0
- data/test/examples/element/example03_species.c +62 -0
- data/test/examples/element/example04_species.c +53 -0
- data/test/examples/element/example05_species.c +59 -0
- data/test/examples/element/example06_species.c +50 -0
- data/test/examples/element/example07_species.c +58 -0
- data/test/examples/element/example08_species.c +49 -0
- data/test/examples/element/example09_species.c +52 -0
- data/test/examples/element/example10_species.c +54 -0
- data/test/examples/element/example11_species.c +51 -0
- data/test/examples/element/example12_species.c +60 -0
- data/test/examples/element/example13_species.c +77 -0
- data/test/examples/neighbourhood/example01_species.c +57 -0
- data/test/examples/neighbourhood/example02_species.c +56 -0
- data/test/examples/neighbourhood/example03_species.c +83 -0
- data/test/examples/neighbourhood/example04_species.c +55 -0
- data/test/examples/neighbourhood/example05_species.c +48 -0
- data/test/examples/shared/example01_species.c +49 -0
- data/test/examples/shared/example02_species.c +55 -0
- data/test/examples/shared/example03_species.c +59 -0
- data/test/examples/shared/example04_species.c +56 -0
- data/test/examples/shared/example05_species.c +52 -0
- metadata +193 -73
- data/examples/benchmarks/overview.txt +0 -38
- data/lib/castaddon/node.rb +0 -753
@@ -0,0 +1,174 @@
|
|
1
|
+
|
2
|
+
# Determine whether kernel fusion is legal (see algorithm in paper/thesis)
|
3
|
+
def fusion_is_legal?(a, b)
|
4
|
+
(a.writes + a.reads).each do |x|
|
5
|
+
(b.writes + b.reads).each do |y|
|
6
|
+
if (x.tN == y.tN) && (x.tA == 'write' || y.tA == 'write')
|
7
|
+
puts Adarwin::MESSAGE+"Evaluating #{x.to_arc} and #{y.to_arc} for fusion"
|
8
|
+
if x.tD.to_s != y.tD.to_s || x.tE.to_s != y.tE.to_s || x.tS.to_s != y.tS.to_s
|
9
|
+
puts Adarwin::MESSAGE+"Unable to fuse #{x.to_arc} and #{y.to_arc}"
|
10
|
+
return false
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
puts Adarwin::MESSAGE+"Applying fusion"
|
16
|
+
return true
|
17
|
+
end
|
18
|
+
|
19
|
+
|
20
|
+
# Perform the kernel fusion transformations
|
21
|
+
def kernel_fusion(nests, settings)
|
22
|
+
|
23
|
+
# Select
|
24
|
+
candidates = nests.select{ |n| n.has_species? }
|
25
|
+
|
26
|
+
# Iterate
|
27
|
+
prev = nil
|
28
|
+
candidates.each_with_index do |nest,nest_index|
|
29
|
+
curr = nest
|
30
|
+
if prev
|
31
|
+
|
32
|
+
# Get the loop details
|
33
|
+
loops_prev = prev.code.get_direct_loops
|
34
|
+
loops_curr = curr.code.get_direct_loops
|
35
|
+
if loops_prev.size != loops_curr.size
|
36
|
+
puts Adarwin::MESSAGE+"Unable to apply fusion, loop count does not match"
|
37
|
+
next
|
38
|
+
end
|
39
|
+
|
40
|
+
# Only proceed if fusion is legal for this combination
|
41
|
+
if fusion_is_legal?(prev, curr)
|
42
|
+
fused_code = []
|
43
|
+
|
44
|
+
# Get the bodies
|
45
|
+
body_curr = get_body(loops_curr.size,curr.code.clone)
|
46
|
+
body_prev = get_body(loops_prev.size,prev.code.clone)
|
47
|
+
|
48
|
+
# Fuse everything together: include if-statements for non-matching loop bounds
|
49
|
+
if settings == 1
|
50
|
+
|
51
|
+
# Create new loops
|
52
|
+
loops_target = []
|
53
|
+
loops_prev.zip(loops_curr).each do |prevl,currl|
|
54
|
+
raise_error("Unequal step count #{prevl[:step]} versus #{currl[:step]}") if prevl[:step] != currl[:step]
|
55
|
+
minmin = exact_min(prevl[:min],currl[:min])
|
56
|
+
maxmax = exact_max(prevl[:max],currl[:max])
|
57
|
+
loop_datum = { :var => prevl[:var]+currl[:var], :min => minmin, :max => maxmax, :step => prevl[:step]}
|
58
|
+
loops_target.push(loop_datum)
|
59
|
+
|
60
|
+
# Replace all occurances of the fused loop variable in the current/previous codes
|
61
|
+
body_prev = body_prev.replace_variable(prevl[:var],loop_datum[:var])
|
62
|
+
body_curr = body_curr.replace_variable(currl[:var],loop_datum[:var])
|
63
|
+
|
64
|
+
# Set minimum if-statement conditions
|
65
|
+
body_prev = create_if(loop_datum[:var],minmin,prevl[:min],body_prev,'>=')
|
66
|
+
body_curr = create_if(loop_datum[:var],minmin,currl[:min],body_curr,'>=')
|
67
|
+
|
68
|
+
# Set maximum if-statement conditions
|
69
|
+
body_prev = create_if(loop_datum[:var],maxmax,prevl[:max],body_prev,'<=')
|
70
|
+
body_curr = create_if(loop_datum[:var],maxmax,currl[:max],body_curr,'<=')
|
71
|
+
end
|
72
|
+
|
73
|
+
# Generate the new code
|
74
|
+
fused_code.push(code_from_loops(loops_target,[body_prev,body_curr]))
|
75
|
+
|
76
|
+
# Create a prologue in case of mismatching loop bounds (experimental)
|
77
|
+
elsif settings == 2
|
78
|
+
|
79
|
+
# Generate the loop body
|
80
|
+
loops_target = []
|
81
|
+
loops_prev.zip(loops_curr).each do |prevl,currl|
|
82
|
+
raise_error("Unequal step count #{prevl[:step]} versus #{currl[:step]}") if prevl[:step] != currl[:step]
|
83
|
+
body_prev = body_prev.replace_variable(prevl[:var],prevl[:var]+currl[:var])
|
84
|
+
body_curr = body_curr.replace_variable(currl[:var],prevl[:var]+currl[:var])
|
85
|
+
end
|
86
|
+
|
87
|
+
# Create the main loop nest
|
88
|
+
loops_target = []
|
89
|
+
loops_prev.zip(loops_curr).each do |prevl,currl|
|
90
|
+
minmin = exact_min(prevl[:min],currl[:min])
|
91
|
+
minmax = exact_min(prevl[:max],currl[:max])
|
92
|
+
loop_datum = { :var => prevl[:var]+currl[:var], :min => minmin, :max => minmax, :step => prevl[:step]}
|
93
|
+
loops_target.push(loop_datum)
|
94
|
+
end
|
95
|
+
fused_code.push(code_from_loops(loops_target,[body_prev,body_curr]))
|
96
|
+
|
97
|
+
# Create the epilogue
|
98
|
+
body = []
|
99
|
+
loops_target = []
|
100
|
+
loops_prev.zip(loops_curr).each do |prevl,currl|
|
101
|
+
minmax = exact_min(prevl[:max],currl[:max])
|
102
|
+
maxmax = exact_max(prevl[:max],currl[:max])
|
103
|
+
loop_datum = { :var => prevl[:var]+currl[:var], :min => minmax, :max => maxmax, :step => prevl[:step]}
|
104
|
+
loops_target.push(loop_datum)
|
105
|
+
if prevl[:max] != currl[:max]
|
106
|
+
body = (prevl[:max] == maxmax) ? [body_curr] : [body_prev]
|
107
|
+
end
|
108
|
+
end
|
109
|
+
fused_code.push(code_from_loops(loops_target,body))
|
110
|
+
end
|
111
|
+
|
112
|
+
# Add the newly created code to the original code
|
113
|
+
fused_code.each_with_index do |fused_codelet,nest_id|
|
114
|
+
puts fused_codelet
|
115
|
+
prev.code.insert_prev(fused_codelet)
|
116
|
+
|
117
|
+
# Create a new nest
|
118
|
+
nest = Adarwin::Nest.new(prev.level, fused_codelet, prev.id, prev.name.gsub(/_k(\d+)/,'_fused')+nest_id.to_s, prev.verbose, 1)
|
119
|
+
nests.push(nest)
|
120
|
+
end
|
121
|
+
|
122
|
+
|
123
|
+
# Set the other nests as to-be-removed
|
124
|
+
prev.removed = true
|
125
|
+
curr.removed = true
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
# Next nest
|
130
|
+
prev = nest
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
# Return the body of a loop nest
|
135
|
+
def get_body(num_loops,code)
|
136
|
+
return code if num_loops == 0
|
137
|
+
if code.first.for_statement? && code.first.stmt
|
138
|
+
code = code.first
|
139
|
+
end
|
140
|
+
if code.for_statement? && code.stmt
|
141
|
+
return get_body(num_loops-1,code.stmt.stmts)
|
142
|
+
end
|
143
|
+
raise_error("Not a perfect nested loop")
|
144
|
+
end
|
145
|
+
|
146
|
+
# Create an if-statement in front of a statement
|
147
|
+
def create_if(loop_var,reference_bound,loop_bound,code,condition)
|
148
|
+
if reference_bound != loop_bound
|
149
|
+
return C::Statement.parse("if(#{loop_var} #{condition} #{loop_bound}) { #{code.to_s} }")
|
150
|
+
end
|
151
|
+
return code
|
152
|
+
end
|
153
|
+
|
154
|
+
# Generate code from a combination of loops and statements (the body)
|
155
|
+
def code_from_loops(loops,statements)
|
156
|
+
code = ""
|
157
|
+
|
158
|
+
# Start of the loops
|
159
|
+
definition = "int "
|
160
|
+
loops.each do |loop_datum|
|
161
|
+
increment = (loop_datum[:step] == '1') ? "#{loop_datum[:var]}++" : "#{loop_datum[:var]}=#{loop_datum[:var]}+#{loop_datum[:step]}"
|
162
|
+
code += "for(#{definition}#{loop_datum[:var]}=#{loop_datum[:min]}; #{loop_datum[:var]}<=#{loop_datum[:max]}; #{increment}) {"
|
163
|
+
end
|
164
|
+
|
165
|
+
# Loop body
|
166
|
+
statements.each do |statement|
|
167
|
+
code += statement.to_s
|
168
|
+
end
|
169
|
+
|
170
|
+
# End of the loops
|
171
|
+
loops.size.times{ |i| code += "}" }
|
172
|
+
|
173
|
+
C::Statement.parse(code)
|
174
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
|
2
|
+
module Adarwin
|
3
|
+
|
4
|
+
# This class represents an interval [a..b] including a and b. The class has
|
5
|
+
# the following methods:
|
6
|
+
# * Initialise the interval (+initialize+)
|
7
|
+
# * Print the interval (+to_s+)
|
8
|
+
# * Merge an interval with another interval (+merge+)
|
9
|
+
# * Return the length of the interval (+length+)
|
10
|
+
class Interval
|
11
|
+
attr_accessor :a, :b
|
12
|
+
|
13
|
+
# Initialise the interval. This method performs a comparison to see whether
|
14
|
+
# a or b is the upper-bound. This comparison is based on guesses made by the
|
15
|
+
# +compare+ method. This method uses loop information if needed.
|
16
|
+
# FIXME: Uses the +compare+ method which might be based on a guess
|
17
|
+
def initialize(a,b,loops)
|
18
|
+
@loops = loops
|
19
|
+
a = simplify(a.to_s)
|
20
|
+
b = simplify(b.to_s)
|
21
|
+
case compare(a,b,@loops)
|
22
|
+
when 'lt' || 'eq' then @a = a; @b = b
|
23
|
+
when 'gt' then @a = b; @b = a
|
24
|
+
else @a = a; @b = b
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# Print the interval as a string (e.g. [4..9]).
|
29
|
+
def to_s
|
30
|
+
@a+RANGE_SEP+@b
|
31
|
+
end
|
32
|
+
|
33
|
+
# Merge this interval with another interval. This is based on a comparison
|
34
|
+
# made by the +compare+ method, which is an approximation based on loop
|
35
|
+
# information.
|
36
|
+
# FIXME: Uses the +compare+ method which might be based on a guess
|
37
|
+
def merge(other_interval)
|
38
|
+
@a = case compare(@a,other_interval.a,@loops)
|
39
|
+
when 'gt' || 'eq' then other_interval.a
|
40
|
+
when 'lt' then @a
|
41
|
+
else other_interval.a
|
42
|
+
end
|
43
|
+
@b = case compare(@b,other_interval.b,@loops)
|
44
|
+
when 'gt' || 'eq' then @b
|
45
|
+
when 'lt' then other_interval.b
|
46
|
+
else @b
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# Method to compute the length of the interval. For example, the length of
|
51
|
+
# [a..b] is equal to (b-a+1).
|
52
|
+
def length
|
53
|
+
simplify("(#{@b})-(#{a})+1")
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
@@ -0,0 +1,153 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
# Recursive copy optimisations
|
4
|
+
def recursive_copy_optimisations(nests,options)
|
5
|
+
perform_copy_optimisations1(nests,options)
|
6
|
+
perform_copy_optimisations2(nests,options)
|
7
|
+
nests.each do |nest|
|
8
|
+
children = get_children(nest)
|
9
|
+
recursive_copy_optimisations(children,options) if !children.empty?
|
10
|
+
end
|
11
|
+
perform_copy_optimisations3(nests,options)
|
12
|
+
perform_copy_optimisations3(nests,options)
|
13
|
+
end
|
14
|
+
|
15
|
+
# First set of copyin/copyout optimisations (recursive)
|
16
|
+
def perform_copy_optimisations1(nests,options)
|
17
|
+
previous = nil
|
18
|
+
nests.each_with_index do |nest,nest_index|
|
19
|
+
current = nest
|
20
|
+
if previous
|
21
|
+
|
22
|
+
# Remove spurious copies (out/in)
|
23
|
+
if options[:mem_remove_spurious]
|
24
|
+
previous.copyouts.each do |copyout|
|
25
|
+
current.copyins.each do |copyin|
|
26
|
+
if copyout.tN.to_s == copyin.tN.to_s && copyout.tD.to_s == copyin.tD.to_s
|
27
|
+
current.copyins.delete(copyin)
|
28
|
+
return perform_copy_optimisations1(nests,options)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
# Remove spurious copies (out/out)
|
35
|
+
if options[:mem_remove_spurious]
|
36
|
+
previous.copyouts.each do |copyout|
|
37
|
+
current.copyouts.each do |other_copyout|
|
38
|
+
if copyout.tN.to_s == other_copyout.tN.to_s && copyout.tD.to_s == other_copyout.tD.to_s
|
39
|
+
previous.copyouts.delete(copyout)
|
40
|
+
return perform_copy_optimisations1(nests,options)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# Move copyins to the front
|
47
|
+
if options[:mem_copyin_to_front]
|
48
|
+
current.copyins.each do |copyin|
|
49
|
+
if previous.writes && !previous.writes.map{ |w| w.tN }.include?(copyin.tN)
|
50
|
+
previous.copyins.push(copyin)
|
51
|
+
current.copyins.delete(copyin)
|
52
|
+
return perform_copy_optimisations1(nests,options)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
58
|
+
|
59
|
+
# Next nest
|
60
|
+
previous = nest
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
# Second set of copyin/copyout optimisations (non-recursive)
|
65
|
+
def perform_copy_optimisations2(nests,options)
|
66
|
+
nests.each_with_index do |nest,nest_index|
|
67
|
+
current = nest
|
68
|
+
|
69
|
+
# Move copyouts to the back
|
70
|
+
if options[:mem_copyout_to_back]
|
71
|
+
current.copyouts.each do |copyout|
|
72
|
+
nests.each_with_index do |other_nest,other_nest_index|
|
73
|
+
if other_nest.id > nest.id && other_nest.depth == nest.depth
|
74
|
+
if other_nest.writes && !other_nest.writes.map{ |w| w.tN }.include?(copyout.tN)
|
75
|
+
copyout.id = copyout.id+1
|
76
|
+
else
|
77
|
+
break
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
# Remove spurious copies (double in)
|
85
|
+
if options[:mem_remove_spurious]
|
86
|
+
current.copyins.each_with_index do |copyin,index|
|
87
|
+
current.copyins.each_with_index do |other_copyin,other_index|
|
88
|
+
if index != other_index
|
89
|
+
if copyin.tN.to_s == other_copyin.tN.to_s && copyin.tD.to_s == other_copyin.tD.to_s
|
90
|
+
if copyin.id > other_copyin.id
|
91
|
+
current.copyins.delete(copyin)
|
92
|
+
else
|
93
|
+
current.copyins.delete(other_copyin)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
# Third set of copyin/copyout optimisations (inter-level)
|
105
|
+
def perform_copy_optimisations3(nests,options)
|
106
|
+
nests.each do |nest|
|
107
|
+
current = nest
|
108
|
+
children = get_children(nest)
|
109
|
+
if !children.empty?
|
110
|
+
|
111
|
+
# Inter-level loop optimisations (move to outer loop)
|
112
|
+
if options[:mem_to_outer_loop]
|
113
|
+
|
114
|
+
# Move copyouts to outer loops
|
115
|
+
max_id = children.map{ |c| 2*c.id+1 }.max
|
116
|
+
children.each do |child|
|
117
|
+
child.copyouts.each do |copyout|
|
118
|
+
to_outer_loop = true
|
119
|
+
nest.outer_loops.map{ |l| l[:var] }.each do |var|
|
120
|
+
to_outer_loop = false if copyout.depends_on?(var)
|
121
|
+
end
|
122
|
+
children.each do |other_child|
|
123
|
+
to_outer_loop = false if other_child.copyins.map{ |c| c.tN }.include?(copyout.tN)
|
124
|
+
end
|
125
|
+
to_outer_loop = false if copyout.get_sync_id < max_id
|
126
|
+
if to_outer_loop
|
127
|
+
copyout.id = nest.id
|
128
|
+
nest.copyouts.push(copyout)
|
129
|
+
child.copyouts.delete(copyout)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
# Move copyins to outer loops
|
135
|
+
children.first.copyins.each do |copyin|
|
136
|
+
to_outer_loop = true
|
137
|
+
nest.outer_loops.map{ |l| l[:var] }.each do |var|
|
138
|
+
to_outer_loop = false if copyin.depends_on?(var)
|
139
|
+
end
|
140
|
+
children.drop(1).each do |child|
|
141
|
+
to_outer_loop = false if child.copyins.map{ |c| c.tN }.include?(copyin.tN)
|
142
|
+
to_outer_loop = false if child.copyouts.map{ |c| c.tN }.include?(copyin.tN) && child != children.last
|
143
|
+
end
|
144
|
+
if to_outer_loop
|
145
|
+
nest.copyins.push(copyin)
|
146
|
+
children.first.copyins.delete(copyin)
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
data/lib/adarwin/nest.rb
ADDED
@@ -0,0 +1,225 @@
|
|
1
|
+
|
2
|
+
module Adarwin
|
3
|
+
|
4
|
+
# This class represents a loop nest. The end goal is to annotate the loop nest
|
5
|
+
# with the corresponding species information. If the loop nest cannot be
|
6
|
+
# parallelised (if there are dependences), the species information is not
|
7
|
+
# printed.
|
8
|
+
#
|
9
|
+
# This class contains methods to perform among others the following:
|
10
|
+
# * Find all array references in the loop nest
|
11
|
+
# * Merge found array references into another array reference
|
12
|
+
# * Translate array references into species
|
13
|
+
# * Perform dependence tests to check for parallelism
|
14
|
+
#
|
15
|
+
class Nest
|
16
|
+
attr_accessor :code, :species, :name, :verbose
|
17
|
+
attr_accessor :fused, :removed
|
18
|
+
attr_accessor :copyins, :copyouts
|
19
|
+
attr_accessor :depth, :level, :id
|
20
|
+
attr_accessor :reads, :writes
|
21
|
+
attr_accessor :outer_loops
|
22
|
+
|
23
|
+
# Method to initialise the loop nest. The loop nest is initialised with the
|
24
|
+
# following variables:
|
25
|
+
# * An identifier for the order/depth in which the nest appears (+level+)
|
26
|
+
# * The loop nest body in AST form (+code+)
|
27
|
+
# * A unique identifier for this loop nest (+id+)
|
28
|
+
# * A human readable name for this loop nest (+name+)
|
29
|
+
# * Whether or not verbose information should be printed (+verbose+)
|
30
|
+
def initialize(level, code, id, name, verbose, fused=0)
|
31
|
+
@depth = level.length
|
32
|
+
@level = level
|
33
|
+
@code = code
|
34
|
+
@id = id
|
35
|
+
@name = name+'_k'+(@id+1).to_s
|
36
|
+
@verbose = verbose
|
37
|
+
|
38
|
+
# Set the default values in case there are dependences
|
39
|
+
@species = ''
|
40
|
+
@fused = fused
|
41
|
+
@removed = false
|
42
|
+
@copyins = []
|
43
|
+
@copyouts = []
|
44
|
+
|
45
|
+
# Get all loops from the loop body and subtract the outer loops from all
|
46
|
+
# loops to obtain the set of inner loops (loops in the body).
|
47
|
+
@all_loops = @code.get_all_loops()
|
48
|
+
@outer_loops = @code.get_direct_loops()
|
49
|
+
@inner_loops = @all_loops - @outer_loops
|
50
|
+
|
51
|
+
# Process the read/write nodes in the loop body to obtain the array
|
52
|
+
# reference characterisations. The references also need to be aware of all
|
53
|
+
# loop data and of any if-statements in the loop body.
|
54
|
+
@references = @code.clone.get_accesses().map do |reference|
|
55
|
+
Reference.new(reference,@id,@inner_loops,@outer_loops,@verbose)
|
56
|
+
end
|
57
|
+
|
58
|
+
# Perform the dependence test. The result can be either true or false.
|
59
|
+
# Proceed only if there are no dependences.
|
60
|
+
# Don't perform the dependence test if this is a fused loopnest
|
61
|
+
@has_dependences = (@fused > 0) ? false : has_dependences?
|
62
|
+
if !@has_dependences && !@references.empty?
|
63
|
+
|
64
|
+
# Merge array reference characterisations into other array references
|
65
|
+
merge_references()
|
66
|
+
|
67
|
+
# Translate array reference characterisations into species and ARC
|
68
|
+
translate_into_species()
|
69
|
+
translate_into_arc()
|
70
|
+
|
71
|
+
# Set the copyin/copyout data from the array references
|
72
|
+
@copyins = @references.select{ |r| r.tA == 'read' }
|
73
|
+
@copyouts = @references.select{ |r| r.tA == 'write' }
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
# Perform the algorithm to merge array reference characterisations into
|
78
|
+
# merged array references. This method is a copy of the merging algorithm
|
79
|
+
# as found in the scientific paper.
|
80
|
+
# TODO: Complete this algorithm to match the scientific paper version.
|
81
|
+
def merge_references
|
82
|
+
@references.each do |ref1|
|
83
|
+
@references.each do |ref2|
|
84
|
+
if ref1 != ref2
|
85
|
+
|
86
|
+
# Perform the checks to see if merging is valid
|
87
|
+
if ref1.tN == ref2.tN && ref1.tA == ref2.tA && ref1.tS == ref2.tS
|
88
|
+
|
89
|
+
# Merge the domain (ref2 into ref1)
|
90
|
+
ref1.tD.each_with_index do |tD,i|
|
91
|
+
tD.merge(ref2.tD[i])
|
92
|
+
end
|
93
|
+
|
94
|
+
# Merge the number of elements (ref2 into ref1)
|
95
|
+
ref1.tE.each_with_index do |tE,i|
|
96
|
+
tE.merge(ref2.tE[i])
|
97
|
+
end
|
98
|
+
|
99
|
+
# Delete ref2
|
100
|
+
@references.delete(ref2)
|
101
|
+
|
102
|
+
# Something has changed: re-run the whole algorithm again
|
103
|
+
merge_references()
|
104
|
+
return
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
# Method to translate the array reference characterisations into species.
|
112
|
+
# The actual logic is performed within the Reference class. In this method,
|
113
|
+
# only the combining of the separate parts is performed.
|
114
|
+
def translate_into_species
|
115
|
+
|
116
|
+
# Obtain the reads and writes
|
117
|
+
@reads = @references.select{ |r| r.tA == 'read' }
|
118
|
+
@writes = @references.select{ |r| r.tA == 'write' }
|
119
|
+
|
120
|
+
# Create a 'void' access pattern in case there is no read or no write.
|
121
|
+
# Else, set the species for the individual accesses.
|
122
|
+
read_names = (@reads.empty?) ? ['0:0|void'] : @reads.map{ |r| r.to_species }
|
123
|
+
write_names = (@writes.empty?) ? ['0:0|void'] : @writes.map{ |r| r.to_species }
|
124
|
+
|
125
|
+
# Combine the descriptions (using Reference's +to_s+ method) into species
|
126
|
+
species_in = read_names.uniq.join(' '+WEDGE+' ')
|
127
|
+
species_out = write_names.uniq.join(' '+WEDGE+' ')
|
128
|
+
@species = species_in+' '+ARROW+' '+species_out
|
129
|
+
end
|
130
|
+
|
131
|
+
# Method to translate the array reference characterisations into a string.
|
132
|
+
def translate_into_arc
|
133
|
+
@arc = @references.map{ |r| r.to_arc }.join(' , ')
|
134
|
+
end
|
135
|
+
|
136
|
+
# Perform the dependence test for the current loop nest. This method gathers
|
137
|
+
# all pairs of array references to test and calls the actual dependence
|
138
|
+
# tests. Currently, the dependence tests are a combination of the GCD test
|
139
|
+
# and the Banerjee test.
|
140
|
+
def has_dependences?
|
141
|
+
|
142
|
+
# Gather all the read/write and write/write pairs to test
|
143
|
+
to_test = []
|
144
|
+
writes = @references.select{ |r| r.tA == 'write' }
|
145
|
+
writes.each do |ref1|
|
146
|
+
@references.each do |ref2|
|
147
|
+
|
148
|
+
# Only if the array names are the same and they are not tested before
|
149
|
+
if ref1.tN == ref2.tN && !to_test.include?([ref2,ref1])
|
150
|
+
|
151
|
+
# Only if the array references are different (e.g. don't test
|
152
|
+
# A[i][j+4] and A[i][j+4]).
|
153
|
+
if (ref1.get_references != ref2.get_references)
|
154
|
+
to_test << [ref1,ref2]
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
# Test all pairs using the GCD and Banerjee tests
|
161
|
+
#p to_test.map{ |t| t.map{ |r| r.to_arc }}
|
162
|
+
to_test.uniq.each do |pair|
|
163
|
+
dependence_test = Dependence.new(pair[0],pair[1],@verbose)
|
164
|
+
if dependence_test.result
|
165
|
+
return true
|
166
|
+
end
|
167
|
+
end
|
168
|
+
return false
|
169
|
+
end
|
170
|
+
|
171
|
+
# Perform a check to see if the loop nest has species that are not just
|
172
|
+
# formed from shared or full patterns. If so, there is no parallelism.
|
173
|
+
def has_species?
|
174
|
+
return false if @removed
|
175
|
+
return false if @has_dependences
|
176
|
+
return false if @species == ''
|
177
|
+
only_full = (@reads) ? @reads.select{ |a| a.pattern != 'full' }.empty? : false
|
178
|
+
only_shared = (@writes) ? @writes.select{ |a| a.pattern != 'shared' }.empty? : false
|
179
|
+
return !(only_full && only_shared)
|
180
|
+
end
|
181
|
+
|
182
|
+
# Method to print the start pragma of a species.
|
183
|
+
def print_species_start
|
184
|
+
PRAGMA_DELIMITER_START+PRAGMA_SPECIES+' kernel '+@species+PRAGMA_DELIMITER_END
|
185
|
+
end
|
186
|
+
|
187
|
+
# Method to print the end pragma of a species.
|
188
|
+
def print_species_end
|
189
|
+
PRAGMA_DELIMITER_START+PRAGMA_SPECIES+' endkernel '+@name+PRAGMA_DELIMITER_END
|
190
|
+
end
|
191
|
+
|
192
|
+
# Method to print the start of an array reference characterisation (ARC).
|
193
|
+
def print_arc_start
|
194
|
+
PRAGMA_DELIMITER_START+PRAGMA_ARC+' kernel '+@arc+PRAGMA_DELIMITER_END
|
195
|
+
end
|
196
|
+
|
197
|
+
# Method to print the end of an array reference characterisation (ARC).
|
198
|
+
def print_arc_end
|
199
|
+
PRAGMA_DELIMITER_START+PRAGMA_ARC+' endkernel '+@name+PRAGMA_DELIMITER_END
|
200
|
+
end
|
201
|
+
|
202
|
+
# Method to print the copyin pragma.
|
203
|
+
def print_copyins
|
204
|
+
copys = @copyins.map{ |a| a.to_copy(2*a.id) }
|
205
|
+
PRAGMA_DELIMITER_START+PRAGMA_SPECIES+' copyin '+copys.join(' '+WEDGE+' ')+PRAGMA_DELIMITER_END
|
206
|
+
end
|
207
|
+
|
208
|
+
# Method to print the copyout pragma.
|
209
|
+
def print_copyouts
|
210
|
+
copys = @copyouts.map{ |a| a.to_copy(2*a.id+1) }
|
211
|
+
PRAGMA_DELIMITER_START+PRAGMA_SPECIES+' copyout '+copys.join(' '+WEDGE+' ')+PRAGMA_DELIMITER_END
|
212
|
+
end
|
213
|
+
|
214
|
+
# Method to check if the loop nest has copyins.
|
215
|
+
def has_copyins?
|
216
|
+
return !(copyins.empty?) && !(copyins.select{ |r| r.tD if !r.tD.empty? }.empty?)
|
217
|
+
end
|
218
|
+
|
219
|
+
# Method to check if the loop nest has copyouts.
|
220
|
+
def has_copyouts?
|
221
|
+
return !(copyouts.empty?) && !(copyouts.select{ |r| r.tD if !r.tD.empty? }.empty?)
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
end
|