bones-compiler 1.1.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/CHANGELOG +37 -0
- data/LICENSE +1 -1
- data/README.rdoc +95 -70
- data/Rakefile +78 -3
- data/VERSION +1 -1
- data/bin/adarwin +17 -0
- data/examples/benchmarks/PolyBench/2mm.c +104 -0
- data/examples/benchmarks/{3mm.c → PolyBench/3mm.c} +5 -2
- data/examples/benchmarks/{adi.c → PolyBench/adi.c} +6 -3
- data/examples/benchmarks/{atax.c → PolyBench/atax.c} +5 -2
- data/examples/benchmarks/{bicg.c → PolyBench/bicg.c} +5 -2
- data/examples/benchmarks/{cholesky.c → PolyBench/cholesky.c} +3 -0
- data/examples/benchmarks/{common.h → PolyBench/common.h} +2 -2
- data/examples/benchmarks/{correlation.c → PolyBench/correlation.c} +16 -7
- data/examples/benchmarks/{covariance.c → PolyBench/covariance.c} +7 -2
- data/examples/benchmarks/{doitgen.c → PolyBench/doitgen.c} +5 -2
- data/examples/benchmarks/{durbin.c → PolyBench/durbin.c} +3 -0
- data/examples/benchmarks/{dynprog.c → PolyBench/dynprog.c} +3 -0
- data/examples/benchmarks/{fdtd-2d-apml.c → PolyBench/fdtd-2d-apml.c} +3 -0
- data/examples/benchmarks/{fdtd-2d.c → PolyBench/fdtd-2d.c} +5 -2
- data/examples/benchmarks/{floyd-warshall.c → PolyBench/floyd-warshall.c} +3 -0
- data/examples/benchmarks/{gemm.c → PolyBench/gemm.c} +5 -2
- data/examples/benchmarks/{gemver.c → PolyBench/gemver.c} +5 -2
- data/examples/benchmarks/{gesummv.c → PolyBench/gesummv.c} +5 -2
- data/examples/benchmarks/{gramschmidt.c → PolyBench/gramschmidt.c} +3 -0
- data/examples/benchmarks/{jacobi-1d-imper.c → PolyBench/jacobi-1d-imper.c} +10 -2
- data/examples/benchmarks/{jacobi-2d-imper.c → PolyBench/jacobi-2d-imper.c} +8 -3
- data/examples/benchmarks/{lu.c → PolyBench/lu.c} +3 -0
- data/examples/benchmarks/{ludcmp.c → PolyBench/ludcmp.c} +3 -0
- data/examples/benchmarks/{mvt.c → PolyBench/mvt.c} +6 -2
- data/examples/benchmarks/{reg_detect.c → PolyBench/reg_detect.c} +3 -0
- data/examples/benchmarks/{seidel-2d.c → PolyBench/seidel-2d.c} +3 -0
- data/examples/benchmarks/{symm.c → PolyBench/symm.c} +3 -0
- data/examples/benchmarks/{syr2k.c → PolyBench/syr2k.c} +5 -2
- data/examples/benchmarks/{syrk.c → PolyBench/syrk.c} +7 -4
- data/examples/benchmarks/{trisolv.c → PolyBench/trisolv.c} +3 -0
- data/examples/benchmarks/{trmm.c → PolyBench/trmm.c} +3 -0
- data/examples/benchmarks/Rodinia/cfd.c +180 -0
- data/examples/benchmarks/Rodinia/hotspot.c +228 -0
- data/examples/benchmarks/Rodinia/kmeans.c +164 -0
- data/examples/benchmarks/Rodinia/srad.c +188 -0
- data/examples/benchmarks/other/common.h +0 -0
- data/examples/benchmarks/other/dct.c +58 -0
- data/examples/benchmarks/other/mm.c +50 -0
- data/examples/benchmarks/{saxpy.c → other/saxpy.c} +11 -7
- data/examples/chunk/{example1.c → example01.c} +0 -0
- data/examples/chunk/{example2.c → example02.c} +0 -0
- data/examples/chunk/{example3.c → example03.c} +0 -0
- data/examples/chunk/{example4.c → example04.c} +0 -0
- data/examples/chunk/{example5.c → example05.c} +0 -0
- data/examples/chunk/example06.c +45 -0
- data/examples/chunk/example07.c +49 -0
- data/examples/dependences/example01.c +42 -0
- data/examples/dependences/example02.c +40 -0
- data/examples/dependences/example03.c +43 -0
- data/examples/dependences/example04.c +44 -0
- data/examples/dependences/example05.c +42 -0
- data/examples/element/{example1.c → example01.c} +0 -0
- data/examples/element/{example2.c → example02.c} +2 -2
- data/examples/element/{example3.c → example03.c} +0 -0
- data/examples/element/{example4.c → example04.c} +0 -0
- data/examples/element/{example5.c → example05.c} +0 -0
- data/examples/element/{example6.c → example06.c} +0 -0
- data/examples/element/{example7.c → example07.c} +0 -0
- data/examples/element/{example8.c → example08.c} +0 -0
- data/examples/element/{example9.c → example09.c} +0 -0
- data/examples/element/example13.c +73 -0
- data/examples/fusion/example01.c +68 -0
- data/examples/fusion/example02.c +73 -0
- data/examples/fusion/example03.c +72 -0
- data/examples/fusion/example04.c +61 -0
- data/examples/fusion/example05.c +55 -0
- data/examples/neighbourhood/{example1.c → example01.c} +0 -0
- data/examples/neighbourhood/{example2.c → example02.c} +0 -0
- data/examples/neighbourhood/{example3.c → example03.c} +0 -0
- data/examples/neighbourhood/{example4.c → example04.c} +0 -0
- data/examples/neighbourhood/example05.c +44 -0
- data/examples/shared/{example1.c → example01.c} +0 -0
- data/examples/shared/{example2.c → example02.c} +0 -0
- data/examples/shared/{example3.c → example03.c} +0 -0
- data/examples/shared/{example4.c → example04.c} +0 -0
- data/examples/shared/{example5.c → example05.c} +0 -0
- data/lib/adarwin.rb +62 -0
- data/lib/adarwin/dependences.rb +268 -0
- data/lib/adarwin/engine.rb +277 -0
- data/lib/adarwin/fusion.rb +174 -0
- data/lib/adarwin/interval.rb +57 -0
- data/lib/adarwin/memorycopies.rb +153 -0
- data/lib/adarwin/nest.rb +225 -0
- data/lib/adarwin/preprocessor.rb +76 -0
- data/lib/adarwin/reference.rb +261 -0
- data/lib/bones.rb +4 -55
- data/lib/bones/algorithm.rb +77 -40
- data/lib/bones/copy.rb +26 -0
- data/lib/bones/engine.rb +147 -31
- data/lib/bones/preprocessor.rb +92 -12
- data/lib/bones/species.rb +4 -3
- data/lib/bones/structure.rb +14 -4
- data/lib/castaddon.rb +11 -6
- data/lib/castaddon/node_adarwin.rb +245 -0
- data/lib/castaddon/node_bones.rb +316 -0
- data/lib/castaddon/node_common.rb +289 -0
- data/lib/castaddon/transformations.rb +236 -0
- data/lib/common.rb +216 -0
- data/skeletons/CPU-C/common/header.c +3 -0
- data/skeletons/CPU-C/common/mem_global.c +0 -0
- data/skeletons/CPU-C/common/timer_2_start.c +11 -13
- data/skeletons/CPU-C/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-C/common/timer_globals.c +29 -0
- data/skeletons/CPU-OPENCL-INTEL/common/globals.c +1 -1
- data/skeletons/CPU-OPENCL-INTEL/common/header.c +3 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +7 -2
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +4 -2
- data/skeletons/CPU-OPENCL-INTEL/common/mem_global.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +6 -3
- data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-OPENCL-INTEL/common/timer_globals.c +24 -0
- data/skeletons/CPU-OPENMP/common/globals.c +1 -0
- data/skeletons/CPU-OPENMP/common/header.c +3 -0
- data/skeletons/CPU-OPENMP/common/mem_global.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_1_start.c +0 -12
- data/skeletons/CPU-OPENMP/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-OPENMP/common/timer_globals.c +33 -0
- data/skeletons/GPU-CUDA/common/globals.c +27 -3
- data/skeletons/GPU-CUDA/common/header.c +2 -0
- data/skeletons/GPU-CUDA/common/mem_async_alloc.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_copyin.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_copyout.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_free.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +2 -1
- data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +2 -1
- data/skeletons/GPU-CUDA/common/mem_global.c +1 -0
- data/skeletons/GPU-CUDA/common/mem_prologue.c +1 -2
- data/skeletons/GPU-CUDA/common/scheduler.c +86 -0
- data/skeletons/GPU-CUDA/common/timer_2_start.c +2 -4
- data/skeletons/GPU-CUDA/common/timer_2_stop.c +3 -5
- data/skeletons/GPU-CUDA/common/timer_globals.c +26 -0
- data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +5 -7
- data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +4 -6
- data/skeletons/GPU-CUDA/kernel/default.host.c +1 -1
- data/skeletons/GPU-CUDA/kernel/default.kernel.cu +6 -8
- data/skeletons/GPU-CUDA/skeletons.txt +6 -5
- data/{examples/benchmarks/2mm.c → test/examples/benchmarks/PolyBench/2mm_species.c} +19 -15
- data/test/examples/benchmarks/PolyBench/3mm_species.c +82 -0
- data/test/examples/benchmarks/PolyBench/adi_species.c +89 -0
- data/test/examples/benchmarks/PolyBench/atax_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/bicg_species.c +71 -0
- data/test/examples/benchmarks/PolyBench/cholesky_species.c +68 -0
- data/test/examples/benchmarks/PolyBench/correlation_species.c +97 -0
- data/test/examples/benchmarks/PolyBench/covariance_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/doitgen_species.c +67 -0
- data/test/examples/benchmarks/PolyBench/durbin_species.c +80 -0
- data/test/examples/benchmarks/PolyBench/dynprog_species.c +71 -0
- data/test/examples/benchmarks/PolyBench/fdtd-2d-apml_species.c +112 -0
- data/test/examples/benchmarks/PolyBench/fdtd-2d_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/floyd-warshall_species.c +54 -0
- data/test/examples/benchmarks/PolyBench/gemm_species.c +73 -0
- data/test/examples/benchmarks/PolyBench/gemver_species.c +93 -0
- data/test/examples/benchmarks/PolyBench/gesummv_species.c +68 -0
- data/test/examples/benchmarks/PolyBench/gramschmidt_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/jacobi-1d-imper_species.c +59 -0
- data/test/examples/benchmarks/PolyBench/jacobi-2d-imper_species.c +65 -0
- data/test/examples/benchmarks/PolyBench/lu_species.c +57 -0
- data/test/examples/benchmarks/PolyBench/ludcmp_species.c +89 -0
- data/test/examples/benchmarks/PolyBench/mvt_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/reg_detect_species.c +86 -0
- data/test/examples/benchmarks/PolyBench/seidel-2d_species.c +53 -0
- data/test/examples/benchmarks/PolyBench/symm_species.c +74 -0
- data/test/examples/benchmarks/PolyBench/syr2k_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/syrk_species.c +66 -0
- data/test/examples/benchmarks/PolyBench/trisolv_species.c +61 -0
- data/test/examples/benchmarks/PolyBench/trmm_species.c +61 -0
- data/test/examples/chunk/example01_species.c +58 -0
- data/test/examples/chunk/example02_species.c +48 -0
- data/test/examples/chunk/example03_species.c +63 -0
- data/test/examples/chunk/example04_species.c +58 -0
- data/test/examples/chunk/example05_species.c +56 -0
- data/test/examples/chunk/example06_species.c +49 -0
- data/test/examples/chunk/example07_species.c +53 -0
- data/test/examples/dependences/example01_species.c +46 -0
- data/test/examples/dependences/example02_species.c +44 -0
- data/test/examples/dependences/example03_species.c +47 -0
- data/test/examples/dependences/example04_species.c +48 -0
- data/test/examples/dependences/example05_species.c +46 -0
- data/test/examples/element/example01_species.c +50 -0
- data/test/examples/element/example02_species.c +50 -0
- data/test/examples/element/example03_species.c +62 -0
- data/test/examples/element/example04_species.c +53 -0
- data/test/examples/element/example05_species.c +59 -0
- data/test/examples/element/example06_species.c +50 -0
- data/test/examples/element/example07_species.c +58 -0
- data/test/examples/element/example08_species.c +49 -0
- data/test/examples/element/example09_species.c +52 -0
- data/test/examples/element/example10_species.c +54 -0
- data/test/examples/element/example11_species.c +51 -0
- data/test/examples/element/example12_species.c +60 -0
- data/test/examples/element/example13_species.c +77 -0
- data/test/examples/neighbourhood/example01_species.c +57 -0
- data/test/examples/neighbourhood/example02_species.c +56 -0
- data/test/examples/neighbourhood/example03_species.c +83 -0
- data/test/examples/neighbourhood/example04_species.c +55 -0
- data/test/examples/neighbourhood/example05_species.c +48 -0
- data/test/examples/shared/example01_species.c +49 -0
- data/test/examples/shared/example02_species.c +55 -0
- data/test/examples/shared/example03_species.c +59 -0
- data/test/examples/shared/example04_species.c +56 -0
- data/test/examples/shared/example05_species.c +52 -0
- metadata +193 -73
- data/examples/benchmarks/overview.txt +0 -38
- data/lib/castaddon/node.rb +0 -753
@@ -0,0 +1,174 @@
|
|
1
|
+
|
2
|
+
# Determine whether kernel fusion is legal (see algorithm in paper/thesis)
|
3
|
+
def fusion_is_legal?(a, b)
|
4
|
+
(a.writes + a.reads).each do |x|
|
5
|
+
(b.writes + b.reads).each do |y|
|
6
|
+
if (x.tN == y.tN) && (x.tA == 'write' || y.tA == 'write')
|
7
|
+
puts Adarwin::MESSAGE+"Evaluating #{x.to_arc} and #{y.to_arc} for fusion"
|
8
|
+
if x.tD.to_s != y.tD.to_s || x.tE.to_s != y.tE.to_s || x.tS.to_s != y.tS.to_s
|
9
|
+
puts Adarwin::MESSAGE+"Unable to fuse #{x.to_arc} and #{y.to_arc}"
|
10
|
+
return false
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
puts Adarwin::MESSAGE+"Applying fusion"
|
16
|
+
return true
|
17
|
+
end
|
18
|
+
|
19
|
+
|
20
|
+
# Perform the kernel fusion transformations
|
21
|
+
def kernel_fusion(nests, settings)
|
22
|
+
|
23
|
+
# Select
|
24
|
+
candidates = nests.select{ |n| n.has_species? }
|
25
|
+
|
26
|
+
# Iterate
|
27
|
+
prev = nil
|
28
|
+
candidates.each_with_index do |nest,nest_index|
|
29
|
+
curr = nest
|
30
|
+
if prev
|
31
|
+
|
32
|
+
# Get the loop details
|
33
|
+
loops_prev = prev.code.get_direct_loops
|
34
|
+
loops_curr = curr.code.get_direct_loops
|
35
|
+
if loops_prev.size != loops_curr.size
|
36
|
+
puts Adarwin::MESSAGE+"Unable to apply fusion, loop count does not match"
|
37
|
+
next
|
38
|
+
end
|
39
|
+
|
40
|
+
# Only proceed if fusion is legal for this combination
|
41
|
+
if fusion_is_legal?(prev, curr)
|
42
|
+
fused_code = []
|
43
|
+
|
44
|
+
# Get the bodies
|
45
|
+
body_curr = get_body(loops_curr.size,curr.code.clone)
|
46
|
+
body_prev = get_body(loops_prev.size,prev.code.clone)
|
47
|
+
|
48
|
+
# Fuse everything together: include if-statements for non-matching loop bounds
|
49
|
+
if settings == 1
|
50
|
+
|
51
|
+
# Create new loops
|
52
|
+
loops_target = []
|
53
|
+
loops_prev.zip(loops_curr).each do |prevl,currl|
|
54
|
+
raise_error("Unequal step count #{prevl[:step]} versus #{currl[:step]}") if prevl[:step] != currl[:step]
|
55
|
+
minmin = exact_min(prevl[:min],currl[:min])
|
56
|
+
maxmax = exact_max(prevl[:max],currl[:max])
|
57
|
+
loop_datum = { :var => prevl[:var]+currl[:var], :min => minmin, :max => maxmax, :step => prevl[:step]}
|
58
|
+
loops_target.push(loop_datum)
|
59
|
+
|
60
|
+
# Replace all occurances of the fused loop variable in the current/previous codes
|
61
|
+
body_prev = body_prev.replace_variable(prevl[:var],loop_datum[:var])
|
62
|
+
body_curr = body_curr.replace_variable(currl[:var],loop_datum[:var])
|
63
|
+
|
64
|
+
# Set minimum if-statement conditions
|
65
|
+
body_prev = create_if(loop_datum[:var],minmin,prevl[:min],body_prev,'>=')
|
66
|
+
body_curr = create_if(loop_datum[:var],minmin,currl[:min],body_curr,'>=')
|
67
|
+
|
68
|
+
# Set maximum if-statement conditions
|
69
|
+
body_prev = create_if(loop_datum[:var],maxmax,prevl[:max],body_prev,'<=')
|
70
|
+
body_curr = create_if(loop_datum[:var],maxmax,currl[:max],body_curr,'<=')
|
71
|
+
end
|
72
|
+
|
73
|
+
# Generate the new code
|
74
|
+
fused_code.push(code_from_loops(loops_target,[body_prev,body_curr]))
|
75
|
+
|
76
|
+
# Create a prologue in case of mismatching loop bounds (experimental)
|
77
|
+
elsif settings == 2
|
78
|
+
|
79
|
+
# Generate the loop body
|
80
|
+
loops_target = []
|
81
|
+
loops_prev.zip(loops_curr).each do |prevl,currl|
|
82
|
+
raise_error("Unequal step count #{prevl[:step]} versus #{currl[:step]}") if prevl[:step] != currl[:step]
|
83
|
+
body_prev = body_prev.replace_variable(prevl[:var],prevl[:var]+currl[:var])
|
84
|
+
body_curr = body_curr.replace_variable(currl[:var],prevl[:var]+currl[:var])
|
85
|
+
end
|
86
|
+
|
87
|
+
# Create the main loop nest
|
88
|
+
loops_target = []
|
89
|
+
loops_prev.zip(loops_curr).each do |prevl,currl|
|
90
|
+
minmin = exact_min(prevl[:min],currl[:min])
|
91
|
+
minmax = exact_min(prevl[:max],currl[:max])
|
92
|
+
loop_datum = { :var => prevl[:var]+currl[:var], :min => minmin, :max => minmax, :step => prevl[:step]}
|
93
|
+
loops_target.push(loop_datum)
|
94
|
+
end
|
95
|
+
fused_code.push(code_from_loops(loops_target,[body_prev,body_curr]))
|
96
|
+
|
97
|
+
# Create the epilogue
|
98
|
+
body = []
|
99
|
+
loops_target = []
|
100
|
+
loops_prev.zip(loops_curr).each do |prevl,currl|
|
101
|
+
minmax = exact_min(prevl[:max],currl[:max])
|
102
|
+
maxmax = exact_max(prevl[:max],currl[:max])
|
103
|
+
loop_datum = { :var => prevl[:var]+currl[:var], :min => minmax, :max => maxmax, :step => prevl[:step]}
|
104
|
+
loops_target.push(loop_datum)
|
105
|
+
if prevl[:max] != currl[:max]
|
106
|
+
body = (prevl[:max] == maxmax) ? [body_curr] : [body_prev]
|
107
|
+
end
|
108
|
+
end
|
109
|
+
fused_code.push(code_from_loops(loops_target,body))
|
110
|
+
end
|
111
|
+
|
112
|
+
# Add the newly created code to the original code
|
113
|
+
fused_code.each_with_index do |fused_codelet,nest_id|
|
114
|
+
puts fused_codelet
|
115
|
+
prev.code.insert_prev(fused_codelet)
|
116
|
+
|
117
|
+
# Create a new nest
|
118
|
+
nest = Adarwin::Nest.new(prev.level, fused_codelet, prev.id, prev.name.gsub(/_k(\d+)/,'_fused')+nest_id.to_s, prev.verbose, 1)
|
119
|
+
nests.push(nest)
|
120
|
+
end
|
121
|
+
|
122
|
+
|
123
|
+
# Set the other nests as to-be-removed
|
124
|
+
prev.removed = true
|
125
|
+
curr.removed = true
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
# Next nest
|
130
|
+
prev = nest
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
# Return the body of a loop nest
|
135
|
+
def get_body(num_loops,code)
|
136
|
+
return code if num_loops == 0
|
137
|
+
if code.first.for_statement? && code.first.stmt
|
138
|
+
code = code.first
|
139
|
+
end
|
140
|
+
if code.for_statement? && code.stmt
|
141
|
+
return get_body(num_loops-1,code.stmt.stmts)
|
142
|
+
end
|
143
|
+
raise_error("Not a perfect nested loop")
|
144
|
+
end
|
145
|
+
|
146
|
+
# Create an if-statement in front of a statement
|
147
|
+
def create_if(loop_var,reference_bound,loop_bound,code,condition)
|
148
|
+
if reference_bound != loop_bound
|
149
|
+
return C::Statement.parse("if(#{loop_var} #{condition} #{loop_bound}) { #{code.to_s} }")
|
150
|
+
end
|
151
|
+
return code
|
152
|
+
end
|
153
|
+
|
154
|
+
# Generate code from a combination of loops and statements (the body)
|
155
|
+
def code_from_loops(loops,statements)
|
156
|
+
code = ""
|
157
|
+
|
158
|
+
# Start of the loops
|
159
|
+
definition = "int "
|
160
|
+
loops.each do |loop_datum|
|
161
|
+
increment = (loop_datum[:step] == '1') ? "#{loop_datum[:var]}++" : "#{loop_datum[:var]}=#{loop_datum[:var]}+#{loop_datum[:step]}"
|
162
|
+
code += "for(#{definition}#{loop_datum[:var]}=#{loop_datum[:min]}; #{loop_datum[:var]}<=#{loop_datum[:max]}; #{increment}) {"
|
163
|
+
end
|
164
|
+
|
165
|
+
# Loop body
|
166
|
+
statements.each do |statement|
|
167
|
+
code += statement.to_s
|
168
|
+
end
|
169
|
+
|
170
|
+
# End of the loops
|
171
|
+
loops.size.times{ |i| code += "}" }
|
172
|
+
|
173
|
+
C::Statement.parse(code)
|
174
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
|
2
|
+
module Adarwin
|
3
|
+
|
4
|
+
# This class represents an interval [a..b] including a and b. The class has
|
5
|
+
# the following methods:
|
6
|
+
# * Initialise the interval (+initialize+)
|
7
|
+
# * Print the interval (+to_s+)
|
8
|
+
# * Merge an interval with another interval (+merge+)
|
9
|
+
# * Return the length of the interval (+length+)
|
10
|
+
class Interval
|
11
|
+
attr_accessor :a, :b
|
12
|
+
|
13
|
+
# Initialise the interval. This method performs a comparison to see whether
|
14
|
+
# a or b is the upper-bound. This comparison is based on guesses made by the
|
15
|
+
# +compare+ method. This method uses loop information if needed.
|
16
|
+
# FIXME: Uses the +compare+ method which might be based on a guess
|
17
|
+
def initialize(a,b,loops)
|
18
|
+
@loops = loops
|
19
|
+
a = simplify(a.to_s)
|
20
|
+
b = simplify(b.to_s)
|
21
|
+
case compare(a,b,@loops)
|
22
|
+
when 'lt' || 'eq' then @a = a; @b = b
|
23
|
+
when 'gt' then @a = b; @b = a
|
24
|
+
else @a = a; @b = b
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# Print the interval as a string (e.g. [4..9]).
|
29
|
+
def to_s
|
30
|
+
@a+RANGE_SEP+@b
|
31
|
+
end
|
32
|
+
|
33
|
+
# Merge this interval with another interval. This is based on a comparison
|
34
|
+
# made by the +compare+ method, which is an approximation based on loop
|
35
|
+
# information.
|
36
|
+
# FIXME: Uses the +compare+ method which might be based on a guess
|
37
|
+
def merge(other_interval)
|
38
|
+
@a = case compare(@a,other_interval.a,@loops)
|
39
|
+
when 'gt' || 'eq' then other_interval.a
|
40
|
+
when 'lt' then @a
|
41
|
+
else other_interval.a
|
42
|
+
end
|
43
|
+
@b = case compare(@b,other_interval.b,@loops)
|
44
|
+
when 'gt' || 'eq' then @b
|
45
|
+
when 'lt' then other_interval.b
|
46
|
+
else @b
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# Method to compute the length of the interval. For example, the length of
|
51
|
+
# [a..b] is equal to (b-a+1).
|
52
|
+
def length
|
53
|
+
simplify("(#{@b})-(#{a})+1")
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
@@ -0,0 +1,153 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
# Recursive copy optimisations
|
4
|
+
def recursive_copy_optimisations(nests,options)
|
5
|
+
perform_copy_optimisations1(nests,options)
|
6
|
+
perform_copy_optimisations2(nests,options)
|
7
|
+
nests.each do |nest|
|
8
|
+
children = get_children(nest)
|
9
|
+
recursive_copy_optimisations(children,options) if !children.empty?
|
10
|
+
end
|
11
|
+
perform_copy_optimisations3(nests,options)
|
12
|
+
perform_copy_optimisations3(nests,options)
|
13
|
+
end
|
14
|
+
|
15
|
+
# First set of copyin/copyout optimisations (recursive)
|
16
|
+
def perform_copy_optimisations1(nests,options)
|
17
|
+
previous = nil
|
18
|
+
nests.each_with_index do |nest,nest_index|
|
19
|
+
current = nest
|
20
|
+
if previous
|
21
|
+
|
22
|
+
# Remove spurious copies (out/in)
|
23
|
+
if options[:mem_remove_spurious]
|
24
|
+
previous.copyouts.each do |copyout|
|
25
|
+
current.copyins.each do |copyin|
|
26
|
+
if copyout.tN.to_s == copyin.tN.to_s && copyout.tD.to_s == copyin.tD.to_s
|
27
|
+
current.copyins.delete(copyin)
|
28
|
+
return perform_copy_optimisations1(nests,options)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
# Remove spurious copies (out/out)
|
35
|
+
if options[:mem_remove_spurious]
|
36
|
+
previous.copyouts.each do |copyout|
|
37
|
+
current.copyouts.each do |other_copyout|
|
38
|
+
if copyout.tN.to_s == other_copyout.tN.to_s && copyout.tD.to_s == other_copyout.tD.to_s
|
39
|
+
previous.copyouts.delete(copyout)
|
40
|
+
return perform_copy_optimisations1(nests,options)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# Move copyins to the front
|
47
|
+
if options[:mem_copyin_to_front]
|
48
|
+
current.copyins.each do |copyin|
|
49
|
+
if previous.writes && !previous.writes.map{ |w| w.tN }.include?(copyin.tN)
|
50
|
+
previous.copyins.push(copyin)
|
51
|
+
current.copyins.delete(copyin)
|
52
|
+
return perform_copy_optimisations1(nests,options)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
58
|
+
|
59
|
+
# Next nest
|
60
|
+
previous = nest
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
# Second set of copyin/copyout optimisations (non-recursive)
|
65
|
+
def perform_copy_optimisations2(nests,options)
|
66
|
+
nests.each_with_index do |nest,nest_index|
|
67
|
+
current = nest
|
68
|
+
|
69
|
+
# Move copyouts to the back
|
70
|
+
if options[:mem_copyout_to_back]
|
71
|
+
current.copyouts.each do |copyout|
|
72
|
+
nests.each_with_index do |other_nest,other_nest_index|
|
73
|
+
if other_nest.id > nest.id && other_nest.depth == nest.depth
|
74
|
+
if other_nest.writes && !other_nest.writes.map{ |w| w.tN }.include?(copyout.tN)
|
75
|
+
copyout.id = copyout.id+1
|
76
|
+
else
|
77
|
+
break
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
# Remove spurious copies (double in)
|
85
|
+
if options[:mem_remove_spurious]
|
86
|
+
current.copyins.each_with_index do |copyin,index|
|
87
|
+
current.copyins.each_with_index do |other_copyin,other_index|
|
88
|
+
if index != other_index
|
89
|
+
if copyin.tN.to_s == other_copyin.tN.to_s && copyin.tD.to_s == other_copyin.tD.to_s
|
90
|
+
if copyin.id > other_copyin.id
|
91
|
+
current.copyins.delete(copyin)
|
92
|
+
else
|
93
|
+
current.copyins.delete(other_copyin)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
# Third set of copyin/copyout optimisations (inter-level)
|
105
|
+
def perform_copy_optimisations3(nests,options)
|
106
|
+
nests.each do |nest|
|
107
|
+
current = nest
|
108
|
+
children = get_children(nest)
|
109
|
+
if !children.empty?
|
110
|
+
|
111
|
+
# Inter-level loop optimisations (move to outer loop)
|
112
|
+
if options[:mem_to_outer_loop]
|
113
|
+
|
114
|
+
# Move copyouts to outer loops
|
115
|
+
max_id = children.map{ |c| 2*c.id+1 }.max
|
116
|
+
children.each do |child|
|
117
|
+
child.copyouts.each do |copyout|
|
118
|
+
to_outer_loop = true
|
119
|
+
nest.outer_loops.map{ |l| l[:var] }.each do |var|
|
120
|
+
to_outer_loop = false if copyout.depends_on?(var)
|
121
|
+
end
|
122
|
+
children.each do |other_child|
|
123
|
+
to_outer_loop = false if other_child.copyins.map{ |c| c.tN }.include?(copyout.tN)
|
124
|
+
end
|
125
|
+
to_outer_loop = false if copyout.get_sync_id < max_id
|
126
|
+
if to_outer_loop
|
127
|
+
copyout.id = nest.id
|
128
|
+
nest.copyouts.push(copyout)
|
129
|
+
child.copyouts.delete(copyout)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
# Move copyins to outer loops
|
135
|
+
children.first.copyins.each do |copyin|
|
136
|
+
to_outer_loop = true
|
137
|
+
nest.outer_loops.map{ |l| l[:var] }.each do |var|
|
138
|
+
to_outer_loop = false if copyin.depends_on?(var)
|
139
|
+
end
|
140
|
+
children.drop(1).each do |child|
|
141
|
+
to_outer_loop = false if child.copyins.map{ |c| c.tN }.include?(copyin.tN)
|
142
|
+
to_outer_loop = false if child.copyouts.map{ |c| c.tN }.include?(copyin.tN) && child != children.last
|
143
|
+
end
|
144
|
+
if to_outer_loop
|
145
|
+
nest.copyins.push(copyin)
|
146
|
+
children.first.copyins.delete(copyin)
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
data/lib/adarwin/nest.rb
ADDED
@@ -0,0 +1,225 @@
|
|
1
|
+
|
2
|
+
module Adarwin
|
3
|
+
|
4
|
+
# This class represents a loop nest. The end goal is to annotate the loop nest
|
5
|
+
# with the corresponding species information. If the loop nest cannot be
|
6
|
+
# parallelised (if there are dependences), the species information is not
|
7
|
+
# printed.
|
8
|
+
#
|
9
|
+
# This class contains methods to perform among others the following:
|
10
|
+
# * Find all array references in the loop nest
|
11
|
+
# * Merge found array references into another array reference
|
12
|
+
# * Translate array references into species
|
13
|
+
# * Perform dependence tests to check for parallelism
|
14
|
+
#
|
15
|
+
class Nest
|
16
|
+
attr_accessor :code, :species, :name, :verbose
|
17
|
+
attr_accessor :fused, :removed
|
18
|
+
attr_accessor :copyins, :copyouts
|
19
|
+
attr_accessor :depth, :level, :id
|
20
|
+
attr_accessor :reads, :writes
|
21
|
+
attr_accessor :outer_loops
|
22
|
+
|
23
|
+
# Method to initialise the loop nest. The loop nest is initialised with the
|
24
|
+
# following variables:
|
25
|
+
# * An identifier for the order/depth in which the nest appears (+level+)
|
26
|
+
# * The loop nest body in AST form (+code+)
|
27
|
+
# * A unique identifier for this loop nest (+id+)
|
28
|
+
# * A human readable name for this loop nest (+name+)
|
29
|
+
# * Whether or not verbose information should be printed (+verbose+)
|
30
|
+
def initialize(level, code, id, name, verbose, fused=0)
|
31
|
+
@depth = level.length
|
32
|
+
@level = level
|
33
|
+
@code = code
|
34
|
+
@id = id
|
35
|
+
@name = name+'_k'+(@id+1).to_s
|
36
|
+
@verbose = verbose
|
37
|
+
|
38
|
+
# Set the default values in case there are dependences
|
39
|
+
@species = ''
|
40
|
+
@fused = fused
|
41
|
+
@removed = false
|
42
|
+
@copyins = []
|
43
|
+
@copyouts = []
|
44
|
+
|
45
|
+
# Get all loops from the loop body and subtract the outer loops from all
|
46
|
+
# loops to obtain the set of inner loops (loops in the body).
|
47
|
+
@all_loops = @code.get_all_loops()
|
48
|
+
@outer_loops = @code.get_direct_loops()
|
49
|
+
@inner_loops = @all_loops - @outer_loops
|
50
|
+
|
51
|
+
# Process the read/write nodes in the loop body to obtain the array
|
52
|
+
# reference characterisations. The references also need to be aware of all
|
53
|
+
# loop data and of any if-statements in the loop body.
|
54
|
+
@references = @code.clone.get_accesses().map do |reference|
|
55
|
+
Reference.new(reference,@id,@inner_loops,@outer_loops,@verbose)
|
56
|
+
end
|
57
|
+
|
58
|
+
# Perform the dependence test. The result can be either true or false.
|
59
|
+
# Proceed only if there are no dependences.
|
60
|
+
# Don't perform the dependence test if this is a fused loopnest
|
61
|
+
@has_dependences = (@fused > 0) ? false : has_dependences?
|
62
|
+
if !@has_dependences && !@references.empty?
|
63
|
+
|
64
|
+
# Merge array reference characterisations into other array references
|
65
|
+
merge_references()
|
66
|
+
|
67
|
+
# Translate array reference characterisations into species and ARC
|
68
|
+
translate_into_species()
|
69
|
+
translate_into_arc()
|
70
|
+
|
71
|
+
# Set the copyin/copyout data from the array references
|
72
|
+
@copyins = @references.select{ |r| r.tA == 'read' }
|
73
|
+
@copyouts = @references.select{ |r| r.tA == 'write' }
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
# Perform the algorithm to merge array reference characterisations into
|
78
|
+
# merged array references. This method is a copy of the merging algorithm
|
79
|
+
# as found in the scientific paper.
|
80
|
+
# TODO: Complete this algorithm to match the scientific paper version.
|
81
|
+
def merge_references
|
82
|
+
@references.each do |ref1|
|
83
|
+
@references.each do |ref2|
|
84
|
+
if ref1 != ref2
|
85
|
+
|
86
|
+
# Perform the checks to see if merging is valid
|
87
|
+
if ref1.tN == ref2.tN && ref1.tA == ref2.tA && ref1.tS == ref2.tS
|
88
|
+
|
89
|
+
# Merge the domain (ref2 into ref1)
|
90
|
+
ref1.tD.each_with_index do |tD,i|
|
91
|
+
tD.merge(ref2.tD[i])
|
92
|
+
end
|
93
|
+
|
94
|
+
# Merge the number of elements (ref2 into ref1)
|
95
|
+
ref1.tE.each_with_index do |tE,i|
|
96
|
+
tE.merge(ref2.tE[i])
|
97
|
+
end
|
98
|
+
|
99
|
+
# Delete ref2
|
100
|
+
@references.delete(ref2)
|
101
|
+
|
102
|
+
# Something has changed: re-run the whole algorithm again
|
103
|
+
merge_references()
|
104
|
+
return
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
# Method to translate the array reference characterisations into species.
|
112
|
+
# The actual logic is performed within the Reference class. In this method,
|
113
|
+
# only the combining of the separate parts is performed.
|
114
|
+
def translate_into_species
|
115
|
+
|
116
|
+
# Obtain the reads and writes
|
117
|
+
@reads = @references.select{ |r| r.tA == 'read' }
|
118
|
+
@writes = @references.select{ |r| r.tA == 'write' }
|
119
|
+
|
120
|
+
# Create a 'void' access pattern in case there is no read or no write.
|
121
|
+
# Else, set the species for the individual accesses.
|
122
|
+
read_names = (@reads.empty?) ? ['0:0|void'] : @reads.map{ |r| r.to_species }
|
123
|
+
write_names = (@writes.empty?) ? ['0:0|void'] : @writes.map{ |r| r.to_species }
|
124
|
+
|
125
|
+
# Combine the descriptions (using Reference's +to_s+ method) into species
|
126
|
+
species_in = read_names.uniq.join(' '+WEDGE+' ')
|
127
|
+
species_out = write_names.uniq.join(' '+WEDGE+' ')
|
128
|
+
@species = species_in+' '+ARROW+' '+species_out
|
129
|
+
end
|
130
|
+
|
131
|
+
# Method to translate the array reference characterisations into a string.
|
132
|
+
def translate_into_arc
|
133
|
+
@arc = @references.map{ |r| r.to_arc }.join(' , ')
|
134
|
+
end
|
135
|
+
|
136
|
+
# Perform the dependence test for the current loop nest. This method gathers
|
137
|
+
# all pairs of array references to test and calls the actual dependence
|
138
|
+
# tests. Currently, the dependence tests are a combination of the GCD test
|
139
|
+
# and the Banerjee test.
|
140
|
+
def has_dependences?
|
141
|
+
|
142
|
+
# Gather all the read/write and write/write pairs to test
|
143
|
+
to_test = []
|
144
|
+
writes = @references.select{ |r| r.tA == 'write' }
|
145
|
+
writes.each do |ref1|
|
146
|
+
@references.each do |ref2|
|
147
|
+
|
148
|
+
# Only if the array names are the same and they are not tested before
|
149
|
+
if ref1.tN == ref2.tN && !to_test.include?([ref2,ref1])
|
150
|
+
|
151
|
+
# Only if the array references are different (e.g. don't test
|
152
|
+
# A[i][j+4] and A[i][j+4]).
|
153
|
+
if (ref1.get_references != ref2.get_references)
|
154
|
+
to_test << [ref1,ref2]
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
# Test all pairs using the GCD and Banerjee tests
|
161
|
+
#p to_test.map{ |t| t.map{ |r| r.to_arc }}
|
162
|
+
to_test.uniq.each do |pair|
|
163
|
+
dependence_test = Dependence.new(pair[0],pair[1],@verbose)
|
164
|
+
if dependence_test.result
|
165
|
+
return true
|
166
|
+
end
|
167
|
+
end
|
168
|
+
return false
|
169
|
+
end
|
170
|
+
|
171
|
+
# Perform a check to see if the loop nest has species that are not just
|
172
|
+
# formed from shared or full patterns. If so, there is no parallelism.
|
173
|
+
def has_species?
|
174
|
+
return false if @removed
|
175
|
+
return false if @has_dependences
|
176
|
+
return false if @species == ''
|
177
|
+
only_full = (@reads) ? @reads.select{ |a| a.pattern != 'full' }.empty? : false
|
178
|
+
only_shared = (@writes) ? @writes.select{ |a| a.pattern != 'shared' }.empty? : false
|
179
|
+
return !(only_full && only_shared)
|
180
|
+
end
|
181
|
+
|
182
|
+
# Method to print the start pragma of a species.
|
183
|
+
def print_species_start
|
184
|
+
PRAGMA_DELIMITER_START+PRAGMA_SPECIES+' kernel '+@species+PRAGMA_DELIMITER_END
|
185
|
+
end
|
186
|
+
|
187
|
+
# Method to print the end pragma of a species.
|
188
|
+
def print_species_end
|
189
|
+
PRAGMA_DELIMITER_START+PRAGMA_SPECIES+' endkernel '+@name+PRAGMA_DELIMITER_END
|
190
|
+
end
|
191
|
+
|
192
|
+
# Method to print the start of an array reference characterisation (ARC).
|
193
|
+
def print_arc_start
|
194
|
+
PRAGMA_DELIMITER_START+PRAGMA_ARC+' kernel '+@arc+PRAGMA_DELIMITER_END
|
195
|
+
end
|
196
|
+
|
197
|
+
# Method to print the end of an array reference characterisation (ARC).
|
198
|
+
def print_arc_end
|
199
|
+
PRAGMA_DELIMITER_START+PRAGMA_ARC+' endkernel '+@name+PRAGMA_DELIMITER_END
|
200
|
+
end
|
201
|
+
|
202
|
+
# Method to print the copyin pragma.
|
203
|
+
def print_copyins
|
204
|
+
copys = @copyins.map{ |a| a.to_copy(2*a.id) }
|
205
|
+
PRAGMA_DELIMITER_START+PRAGMA_SPECIES+' copyin '+copys.join(' '+WEDGE+' ')+PRAGMA_DELIMITER_END
|
206
|
+
end
|
207
|
+
|
208
|
+
# Method to print the copyout pragma.
|
209
|
+
def print_copyouts
|
210
|
+
copys = @copyouts.map{ |a| a.to_copy(2*a.id+1) }
|
211
|
+
PRAGMA_DELIMITER_START+PRAGMA_SPECIES+' copyout '+copys.join(' '+WEDGE+' ')+PRAGMA_DELIMITER_END
|
212
|
+
end
|
213
|
+
|
214
|
+
# Method to check if the loop nest has copyins.
|
215
|
+
def has_copyins?
|
216
|
+
return !(copyins.empty?) && !(copyins.select{ |r| r.tD if !r.tD.empty? }.empty?)
|
217
|
+
end
|
218
|
+
|
219
|
+
# Method to check if the loop nest has copyouts.
|
220
|
+
def has_copyouts?
|
221
|
+
return !(copyouts.empty?) && !(copyouts.select{ |r| r.tD if !r.tD.empty? }.empty?)
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
end
|