bones-compiler 1.1.0 → 1.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/CHANGELOG +37 -0
- data/LICENSE +1 -1
- data/README.rdoc +95 -70
- data/Rakefile +78 -3
- data/VERSION +1 -1
- data/bin/adarwin +17 -0
- data/examples/benchmarks/PolyBench/2mm.c +104 -0
- data/examples/benchmarks/{3mm.c → PolyBench/3mm.c} +5 -2
- data/examples/benchmarks/{adi.c → PolyBench/adi.c} +6 -3
- data/examples/benchmarks/{atax.c → PolyBench/atax.c} +5 -2
- data/examples/benchmarks/{bicg.c → PolyBench/bicg.c} +5 -2
- data/examples/benchmarks/{cholesky.c → PolyBench/cholesky.c} +3 -0
- data/examples/benchmarks/{common.h → PolyBench/common.h} +2 -2
- data/examples/benchmarks/{correlation.c → PolyBench/correlation.c} +16 -7
- data/examples/benchmarks/{covariance.c → PolyBench/covariance.c} +7 -2
- data/examples/benchmarks/{doitgen.c → PolyBench/doitgen.c} +5 -2
- data/examples/benchmarks/{durbin.c → PolyBench/durbin.c} +3 -0
- data/examples/benchmarks/{dynprog.c → PolyBench/dynprog.c} +3 -0
- data/examples/benchmarks/{fdtd-2d-apml.c → PolyBench/fdtd-2d-apml.c} +3 -0
- data/examples/benchmarks/{fdtd-2d.c → PolyBench/fdtd-2d.c} +5 -2
- data/examples/benchmarks/{floyd-warshall.c → PolyBench/floyd-warshall.c} +3 -0
- data/examples/benchmarks/{gemm.c → PolyBench/gemm.c} +5 -2
- data/examples/benchmarks/{gemver.c → PolyBench/gemver.c} +5 -2
- data/examples/benchmarks/{gesummv.c → PolyBench/gesummv.c} +5 -2
- data/examples/benchmarks/{gramschmidt.c → PolyBench/gramschmidt.c} +3 -0
- data/examples/benchmarks/{jacobi-1d-imper.c → PolyBench/jacobi-1d-imper.c} +10 -2
- data/examples/benchmarks/{jacobi-2d-imper.c → PolyBench/jacobi-2d-imper.c} +8 -3
- data/examples/benchmarks/{lu.c → PolyBench/lu.c} +3 -0
- data/examples/benchmarks/{ludcmp.c → PolyBench/ludcmp.c} +3 -0
- data/examples/benchmarks/{mvt.c → PolyBench/mvt.c} +6 -2
- data/examples/benchmarks/{reg_detect.c → PolyBench/reg_detect.c} +3 -0
- data/examples/benchmarks/{seidel-2d.c → PolyBench/seidel-2d.c} +3 -0
- data/examples/benchmarks/{symm.c → PolyBench/symm.c} +3 -0
- data/examples/benchmarks/{syr2k.c → PolyBench/syr2k.c} +5 -2
- data/examples/benchmarks/{syrk.c → PolyBench/syrk.c} +7 -4
- data/examples/benchmarks/{trisolv.c → PolyBench/trisolv.c} +3 -0
- data/examples/benchmarks/{trmm.c → PolyBench/trmm.c} +3 -0
- data/examples/benchmarks/Rodinia/cfd.c +180 -0
- data/examples/benchmarks/Rodinia/hotspot.c +228 -0
- data/examples/benchmarks/Rodinia/kmeans.c +164 -0
- data/examples/benchmarks/Rodinia/srad.c +188 -0
- data/examples/benchmarks/other/common.h +0 -0
- data/examples/benchmarks/other/dct.c +58 -0
- data/examples/benchmarks/other/mm.c +50 -0
- data/examples/benchmarks/{saxpy.c → other/saxpy.c} +11 -7
- data/examples/chunk/{example1.c → example01.c} +0 -0
- data/examples/chunk/{example2.c → example02.c} +0 -0
- data/examples/chunk/{example3.c → example03.c} +0 -0
- data/examples/chunk/{example4.c → example04.c} +0 -0
- data/examples/chunk/{example5.c → example05.c} +0 -0
- data/examples/chunk/example06.c +45 -0
- data/examples/chunk/example07.c +49 -0
- data/examples/dependences/example01.c +42 -0
- data/examples/dependences/example02.c +40 -0
- data/examples/dependences/example03.c +43 -0
- data/examples/dependences/example04.c +44 -0
- data/examples/dependences/example05.c +42 -0
- data/examples/element/{example1.c → example01.c} +0 -0
- data/examples/element/{example2.c → example02.c} +2 -2
- data/examples/element/{example3.c → example03.c} +0 -0
- data/examples/element/{example4.c → example04.c} +0 -0
- data/examples/element/{example5.c → example05.c} +0 -0
- data/examples/element/{example6.c → example06.c} +0 -0
- data/examples/element/{example7.c → example07.c} +0 -0
- data/examples/element/{example8.c → example08.c} +0 -0
- data/examples/element/{example9.c → example09.c} +0 -0
- data/examples/element/example13.c +73 -0
- data/examples/fusion/example01.c +68 -0
- data/examples/fusion/example02.c +73 -0
- data/examples/fusion/example03.c +72 -0
- data/examples/fusion/example04.c +61 -0
- data/examples/fusion/example05.c +55 -0
- data/examples/neighbourhood/{example1.c → example01.c} +0 -0
- data/examples/neighbourhood/{example2.c → example02.c} +0 -0
- data/examples/neighbourhood/{example3.c → example03.c} +0 -0
- data/examples/neighbourhood/{example4.c → example04.c} +0 -0
- data/examples/neighbourhood/example05.c +44 -0
- data/examples/shared/{example1.c → example01.c} +0 -0
- data/examples/shared/{example2.c → example02.c} +0 -0
- data/examples/shared/{example3.c → example03.c} +0 -0
- data/examples/shared/{example4.c → example04.c} +0 -0
- data/examples/shared/{example5.c → example05.c} +0 -0
- data/lib/adarwin.rb +62 -0
- data/lib/adarwin/dependences.rb +268 -0
- data/lib/adarwin/engine.rb +277 -0
- data/lib/adarwin/fusion.rb +174 -0
- data/lib/adarwin/interval.rb +57 -0
- data/lib/adarwin/memorycopies.rb +153 -0
- data/lib/adarwin/nest.rb +225 -0
- data/lib/adarwin/preprocessor.rb +76 -0
- data/lib/adarwin/reference.rb +261 -0
- data/lib/bones.rb +4 -55
- data/lib/bones/algorithm.rb +77 -40
- data/lib/bones/copy.rb +26 -0
- data/lib/bones/engine.rb +147 -31
- data/lib/bones/preprocessor.rb +92 -12
- data/lib/bones/species.rb +4 -3
- data/lib/bones/structure.rb +14 -4
- data/lib/castaddon.rb +11 -6
- data/lib/castaddon/node_adarwin.rb +245 -0
- data/lib/castaddon/node_bones.rb +316 -0
- data/lib/castaddon/node_common.rb +289 -0
- data/lib/castaddon/transformations.rb +236 -0
- data/lib/common.rb +216 -0
- data/skeletons/CPU-C/common/header.c +3 -0
- data/skeletons/CPU-C/common/mem_global.c +0 -0
- data/skeletons/CPU-C/common/timer_2_start.c +11 -13
- data/skeletons/CPU-C/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-C/common/timer_globals.c +29 -0
- data/skeletons/CPU-OPENCL-INTEL/common/globals.c +1 -1
- data/skeletons/CPU-OPENCL-INTEL/common/header.c +3 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +7 -2
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +4 -2
- data/skeletons/CPU-OPENCL-INTEL/common/mem_global.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +6 -3
- data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-OPENCL-INTEL/common/timer_globals.c +24 -0
- data/skeletons/CPU-OPENMP/common/globals.c +1 -0
- data/skeletons/CPU-OPENMP/common/header.c +3 -0
- data/skeletons/CPU-OPENMP/common/mem_global.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_1_start.c +0 -12
- data/skeletons/CPU-OPENMP/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-OPENMP/common/timer_globals.c +33 -0
- data/skeletons/GPU-CUDA/common/globals.c +27 -3
- data/skeletons/GPU-CUDA/common/header.c +2 -0
- data/skeletons/GPU-CUDA/common/mem_async_alloc.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_copyin.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_copyout.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_free.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +2 -1
- data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +2 -1
- data/skeletons/GPU-CUDA/common/mem_global.c +1 -0
- data/skeletons/GPU-CUDA/common/mem_prologue.c +1 -2
- data/skeletons/GPU-CUDA/common/scheduler.c +86 -0
- data/skeletons/GPU-CUDA/common/timer_2_start.c +2 -4
- data/skeletons/GPU-CUDA/common/timer_2_stop.c +3 -5
- data/skeletons/GPU-CUDA/common/timer_globals.c +26 -0
- data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +5 -7
- data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +4 -6
- data/skeletons/GPU-CUDA/kernel/default.host.c +1 -1
- data/skeletons/GPU-CUDA/kernel/default.kernel.cu +6 -8
- data/skeletons/GPU-CUDA/skeletons.txt +6 -5
- data/{examples/benchmarks/2mm.c → test/examples/benchmarks/PolyBench/2mm_species.c} +19 -15
- data/test/examples/benchmarks/PolyBench/3mm_species.c +82 -0
- data/test/examples/benchmarks/PolyBench/adi_species.c +89 -0
- data/test/examples/benchmarks/PolyBench/atax_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/bicg_species.c +71 -0
- data/test/examples/benchmarks/PolyBench/cholesky_species.c +68 -0
- data/test/examples/benchmarks/PolyBench/correlation_species.c +97 -0
- data/test/examples/benchmarks/PolyBench/covariance_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/doitgen_species.c +67 -0
- data/test/examples/benchmarks/PolyBench/durbin_species.c +80 -0
- data/test/examples/benchmarks/PolyBench/dynprog_species.c +71 -0
- data/test/examples/benchmarks/PolyBench/fdtd-2d-apml_species.c +112 -0
- data/test/examples/benchmarks/PolyBench/fdtd-2d_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/floyd-warshall_species.c +54 -0
- data/test/examples/benchmarks/PolyBench/gemm_species.c +73 -0
- data/test/examples/benchmarks/PolyBench/gemver_species.c +93 -0
- data/test/examples/benchmarks/PolyBench/gesummv_species.c +68 -0
- data/test/examples/benchmarks/PolyBench/gramschmidt_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/jacobi-1d-imper_species.c +59 -0
- data/test/examples/benchmarks/PolyBench/jacobi-2d-imper_species.c +65 -0
- data/test/examples/benchmarks/PolyBench/lu_species.c +57 -0
- data/test/examples/benchmarks/PolyBench/ludcmp_species.c +89 -0
- data/test/examples/benchmarks/PolyBench/mvt_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/reg_detect_species.c +86 -0
- data/test/examples/benchmarks/PolyBench/seidel-2d_species.c +53 -0
- data/test/examples/benchmarks/PolyBench/symm_species.c +74 -0
- data/test/examples/benchmarks/PolyBench/syr2k_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/syrk_species.c +66 -0
- data/test/examples/benchmarks/PolyBench/trisolv_species.c +61 -0
- data/test/examples/benchmarks/PolyBench/trmm_species.c +61 -0
- data/test/examples/chunk/example01_species.c +58 -0
- data/test/examples/chunk/example02_species.c +48 -0
- data/test/examples/chunk/example03_species.c +63 -0
- data/test/examples/chunk/example04_species.c +58 -0
- data/test/examples/chunk/example05_species.c +56 -0
- data/test/examples/chunk/example06_species.c +49 -0
- data/test/examples/chunk/example07_species.c +53 -0
- data/test/examples/dependences/example01_species.c +46 -0
- data/test/examples/dependences/example02_species.c +44 -0
- data/test/examples/dependences/example03_species.c +47 -0
- data/test/examples/dependences/example04_species.c +48 -0
- data/test/examples/dependences/example05_species.c +46 -0
- data/test/examples/element/example01_species.c +50 -0
- data/test/examples/element/example02_species.c +50 -0
- data/test/examples/element/example03_species.c +62 -0
- data/test/examples/element/example04_species.c +53 -0
- data/test/examples/element/example05_species.c +59 -0
- data/test/examples/element/example06_species.c +50 -0
- data/test/examples/element/example07_species.c +58 -0
- data/test/examples/element/example08_species.c +49 -0
- data/test/examples/element/example09_species.c +52 -0
- data/test/examples/element/example10_species.c +54 -0
- data/test/examples/element/example11_species.c +51 -0
- data/test/examples/element/example12_species.c +60 -0
- data/test/examples/element/example13_species.c +77 -0
- data/test/examples/neighbourhood/example01_species.c +57 -0
- data/test/examples/neighbourhood/example02_species.c +56 -0
- data/test/examples/neighbourhood/example03_species.c +83 -0
- data/test/examples/neighbourhood/example04_species.c +55 -0
- data/test/examples/neighbourhood/example05_species.c +48 -0
- data/test/examples/shared/example01_species.c +49 -0
- data/test/examples/shared/example02_species.c +55 -0
- data/test/examples/shared/example03_species.c +59 -0
- data/test/examples/shared/example04_species.c +56 -0
- data/test/examples/shared/example05_species.c +52 -0
- metadata +193 -73
- data/examples/benchmarks/overview.txt +0 -38
- data/lib/castaddon/node.rb +0 -753
@@ -0,0 +1,76 @@
|
|
1
|
+
|
2
|
+
module Adarwin
|
3
|
+
|
4
|
+
# This is the C99 pre-processor for Adarwin. It has the following tasks:
|
5
|
+
# * Extract the SCoP part from the code (the region of interest)
|
6
|
+
# * Extract the header code (defines, includes, etc.)
|
7
|
+
# * Output the original code without pre-processor directives
|
8
|
+
# * Output the original code minus the SCoP (SCoP to be filled in later)
|
9
|
+
class Preprocessor < Common
|
10
|
+
attr_reader :source_code, :header_code, :parsed_code, :scop_code, :target_code
|
11
|
+
|
12
|
+
# Regular expression to identify whitespaces (tabs, spaces).
|
13
|
+
WHITESPACE = '\s*'
|
14
|
+
|
15
|
+
# This is the method which initializes the preprocessor. Initialization
|
16
|
+
# requires the target source code to process, which is then set as the class
|
17
|
+
# variable +@source_code+.
|
18
|
+
def initialize(source_code)
|
19
|
+
@source_code = source_code
|
20
|
+
@header_code = ''
|
21
|
+
@parsed_code = ''
|
22
|
+
@target_code = ''
|
23
|
+
@scop_code = ''
|
24
|
+
end
|
25
|
+
|
26
|
+
# This is the method to perform the actual preprocessing. This method takes
|
27
|
+
# care of all the pre-processor tasks. The output is stored in the two
|
28
|
+
# attributes +header_code+, and +scop+.
|
29
|
+
# FIXME: What about multi-line statements? For example, a multi-line comment
|
30
|
+
# could have a commented-out SCoP or define or include.
|
31
|
+
def process
|
32
|
+
scop = false
|
33
|
+
scop_in_code = false
|
34
|
+
|
35
|
+
# Process the file line by line
|
36
|
+
@source_code.each_line.with_index do |line,index|
|
37
|
+
if line =~ /^#{WHITESPACE}#/
|
38
|
+
|
39
|
+
# Keep 'include' statements as header code
|
40
|
+
if line =~ /^#{WHITESPACE}#include/
|
41
|
+
@header_code += line
|
42
|
+
@target_code += line
|
43
|
+
|
44
|
+
# Process 'define' statements
|
45
|
+
elsif line =~ /^#{WHITESPACE}#define/
|
46
|
+
@header_code += line
|
47
|
+
@target_code += line
|
48
|
+
|
49
|
+
# Found the start of a SCoP
|
50
|
+
elsif line =~ /^#{WHITESPACE}#{SCOP_START}/
|
51
|
+
scop = true
|
52
|
+
scop_in_code = true
|
53
|
+
@parsed_code += '{'+NL
|
54
|
+
|
55
|
+
# Found the end of a SCoP
|
56
|
+
elsif line =~ /^#{WHITESPACE}#{SCOP_END}/
|
57
|
+
scop = false
|
58
|
+
@parsed_code += '}'+NL
|
59
|
+
end
|
60
|
+
|
61
|
+
# Nothing special in the code going on here
|
62
|
+
else
|
63
|
+
@scop_code += line if scop
|
64
|
+
@parsed_code += line
|
65
|
+
@target_code += line
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
# Exit if there is no SCoP found
|
70
|
+
if !scop_in_code
|
71
|
+
raise_error('No "#pragma scop" found in the source code')
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
@@ -0,0 +1,261 @@
|
|
1
|
+
|
2
|
+
module Adarwin
|
3
|
+
|
4
|
+
# This class represents an array reference characterisation. This reference is
|
5
|
+
# constructed as a 5-tuple (tN,tA,tD,tE,tS) with the following information:
|
6
|
+
# * tN: The name of the reference.
|
7
|
+
# * tA: The access direction (read or write).
|
8
|
+
# * tD: The full domain accessed.
|
9
|
+
# * tE: The number of elements accessed each iteration (the size).
|
10
|
+
# * tS: The step of a accesses among iterations.
|
11
|
+
# To be able to compute the 5-tuple, the reference also stores information
|
12
|
+
# about the loops and conditional statements to which the original array
|
13
|
+
# reference is subjected.
|
14
|
+
#
|
15
|
+
# This class contains methods to perform among others the following:
|
16
|
+
# * Initialise the class and sets the 5-tuple (N,A,D,E,S)
|
17
|
+
# * Retrieve information on array indices
|
18
|
+
# * Print in different forms (species, ARC, copy/sync pragma's)
|
19
|
+
class Reference
|
20
|
+
attr_accessor :tN, :tA, :tD, :tE, :tS
|
21
|
+
attr_accessor :bounds, :indices, :pattern, :id
|
22
|
+
attr_accessor :all_loops
|
23
|
+
|
24
|
+
# This method initialises the array reference class. It takes details of the
|
25
|
+
# reference itself and details of the loop nest it belongs to. The method
|
26
|
+
# performs among others the following:
|
27
|
+
# * It initialises the 5-tuple (N,A,D,E,S)
|
28
|
+
# * It constructs the sets of loops (all,inner,outer) for this reference
|
29
|
+
# * It computes the bounds based on loop data and on if-statements
|
30
|
+
# * It computes the domain (D), number of elements (E), and step (S)
|
31
|
+
def initialize(reference,id,inner_loops,outer_loops,verbose)
|
32
|
+
@id = id
|
33
|
+
|
34
|
+
# Initialise the 5-tuple (already fill in N and A)
|
35
|
+
@tN = reference[:name]
|
36
|
+
@tA = reference[:type]
|
37
|
+
@tD = []
|
38
|
+
@tE = []
|
39
|
+
@tS = []
|
40
|
+
|
41
|
+
# Set the inner loops as the loop nest's inner loop intersected with all
|
42
|
+
# loops found for this statement. Beware of the difference between loops
|
43
|
+
# of a loop nest and loops of a statement.
|
44
|
+
@all_loops = reference[:loop_data]
|
45
|
+
@inner_loops = inner_loops & @all_loops
|
46
|
+
@outer_loops = outer_loops
|
47
|
+
|
48
|
+
# Set the indices of the array reference (e.g. 2*i+4). The size of this
|
49
|
+
# array is equal to the number of dimensions of the array.
|
50
|
+
@indices = reference[:indices]
|
51
|
+
|
52
|
+
# Set the if-statements for the reference. Process them together with the
|
53
|
+
# loop start/end conditions to obtain a final set of conditions/bounds.
|
54
|
+
@bounds = []
|
55
|
+
loop_vars = @all_loops.map{ |l| l[:var]}
|
56
|
+
@all_loops.each do |loop_data|
|
57
|
+
conditions = [loop_data[:min],loop_data[:max]]
|
58
|
+
reference[:if_statements].each do |if_statement|
|
59
|
+
condition_if = if_statement.map{ |c| solve(c,loop_data[:var],loop_vars) }
|
60
|
+
conditions = [
|
61
|
+
max(conditions[0],condition_if[0]),
|
62
|
+
min(conditions[1],condition_if[1])
|
63
|
+
]
|
64
|
+
end
|
65
|
+
@bounds << { :var => loop_data[:var], :min => conditions[0], :max => conditions[1] }
|
66
|
+
end
|
67
|
+
|
68
|
+
# Compute the domain (D) based on the bounds. The bounds are derived from
|
69
|
+
# the if-statements and for-loops.
|
70
|
+
@tD = @indices.map do |i|
|
71
|
+
index_to_interval(i,@bounds)
|
72
|
+
end
|
73
|
+
|
74
|
+
# Compute the number of elements (E) accessed every iteration (the size).
|
75
|
+
# TODO: Clean-up this method.
|
76
|
+
@tE = @indices.map do |i|
|
77
|
+
#if !dependent?(i,@all_loops)
|
78
|
+
# puts "independent"
|
79
|
+
# index_to_interval(i,@inner_loops)
|
80
|
+
#else
|
81
|
+
#puts "dependent"
|
82
|
+
get_base_offset(i)
|
83
|
+
#end
|
84
|
+
end
|
85
|
+
|
86
|
+
# Compute the step taken. There are 3 cases considered the index is: 1)
|
87
|
+
# dependent on the outer loops, 2) dependent on the inner loops, or 3)
|
88
|
+
# indepdent of any loops.
|
89
|
+
@tS = @indices.map do |i|
|
90
|
+
if dependent?(i,@inner_loops)
|
91
|
+
index_to_interval(i,@inner_loops).length
|
92
|
+
elsif dependent?(i,@outer_loops)
|
93
|
+
get_step(i,@outer_loops)
|
94
|
+
else
|
95
|
+
'0'
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
# If the step and the domain are equal in size, the step can also be set
|
100
|
+
# to zero to reflect accessing the full array.
|
101
|
+
@tS.each_with_index do |tS,index|
|
102
|
+
if (tS == @tD[index].length) || (@tD[index].length == '1')
|
103
|
+
@tS[index] = '0'
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
# Print the result
|
108
|
+
puts MESSAGE+"Found: #{to_arc}" if verbose
|
109
|
+
end
|
110
|
+
|
111
|
+
# This method replaces loop variables for a given set of loops with 0. This
|
112
|
+
# basically gives us the offset of array references with respect to the loop
|
113
|
+
# variable. For example, A[2*i+4] and A[i+j+3] will give us [4,j+3] with
|
114
|
+
# repsect to an i-loop.
|
115
|
+
def get_base_offset(index)
|
116
|
+
index = index.clone
|
117
|
+
@outer_loops.each do |for_loop|
|
118
|
+
search = C::Variable.parse(for_loop[:var])
|
119
|
+
replace = C::Expression.parse('0')
|
120
|
+
index = index.search_and_replace_node(search,replace)
|
121
|
+
end
|
122
|
+
return index_to_interval(index,@inner_loops)
|
123
|
+
end
|
124
|
+
|
125
|
+
# Method to fill in the ranges for an array reference. This is based on
|
126
|
+
# information of the loop nests. The output is an interval.
|
127
|
+
def index_to_interval(index,loops)
|
128
|
+
access_min = find_extreme(:min,index,loops)
|
129
|
+
access_max = find_extreme(:max,index,loops)
|
130
|
+
return Interval.new(access_min,access_max,@all_loops)
|
131
|
+
end
|
132
|
+
|
133
|
+
# Substitute loop data with the upper-bound or lower-bound of a loop to find
|
134
|
+
# the minimum/maximum of an array reference. The body is executed twice,
|
135
|
+
# because a loop bound can be based on another loop variable.
|
136
|
+
def find_extreme(position,index,loops)
|
137
|
+
index = index.clone
|
138
|
+
2.times do
|
139
|
+
loops.each do |for_loop|
|
140
|
+
search = C::Variable.parse(for_loop[:var])
|
141
|
+
replace = C::Expression.parse(for_loop[position])
|
142
|
+
index = index.search_and_replace_node(search,replace)
|
143
|
+
end
|
144
|
+
end
|
145
|
+
return simplify(index.to_s.gsub(';','').gsub(' ','').gsub("\t",''))
|
146
|
+
end
|
147
|
+
|
148
|
+
# Method to check whether the an index is dependent on a given set of loops.
|
149
|
+
# For example, A[i+3] is independent of j, but dependent on i.
|
150
|
+
def dependent?(index,loops)
|
151
|
+
index.preorder do |node|
|
152
|
+
if node.variable?
|
153
|
+
loops.each do |for_loop|
|
154
|
+
return true if (node.name == for_loop[:var])
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
return false
|
159
|
+
end
|
160
|
+
|
161
|
+
# Method to retrieve the step for a given array index and loops. The method
|
162
|
+
# returns the difference between two subsequent iterations: one with the
|
163
|
+
# loop variable at 0 and one after the first increment.
|
164
|
+
def get_step(index,loops)
|
165
|
+
|
166
|
+
# Replace the loop indices with 0
|
167
|
+
index1 = index.clone
|
168
|
+
loops.each do |for_loop|
|
169
|
+
search = C::Variable.parse(for_loop[:var])
|
170
|
+
replace = C::Expression.parse('0')
|
171
|
+
index1 = index1.search_and_replace_node(search,replace)
|
172
|
+
end
|
173
|
+
|
174
|
+
# Replace the loop indices with the loop step
|
175
|
+
index2 = index.clone
|
176
|
+
loops.each do |for_loop|
|
177
|
+
search = C::Variable.parse(for_loop[:var])
|
178
|
+
replace = C::Expression.parse(for_loop[:step])
|
179
|
+
index2 = index2.search_and_replace_node(search,replace)
|
180
|
+
end
|
181
|
+
|
182
|
+
# Return the difference
|
183
|
+
return abs(simplify("(#{index2})-(#{index1})"))
|
184
|
+
end
|
185
|
+
|
186
|
+
# Method to output the result as algorithmic species. This reflects the
|
187
|
+
# algorithm as presented in the scientific paper.
|
188
|
+
def to_species
|
189
|
+
if @tS.reject{ |s| s == "0"}.empty?
|
190
|
+
if (@tA == 'read') # Full (steps length 0 and read)
|
191
|
+
@pattern = 'full'
|
192
|
+
else # Shared (steps length 0 and write)
|
193
|
+
@pattern = 'shared'
|
194
|
+
end
|
195
|
+
elsif @tE.reject{ |s| s.length == "1"}.empty? # Element (sizes length 1)
|
196
|
+
@pattern = 'element'
|
197
|
+
elsif step_smaller_than_num_elements? # Neighbourhood (tS < tE)
|
198
|
+
@pattern = 'neighbourhood('+@tE.join(DIM_SEP)+')'
|
199
|
+
else # Chunk (tS >= tE)
|
200
|
+
@pattern = 'chunk('+@tE.join(DIM_SEP)+')'
|
201
|
+
end
|
202
|
+
|
203
|
+
# Fill in the name and the domain and return the result
|
204
|
+
return @tN+'['+@tD.join(DIM_SEP)+']'+PIPE+@pattern
|
205
|
+
end
|
206
|
+
|
207
|
+
# Method to output the result as an array reference characterisation (ARC).
|
208
|
+
def to_arc
|
209
|
+
return "(#{tN},#{tA},#{tD},#{tE},#{tS})".gsub('"','').gsub(' ','')
|
210
|
+
end
|
211
|
+
|
212
|
+
# Method to output a copyin/copyout statement. This indicates the name (N),
|
213
|
+
# the domain (D), and a unique identifier.
|
214
|
+
def to_copy(id)
|
215
|
+
@tN+'['+@tD.join(DIM_SEP)+']'+'|'+id.to_s
|
216
|
+
end
|
217
|
+
|
218
|
+
# Method to print the unique identifier of the loop nest in terms of
|
219
|
+
# synchronisation statements to be printed. This is a per-reference id
|
220
|
+
# instead of a per-loop id, because it depends on the type of access (read
|
221
|
+
# or write).
|
222
|
+
def get_sync_id
|
223
|
+
(@tA == 'write') ? 2*@id+1 : 2*@id
|
224
|
+
end
|
225
|
+
|
226
|
+
# Helper method for the +to_species+ method. This method compares the step
|
227
|
+
# with the number of elements accessed to determine which one is smaller.
|
228
|
+
# FIXME: This is based on the +compare+ method which might take a guess.
|
229
|
+
def step_smaller_than_num_elements?
|
230
|
+
@tS.each_with_index do |step,index|
|
231
|
+
if step != '0'
|
232
|
+
comparison = compare(step,@tE[index].length,@all_loops)
|
233
|
+
if (comparison == 'lt')
|
234
|
+
return true
|
235
|
+
end
|
236
|
+
end
|
237
|
+
end
|
238
|
+
return false
|
239
|
+
end
|
240
|
+
|
241
|
+
# Method to print out a human readable form of the array references (e.g.
|
242
|
+
# [4*i+6][j]). This is basically what the +puts+ method also does.
|
243
|
+
def get_references
|
244
|
+
return @indices.to_ary.map{ |i| i.to_s }
|
245
|
+
end
|
246
|
+
|
247
|
+
# Method to find out if the reference is dependent on a variable. It is
|
248
|
+
# used by the copy optimisations.
|
249
|
+
def depends_on?(var)
|
250
|
+
@indices.each do |index|
|
251
|
+
index.preorder do |node|
|
252
|
+
if node.variable?
|
253
|
+
return true if (node.name == var)
|
254
|
+
end
|
255
|
+
end
|
256
|
+
end
|
257
|
+
return false
|
258
|
+
end
|
259
|
+
|
260
|
+
end
|
261
|
+
end
|
data/lib/bones.rb
CHANGED
@@ -1,10 +1,6 @@
|
|
1
1
|
|
2
|
-
#
|
3
|
-
require '
|
4
|
-
|
5
|
-
# Bones uses the 'trollop' gem to parse command line options.
|
6
|
-
require 'rubygems'
|
7
|
-
require 'trollop'
|
2
|
+
# Include the common part between Bones and Aset
|
3
|
+
require 'common.rb'
|
8
4
|
|
9
5
|
# We define a custom error class for code generation related
|
10
6
|
# errors (any error raised by Bones).
|
@@ -16,20 +12,9 @@ def raise_error(message) #:nodoc:
|
|
16
12
|
end
|
17
13
|
|
18
14
|
# Extending the Ruby standard string class to support some
|
19
|
-
#
|
20
|
-
# and two methods related to comma removal.
|
15
|
+
# additional methods: two methods related to comma removal.
|
21
16
|
class String #:nodoc:
|
22
17
|
|
23
|
-
# Extend the Ruby string class to be able to chain 'gsub!'
|
24
|
-
#-commands. This code is taken from the web.
|
25
|
-
meth = 'gsub!'
|
26
|
-
orig_meth = "orig_#{meth}"
|
27
|
-
alias_method orig_meth, meth
|
28
|
-
define_method(meth) do |*args|
|
29
|
-
self.send(orig_meth, *args)
|
30
|
-
self
|
31
|
-
end
|
32
|
-
|
33
18
|
# Replace double comma's in a string with a single comma.
|
34
19
|
# This method is useful for function-argument lists.
|
35
20
|
def remove_double_commas
|
@@ -213,43 +198,6 @@ module Bones
|
|
213
198
|
return code
|
214
199
|
end
|
215
200
|
|
216
|
-
# Helper method to evaluate mathematical expressions, possibly containing
|
217
|
-
# symbols. This method is only used for readability, without it the code
|
218
|
-
# is functionally correct, but expressions might be larger than needed.
|
219
|
-
# This method is only tested on integers.
|
220
|
-
def simplify(expr)
|
221
|
-
raise_error('Invalid expression to simplify') if !expr
|
222
|
-
done = false
|
223
|
-
while !done do
|
224
|
-
old_expr = expr
|
225
|
-
case expr
|
226
|
-
when /^\(([^\(\)]*)\)$/ then expr = $1 # Remove outer brackets
|
227
|
-
when /(.*)\((-?\w*)\)(.*)/ then expr = $1+$2+$3 # Remove brackets with one constant or variable inside
|
228
|
-
when /(.*)\(\(([^\(\)]*)\)\)(.*)/ then expr = $1+'('+$2+')'+$3 # Substitute double brackets into single brackets
|
229
|
-
when /(.*)(\-\d+)\*(\d+)\b(.*)/ then expr = $1+'+'+(($2.to_i)*($3.to_i)).to_s+$4 # Perform multiplications on constants (starting with a '-')
|
230
|
-
when /(.*)(\-\d+)\+(\d+)\b(.*)/ then expr = $1+'+'+(($2.to_i)+($3.to_i)).to_s+$4 # Perform additions on constants (starting with a '-')
|
231
|
-
when /(.*)(\-\d+)\-(\d+)\b(.*)/ then expr = $1+'+'+(($2.to_i)-($3.to_i)).to_s+$4 # Perform subtractions on constants (starting with a '-')
|
232
|
-
when /(.*)\b(\d+)\*(\d+)\b(.*)/ then expr = $1+(($2.to_i)*($3.to_i)).to_s+$4 # Perform multiplications on constants
|
233
|
-
when /(.*)\b(\d+)\+(\d+)\b(.*)/ then expr = $1+(($2.to_i)+($3.to_i)).to_s+$4 # Perform additions on constants
|
234
|
-
when /(.*)\b(\d+)\-(\d+)\b(.*)/ then expr = $1+(($2.to_i)-($3.to_i)).to_s+$4 # Perform subtractions on constants
|
235
|
-
when /(.*)\b(\w+)\-(\2)\b(.*)/ then expr = $1+'0'+$4 # Perform subtractions of variables to zero (e.g. 'a-a=0')
|
236
|
-
when /(.*)\/1\b(.*)/ then expr = $1+$2 # Remove divisions by 1
|
237
|
-
when /(.*)(\+0\b|\b0\+)(.*)/ then expr = $1+$3 # Remove additions with 0
|
238
|
-
when /(.*[\+\(])\(([^\(\)\*\/\%]+)\)([\+\-\)].*)/ then expr = $1+$2+$3 # Remove brackets that are not needed (e.g. '(a+b)+c')
|
239
|
-
end
|
240
|
-
expr.gsub!(/\s/,'') # Remove whitespaces
|
241
|
-
expr.gsub!(/\-\-/,'+') # Substitute double minusses for a plus
|
242
|
-
expr.gsub!(/\+\-/,'-') # Substitute plus-minus for a minus
|
243
|
-
expr.gsub!(/(^|\()\+/,'') # Remove plus signs at the start of a line or after an opening bracket
|
244
|
-
if expr =~ /(.*)\b(\d+)\/(\d+)\b(.*)/ # Perform divisions on constants...
|
245
|
-
division = ($2.to_i)/($3.to_i) # ...but first check whether the result will be correct (integer division)
|
246
|
-
expr = $1+division.to_s+$4 if division*$3.to_i == $2.to_i
|
247
|
-
end
|
248
|
-
done = true if old_expr == expr
|
249
|
-
end
|
250
|
-
return expr
|
251
|
-
end
|
252
|
-
|
253
201
|
end
|
254
202
|
|
255
203
|
end
|
@@ -261,6 +209,7 @@ require 'bones/species.rb'
|
|
261
209
|
require 'bones/algorithm.rb'
|
262
210
|
require 'bones/variablelist.rb'
|
263
211
|
require 'bones/variable.rb'
|
212
|
+
require 'bones/copy.rb'
|
264
213
|
require 'bones/preprocessor.rb'
|
265
214
|
require 'bones/engine.rb'
|
266
215
|
|
data/lib/bones/algorithm.rb
CHANGED
@@ -10,7 +10,7 @@ module Bones
|
|
10
10
|
# and lists of input/output array variables.
|
11
11
|
class Algorithm < Common
|
12
12
|
attr_reader :name, :species, :code, :lists, :arrays, :id, :function_name
|
13
|
-
attr_accessor :hash, :merge_factor
|
13
|
+
attr_accessor :hash, :merge_factor, :register_caching_enabled
|
14
14
|
|
15
15
|
# Constant to set the name of the algorithm's accelerated version
|
16
16
|
ACCELERATED = '_accelerated'
|
@@ -31,14 +31,25 @@ module Bones
|
|
31
31
|
@original_name = @name+ORIGINAL
|
32
32
|
@accelerated_name = @name+ACCELERATED
|
33
33
|
@species = species
|
34
|
-
|
34
|
+
begin
|
35
|
+
@code = C::Statement.parse(code).preprocess
|
36
|
+
rescue
|
37
|
+
@code = C::Statement.parse('{'+code+'}').preprocess
|
38
|
+
end
|
35
39
|
@hash = {}
|
36
40
|
@lists = {:host_name => [],:host_definition => [], :argument_name => [], :argument_definition => [], :golden_name => []}
|
37
41
|
@arrays = Variablelist.new()
|
38
42
|
@constants = Variablelist.new()
|
39
|
-
@merge_factor =
|
43
|
+
@merge_factor = nil
|
44
|
+
@register_caching_enabled = 1
|
40
45
|
@function_code = ''
|
41
46
|
@function_name = ''
|
47
|
+
|
48
|
+
# Set the initial hash
|
49
|
+
@hash = {:algorithm_id => @id,
|
50
|
+
:algorithm_name => @name,
|
51
|
+
:algorithm_basename => @basename,
|
52
|
+
:algorithm_filename => @filename}
|
42
53
|
end
|
43
54
|
|
44
55
|
# This method sets the code and name for the function in
|
@@ -119,15 +130,17 @@ module Bones
|
|
119
130
|
new_code.transform_flatten(array)
|
120
131
|
end
|
121
132
|
|
122
|
-
# Perform array substitution (conditionally do this)
|
123
|
-
@
|
124
|
-
|
125
|
-
if
|
126
|
-
|
127
|
-
|
128
|
-
|
133
|
+
# Perform array substitution a.k.a. register caching (conditionally do this)
|
134
|
+
if @register_caching_enabled == 1
|
135
|
+
@arrays.outputs.each do |array|
|
136
|
+
if array.species.element?
|
137
|
+
if @arrays.inputs.include?(array)
|
138
|
+
new_code.transform_substitution(array,true)
|
139
|
+
else
|
140
|
+
new_code.transform_substitution(array,false)
|
141
|
+
end
|
142
|
+
extra_indent = INDENT
|
129
143
|
end
|
130
|
-
extra_indent = INDENT
|
131
144
|
end
|
132
145
|
end
|
133
146
|
|
@@ -138,23 +151,35 @@ module Bones
|
|
138
151
|
|
139
152
|
# Perform thread-merging (experimental)
|
140
153
|
# TODO: Solve the problem related to constants (e.g chunk/example1.c)
|
141
|
-
if @merge_factor ==
|
142
|
-
@
|
154
|
+
if @merge_factor == nil
|
155
|
+
if transformation[0,1] == '4' && @hash[:parallelism].to_i >= 1024*1024
|
156
|
+
@merge_factor = 4
|
157
|
+
else
|
158
|
+
@merge_factor = 1
|
159
|
+
end
|
143
160
|
end
|
144
161
|
if @merge_factor > 1
|
145
|
-
puts
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
162
|
+
puts @hash[:parallelism]
|
163
|
+
if new_code.has_conditional_statements?
|
164
|
+
puts MESSAGE+'Not coarsening ('+@merge_factor.to_s+'x) because of conditional statements in kernel body.'
|
165
|
+
# TODO: Fix this temporary hack for multiple loops with mismatching bounds
|
166
|
+
elsif ((@hash[:parallelism].to_i % @merge_factor) != 0) || (@hash[:parallelism].to_i == 4192256)
|
167
|
+
puts MESSAGE+'Not coarsening ('+@merge_factor.to_s+'x) because of mismatching amount of parallelism ('+@hash[:parallelism]+').'
|
168
|
+
else
|
169
|
+
puts MESSAGE+'Coarsening threads by a factor '+@merge_factor.to_s+'.'
|
170
|
+
|
171
|
+
# Update the hash
|
172
|
+
@hash[:ids] = @hash[:ids].split(NL).map { |line|
|
173
|
+
C::parse(line).transform_merge_threads(@merge_factor,[GLOBAL_ID]+@constants.map{ |c| c.name }).to_s.split(NL).each_with_index.map do |id,index|
|
174
|
+
id.gsub(/\b#{GLOBAL_ID}\b/,"(#{GLOBAL_ID}+gridDim.x*blockDim.x*#{index})")
|
175
|
+
end
|
176
|
+
}.join(NL+INDENT*2)
|
177
|
+
@hash[:parallelism] = (@hash[:parallelism].to_i / @merge_factor).to_s
|
178
|
+
|
179
|
+
# Transform the code
|
180
|
+
excludes = (@constants+@arrays).map { |c| c.name }
|
181
|
+
new_code.transform_merge_threads(@merge_factor,excludes)
|
182
|
+
end
|
158
183
|
end
|
159
184
|
|
160
185
|
# Obtain the complexity in terms of operations for the resulting code
|
@@ -215,12 +240,8 @@ module Bones
|
|
215
240
|
# kernel_argument_list
|
216
241
|
#
|
217
242
|
def populate_hash
|
218
|
-
@hash =
|
219
|
-
|
220
|
-
:algorithm_basename => @basename,
|
221
|
-
:algorithm_filename => @filename,
|
222
|
-
:argument_name => @lists[:argument_name],
|
223
|
-
:argument_definition => @lists[:argument_definition]}
|
243
|
+
@hash[:argument_name] = @lists[:argument_name]
|
244
|
+
@hash[:argument_definition] = @lists[:argument_definition]
|
224
245
|
|
225
246
|
# Obtain the necessary data for the hash per array
|
226
247
|
parallelisms = []
|
@@ -278,11 +299,15 @@ module Bones
|
|
278
299
|
|
279
300
|
# Generate the index expressions
|
280
301
|
divider = (array.species.chunk?) ? '/'+sum(array.species.parameters[index]) : ''
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
302
|
+
dimensions_hash = (index == dimensions.length-1) ? '1' : dimensions.drop(index+1).map { |d| sum(d) }.join('*')
|
303
|
+
dimensions_hash = simplify(dimensions_hash)
|
304
|
+
dimensions_division = (dimensions_hash == '1') ? '' : '/('+dimensions_hash+')'
|
305
|
+
minihash = {:dimensions1 => "#{GLOBAL_ID}#{dimensions_division}",
|
306
|
+
:dimensions2 => "#{LOCAL_ID }#{dimensions_division}",
|
307
|
+
:modulo => (index_reverse != dimensions.length-1) ? '%('+simplify(sum(dimension)+divider)+')' : '',
|
308
|
+
:offset => simplify(from(dimension))}
|
309
|
+
expr_global = search_and_replace(minihash,"((<dimensions1>)<modulo>)+<offset>")
|
310
|
+
expr_local = search_and_replace(minihash,"((<dimensions2>)<modulo>)+<offset>")
|
286
311
|
|
287
312
|
# Selectively push the ID definitions to the result array
|
288
313
|
from = array.species.from_at(index)
|
@@ -342,6 +367,7 @@ module Bones
|
|
342
367
|
def update_hash(loop_variable)
|
343
368
|
names = @hash[:argument_name].split(', ')
|
344
369
|
definitions = @hash[:argument_definition].split(', ')
|
370
|
+
# TODO: The following two lines give problems with correlation-k4
|
345
371
|
names.delete(loop_variable.to_s)
|
346
372
|
definitions.each { |definition| definitions.delete(definition) if definition =~ /\b#{loop_variable}\b/ }
|
347
373
|
@hash[:argument_name] = names.join(', ')
|
@@ -387,10 +413,12 @@ module Bones
|
|
387
413
|
array_names = arrays.map { |a| a.name }.join('","')
|
388
414
|
raise_error(direction.capitalize+'put array count mismatch (expected '+species.length.to_s+', found '+arrays.length.to_s+' ["'+array_names+'"])')
|
389
415
|
end
|
390
|
-
|
416
|
+
|
391
417
|
# Set the species for the arrays (distinguish between arrays with and without a name)
|
392
418
|
species.each do |structure|
|
393
|
-
|
419
|
+
|
420
|
+
# Loop over all found arrays and match it with a species
|
421
|
+
array = nil
|
394
422
|
arrays.each do |free_array|
|
395
423
|
if !free_array.species
|
396
424
|
if structure.has_arrayname?
|
@@ -404,8 +432,17 @@ module Bones
|
|
404
432
|
end
|
405
433
|
end
|
406
434
|
end
|
435
|
+
|
436
|
+
# Still haven't found anything, assign the species to an array of equal name
|
437
|
+
if !array
|
438
|
+
arrays.each do |free_array|
|
439
|
+
array = free_array if structure.name == free_array.name
|
440
|
+
end
|
441
|
+
end
|
442
|
+
|
443
|
+
# Process the assignment
|
407
444
|
array.species = structure
|
408
|
-
#
|
445
|
+
raise_error("Species of '#{array.species.name}' is mismatched with array '#{array.name}'") if array.species.name != array.name
|
409
446
|
|
410
447
|
# Check if the array size was set, if not, it will be set to the species' size
|
411
448
|
if array.size.empty?
|