bones-compiler 1.1.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/CHANGELOG +37 -0
- data/LICENSE +1 -1
- data/README.rdoc +95 -70
- data/Rakefile +78 -3
- data/VERSION +1 -1
- data/bin/adarwin +17 -0
- data/examples/benchmarks/PolyBench/2mm.c +104 -0
- data/examples/benchmarks/{3mm.c → PolyBench/3mm.c} +5 -2
- data/examples/benchmarks/{adi.c → PolyBench/adi.c} +6 -3
- data/examples/benchmarks/{atax.c → PolyBench/atax.c} +5 -2
- data/examples/benchmarks/{bicg.c → PolyBench/bicg.c} +5 -2
- data/examples/benchmarks/{cholesky.c → PolyBench/cholesky.c} +3 -0
- data/examples/benchmarks/{common.h → PolyBench/common.h} +2 -2
- data/examples/benchmarks/{correlation.c → PolyBench/correlation.c} +16 -7
- data/examples/benchmarks/{covariance.c → PolyBench/covariance.c} +7 -2
- data/examples/benchmarks/{doitgen.c → PolyBench/doitgen.c} +5 -2
- data/examples/benchmarks/{durbin.c → PolyBench/durbin.c} +3 -0
- data/examples/benchmarks/{dynprog.c → PolyBench/dynprog.c} +3 -0
- data/examples/benchmarks/{fdtd-2d-apml.c → PolyBench/fdtd-2d-apml.c} +3 -0
- data/examples/benchmarks/{fdtd-2d.c → PolyBench/fdtd-2d.c} +5 -2
- data/examples/benchmarks/{floyd-warshall.c → PolyBench/floyd-warshall.c} +3 -0
- data/examples/benchmarks/{gemm.c → PolyBench/gemm.c} +5 -2
- data/examples/benchmarks/{gemver.c → PolyBench/gemver.c} +5 -2
- data/examples/benchmarks/{gesummv.c → PolyBench/gesummv.c} +5 -2
- data/examples/benchmarks/{gramschmidt.c → PolyBench/gramschmidt.c} +3 -0
- data/examples/benchmarks/{jacobi-1d-imper.c → PolyBench/jacobi-1d-imper.c} +10 -2
- data/examples/benchmarks/{jacobi-2d-imper.c → PolyBench/jacobi-2d-imper.c} +8 -3
- data/examples/benchmarks/{lu.c → PolyBench/lu.c} +3 -0
- data/examples/benchmarks/{ludcmp.c → PolyBench/ludcmp.c} +3 -0
- data/examples/benchmarks/{mvt.c → PolyBench/mvt.c} +6 -2
- data/examples/benchmarks/{reg_detect.c → PolyBench/reg_detect.c} +3 -0
- data/examples/benchmarks/{seidel-2d.c → PolyBench/seidel-2d.c} +3 -0
- data/examples/benchmarks/{symm.c → PolyBench/symm.c} +3 -0
- data/examples/benchmarks/{syr2k.c → PolyBench/syr2k.c} +5 -2
- data/examples/benchmarks/{syrk.c → PolyBench/syrk.c} +7 -4
- data/examples/benchmarks/{trisolv.c → PolyBench/trisolv.c} +3 -0
- data/examples/benchmarks/{trmm.c → PolyBench/trmm.c} +3 -0
- data/examples/benchmarks/Rodinia/cfd.c +180 -0
- data/examples/benchmarks/Rodinia/hotspot.c +228 -0
- data/examples/benchmarks/Rodinia/kmeans.c +164 -0
- data/examples/benchmarks/Rodinia/srad.c +188 -0
- data/examples/benchmarks/other/common.h +0 -0
- data/examples/benchmarks/other/dct.c +58 -0
- data/examples/benchmarks/other/mm.c +50 -0
- data/examples/benchmarks/{saxpy.c → other/saxpy.c} +11 -7
- data/examples/chunk/{example1.c → example01.c} +0 -0
- data/examples/chunk/{example2.c → example02.c} +0 -0
- data/examples/chunk/{example3.c → example03.c} +0 -0
- data/examples/chunk/{example4.c → example04.c} +0 -0
- data/examples/chunk/{example5.c → example05.c} +0 -0
- data/examples/chunk/example06.c +45 -0
- data/examples/chunk/example07.c +49 -0
- data/examples/dependences/example01.c +42 -0
- data/examples/dependences/example02.c +40 -0
- data/examples/dependences/example03.c +43 -0
- data/examples/dependences/example04.c +44 -0
- data/examples/dependences/example05.c +42 -0
- data/examples/element/{example1.c → example01.c} +0 -0
- data/examples/element/{example2.c → example02.c} +2 -2
- data/examples/element/{example3.c → example03.c} +0 -0
- data/examples/element/{example4.c → example04.c} +0 -0
- data/examples/element/{example5.c → example05.c} +0 -0
- data/examples/element/{example6.c → example06.c} +0 -0
- data/examples/element/{example7.c → example07.c} +0 -0
- data/examples/element/{example8.c → example08.c} +0 -0
- data/examples/element/{example9.c → example09.c} +0 -0
- data/examples/element/example13.c +73 -0
- data/examples/fusion/example01.c +68 -0
- data/examples/fusion/example02.c +73 -0
- data/examples/fusion/example03.c +72 -0
- data/examples/fusion/example04.c +61 -0
- data/examples/fusion/example05.c +55 -0
- data/examples/neighbourhood/{example1.c → example01.c} +0 -0
- data/examples/neighbourhood/{example2.c → example02.c} +0 -0
- data/examples/neighbourhood/{example3.c → example03.c} +0 -0
- data/examples/neighbourhood/{example4.c → example04.c} +0 -0
- data/examples/neighbourhood/example05.c +44 -0
- data/examples/shared/{example1.c → example01.c} +0 -0
- data/examples/shared/{example2.c → example02.c} +0 -0
- data/examples/shared/{example3.c → example03.c} +0 -0
- data/examples/shared/{example4.c → example04.c} +0 -0
- data/examples/shared/{example5.c → example05.c} +0 -0
- data/lib/adarwin.rb +62 -0
- data/lib/adarwin/dependences.rb +268 -0
- data/lib/adarwin/engine.rb +277 -0
- data/lib/adarwin/fusion.rb +174 -0
- data/lib/adarwin/interval.rb +57 -0
- data/lib/adarwin/memorycopies.rb +153 -0
- data/lib/adarwin/nest.rb +225 -0
- data/lib/adarwin/preprocessor.rb +76 -0
- data/lib/adarwin/reference.rb +261 -0
- data/lib/bones.rb +4 -55
- data/lib/bones/algorithm.rb +77 -40
- data/lib/bones/copy.rb +26 -0
- data/lib/bones/engine.rb +147 -31
- data/lib/bones/preprocessor.rb +92 -12
- data/lib/bones/species.rb +4 -3
- data/lib/bones/structure.rb +14 -4
- data/lib/castaddon.rb +11 -6
- data/lib/castaddon/node_adarwin.rb +245 -0
- data/lib/castaddon/node_bones.rb +316 -0
- data/lib/castaddon/node_common.rb +289 -0
- data/lib/castaddon/transformations.rb +236 -0
- data/lib/common.rb +216 -0
- data/skeletons/CPU-C/common/header.c +3 -0
- data/skeletons/CPU-C/common/mem_global.c +0 -0
- data/skeletons/CPU-C/common/timer_2_start.c +11 -13
- data/skeletons/CPU-C/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-C/common/timer_globals.c +29 -0
- data/skeletons/CPU-OPENCL-INTEL/common/globals.c +1 -1
- data/skeletons/CPU-OPENCL-INTEL/common/header.c +3 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +7 -2
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +4 -2
- data/skeletons/CPU-OPENCL-INTEL/common/mem_global.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +6 -3
- data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-OPENCL-INTEL/common/timer_globals.c +24 -0
- data/skeletons/CPU-OPENMP/common/globals.c +1 -0
- data/skeletons/CPU-OPENMP/common/header.c +3 -0
- data/skeletons/CPU-OPENMP/common/mem_global.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_1_start.c +0 -12
- data/skeletons/CPU-OPENMP/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-OPENMP/common/timer_globals.c +33 -0
- data/skeletons/GPU-CUDA/common/globals.c +27 -3
- data/skeletons/GPU-CUDA/common/header.c +2 -0
- data/skeletons/GPU-CUDA/common/mem_async_alloc.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_copyin.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_copyout.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_free.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +2 -1
- data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +2 -1
- data/skeletons/GPU-CUDA/common/mem_global.c +1 -0
- data/skeletons/GPU-CUDA/common/mem_prologue.c +1 -2
- data/skeletons/GPU-CUDA/common/scheduler.c +86 -0
- data/skeletons/GPU-CUDA/common/timer_2_start.c +2 -4
- data/skeletons/GPU-CUDA/common/timer_2_stop.c +3 -5
- data/skeletons/GPU-CUDA/common/timer_globals.c +26 -0
- data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +5 -7
- data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +4 -6
- data/skeletons/GPU-CUDA/kernel/default.host.c +1 -1
- data/skeletons/GPU-CUDA/kernel/default.kernel.cu +6 -8
- data/skeletons/GPU-CUDA/skeletons.txt +6 -5
- data/{examples/benchmarks/2mm.c → test/examples/benchmarks/PolyBench/2mm_species.c} +19 -15
- data/test/examples/benchmarks/PolyBench/3mm_species.c +82 -0
- data/test/examples/benchmarks/PolyBench/adi_species.c +89 -0
- data/test/examples/benchmarks/PolyBench/atax_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/bicg_species.c +71 -0
- data/test/examples/benchmarks/PolyBench/cholesky_species.c +68 -0
- data/test/examples/benchmarks/PolyBench/correlation_species.c +97 -0
- data/test/examples/benchmarks/PolyBench/covariance_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/doitgen_species.c +67 -0
- data/test/examples/benchmarks/PolyBench/durbin_species.c +80 -0
- data/test/examples/benchmarks/PolyBench/dynprog_species.c +71 -0
- data/test/examples/benchmarks/PolyBench/fdtd-2d-apml_species.c +112 -0
- data/test/examples/benchmarks/PolyBench/fdtd-2d_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/floyd-warshall_species.c +54 -0
- data/test/examples/benchmarks/PolyBench/gemm_species.c +73 -0
- data/test/examples/benchmarks/PolyBench/gemver_species.c +93 -0
- data/test/examples/benchmarks/PolyBench/gesummv_species.c +68 -0
- data/test/examples/benchmarks/PolyBench/gramschmidt_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/jacobi-1d-imper_species.c +59 -0
- data/test/examples/benchmarks/PolyBench/jacobi-2d-imper_species.c +65 -0
- data/test/examples/benchmarks/PolyBench/lu_species.c +57 -0
- data/test/examples/benchmarks/PolyBench/ludcmp_species.c +89 -0
- data/test/examples/benchmarks/PolyBench/mvt_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/reg_detect_species.c +86 -0
- data/test/examples/benchmarks/PolyBench/seidel-2d_species.c +53 -0
- data/test/examples/benchmarks/PolyBench/symm_species.c +74 -0
- data/test/examples/benchmarks/PolyBench/syr2k_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/syrk_species.c +66 -0
- data/test/examples/benchmarks/PolyBench/trisolv_species.c +61 -0
- data/test/examples/benchmarks/PolyBench/trmm_species.c +61 -0
- data/test/examples/chunk/example01_species.c +58 -0
- data/test/examples/chunk/example02_species.c +48 -0
- data/test/examples/chunk/example03_species.c +63 -0
- data/test/examples/chunk/example04_species.c +58 -0
- data/test/examples/chunk/example05_species.c +56 -0
- data/test/examples/chunk/example06_species.c +49 -0
- data/test/examples/chunk/example07_species.c +53 -0
- data/test/examples/dependences/example01_species.c +46 -0
- data/test/examples/dependences/example02_species.c +44 -0
- data/test/examples/dependences/example03_species.c +47 -0
- data/test/examples/dependences/example04_species.c +48 -0
- data/test/examples/dependences/example05_species.c +46 -0
- data/test/examples/element/example01_species.c +50 -0
- data/test/examples/element/example02_species.c +50 -0
- data/test/examples/element/example03_species.c +62 -0
- data/test/examples/element/example04_species.c +53 -0
- data/test/examples/element/example05_species.c +59 -0
- data/test/examples/element/example06_species.c +50 -0
- data/test/examples/element/example07_species.c +58 -0
- data/test/examples/element/example08_species.c +49 -0
- data/test/examples/element/example09_species.c +52 -0
- data/test/examples/element/example10_species.c +54 -0
- data/test/examples/element/example11_species.c +51 -0
- data/test/examples/element/example12_species.c +60 -0
- data/test/examples/element/example13_species.c +77 -0
- data/test/examples/neighbourhood/example01_species.c +57 -0
- data/test/examples/neighbourhood/example02_species.c +56 -0
- data/test/examples/neighbourhood/example03_species.c +83 -0
- data/test/examples/neighbourhood/example04_species.c +55 -0
- data/test/examples/neighbourhood/example05_species.c +48 -0
- data/test/examples/shared/example01_species.c +49 -0
- data/test/examples/shared/example02_species.c +55 -0
- data/test/examples/shared/example03_species.c +59 -0
- data/test/examples/shared/example04_species.c +56 -0
- data/test/examples/shared/example05_species.c +52 -0
- metadata +193 -73
- data/examples/benchmarks/overview.txt +0 -38
- data/lib/castaddon/node.rb +0 -753
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
//
|
|
2
|
+
// This file is part of the Bones source-to-source compiler examples. This C-code
|
|
3
|
+
// example is meant to illustrate the use of Bones. For more information on Bones
|
|
4
|
+
// use the contact information below.
|
|
5
|
+
//
|
|
6
|
+
// == More information on Bones
|
|
7
|
+
// Contact............Cedric Nugteren <c.nugteren@tue.nl>
|
|
8
|
+
// Web address........http://parse.ele.tue.nl/bones/
|
|
9
|
+
//
|
|
10
|
+
// == File information
|
|
11
|
+
// Filename...........neighbourhood/example05.c
|
|
12
|
+
// Author.............Cedric Nugteren
|
|
13
|
+
// Last modified on...07-May-2013
|
|
14
|
+
//
|
|
15
|
+
|
|
16
|
+
#include <stdio.h>
|
|
17
|
+
|
|
18
|
+
// This is 'example05', an unrolled one-sided neighbourhood
|
|
19
|
+
int main(void) {
|
|
20
|
+
int i;
|
|
21
|
+
int N = 256;
|
|
22
|
+
|
|
23
|
+
// Declare input/output arrays
|
|
24
|
+
int A[N];
|
|
25
|
+
int B[N];
|
|
26
|
+
|
|
27
|
+
// Set the input data
|
|
28
|
+
for(i=0;i<N;i++) {
|
|
29
|
+
A[i] = i;
|
|
30
|
+
B[i] = i+5;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// Perform the computation
|
|
34
|
+
#pragma species kernel A[2:N]|neighbourhood(0:1) -> B[2:N-1]|element
|
|
35
|
+
for (i=2; i<N; i++) {
|
|
36
|
+
B[i] = A[i] + A[i+1];
|
|
37
|
+
}
|
|
38
|
+
#pragma species endkernel example05
|
|
39
|
+
|
|
40
|
+
// Clean-up and exit the function
|
|
41
|
+
fflush(stdout);
|
|
42
|
+
return 0;
|
|
43
|
+
}
|
|
44
|
+
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
data/lib/adarwin.rb
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
|
|
2
|
+
# Include the common part between Bones and A-Darwin
|
|
3
|
+
require 'common.rb'
|
|
4
|
+
|
|
5
|
+
# We define a custom error class for code generation related
|
|
6
|
+
# errors (any error raised).
|
|
7
|
+
class CodeGenError < StandardError #:nodoc:
|
|
8
|
+
end
|
|
9
|
+
def raise_error(message) #:nodoc:
|
|
10
|
+
puts Adarwin::ERROR+message
|
|
11
|
+
raise CodeGenError, 'Error encountered, stopping execution of A-Darwin'
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
# The module keeps all the classes and constants
|
|
15
|
+
# together. It contains the classes:
|
|
16
|
+
# * Engine: The main component of the tool, providing the high-level flow.
|
|
17
|
+
# * Preprocessor: C-preprocessor, extracting defines/includes from source code.
|
|
18
|
+
# * Nest:
|
|
19
|
+
# * Interval:
|
|
20
|
+
# * Dependence:
|
|
21
|
+
# * Reference:
|
|
22
|
+
#
|
|
23
|
+
# The module also contains a list of inter-class constants.
|
|
24
|
+
module Adarwin
|
|
25
|
+
|
|
26
|
+
# A string given as a start of an informative message.
|
|
27
|
+
MESSAGE = '[A-Darwin] ### Info : '
|
|
28
|
+
# A string given as a start of an warning message.
|
|
29
|
+
WARNING = '[A-Darwin] ### Warning: '
|
|
30
|
+
# A string given as a start of an error message.
|
|
31
|
+
ERROR = '[A-Darwin] ### Error : '
|
|
32
|
+
|
|
33
|
+
# Start of the scop
|
|
34
|
+
SCOP_START = '#pragma scop'
|
|
35
|
+
# Enf of the scop
|
|
36
|
+
SCOP_END = '#pragma endscop'
|
|
37
|
+
|
|
38
|
+
# Species pragma
|
|
39
|
+
PRAGMA_SPECIES = '#pragma species'
|
|
40
|
+
|
|
41
|
+
# Array reference characterisation (ARC) pragma
|
|
42
|
+
PRAGMA_ARC = '#pragma ARC'
|
|
43
|
+
|
|
44
|
+
# Create a string from a pragma because pragma's are unsupported by CAST.
|
|
45
|
+
PRAGMA_DELIMITER_START = '"PRAGMA '
|
|
46
|
+
PRAGMA_DELIMITER_END = ' PRAGMA"'
|
|
47
|
+
|
|
48
|
+
# This class is created to be a parent class of all classes.
|
|
49
|
+
class Common
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# This list of require's makes sure all classes are included.
|
|
55
|
+
require 'adarwin/interval.rb'
|
|
56
|
+
require 'adarwin/dependences.rb'
|
|
57
|
+
require 'adarwin/preprocessor.rb'
|
|
58
|
+
require 'adarwin/memorycopies.rb'
|
|
59
|
+
require 'adarwin/fusion.rb'
|
|
60
|
+
require 'adarwin/engine.rb'
|
|
61
|
+
require 'adarwin/reference.rb'
|
|
62
|
+
require 'adarwin/nest.rb'
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
module Adarwin
|
|
2
|
+
|
|
3
|
+
# This class represents the dependence tests. The dependence tests are not
|
|
4
|
+
# objects as such, the use of a class might therefore be a bit out of place.
|
|
5
|
+
# Instead, the class rather holds all methods related to dependence tests.
|
|
6
|
+
#
|
|
7
|
+
# For an M-dimensional access, the problem of dependence testing is reduced to
|
|
8
|
+
# that of determining whether a system of M linear equations of the form
|
|
9
|
+
# >>> a_1*I_1 + a_2*I_2 + ... + a_n*I_n = a_0
|
|
10
|
+
# has a simultaneous integer solution satisfying the loop/if bounds given as
|
|
11
|
+
# >>> min_k <= I_k <= max_k
|
|
12
|
+
#
|
|
13
|
+
# Currently, the following conservative tests are implemented:
|
|
14
|
+
# * The GCD (greatest common divisor) test
|
|
15
|
+
# * The Banerjee test
|
|
16
|
+
#
|
|
17
|
+
# In case the accesses are multi-dimensional, we perform a subscript-by-
|
|
18
|
+
# subscript checking. In other words, we test each dimension separately
|
|
19
|
+
# using the two tests. If we find a possible dependence in one dimension, we
|
|
20
|
+
# conclude that there is a dependence.
|
|
21
|
+
class Dependence
|
|
22
|
+
attr_accessor :result
|
|
23
|
+
|
|
24
|
+
# Method to initialise the dependence tests. This method actually already
|
|
25
|
+
# computes all the dependence tests and stores the result in a class
|
|
26
|
+
# variable. It takes as input the pair of accesses it needs to check for
|
|
27
|
+
# dependences.
|
|
28
|
+
def initialize(access1,access2,verbose)
|
|
29
|
+
@verbose = verbose
|
|
30
|
+
bounds = [access1.bounds,access2.bounds]
|
|
31
|
+
|
|
32
|
+
# Iterate over the dimensions of the array reference
|
|
33
|
+
results = []
|
|
34
|
+
dimensions = [access1.indices.size,access2.indices.size].min
|
|
35
|
+
for dim in 1..dimensions
|
|
36
|
+
ref1 = access1.indices[dim-1]
|
|
37
|
+
ref2 = access2.indices[dim-1]
|
|
38
|
+
loop_vars = [access1.all_loops.map{ |l| l[:var] },access2.all_loops.map{ |l| l[:var] }]
|
|
39
|
+
|
|
40
|
+
# Conclude directly that there is no dependence if the references are
|
|
41
|
+
# exactly the same.
|
|
42
|
+
if ref1 == ref2
|
|
43
|
+
results << false
|
|
44
|
+
next
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# TODO: Include the step in the dependence tests
|
|
48
|
+
#p access1.tS[dim-1]
|
|
49
|
+
|
|
50
|
+
# Get all variables, a linear equation, and the corresponding conditions
|
|
51
|
+
all_vars, equation, conditions = get_linear_equation(ref1,ref2,bounds,loop_vars)
|
|
52
|
+
|
|
53
|
+
# Conclude directly that there is no dependence if the variables are not
|
|
54
|
+
# dependent on the loops.
|
|
55
|
+
if equation[:ak].empty?
|
|
56
|
+
results << false
|
|
57
|
+
next
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Perform the GCD test
|
|
61
|
+
gcd_result = gcd_test(all_vars,equation)
|
|
62
|
+
|
|
63
|
+
# End if the GCD test concludes that there are no dependences
|
|
64
|
+
if gcd_result == false
|
|
65
|
+
results << false
|
|
66
|
+
|
|
67
|
+
# Continue with Banerjee if GCD concludes there might be dependences
|
|
68
|
+
else
|
|
69
|
+
ban_result = ban_test(all_vars,equation,conditions)
|
|
70
|
+
results << ban_result
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Combine the results for all dimensions
|
|
75
|
+
if results.include?(true)
|
|
76
|
+
@result = true
|
|
77
|
+
else
|
|
78
|
+
@result = false
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# This method implements the GCD test. The test is based on the computation
|
|
83
|
+
# of the greatest common divisor, giving it its name. The GCD test is based
|
|
84
|
+
# on the fact that a linear equation in the form of
|
|
85
|
+
# >>> a_1*I_1 + a_2*I_2 + ... + a_n*I_n = a_0
|
|
86
|
+
# has an integer solution if and only if the greatest common divisor of a_1,
|
|
87
|
+
# a_2,...,a_n is a divisor of a_0. The GCD test checks for this
|
|
88
|
+
# divisability by performing the division and checking if the result is
|
|
89
|
+
# integer.
|
|
90
|
+
#
|
|
91
|
+
# This method returns true if there is an integer solution, not necessarily
|
|
92
|
+
# within the loop bounds. Thus, if the method returns true, there might be a
|
|
93
|
+
# dependence. If the method returns false, there is definitely no dependence.
|
|
94
|
+
#
|
|
95
|
+
# TODO: If the result (+division+) is symbolic, can we conclude anything?
|
|
96
|
+
def gcd_test(all_vars,equation)
|
|
97
|
+
|
|
98
|
+
# Gather all the data to perform the test. Here, base represents a_0 and
|
|
99
|
+
# data represents a_1,a_2,...,a_n.
|
|
100
|
+
base = equation[:a0]
|
|
101
|
+
data = equation[:ak]
|
|
102
|
+
|
|
103
|
+
# Perform the greatest common divisor calculation and perform the division
|
|
104
|
+
result = gcd(data)
|
|
105
|
+
division = base/result.to_f
|
|
106
|
+
|
|
107
|
+
# See if the division is integer under the condition that we can test that
|
|
108
|
+
if result == 0
|
|
109
|
+
gcd_result = false
|
|
110
|
+
elsif division.class != Float
|
|
111
|
+
gcd_result = true
|
|
112
|
+
else
|
|
113
|
+
gcd_result = (division.to_i.to_f == division)
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Display and return the result
|
|
117
|
+
puts MESSAGE+"GCD-test '#{gcd_result}' ---> (#{base})/(#{result}) = #{division}, gcd(#{data})" if @verbose
|
|
118
|
+
return gcd_result
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# This method implements the Banerjee test. This test takes loop bounds into
|
|
122
|
+
# consideration. The test is based on a linear equation in the form of
|
|
123
|
+
# >>> a_1*I_1 + a_2*I_2 + ... + a_n*I_n = a_0
|
|
124
|
+
# and loop bounds in the form of
|
|
125
|
+
# >>> min_k <= I_k <= max_k
|
|
126
|
+
#
|
|
127
|
+
# The test proceeds as follows. First, the values a_k+ and a_k- are
|
|
128
|
+
# computed. Also, the bounds min_k and max_k are calculated from the loop
|
|
129
|
+
# conditions. Following, the test computes the extreme values 'low' and
|
|
130
|
+
# 'high'. Finally, the test computes whether the following holds:
|
|
131
|
+
# >>> low <= a_0 <= high
|
|
132
|
+
# If this holds, there might be a dependence (method returns true). If this
|
|
133
|
+
# does not hold, there is definitely no dependence (method returns false).
|
|
134
|
+
def ban_test(all_vars,equation,conditions)
|
|
135
|
+
|
|
136
|
+
# Pre-process the data to obtain the a_k+, a_k-, and lower-bounds and
|
|
137
|
+
# upper-bounds for a_k (min_k and max_k).
|
|
138
|
+
values = []
|
|
139
|
+
equation[:ak].each_with_index do |a,index|
|
|
140
|
+
values << {
|
|
141
|
+
:ak_plus => (a >= 0) ? a : 0,
|
|
142
|
+
:ak_min => (a <= 0) ? -a : 0,
|
|
143
|
+
:min_k => conditions[index][:min],
|
|
144
|
+
:max_k => conditions[index][:max]
|
|
145
|
+
}
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# Compute the extreme values 'low' and 'high'. This is done symbolically.
|
|
149
|
+
low, high = "0", "0"
|
|
150
|
+
values.each do |v|
|
|
151
|
+
partial_low = simplify("
|
|
152
|
+
(#{v[:ak_plus]}) * (#{v[:min_k]}) -
|
|
153
|
+
(#{v[:ak_min]}) * (#{v[:max_k]})
|
|
154
|
+
")
|
|
155
|
+
low = simplify("(#{low}) + (#{partial_low})")
|
|
156
|
+
partial_high = simplify("
|
|
157
|
+
(#{v[:ak_plus]}) * (#{v[:max_k]}) -
|
|
158
|
+
(#{v[:ak_min]}) * (#{v[:min_k]})
|
|
159
|
+
")
|
|
160
|
+
high = simplify("(#{high}) + (#{partial_high})")
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
# Perform the actual test: checking if low <= a_0 <= high holds. This is
|
|
164
|
+
# implemented as two parts: check the lower-bound and check the upper-
|
|
165
|
+
# bound.
|
|
166
|
+
# FIXME: This method uses the +max+ which might make a guess.
|
|
167
|
+
base = equation[:a0]
|
|
168
|
+
test1 = (base.to_s == max(low,base.to_s))
|
|
169
|
+
test2 = (high == max(base.to_s,high))
|
|
170
|
+
ban_result = (test1 && test2)
|
|
171
|
+
|
|
172
|
+
# Display and return the results
|
|
173
|
+
puts MESSAGE+"Banerjee '#{ban_result}' ---> (#{test1},#{test2}), '(#{low} <= #{base} <= #{high})'" if @verbose
|
|
174
|
+
return ban_result
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
# This method retrieves a linear equation from a pair of access. Accesses
|
|
178
|
+
# are transformed into a linear equation of the form
|
|
179
|
+
# >>> a_1*I_1 + a_2*I_2 + ... + a_n*I_n = a_0
|
|
180
|
+
# Additionally, this method returns a list of all variables and a list of
|
|
181
|
+
# loop bounds corresponding to the linear equation's variables.
|
|
182
|
+
def get_linear_equation(access1,access2,bounds,all_loop_vars)
|
|
183
|
+
equation = { :a0 => 0, :ak => [] }
|
|
184
|
+
all_vars = []
|
|
185
|
+
conditions = []
|
|
186
|
+
hash = {}
|
|
187
|
+
|
|
188
|
+
# Loop over the two accesses
|
|
189
|
+
[access1,access2].each_with_index do |access,index|
|
|
190
|
+
access = simplify(access.to_s)
|
|
191
|
+
|
|
192
|
+
# Get the variables (I_1 ... I_n) and modify the access expression
|
|
193
|
+
vars = get_vars(access).uniq
|
|
194
|
+
loop_vars = get_loop_vars(vars,all_loop_vars[index])
|
|
195
|
+
all_vars = (all_vars + vars).uniq
|
|
196
|
+
vars.each do |var_name|
|
|
197
|
+
access = access.gsub(/\b#{var_name}\b/,"hash[:#{var_name}]")
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
# Create a hash of all the variables. For now, this is just the name of
|
|
201
|
+
# the variable. The values will be set later. This uses the 'symbolic'
|
|
202
|
+
# library.
|
|
203
|
+
vars.each do |var_name|
|
|
204
|
+
if !hash[var_name.to_sym]
|
|
205
|
+
hash[var_name.to_sym] = var :name => var_name
|
|
206
|
+
end
|
|
207
|
+
hash[var_name.to_sym].value = hash[var_name.to_sym]
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
# Find the constant term (a_0). This uses the +eval+ method together
|
|
211
|
+
# with the 'symbolic' gem to compute the term.
|
|
212
|
+
loop_vars.each do |var_name|
|
|
213
|
+
hash[var_name.to_sym].value = 0
|
|
214
|
+
end
|
|
215
|
+
base = eval(access).value
|
|
216
|
+
val = (index == 0) ? base : -base
|
|
217
|
+
equation[:a0] = equation[:a0] + val
|
|
218
|
+
|
|
219
|
+
# Find the other terms (a_1, a_2, ... a_n). This uses the +eval+ method
|
|
220
|
+
# together with the 'symbolic' gem to compute the terms.
|
|
221
|
+
loop_vars.each do |var_name|
|
|
222
|
+
hash[var_name.to_sym].value = 1
|
|
223
|
+
val = eval(access).value - base
|
|
224
|
+
val = (index == 0) ? val : -val
|
|
225
|
+
equation[:ak] << val
|
|
226
|
+
hash[var_name.to_sym].value = 0
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
# Get the loop bound conditions corresponding to the linear equation's
|
|
230
|
+
# variables.
|
|
231
|
+
loop_vars.each do |var_name|
|
|
232
|
+
conditions << bounds[index].select{ |c| c[:var] == var_name }.first
|
|
233
|
+
end
|
|
234
|
+
end
|
|
235
|
+
return all_vars, equation, conditions
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
# Implementation of a GCD method with any number of arguments. Relies on
|
|
239
|
+
# Ruby's default GCD method. In contrast to the normal gcd method, this
|
|
240
|
+
# method does not act on a number, but instead takes an array of numbers as
|
|
241
|
+
# an input.
|
|
242
|
+
def gcd(args)
|
|
243
|
+
val = args.first
|
|
244
|
+
args.drop(1).each do |argument|
|
|
245
|
+
val = val.gcd(argument)
|
|
246
|
+
end
|
|
247
|
+
return val
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
# Method to obtain all variables in an array reference that are also loop
|
|
251
|
+
# variables.
|
|
252
|
+
def get_loop_vars(vars,all_loop_vars)
|
|
253
|
+
return vars & all_loop_vars
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
# Method to combine an array of integers in the form of a subtraction. For
|
|
257
|
+
# example, given the input [a,b,c,d], the output will be (a-b-c-d).
|
|
258
|
+
# TODO: Remove this method
|
|
259
|
+
#def merge_subtract(args)
|
|
260
|
+
# val = args.first
|
|
261
|
+
# args.drop(1).each do |argument|
|
|
262
|
+
# val = val - argument
|
|
263
|
+
# end
|
|
264
|
+
# return val
|
|
265
|
+
#end
|
|
266
|
+
|
|
267
|
+
end
|
|
268
|
+
end
|
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
|
|
2
|
+
module Adarwin
|
|
3
|
+
|
|
4
|
+
# This is the main 'engine' for the A-darwin algorithmic species extraction
|
|
5
|
+
# tool. It contains methods to parse the command-line arguments, to run the
|
|
6
|
+
# pre-processor, to insert the annotations, and to pretty print the final
|
|
7
|
+
# output.
|
|
8
|
+
# TODO: Add a syntax check by a normal compiler first (e.g. gcc)
|
|
9
|
+
class Engine < Common
|
|
10
|
+
|
|
11
|
+
# Initializes the engine and processes the command line arguments. This
|
|
12
|
+
# method uses the 'trollop' gem to parse the arguments and to create a
|
|
13
|
+
# nicely formatted help menu. This method additionally initializes a result-
|
|
14
|
+
# hash and reads the contents of the source file from disk.
|
|
15
|
+
#
|
|
16
|
+
# ==== Command-line usage:
|
|
17
|
+
# adarwin --application <input> [OPTIONS]
|
|
18
|
+
#
|
|
19
|
+
# ==== Options:
|
|
20
|
+
# --application, -a <s>: Input application file
|
|
21
|
+
# --no-memory-annotations, -m: Disable the printing of memory annotations
|
|
22
|
+
# --mem-remove-spurious, -s: Memcopy optimisation: remove spurious copies
|
|
23
|
+
# --mem-copyin-to-front, -f: Memcopy optimisation: move copyins to front
|
|
24
|
+
# --mem-copyout-to-back, -b: Memcopy optimisation: move copyouts to back
|
|
25
|
+
# --mem-to-outer-loop, -l: Memcopy optimisation: move copies to outer loops
|
|
26
|
+
# --only-alg-number, -o <i>: Only generate code for the x-th species (99 -> all) (default: 99)
|
|
27
|
+
# --version, -v: Print version and exit
|
|
28
|
+
# --help, -h: Show this message
|
|
29
|
+
#
|
|
30
|
+
def initialize
|
|
31
|
+
@result = {:original_code => [],
|
|
32
|
+
:species_code => []}
|
|
33
|
+
|
|
34
|
+
# Parse the command line options using the 'trollop' gem.
|
|
35
|
+
@options = Trollop::options do
|
|
36
|
+
version 'A-darwin, part of Bones version '+File.read(ADARWIN_DIR+'/VERSION').strip+' (c) 2013 Cedric Nugteren, Eindhoven University of Technology'
|
|
37
|
+
banner NL+'A-darwin is an algorithmic species extraction tool. ' +
|
|
38
|
+
'For more information, see the README.rdoc file or visit the Bones/A-darwin website at http://parse.ele.tue.nl/bones/.' + NL + NL +
|
|
39
|
+
'Usage:' + NL +
|
|
40
|
+
' adarwin --application <input> [OPTIONS]' + NL +
|
|
41
|
+
'using the following flags:'
|
|
42
|
+
opt :application, 'Input application file', :short => 'a', :type => String
|
|
43
|
+
opt :no_memory_annotations, 'Disable the printing of memory annotations', :short => 'm', :default => false
|
|
44
|
+
opt :mem_remove_spurious, 'Memcopy optimisation: remove spurious copies', :short => 'r', :default => false
|
|
45
|
+
opt :mem_copyin_to_front, 'Memcopy optimisation: move copyins to front', :short => 'f', :default => false
|
|
46
|
+
opt :mem_copyout_to_back, 'Memcopy optimisation: move copyouts to back', :short => 'b', :default => false
|
|
47
|
+
opt :mem_to_outer_loop, 'Memcopy optimisation: move copies to outer loops', :short => 'l', :default => false
|
|
48
|
+
opt :fusion, 'Type of kernel fusion to perform (0 -> disable)', :short => 'k', :type => Integer, :default => 0
|
|
49
|
+
opt :print_arc, 'Print array reference characterisations (ARC) instead of species', :short => 'c', :default => false
|
|
50
|
+
opt :silent, 'Become silent (no message printing)', :short => 's', :default => false
|
|
51
|
+
opt :only_alg_number, 'Only generate code for the x-th species (99 -> all)', :short => 'o', :type => Integer, :default => 99
|
|
52
|
+
end
|
|
53
|
+
Trollop::die 'no input file supplied (use: --application)' if !@options[:application_given]
|
|
54
|
+
Trollop::die 'input file "'+@options[:application]+'" does not exist' if !File.exists?(@options[:application])
|
|
55
|
+
@options[:name] = @options[:application].split('/').last.split('.').first
|
|
56
|
+
@options[:no_memory_annotations] = true if @options[:print_arc]
|
|
57
|
+
|
|
58
|
+
# Obtain the source code from file
|
|
59
|
+
@source = File.open(@options[:application],'r'){|f| f.read}
|
|
60
|
+
@basename = File.basename(@options[:application],'.c')
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Method to process a file and to output target code. This method calls all
|
|
64
|
+
# the other methods, it is the main engine.
|
|
65
|
+
#
|
|
66
|
+
# ==== Tasks:
|
|
67
|
+
# * Run the preprocessor to obtain algorithm information.
|
|
68
|
+
# * Use the 'CAST' gem to parse the source into an AST.
|
|
69
|
+
# * Call the code generator to perform the real work and produce output.
|
|
70
|
+
def process
|
|
71
|
+
|
|
72
|
+
# Run the preprocessor
|
|
73
|
+
preprocessor = Adarwin::Preprocessor.new(@source)
|
|
74
|
+
preprocessor.process
|
|
75
|
+
@result[:header_code] = preprocessor.header_code
|
|
76
|
+
|
|
77
|
+
# Set-up the CAST gem to include certain types
|
|
78
|
+
# FIXME: What about other (user-defined?) types?
|
|
79
|
+
parser = C::Parser.new
|
|
80
|
+
parser.type_names << 'FILE'
|
|
81
|
+
parser.type_names << 'size_t'
|
|
82
|
+
|
|
83
|
+
# Parse the original source code into AST form (using CAST)
|
|
84
|
+
original_ast = parser.parse(preprocessor.parsed_code)
|
|
85
|
+
|
|
86
|
+
# Create an AST of the SCoP (using CAST) and save a backup
|
|
87
|
+
scop_ast = C::Block.parse('{'+preprocessor.scop_code+'}')
|
|
88
|
+
original_scop_ast = scop_ast.clone
|
|
89
|
+
|
|
90
|
+
# Process the scop to identify the loop nests of interest and to find the
|
|
91
|
+
# corresponding species. This is the method performing most of the work.
|
|
92
|
+
@nests = []
|
|
93
|
+
@id = 0
|
|
94
|
+
populate_nests(scop_ast)
|
|
95
|
+
|
|
96
|
+
# Remove inner-loop (nested) species. This removes all species that are
|
|
97
|
+
# found within another species. For completeness, this might be desired in
|
|
98
|
+
# some cases.
|
|
99
|
+
# TODO: Make this an option
|
|
100
|
+
@nests.each do |nest|
|
|
101
|
+
if nest.has_species?
|
|
102
|
+
remove_inner_species(get_children(nest))
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
@nests.delete_if{ |n| n.removed }
|
|
106
|
+
|
|
107
|
+
# Iterate over the nests/statements to optimize the copies. Currently,
|
|
108
|
+
# this will only look at loop nests with a depth of 1. Re-call the memory
|
|
109
|
+
# copy optimisations method every time a change is made.
|
|
110
|
+
# TODO: Investigate what the depth should be.
|
|
111
|
+
basenests = @nests.select{ |n| n.depth == 1 }
|
|
112
|
+
recursive_copy_optimisations(basenests,@options)
|
|
113
|
+
|
|
114
|
+
# Kernel fusion is enabled (1,2,3,4) or disabled (0)
|
|
115
|
+
if @options[:fusion] > 0
|
|
116
|
+
# Test if fusion is legal and perform the actual transformation
|
|
117
|
+
kernel_fusion(@nests, @options[:fusion])
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
# Delete the to-be-removed code (because of fusion)
|
|
121
|
+
@nests.each do |nest|
|
|
122
|
+
if nest.removed
|
|
123
|
+
scop_ast.remove_once(nest.code)
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
@nests.delete_if{ |n| n.removed }
|
|
127
|
+
|
|
128
|
+
# Insert the species and memory copy annotations into the original code.
|
|
129
|
+
# Don't do this if the user specified that he is not interested in the
|
|
130
|
+
# memory copy annotations.
|
|
131
|
+
insert_copies(scop_ast) unless @options[:no_memory_annotations]
|
|
132
|
+
insert_species(scop_ast)
|
|
133
|
+
|
|
134
|
+
# Create the modified SCoP and remove the quotes from the pragma's
|
|
135
|
+
# FIXME: This is a hack for now, this has conflicts with strings in code
|
|
136
|
+
modified_scop = INDENT+SCOP_START+NL+scop_ast.to_s+NL+INDENT+SCOP_END+NL
|
|
137
|
+
modified_scop = modified_scop.gsub(PRAGMA_DELIMITER_START,'')
|
|
138
|
+
modified_scop = modified_scop.gsub(PRAGMA_DELIMITER_END,'')
|
|
139
|
+
|
|
140
|
+
# Print the result SCoP
|
|
141
|
+
puts modified_scop if !@options[:silent]
|
|
142
|
+
|
|
143
|
+
# Store the result
|
|
144
|
+
@result[:species_code] = preprocessor.target_code.gsub(preprocessor.scop_code,modified_scop)
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
# This method writes the output code to a file.
|
|
148
|
+
def write_output
|
|
149
|
+
|
|
150
|
+
# Populate the species file
|
|
151
|
+
# TODO: The filename is fixed, make this an optional argument
|
|
152
|
+
File.open(File.join(@options[:application].split('.').first+'_species'+'.c'),'w') do |target|
|
|
153
|
+
target.puts @result[:species_code]
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
# This method populates the Nest datastructure (recursively). It is the main
|
|
158
|
+
# method to process the loop nests and fine the species information. It is
|
|
159
|
+
# called recursively.
|
|
160
|
+
def populate_nests(ast,level=[])
|
|
161
|
+
|
|
162
|
+
# Only proceed if it is a loop
|
|
163
|
+
if ast.block?
|
|
164
|
+
|
|
165
|
+
# Create the new loop nests for the current depth level
|
|
166
|
+
ast.stmts.each_with_index do |nest,index|
|
|
167
|
+
new_level = level.clone.push(index)
|
|
168
|
+
|
|
169
|
+
# Push the loop nest, but only if it is not disabled by options
|
|
170
|
+
if @options[:only_alg_number].to_i == 99 || @options[:only_alg_number].to_i == (@id+1)
|
|
171
|
+
|
|
172
|
+
# Only continue if the nest is an actual loop nest
|
|
173
|
+
if nest.for_statement?
|
|
174
|
+
@nests.push(Nest.new(new_level,nest,@id,@basename,!@options[:silent]))
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
@id += 1
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
# Proceed to the next depth level.
|
|
181
|
+
# TODO: Make it an option to only investigate the outer most level(s).
|
|
182
|
+
ast.stmts.each_with_index do |nest,index|
|
|
183
|
+
new_level = level.clone.push(index)
|
|
184
|
+
if nest.stmt # && new_level == 0
|
|
185
|
+
populate_nests(nest.stmt,new_level)
|
|
186
|
+
end
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
# This method removes all species in the current loop nest (called
|
|
192
|
+
# recursively). It assumes these species should be removed.
|
|
193
|
+
def remove_inner_species(nests)
|
|
194
|
+
nests.each do |nest|
|
|
195
|
+
nest.copyins = []
|
|
196
|
+
nest.copyouts = []
|
|
197
|
+
nest.species = ''
|
|
198
|
+
nest.removed = true
|
|
199
|
+
children = get_children(nest)
|
|
200
|
+
remove_inner_species(children) if children
|
|
201
|
+
end
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
# Method to obtain the children of a nest
|
|
205
|
+
def get_children(parent)
|
|
206
|
+
children = []
|
|
207
|
+
@nests.map do |nest|
|
|
208
|
+
if parent.depth+1 == nest.depth
|
|
209
|
+
if parent.level == nest.level.reverse.drop(1).reverse
|
|
210
|
+
children << nest
|
|
211
|
+
end
|
|
212
|
+
end
|
|
213
|
+
end
|
|
214
|
+
return children
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
# This method iterates over the loop nests and inserts the species into the
|
|
218
|
+
# original AST. It also inserts the synchronisation barries when needed, and
|
|
219
|
+
# only if the user is interested in the memory copy annotations.
|
|
220
|
+
def insert_species(scop_ast)
|
|
221
|
+
|
|
222
|
+
# Find out where the synchronisation statements are needed
|
|
223
|
+
sync_needed = []
|
|
224
|
+
@nests.each do |nest|
|
|
225
|
+
sync_needed << nest.copyins.map{ |c| c.get_sync_id }
|
|
226
|
+
sync_needed << nest.copyouts.map{ |c| c.get_sync_id }
|
|
227
|
+
end
|
|
228
|
+
sync_needed = sync_needed.flatten.uniq
|
|
229
|
+
|
|
230
|
+
# Insert the annotations into the code
|
|
231
|
+
sync = 0
|
|
232
|
+
@nests.each do |nest|
|
|
233
|
+
sync = 2*nest.id
|
|
234
|
+
|
|
235
|
+
# Insert the pre-kernel synchronisation barrier
|
|
236
|
+
if sync_needed.include?(sync) && !@options[:no_memory_annotations]
|
|
237
|
+
nest.code.insert_prev(C::StringLiteral.parse(PRAGMA_DELIMITER_START+PRAGMA_SPECIES+' sync '+(sync).to_s+PRAGMA_DELIMITER_END))
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
# Insert the pre-kernel species (start of species)
|
|
241
|
+
if nest.has_species?
|
|
242
|
+
to_print = (@options[:print_arc]) ? nest.print_arc_start : nest.print_species_start
|
|
243
|
+
nest.code.insert_prev(C::StringLiteral.parse(to_print))
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
# Insert the post-kernel synchronisation barrier
|
|
247
|
+
if sync_needed.include?(sync+1) && !@options[:no_memory_annotations]
|
|
248
|
+
node = (nest.code.next && nest.code.next.string? && nest.code.next.val =~ /pragma species copyout/) ? nest.code.next : nest.code
|
|
249
|
+
node.insert_next(C::StringLiteral.parse(PRAGMA_DELIMITER_START+PRAGMA_SPECIES+' sync '+(sync+1).to_s+PRAGMA_DELIMITER_END))
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
# Insert the post-kernel species (end of species)
|
|
253
|
+
if nest.has_species?
|
|
254
|
+
to_print = (@options[:print_arc]) ? nest.print_arc_end : nest.print_species_end
|
|
255
|
+
location = nest.code
|
|
256
|
+
location.insert_next(C::StringLiteral.parse(to_print))
|
|
257
|
+
end
|
|
258
|
+
end
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
# Iterate over the loop nests and insert the memory copy annotations into
|
|
262
|
+
# the original AST.
|
|
263
|
+
def insert_copies(scop_ast)
|
|
264
|
+
@nests.each do |nest|
|
|
265
|
+
if nest.has_copyins?
|
|
266
|
+
nest.code.insert_prev(C::StringLiteral.parse(nest.print_copyins))
|
|
267
|
+
end
|
|
268
|
+
if nest.has_copyouts?
|
|
269
|
+
nest.code.insert_next(C::StringLiteral.parse(nest.print_copyouts))
|
|
270
|
+
end
|
|
271
|
+
end
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
end
|
|
277
|
+
|