bones-compiler 1.1.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/CHANGELOG +37 -0
- data/LICENSE +1 -1
- data/README.rdoc +95 -70
- data/Rakefile +78 -3
- data/VERSION +1 -1
- data/bin/adarwin +17 -0
- data/examples/benchmarks/PolyBench/2mm.c +104 -0
- data/examples/benchmarks/{3mm.c → PolyBench/3mm.c} +5 -2
- data/examples/benchmarks/{adi.c → PolyBench/adi.c} +6 -3
- data/examples/benchmarks/{atax.c → PolyBench/atax.c} +5 -2
- data/examples/benchmarks/{bicg.c → PolyBench/bicg.c} +5 -2
- data/examples/benchmarks/{cholesky.c → PolyBench/cholesky.c} +3 -0
- data/examples/benchmarks/{common.h → PolyBench/common.h} +2 -2
- data/examples/benchmarks/{correlation.c → PolyBench/correlation.c} +16 -7
- data/examples/benchmarks/{covariance.c → PolyBench/covariance.c} +7 -2
- data/examples/benchmarks/{doitgen.c → PolyBench/doitgen.c} +5 -2
- data/examples/benchmarks/{durbin.c → PolyBench/durbin.c} +3 -0
- data/examples/benchmarks/{dynprog.c → PolyBench/dynprog.c} +3 -0
- data/examples/benchmarks/{fdtd-2d-apml.c → PolyBench/fdtd-2d-apml.c} +3 -0
- data/examples/benchmarks/{fdtd-2d.c → PolyBench/fdtd-2d.c} +5 -2
- data/examples/benchmarks/{floyd-warshall.c → PolyBench/floyd-warshall.c} +3 -0
- data/examples/benchmarks/{gemm.c → PolyBench/gemm.c} +5 -2
- data/examples/benchmarks/{gemver.c → PolyBench/gemver.c} +5 -2
- data/examples/benchmarks/{gesummv.c → PolyBench/gesummv.c} +5 -2
- data/examples/benchmarks/{gramschmidt.c → PolyBench/gramschmidt.c} +3 -0
- data/examples/benchmarks/{jacobi-1d-imper.c → PolyBench/jacobi-1d-imper.c} +10 -2
- data/examples/benchmarks/{jacobi-2d-imper.c → PolyBench/jacobi-2d-imper.c} +8 -3
- data/examples/benchmarks/{lu.c → PolyBench/lu.c} +3 -0
- data/examples/benchmarks/{ludcmp.c → PolyBench/ludcmp.c} +3 -0
- data/examples/benchmarks/{mvt.c → PolyBench/mvt.c} +6 -2
- data/examples/benchmarks/{reg_detect.c → PolyBench/reg_detect.c} +3 -0
- data/examples/benchmarks/{seidel-2d.c → PolyBench/seidel-2d.c} +3 -0
- data/examples/benchmarks/{symm.c → PolyBench/symm.c} +3 -0
- data/examples/benchmarks/{syr2k.c → PolyBench/syr2k.c} +5 -2
- data/examples/benchmarks/{syrk.c → PolyBench/syrk.c} +7 -4
- data/examples/benchmarks/{trisolv.c → PolyBench/trisolv.c} +3 -0
- data/examples/benchmarks/{trmm.c → PolyBench/trmm.c} +3 -0
- data/examples/benchmarks/Rodinia/cfd.c +180 -0
- data/examples/benchmarks/Rodinia/hotspot.c +228 -0
- data/examples/benchmarks/Rodinia/kmeans.c +164 -0
- data/examples/benchmarks/Rodinia/srad.c +188 -0
- data/examples/benchmarks/other/common.h +0 -0
- data/examples/benchmarks/other/dct.c +58 -0
- data/examples/benchmarks/other/mm.c +50 -0
- data/examples/benchmarks/{saxpy.c → other/saxpy.c} +11 -7
- data/examples/chunk/{example1.c → example01.c} +0 -0
- data/examples/chunk/{example2.c → example02.c} +0 -0
- data/examples/chunk/{example3.c → example03.c} +0 -0
- data/examples/chunk/{example4.c → example04.c} +0 -0
- data/examples/chunk/{example5.c → example05.c} +0 -0
- data/examples/chunk/example06.c +45 -0
- data/examples/chunk/example07.c +49 -0
- data/examples/dependences/example01.c +42 -0
- data/examples/dependences/example02.c +40 -0
- data/examples/dependences/example03.c +43 -0
- data/examples/dependences/example04.c +44 -0
- data/examples/dependences/example05.c +42 -0
- data/examples/element/{example1.c → example01.c} +0 -0
- data/examples/element/{example2.c → example02.c} +2 -2
- data/examples/element/{example3.c → example03.c} +0 -0
- data/examples/element/{example4.c → example04.c} +0 -0
- data/examples/element/{example5.c → example05.c} +0 -0
- data/examples/element/{example6.c → example06.c} +0 -0
- data/examples/element/{example7.c → example07.c} +0 -0
- data/examples/element/{example8.c → example08.c} +0 -0
- data/examples/element/{example9.c → example09.c} +0 -0
- data/examples/element/example13.c +73 -0
- data/examples/fusion/example01.c +68 -0
- data/examples/fusion/example02.c +73 -0
- data/examples/fusion/example03.c +72 -0
- data/examples/fusion/example04.c +61 -0
- data/examples/fusion/example05.c +55 -0
- data/examples/neighbourhood/{example1.c → example01.c} +0 -0
- data/examples/neighbourhood/{example2.c → example02.c} +0 -0
- data/examples/neighbourhood/{example3.c → example03.c} +0 -0
- data/examples/neighbourhood/{example4.c → example04.c} +0 -0
- data/examples/neighbourhood/example05.c +44 -0
- data/examples/shared/{example1.c → example01.c} +0 -0
- data/examples/shared/{example2.c → example02.c} +0 -0
- data/examples/shared/{example3.c → example03.c} +0 -0
- data/examples/shared/{example4.c → example04.c} +0 -0
- data/examples/shared/{example5.c → example05.c} +0 -0
- data/lib/adarwin.rb +62 -0
- data/lib/adarwin/dependences.rb +268 -0
- data/lib/adarwin/engine.rb +277 -0
- data/lib/adarwin/fusion.rb +174 -0
- data/lib/adarwin/interval.rb +57 -0
- data/lib/adarwin/memorycopies.rb +153 -0
- data/lib/adarwin/nest.rb +225 -0
- data/lib/adarwin/preprocessor.rb +76 -0
- data/lib/adarwin/reference.rb +261 -0
- data/lib/bones.rb +4 -55
- data/lib/bones/algorithm.rb +77 -40
- data/lib/bones/copy.rb +26 -0
- data/lib/bones/engine.rb +147 -31
- data/lib/bones/preprocessor.rb +92 -12
- data/lib/bones/species.rb +4 -3
- data/lib/bones/structure.rb +14 -4
- data/lib/castaddon.rb +11 -6
- data/lib/castaddon/node_adarwin.rb +245 -0
- data/lib/castaddon/node_bones.rb +316 -0
- data/lib/castaddon/node_common.rb +289 -0
- data/lib/castaddon/transformations.rb +236 -0
- data/lib/common.rb +216 -0
- data/skeletons/CPU-C/common/header.c +3 -0
- data/skeletons/CPU-C/common/mem_global.c +0 -0
- data/skeletons/CPU-C/common/timer_2_start.c +11 -13
- data/skeletons/CPU-C/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-C/common/timer_globals.c +29 -0
- data/skeletons/CPU-OPENCL-INTEL/common/globals.c +1 -1
- data/skeletons/CPU-OPENCL-INTEL/common/header.c +3 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +7 -2
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +4 -2
- data/skeletons/CPU-OPENCL-INTEL/common/mem_global.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +6 -3
- data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-OPENCL-INTEL/common/timer_globals.c +24 -0
- data/skeletons/CPU-OPENMP/common/globals.c +1 -0
- data/skeletons/CPU-OPENMP/common/header.c +3 -0
- data/skeletons/CPU-OPENMP/common/mem_global.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_1_start.c +0 -12
- data/skeletons/CPU-OPENMP/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-OPENMP/common/timer_globals.c +33 -0
- data/skeletons/GPU-CUDA/common/globals.c +27 -3
- data/skeletons/GPU-CUDA/common/header.c +2 -0
- data/skeletons/GPU-CUDA/common/mem_async_alloc.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_copyin.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_copyout.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_free.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +2 -1
- data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +2 -1
- data/skeletons/GPU-CUDA/common/mem_global.c +1 -0
- data/skeletons/GPU-CUDA/common/mem_prologue.c +1 -2
- data/skeletons/GPU-CUDA/common/scheduler.c +86 -0
- data/skeletons/GPU-CUDA/common/timer_2_start.c +2 -4
- data/skeletons/GPU-CUDA/common/timer_2_stop.c +3 -5
- data/skeletons/GPU-CUDA/common/timer_globals.c +26 -0
- data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +5 -7
- data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +4 -6
- data/skeletons/GPU-CUDA/kernel/default.host.c +1 -1
- data/skeletons/GPU-CUDA/kernel/default.kernel.cu +6 -8
- data/skeletons/GPU-CUDA/skeletons.txt +6 -5
- data/{examples/benchmarks/2mm.c → test/examples/benchmarks/PolyBench/2mm_species.c} +19 -15
- data/test/examples/benchmarks/PolyBench/3mm_species.c +82 -0
- data/test/examples/benchmarks/PolyBench/adi_species.c +89 -0
- data/test/examples/benchmarks/PolyBench/atax_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/bicg_species.c +71 -0
- data/test/examples/benchmarks/PolyBench/cholesky_species.c +68 -0
- data/test/examples/benchmarks/PolyBench/correlation_species.c +97 -0
- data/test/examples/benchmarks/PolyBench/covariance_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/doitgen_species.c +67 -0
- data/test/examples/benchmarks/PolyBench/durbin_species.c +80 -0
- data/test/examples/benchmarks/PolyBench/dynprog_species.c +71 -0
- data/test/examples/benchmarks/PolyBench/fdtd-2d-apml_species.c +112 -0
- data/test/examples/benchmarks/PolyBench/fdtd-2d_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/floyd-warshall_species.c +54 -0
- data/test/examples/benchmarks/PolyBench/gemm_species.c +73 -0
- data/test/examples/benchmarks/PolyBench/gemver_species.c +93 -0
- data/test/examples/benchmarks/PolyBench/gesummv_species.c +68 -0
- data/test/examples/benchmarks/PolyBench/gramschmidt_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/jacobi-1d-imper_species.c +59 -0
- data/test/examples/benchmarks/PolyBench/jacobi-2d-imper_species.c +65 -0
- data/test/examples/benchmarks/PolyBench/lu_species.c +57 -0
- data/test/examples/benchmarks/PolyBench/ludcmp_species.c +89 -0
- data/test/examples/benchmarks/PolyBench/mvt_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/reg_detect_species.c +86 -0
- data/test/examples/benchmarks/PolyBench/seidel-2d_species.c +53 -0
- data/test/examples/benchmarks/PolyBench/symm_species.c +74 -0
- data/test/examples/benchmarks/PolyBench/syr2k_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/syrk_species.c +66 -0
- data/test/examples/benchmarks/PolyBench/trisolv_species.c +61 -0
- data/test/examples/benchmarks/PolyBench/trmm_species.c +61 -0
- data/test/examples/chunk/example01_species.c +58 -0
- data/test/examples/chunk/example02_species.c +48 -0
- data/test/examples/chunk/example03_species.c +63 -0
- data/test/examples/chunk/example04_species.c +58 -0
- data/test/examples/chunk/example05_species.c +56 -0
- data/test/examples/chunk/example06_species.c +49 -0
- data/test/examples/chunk/example07_species.c +53 -0
- data/test/examples/dependences/example01_species.c +46 -0
- data/test/examples/dependences/example02_species.c +44 -0
- data/test/examples/dependences/example03_species.c +47 -0
- data/test/examples/dependences/example04_species.c +48 -0
- data/test/examples/dependences/example05_species.c +46 -0
- data/test/examples/element/example01_species.c +50 -0
- data/test/examples/element/example02_species.c +50 -0
- data/test/examples/element/example03_species.c +62 -0
- data/test/examples/element/example04_species.c +53 -0
- data/test/examples/element/example05_species.c +59 -0
- data/test/examples/element/example06_species.c +50 -0
- data/test/examples/element/example07_species.c +58 -0
- data/test/examples/element/example08_species.c +49 -0
- data/test/examples/element/example09_species.c +52 -0
- data/test/examples/element/example10_species.c +54 -0
- data/test/examples/element/example11_species.c +51 -0
- data/test/examples/element/example12_species.c +60 -0
- data/test/examples/element/example13_species.c +77 -0
- data/test/examples/neighbourhood/example01_species.c +57 -0
- data/test/examples/neighbourhood/example02_species.c +56 -0
- data/test/examples/neighbourhood/example03_species.c +83 -0
- data/test/examples/neighbourhood/example04_species.c +55 -0
- data/test/examples/neighbourhood/example05_species.c +48 -0
- data/test/examples/shared/example01_species.c +49 -0
- data/test/examples/shared/example02_species.c +55 -0
- data/test/examples/shared/example03_species.c +59 -0
- data/test/examples/shared/example04_species.c +56 -0
- data/test/examples/shared/example05_species.c +52 -0
- metadata +193 -73
- data/examples/benchmarks/overview.txt +0 -38
- data/lib/castaddon/node.rb +0 -753
|
File without changes
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
// == File information
|
|
11
11
|
// Filename...........element/example2.c
|
|
12
12
|
// Author.............Cedric Nugteren
|
|
13
|
-
// Last modified on...
|
|
13
|
+
// Last modified on...06-May-2013
|
|
14
14
|
//
|
|
15
15
|
|
|
16
16
|
#include <stdio.h>
|
|
@@ -31,7 +31,7 @@ int main(void) {
|
|
|
31
31
|
}
|
|
32
32
|
|
|
33
33
|
// Perform the computation
|
|
34
|
-
#pragma species kernel
|
|
34
|
+
#pragma species kernel 0:3,0:7|element -> 0:3,0:7|element
|
|
35
35
|
for(i=0;i<4;i++) {
|
|
36
36
|
for(j=0;j<8;j++) {
|
|
37
37
|
B[i][j] = A[i][7-j];
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
//
|
|
2
|
+
// This file is part of the Bones source-to-source compiler examples. This C-code
|
|
3
|
+
// example is meant to illustrate the use of Bones. For more information on Bones
|
|
4
|
+
// use the contact information below.
|
|
5
|
+
//
|
|
6
|
+
// == More information on Bones
|
|
7
|
+
// Contact............Cedric Nugteren <c.nugteren@tue.nl>
|
|
8
|
+
// Web address........http://parse.ele.tue.nl/bones/
|
|
9
|
+
//
|
|
10
|
+
// == File information
|
|
11
|
+
// Filename...........element/example13.c
|
|
12
|
+
// Author.............Cedric Nugteren
|
|
13
|
+
// Last modified on...07-May-2013
|
|
14
|
+
//
|
|
15
|
+
|
|
16
|
+
#include <stdio.h>
|
|
17
|
+
|
|
18
|
+
// This is 'example13', an example with multiple loop nests and various if-statements
|
|
19
|
+
int main(void) {
|
|
20
|
+
int i,j;
|
|
21
|
+
int N = 256;
|
|
22
|
+
|
|
23
|
+
// Declare input/output arrays
|
|
24
|
+
int A[N];
|
|
25
|
+
int B[N];
|
|
26
|
+
int C[N];
|
|
27
|
+
int D[N][N];
|
|
28
|
+
int E[N][N];
|
|
29
|
+
|
|
30
|
+
// Set the input data
|
|
31
|
+
for(i=0;i<N;i++) {
|
|
32
|
+
A[i] = i;
|
|
33
|
+
B[i] = i+5;
|
|
34
|
+
C[i] = i+9;
|
|
35
|
+
for(j=0;j<N;j++) {
|
|
36
|
+
D[i][j] = i*j+3;
|
|
37
|
+
E[i][j] = i*j+9;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// Perform the computation
|
|
42
|
+
#pragma species kernel C[0:N-1]|element -> B[11:N-1]|element ^ A[0:5]|element
|
|
43
|
+
for (i=0; i<N; i++) {
|
|
44
|
+
if (i > 10) {
|
|
45
|
+
B[i] = C[i];
|
|
46
|
+
}
|
|
47
|
+
if (i < 6) {
|
|
48
|
+
A[i] = C[i];
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
#pragma species endkernel example13_k1
|
|
52
|
+
#pragma species kernel A[50:N-1]|element -> B[50:N-1]|element
|
|
53
|
+
for (i=0; i<N-9; i++) {
|
|
54
|
+
if (i+10 > 50) {
|
|
55
|
+
B[i+9] = A[i+9];
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
#pragma species endkernel example13_k2
|
|
59
|
+
#pragma species kernel E[5:N-1,0:N-1]|element -> D[5:N-1,0:N-1]|element
|
|
60
|
+
for (i=0; i<N; i++) {
|
|
61
|
+
for (j=0; j<N; j++) {
|
|
62
|
+
if (i > 4) {
|
|
63
|
+
D[i][j] = E[i][j];
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
#pragma species endkernel example13_k3
|
|
68
|
+
|
|
69
|
+
// Clean-up and exit the function
|
|
70
|
+
fflush(stdout);
|
|
71
|
+
return 0;
|
|
72
|
+
}
|
|
73
|
+
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
//
|
|
2
|
+
// This file is part of the Bones source-to-source compiler examples. This C-code
|
|
3
|
+
// example is meant to illustrate the use of Bones. For more information on Bones
|
|
4
|
+
// use the contact information below.
|
|
5
|
+
//
|
|
6
|
+
// == More information on Bones
|
|
7
|
+
// Contact............Cedric Nugteren <c.nugteren@tue.nl>
|
|
8
|
+
// Web address........http://parse.ele.tue.nl/bones/
|
|
9
|
+
//
|
|
10
|
+
// == File information
|
|
11
|
+
// Filename...........fusion/example01.c
|
|
12
|
+
// Author.............Cedric Nugteren
|
|
13
|
+
// Last modified on...09-July-2013
|
|
14
|
+
//
|
|
15
|
+
|
|
16
|
+
#include <stdio.h>
|
|
17
|
+
#define N 512
|
|
18
|
+
#define M 2048
|
|
19
|
+
|
|
20
|
+
// This is 'example01', a basic example of an opportunity for scalar kernel fusion.
|
|
21
|
+
int main(void) {
|
|
22
|
+
int i,j;
|
|
23
|
+
|
|
24
|
+
// Declare input/output arrays
|
|
25
|
+
int A[N][M];
|
|
26
|
+
int B[N][M];
|
|
27
|
+
int C[N][M];
|
|
28
|
+
|
|
29
|
+
// Set the input data
|
|
30
|
+
for(i=0;i<N;i++) {
|
|
31
|
+
for(j=0;j<M;j++) {
|
|
32
|
+
A[i][j] = i+j;
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// Perform the computation
|
|
37
|
+
#pragma species kernel A[0:N-1,0:M-1]|element -> B[0:N-1,0:M-1]|element
|
|
38
|
+
for(i=0;i<N;i++) {
|
|
39
|
+
for(j=0;j<M;j++) {
|
|
40
|
+
B[i][j] = 2*A[i][j];
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
#pragma species endkernel example01-part1
|
|
44
|
+
#pragma species kernel B[0:N-1,0:M-1]|element -> C[0:N-1,0:M-1]|element
|
|
45
|
+
for(i=0;i<N;i++) {
|
|
46
|
+
for(j=0;j<M;j++) {
|
|
47
|
+
C[i][j] = 8*B[i][j];
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
#pragma species endkernel example01-part2
|
|
51
|
+
|
|
52
|
+
/*
|
|
53
|
+
#pragma species kernel A[0:N-1,0:M-1]|element -> B[0:N-1,0:M-1]|element ^ C[0:N-1,0:M-1]|element
|
|
54
|
+
for(i=0;i<N;i++) {
|
|
55
|
+
for(j=0;j<M;j++) {
|
|
56
|
+
B[i][j] = 2*A[i][j];
|
|
57
|
+
C[i][j] = 8*B[i][j];
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
#pragma species endkernel example01-fused
|
|
61
|
+
*/
|
|
62
|
+
|
|
63
|
+
// Clean-up and exit the function
|
|
64
|
+
fflush(stdout);
|
|
65
|
+
C[8][9] = C[8][9];
|
|
66
|
+
return 0;
|
|
67
|
+
}
|
|
68
|
+
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
//
|
|
2
|
+
// This file is part of the Bones source-to-source compiler examples. This C-code
|
|
3
|
+
// example is meant to illustrate the use of Bones. For more information on Bones
|
|
4
|
+
// use the contact information below.
|
|
5
|
+
//
|
|
6
|
+
// == More information on Bones
|
|
7
|
+
// Contact............Cedric Nugteren <c.nugteren@tue.nl>
|
|
8
|
+
// Web address........http://parse.ele.tue.nl/bones/
|
|
9
|
+
//
|
|
10
|
+
// == File information
|
|
11
|
+
// Filename...........fusion/example02.c
|
|
12
|
+
// Author.............Cedric Nugteren
|
|
13
|
+
// Last modified on...09-July-2013
|
|
14
|
+
//
|
|
15
|
+
|
|
16
|
+
#include <stdio.h>
|
|
17
|
+
#define N 2048
|
|
18
|
+
#define M 512
|
|
19
|
+
// Condition: M must be smaller than N
|
|
20
|
+
|
|
21
|
+
// This is 'example02', an example of scalar kernel fusion with mismatching bounds but independent loop bodies.
|
|
22
|
+
int main(void) {
|
|
23
|
+
int i,j;
|
|
24
|
+
|
|
25
|
+
// Declare input/output arrays
|
|
26
|
+
int A[N][M];
|
|
27
|
+
int B[N][M];
|
|
28
|
+
int C[N][M];
|
|
29
|
+
|
|
30
|
+
// Set the input data
|
|
31
|
+
for(i=0;i<N;i++) {
|
|
32
|
+
for(j=0;j<M;j++) {
|
|
33
|
+
A[i][j] = i+j;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// Perform the computation
|
|
38
|
+
#pragma species kernel A[0:N-1,10:M-1]|element -> B[0:N-1,10:M-1]|element
|
|
39
|
+
for(i=0;i<N;i++) {
|
|
40
|
+
for(j=10;j<M;j++) {
|
|
41
|
+
B[i][j] = A[i][j] + 3;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
#pragma species endkernel example02-part1
|
|
45
|
+
#pragma species kernel A[0:M-1,0:M-1]|element -> C[0:M-1,0:M-1]|element
|
|
46
|
+
for(i=0;i<M;i++) {
|
|
47
|
+
for(j=0;j<M;j++) {
|
|
48
|
+
C[i][j] = -9*A[i][j];
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
#pragma species endkernel example02-part2
|
|
52
|
+
|
|
53
|
+
/*
|
|
54
|
+
#pragma species kernel A[0:N-1,0:M-1]|element -> B[0:N-1,0:M-1]|element ^ C[0:N-1,0:M-1]|element
|
|
55
|
+
for(i=0;i<MAX(N,M);i++) {
|
|
56
|
+
for(j=0;j<M;j++) {
|
|
57
|
+
if (j >= 10 && i < N) {
|
|
58
|
+
B[i][j] = A[i][j] + 3;
|
|
59
|
+
}
|
|
60
|
+
if (i < M) {
|
|
61
|
+
C[i][j] = -9*A[i][j];
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
#pragma species endkernel example02-fused
|
|
66
|
+
*/
|
|
67
|
+
|
|
68
|
+
// Clean-up and exit the function
|
|
69
|
+
fflush(stdout);
|
|
70
|
+
C[8][9] = C[8][9];
|
|
71
|
+
return 0;
|
|
72
|
+
}
|
|
73
|
+
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
//
|
|
2
|
+
// This file is part of the Bones source-to-source compiler examples. This C-code
|
|
3
|
+
// example is meant to illustrate the use of Bones. For more information on Bones
|
|
4
|
+
// use the contact information below.
|
|
5
|
+
//
|
|
6
|
+
// == More information on Bones
|
|
7
|
+
// Contact............Cedric Nugteren <c.nugteren@tue.nl>
|
|
8
|
+
// Web address........http://parse.ele.tue.nl/bones/
|
|
9
|
+
//
|
|
10
|
+
// == File information
|
|
11
|
+
// Filename...........fusion/example03.c
|
|
12
|
+
// Author.............Cedric Nugteren
|
|
13
|
+
// Last modified on...02-Oct-2013
|
|
14
|
+
//
|
|
15
|
+
|
|
16
|
+
#include <stdio.h>
|
|
17
|
+
|
|
18
|
+
// This is 'example03', with code similar to PolyBench's "2mm" benchmark. This is an example where fusion is only legal w.r.t. the i-loop
|
|
19
|
+
int main(void) {
|
|
20
|
+
int i,j,k;
|
|
21
|
+
|
|
22
|
+
// Declare arrays on the stack
|
|
23
|
+
float A[2048][2048];
|
|
24
|
+
float B[2048][2048];
|
|
25
|
+
float C[2048][2048];
|
|
26
|
+
float D[2048][2048];
|
|
27
|
+
float tmp[2048][2048];
|
|
28
|
+
|
|
29
|
+
// Set the constants
|
|
30
|
+
int alpha = 32412;
|
|
31
|
+
int beta = 2123;
|
|
32
|
+
|
|
33
|
+
// Set the input data
|
|
34
|
+
for (i=0; i<2048; i++) { for (j=0; j<2048; j++) { A[i][j] = ((float) i*j) / 2048; } }
|
|
35
|
+
for (i=0; i<2048; i++) { for (j=0; j<2048; j++) { B[i][j] = ((float) i*(j+1)) / 2048; } }
|
|
36
|
+
for (i=0; i<2048; i++) { for (j=0; j<2048; j++) { C[i][j] = ((float) i*(j+3)) / 2048; } }
|
|
37
|
+
for (i=0; i<2048; i++) { for (j=0; j<2048; j++) { D[i][j] = ((float) i*(j+2)) / 2048; } }
|
|
38
|
+
|
|
39
|
+
// Perform the computation (E := alpha*A*B*C + beta*D)
|
|
40
|
+
#pragma species copyin A[0:2047,0:2047]|0 ^ B[0:2047,0:2047]|0 ^ D[0:2047,0:2047]|1 ^ C[0:2047,0:2047]|1
|
|
41
|
+
#pragma species sync 0
|
|
42
|
+
#pragma species kernel A[0:2047,0:2047]|chunk(0:0,0:2047) ^ B[0:2047,0:2047]|chunk(0:2047,0:0) -> tmp[0:2047,0:2047]|element
|
|
43
|
+
for (i=0; i<2048; i++) {
|
|
44
|
+
for (j=0; j<2048; j++) {
|
|
45
|
+
tmp[i][j] = 0;
|
|
46
|
+
for (k=0; k<2048; k++) {
|
|
47
|
+
tmp[i][j] += alpha * A[i][k] * B[k][j];
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
#pragma species endkernel example03-part1
|
|
52
|
+
#pragma species copyout tmp[0:2047,0:2047]|2
|
|
53
|
+
#pragma species sync 1
|
|
54
|
+
#pragma species kernel D[0:2047,0:2047]|element ^ tmp[0:2047,0:2047]|chunk(0:0,0:2047) ^ C[0:2047,0:2047]|chunk(0:2047,0:0) -> D[0:2047,0:2047]|element
|
|
55
|
+
for (i=0; i<2048; i++) {
|
|
56
|
+
for (j=0; j<2048; j++) {
|
|
57
|
+
D[i][j] *= beta;
|
|
58
|
+
for (k=0; k<2048; k++) {
|
|
59
|
+
D[i][j] += tmp[i][k] * C[k][j];
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
#pragma species endkernel example03-part2
|
|
64
|
+
#pragma species copyout D[0:2047,0:2047]|2
|
|
65
|
+
#pragma species sync 2
|
|
66
|
+
|
|
67
|
+
// Clean-up and exit the function
|
|
68
|
+
fflush(stdout);
|
|
69
|
+
D[8][9] = D[8][9];
|
|
70
|
+
return 0;
|
|
71
|
+
}
|
|
72
|
+
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
//
|
|
2
|
+
// This file is part of the Bones source-to-source compiler examples. This C-code
|
|
3
|
+
// example is meant to illustrate the use of Bones. For more information on Bones
|
|
4
|
+
// use the contact information below.
|
|
5
|
+
//
|
|
6
|
+
// == More information on Bones
|
|
7
|
+
// Contact............Cedric Nugteren <c.nugteren@tue.nl>
|
|
8
|
+
// Web address........http://parse.ele.tue.nl/bones/
|
|
9
|
+
//
|
|
10
|
+
// == File information
|
|
11
|
+
// Filename...........fusion/example04.c
|
|
12
|
+
// Author.............Cedric Nugteren
|
|
13
|
+
// Last modified on...02-Oct-2013
|
|
14
|
+
//
|
|
15
|
+
|
|
16
|
+
#include <stdio.h>
|
|
17
|
+
|
|
18
|
+
// This is 'example04', with code similar to PolyBench's "atax" benchmark
|
|
19
|
+
int main(void) {
|
|
20
|
+
int i,j;
|
|
21
|
+
|
|
22
|
+
// Declare arrays on the stack
|
|
23
|
+
float A[4096][4096];
|
|
24
|
+
float x[4096];
|
|
25
|
+
float y[4096];
|
|
26
|
+
float tmp[4096];
|
|
27
|
+
|
|
28
|
+
// Set the input data
|
|
29
|
+
for (i=0; i<4096; i++) {
|
|
30
|
+
x[i] = i*3.14159;
|
|
31
|
+
}
|
|
32
|
+
for (i=0; i<4096; i++) {
|
|
33
|
+
for (j=0; j<4096; j++) {
|
|
34
|
+
A[i][j] = ((float) i*(j+1)) / 4096;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// Perform the computation (y := A'Ax)
|
|
39
|
+
#pragma species kernel 0:4095,0:4095|chunk(0:0,0:4095) ^ 0:4095|full -> 0:4095|element
|
|
40
|
+
for (i=0; i<4096; i++) {
|
|
41
|
+
tmp[i] = 0;
|
|
42
|
+
for (j=0; j<4096; j++) {
|
|
43
|
+
tmp[i] = tmp[i] + A[i][j] * x[j];
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
#pragma species endkernel atax-part1
|
|
47
|
+
#pragma species kernel 0:4095,0:4095|chunk(0:4095,0:0) ^ 0:4095|full -> 0:4095|element
|
|
48
|
+
for (j=0; j<4096; j++) {
|
|
49
|
+
y[j] = 0;
|
|
50
|
+
for (i=0; i<4096; i++) {
|
|
51
|
+
y[j] = y[j] + A[i][j] * tmp[i];
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
#pragma species endkernel atax-part2
|
|
55
|
+
|
|
56
|
+
// Clean-up and exit the function
|
|
57
|
+
fflush(stdout);
|
|
58
|
+
y[9] = y[9];
|
|
59
|
+
return 0;
|
|
60
|
+
}
|
|
61
|
+
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
//
|
|
2
|
+
// This file is part of the Bones source-to-source compiler examples. This C-code
|
|
3
|
+
// example is meant to illustrate the use of Bones. For more information on Bones
|
|
4
|
+
// use the contact information below.
|
|
5
|
+
//
|
|
6
|
+
// == More information on Bones
|
|
7
|
+
// Contact............Cedric Nugteren <c.nugteren@tue.nl>
|
|
8
|
+
// Web address........http://parse.ele.tue.nl/bones/
|
|
9
|
+
//
|
|
10
|
+
// == File information
|
|
11
|
+
// Filename...........fusion/example05.c
|
|
12
|
+
// Author.............Cedric Nugteren
|
|
13
|
+
// Last modified on...08-October-2013
|
|
14
|
+
//
|
|
15
|
+
|
|
16
|
+
#include <stdio.h>
|
|
17
|
+
|
|
18
|
+
// This is 'example05', like example02 but with constant values.
|
|
19
|
+
int main(void) {
|
|
20
|
+
int i,j;
|
|
21
|
+
|
|
22
|
+
// Declare input/output arrays
|
|
23
|
+
int A[2048][1024];
|
|
24
|
+
int B[2048][1024];
|
|
25
|
+
int C[2048][1024];
|
|
26
|
+
|
|
27
|
+
// Set the input data
|
|
28
|
+
for(i=0;i<2048;i++) {
|
|
29
|
+
for(j=0;j<1024;j++) {
|
|
30
|
+
A[i][j] = i+j;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// Perform the computation
|
|
35
|
+
#pragma species kernel A[0:2047,0:1023]|element -> B[0:2047,0:1023]|element
|
|
36
|
+
for(i=0;i<2048;i++) {
|
|
37
|
+
for(j=0;j<1024;j++) {
|
|
38
|
+
B[i][j] = A[i][j] + 3;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
#pragma species endkernel example05-part1
|
|
42
|
+
#pragma species kernel A[0:2047,0:979]|element -> C[0:2047,0:979]|element
|
|
43
|
+
for(i=0;i<2048;i++) {
|
|
44
|
+
for(j=0;j<980;j++) {
|
|
45
|
+
C[i][j] = 9*A[i][j];
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
#pragma species endkernel example05-part2
|
|
49
|
+
|
|
50
|
+
// Clean-up and exit the function
|
|
51
|
+
fflush(stdout);
|
|
52
|
+
C[8][9] = C[8][9];
|
|
53
|
+
return 0;
|
|
54
|
+
}
|
|
55
|
+
|