bones-compiler 1.1.0 → 1.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/CHANGELOG +37 -0
- data/LICENSE +1 -1
- data/README.rdoc +95 -70
- data/Rakefile +78 -3
- data/VERSION +1 -1
- data/bin/adarwin +17 -0
- data/examples/benchmarks/PolyBench/2mm.c +104 -0
- data/examples/benchmarks/{3mm.c → PolyBench/3mm.c} +5 -2
- data/examples/benchmarks/{adi.c → PolyBench/adi.c} +6 -3
- data/examples/benchmarks/{atax.c → PolyBench/atax.c} +5 -2
- data/examples/benchmarks/{bicg.c → PolyBench/bicg.c} +5 -2
- data/examples/benchmarks/{cholesky.c → PolyBench/cholesky.c} +3 -0
- data/examples/benchmarks/{common.h → PolyBench/common.h} +2 -2
- data/examples/benchmarks/{correlation.c → PolyBench/correlation.c} +16 -7
- data/examples/benchmarks/{covariance.c → PolyBench/covariance.c} +7 -2
- data/examples/benchmarks/{doitgen.c → PolyBench/doitgen.c} +5 -2
- data/examples/benchmarks/{durbin.c → PolyBench/durbin.c} +3 -0
- data/examples/benchmarks/{dynprog.c → PolyBench/dynprog.c} +3 -0
- data/examples/benchmarks/{fdtd-2d-apml.c → PolyBench/fdtd-2d-apml.c} +3 -0
- data/examples/benchmarks/{fdtd-2d.c → PolyBench/fdtd-2d.c} +5 -2
- data/examples/benchmarks/{floyd-warshall.c → PolyBench/floyd-warshall.c} +3 -0
- data/examples/benchmarks/{gemm.c → PolyBench/gemm.c} +5 -2
- data/examples/benchmarks/{gemver.c → PolyBench/gemver.c} +5 -2
- data/examples/benchmarks/{gesummv.c → PolyBench/gesummv.c} +5 -2
- data/examples/benchmarks/{gramschmidt.c → PolyBench/gramschmidt.c} +3 -0
- data/examples/benchmarks/{jacobi-1d-imper.c → PolyBench/jacobi-1d-imper.c} +10 -2
- data/examples/benchmarks/{jacobi-2d-imper.c → PolyBench/jacobi-2d-imper.c} +8 -3
- data/examples/benchmarks/{lu.c → PolyBench/lu.c} +3 -0
- data/examples/benchmarks/{ludcmp.c → PolyBench/ludcmp.c} +3 -0
- data/examples/benchmarks/{mvt.c → PolyBench/mvt.c} +6 -2
- data/examples/benchmarks/{reg_detect.c → PolyBench/reg_detect.c} +3 -0
- data/examples/benchmarks/{seidel-2d.c → PolyBench/seidel-2d.c} +3 -0
- data/examples/benchmarks/{symm.c → PolyBench/symm.c} +3 -0
- data/examples/benchmarks/{syr2k.c → PolyBench/syr2k.c} +5 -2
- data/examples/benchmarks/{syrk.c → PolyBench/syrk.c} +7 -4
- data/examples/benchmarks/{trisolv.c → PolyBench/trisolv.c} +3 -0
- data/examples/benchmarks/{trmm.c → PolyBench/trmm.c} +3 -0
- data/examples/benchmarks/Rodinia/cfd.c +180 -0
- data/examples/benchmarks/Rodinia/hotspot.c +228 -0
- data/examples/benchmarks/Rodinia/kmeans.c +164 -0
- data/examples/benchmarks/Rodinia/srad.c +188 -0
- data/examples/benchmarks/other/common.h +0 -0
- data/examples/benchmarks/other/dct.c +58 -0
- data/examples/benchmarks/other/mm.c +50 -0
- data/examples/benchmarks/{saxpy.c → other/saxpy.c} +11 -7
- data/examples/chunk/{example1.c → example01.c} +0 -0
- data/examples/chunk/{example2.c → example02.c} +0 -0
- data/examples/chunk/{example3.c → example03.c} +0 -0
- data/examples/chunk/{example4.c → example04.c} +0 -0
- data/examples/chunk/{example5.c → example05.c} +0 -0
- data/examples/chunk/example06.c +45 -0
- data/examples/chunk/example07.c +49 -0
- data/examples/dependences/example01.c +42 -0
- data/examples/dependences/example02.c +40 -0
- data/examples/dependences/example03.c +43 -0
- data/examples/dependences/example04.c +44 -0
- data/examples/dependences/example05.c +42 -0
- data/examples/element/{example1.c → example01.c} +0 -0
- data/examples/element/{example2.c → example02.c} +2 -2
- data/examples/element/{example3.c → example03.c} +0 -0
- data/examples/element/{example4.c → example04.c} +0 -0
- data/examples/element/{example5.c → example05.c} +0 -0
- data/examples/element/{example6.c → example06.c} +0 -0
- data/examples/element/{example7.c → example07.c} +0 -0
- data/examples/element/{example8.c → example08.c} +0 -0
- data/examples/element/{example9.c → example09.c} +0 -0
- data/examples/element/example13.c +73 -0
- data/examples/fusion/example01.c +68 -0
- data/examples/fusion/example02.c +73 -0
- data/examples/fusion/example03.c +72 -0
- data/examples/fusion/example04.c +61 -0
- data/examples/fusion/example05.c +55 -0
- data/examples/neighbourhood/{example1.c → example01.c} +0 -0
- data/examples/neighbourhood/{example2.c → example02.c} +0 -0
- data/examples/neighbourhood/{example3.c → example03.c} +0 -0
- data/examples/neighbourhood/{example4.c → example04.c} +0 -0
- data/examples/neighbourhood/example05.c +44 -0
- data/examples/shared/{example1.c → example01.c} +0 -0
- data/examples/shared/{example2.c → example02.c} +0 -0
- data/examples/shared/{example3.c → example03.c} +0 -0
- data/examples/shared/{example4.c → example04.c} +0 -0
- data/examples/shared/{example5.c → example05.c} +0 -0
- data/lib/adarwin.rb +62 -0
- data/lib/adarwin/dependences.rb +268 -0
- data/lib/adarwin/engine.rb +277 -0
- data/lib/adarwin/fusion.rb +174 -0
- data/lib/adarwin/interval.rb +57 -0
- data/lib/adarwin/memorycopies.rb +153 -0
- data/lib/adarwin/nest.rb +225 -0
- data/lib/adarwin/preprocessor.rb +76 -0
- data/lib/adarwin/reference.rb +261 -0
- data/lib/bones.rb +4 -55
- data/lib/bones/algorithm.rb +77 -40
- data/lib/bones/copy.rb +26 -0
- data/lib/bones/engine.rb +147 -31
- data/lib/bones/preprocessor.rb +92 -12
- data/lib/bones/species.rb +4 -3
- data/lib/bones/structure.rb +14 -4
- data/lib/castaddon.rb +11 -6
- data/lib/castaddon/node_adarwin.rb +245 -0
- data/lib/castaddon/node_bones.rb +316 -0
- data/lib/castaddon/node_common.rb +289 -0
- data/lib/castaddon/transformations.rb +236 -0
- data/lib/common.rb +216 -0
- data/skeletons/CPU-C/common/header.c +3 -0
- data/skeletons/CPU-C/common/mem_global.c +0 -0
- data/skeletons/CPU-C/common/timer_2_start.c +11 -13
- data/skeletons/CPU-C/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-C/common/timer_globals.c +29 -0
- data/skeletons/CPU-OPENCL-INTEL/common/globals.c +1 -1
- data/skeletons/CPU-OPENCL-INTEL/common/header.c +3 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +7 -2
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +4 -2
- data/skeletons/CPU-OPENCL-INTEL/common/mem_global.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +6 -3
- data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-OPENCL-INTEL/common/timer_globals.c +24 -0
- data/skeletons/CPU-OPENMP/common/globals.c +1 -0
- data/skeletons/CPU-OPENMP/common/header.c +3 -0
- data/skeletons/CPU-OPENMP/common/mem_global.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_1_start.c +0 -12
- data/skeletons/CPU-OPENMP/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-OPENMP/common/timer_globals.c +33 -0
- data/skeletons/GPU-CUDA/common/globals.c +27 -3
- data/skeletons/GPU-CUDA/common/header.c +2 -0
- data/skeletons/GPU-CUDA/common/mem_async_alloc.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_copyin.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_copyout.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_free.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +2 -1
- data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +2 -1
- data/skeletons/GPU-CUDA/common/mem_global.c +1 -0
- data/skeletons/GPU-CUDA/common/mem_prologue.c +1 -2
- data/skeletons/GPU-CUDA/common/scheduler.c +86 -0
- data/skeletons/GPU-CUDA/common/timer_2_start.c +2 -4
- data/skeletons/GPU-CUDA/common/timer_2_stop.c +3 -5
- data/skeletons/GPU-CUDA/common/timer_globals.c +26 -0
- data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +5 -7
- data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +4 -6
- data/skeletons/GPU-CUDA/kernel/default.host.c +1 -1
- data/skeletons/GPU-CUDA/kernel/default.kernel.cu +6 -8
- data/skeletons/GPU-CUDA/skeletons.txt +6 -5
- data/{examples/benchmarks/2mm.c → test/examples/benchmarks/PolyBench/2mm_species.c} +19 -15
- data/test/examples/benchmarks/PolyBench/3mm_species.c +82 -0
- data/test/examples/benchmarks/PolyBench/adi_species.c +89 -0
- data/test/examples/benchmarks/PolyBench/atax_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/bicg_species.c +71 -0
- data/test/examples/benchmarks/PolyBench/cholesky_species.c +68 -0
- data/test/examples/benchmarks/PolyBench/correlation_species.c +97 -0
- data/test/examples/benchmarks/PolyBench/covariance_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/doitgen_species.c +67 -0
- data/test/examples/benchmarks/PolyBench/durbin_species.c +80 -0
- data/test/examples/benchmarks/PolyBench/dynprog_species.c +71 -0
- data/test/examples/benchmarks/PolyBench/fdtd-2d-apml_species.c +112 -0
- data/test/examples/benchmarks/PolyBench/fdtd-2d_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/floyd-warshall_species.c +54 -0
- data/test/examples/benchmarks/PolyBench/gemm_species.c +73 -0
- data/test/examples/benchmarks/PolyBench/gemver_species.c +93 -0
- data/test/examples/benchmarks/PolyBench/gesummv_species.c +68 -0
- data/test/examples/benchmarks/PolyBench/gramschmidt_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/jacobi-1d-imper_species.c +59 -0
- data/test/examples/benchmarks/PolyBench/jacobi-2d-imper_species.c +65 -0
- data/test/examples/benchmarks/PolyBench/lu_species.c +57 -0
- data/test/examples/benchmarks/PolyBench/ludcmp_species.c +89 -0
- data/test/examples/benchmarks/PolyBench/mvt_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/reg_detect_species.c +86 -0
- data/test/examples/benchmarks/PolyBench/seidel-2d_species.c +53 -0
- data/test/examples/benchmarks/PolyBench/symm_species.c +74 -0
- data/test/examples/benchmarks/PolyBench/syr2k_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/syrk_species.c +66 -0
- data/test/examples/benchmarks/PolyBench/trisolv_species.c +61 -0
- data/test/examples/benchmarks/PolyBench/trmm_species.c +61 -0
- data/test/examples/chunk/example01_species.c +58 -0
- data/test/examples/chunk/example02_species.c +48 -0
- data/test/examples/chunk/example03_species.c +63 -0
- data/test/examples/chunk/example04_species.c +58 -0
- data/test/examples/chunk/example05_species.c +56 -0
- data/test/examples/chunk/example06_species.c +49 -0
- data/test/examples/chunk/example07_species.c +53 -0
- data/test/examples/dependences/example01_species.c +46 -0
- data/test/examples/dependences/example02_species.c +44 -0
- data/test/examples/dependences/example03_species.c +47 -0
- data/test/examples/dependences/example04_species.c +48 -0
- data/test/examples/dependences/example05_species.c +46 -0
- data/test/examples/element/example01_species.c +50 -0
- data/test/examples/element/example02_species.c +50 -0
- data/test/examples/element/example03_species.c +62 -0
- data/test/examples/element/example04_species.c +53 -0
- data/test/examples/element/example05_species.c +59 -0
- data/test/examples/element/example06_species.c +50 -0
- data/test/examples/element/example07_species.c +58 -0
- data/test/examples/element/example08_species.c +49 -0
- data/test/examples/element/example09_species.c +52 -0
- data/test/examples/element/example10_species.c +54 -0
- data/test/examples/element/example11_species.c +51 -0
- data/test/examples/element/example12_species.c +60 -0
- data/test/examples/element/example13_species.c +77 -0
- data/test/examples/neighbourhood/example01_species.c +57 -0
- data/test/examples/neighbourhood/example02_species.c +56 -0
- data/test/examples/neighbourhood/example03_species.c +83 -0
- data/test/examples/neighbourhood/example04_species.c +55 -0
- data/test/examples/neighbourhood/example05_species.c +48 -0
- data/test/examples/shared/example01_species.c +49 -0
- data/test/examples/shared/example02_species.c +55 -0
- data/test/examples/shared/example03_species.c +59 -0
- data/test/examples/shared/example04_species.c +56 -0
- data/test/examples/shared/example05_species.c +52 -0
- metadata +193 -73
- data/examples/benchmarks/overview.txt +0 -38
- data/lib/castaddon/node.rb +0 -753
@@ -34,12 +34,13 @@ int main(void) {
|
|
34
34
|
float G[NI][NL];
|
35
35
|
|
36
36
|
// Set the input data
|
37
|
-
for (i=0; i<NI; i++) { for (j=0; j<NK; j++) { A[i][j] = ((float) i*j) / NI; } }
|
37
|
+
/* for (i=0; i<NI; i++) { for (j=0; j<NK; j++) { A[i][j] = ((float) i*j) / NI; } }
|
38
38
|
for (i=0; i<NK; i++) { for (j=0; j<NJ; j++) { B[i][j] = ((float) i*(j+1)) / NJ; } }
|
39
39
|
for (i=0; i<NL; i++) { for (j=0; j<NJ; j++) { C[i][j] = ((float) i*(j+3)) / NL; } }
|
40
40
|
for (i=0; i<NI; i++) { for (j=0; j<NL; j++) { D[i][j] = ((float) i*(j+2)) / NK; } }
|
41
|
-
|
41
|
+
*/
|
42
42
|
// Perform the computation (G := E*F, with E := A*B and F := C*D)
|
43
|
+
#pragma scop
|
43
44
|
#pragma species kernel 0:NI-1,0:NK-1|chunk(0:0,0:NK-1) ^ 0:NK-1,0:NJ-1|chunk(0:NK-1,0:0) -> 0:NI-1,0:NJ-1|element
|
44
45
|
// E := A*B
|
45
46
|
for (i=0; i<NI; i++) {
|
@@ -73,9 +74,11 @@ int main(void) {
|
|
73
74
|
}
|
74
75
|
}
|
75
76
|
#pragma species endkernel 3mm-part3
|
77
|
+
#pragma endscop
|
76
78
|
|
77
79
|
// Clean-up and exit the function
|
78
80
|
fflush(stdout);
|
81
|
+
G[8][9] = G[8][9];
|
79
82
|
return 0;
|
80
83
|
}
|
81
84
|
|
@@ -30,15 +30,16 @@ int main(void) {
|
|
30
30
|
float B[N][N];
|
31
31
|
|
32
32
|
// Set the input data
|
33
|
-
for (i=0; i<N; i++) {
|
33
|
+
/* for (i=0; i<N; i++) {
|
34
34
|
for (j=0; j<N; j++) {
|
35
35
|
X[i][j] = ((float) i*(j+1) + 1) / N;
|
36
36
|
A[i][j] = ((float) i*(j+2) + 2) / N;
|
37
37
|
B[i][j] = ((float) i*(j+3) + 3) / N;
|
38
38
|
}
|
39
39
|
}
|
40
|
-
|
40
|
+
*/
|
41
41
|
// Perform the computation
|
42
|
+
#pragma scop
|
42
43
|
for (t=0; t<TSTEPS; t++) {
|
43
44
|
for (i1=0; i1<N; i1++) {
|
44
45
|
for (i2=1; i2<N; i2++) {
|
@@ -46,7 +47,7 @@ int main(void) {
|
|
46
47
|
B[i1][i2] = B[i1][i2] - A[i1][i2] * A[i1][i2] / B[i1][i2-1];
|
47
48
|
}
|
48
49
|
}
|
49
|
-
#pragma species kernel 0:N-1,N-1:N-1|element
|
50
|
+
#pragma species kernel 0:N-1,N-1:N-1|element ^ 0:N-1,N-1:N-1|element -> 0:N-1,N-1:N-1|element
|
50
51
|
for (i1=0; i1<N; i1++) {
|
51
52
|
X[i1][N-1] = X[i1][N-1] / B[i1][N-1];
|
52
53
|
}
|
@@ -73,9 +74,11 @@ int main(void) {
|
|
73
74
|
}
|
74
75
|
}
|
75
76
|
}
|
77
|
+
#pragma endscop
|
76
78
|
|
77
79
|
// Clean-up and exit the function
|
78
80
|
fflush(stdout);
|
81
|
+
X[8][9] = X[8][9];
|
79
82
|
return 0;
|
80
83
|
}
|
81
84
|
|
@@ -31,7 +31,7 @@ int main(void) {
|
|
31
31
|
float tmp[NX];
|
32
32
|
|
33
33
|
// Set the input data
|
34
|
-
for (i=0; i<NY; i++) {
|
34
|
+
/* for (i=0; i<NY; i++) {
|
35
35
|
x[i] = i*3.14159;
|
36
36
|
}
|
37
37
|
for (i=0; i<NX; i++) {
|
@@ -39,8 +39,9 @@ int main(void) {
|
|
39
39
|
A[i][j] = ((float) i*(j+1)) / NX;
|
40
40
|
}
|
41
41
|
}
|
42
|
-
|
42
|
+
*/
|
43
43
|
// Perform the computation (y := A'Ax)
|
44
|
+
#pragma scop
|
44
45
|
#pragma species kernel 0:NX-1,0:NY-1|chunk(0:0,0:NY-1) ^ 0:NY-1|full -> 0:NX-1|element
|
45
46
|
for (i=0; i<NX; i++) {
|
46
47
|
tmp[i] = 0;
|
@@ -57,9 +58,11 @@ int main(void) {
|
|
57
58
|
}
|
58
59
|
}
|
59
60
|
#pragma species endkernel atax-part2
|
61
|
+
#pragma endscop
|
60
62
|
|
61
63
|
// Clean-up and exit the function
|
62
64
|
fflush(stdout);
|
65
|
+
y[9] = y[9];
|
63
66
|
return 0;
|
64
67
|
}
|
65
68
|
|
@@ -32,7 +32,7 @@ int main(void) {
|
|
32
32
|
float s[NY];
|
33
33
|
|
34
34
|
// Set the input data
|
35
|
-
for (i=0; i<NY; i++) {
|
35
|
+
/* for (i=0; i<NY; i++) {
|
36
36
|
p[i] = i*3.14159;
|
37
37
|
}
|
38
38
|
for (i=0; i<NX; i++) {
|
@@ -41,8 +41,9 @@ int main(void) {
|
|
41
41
|
A[i][j] = ((float) i*(j+1)) / NX;
|
42
42
|
}
|
43
43
|
}
|
44
|
-
|
44
|
+
*/
|
45
45
|
// Perform the computation
|
46
|
+
#pragma scop
|
46
47
|
#pragma species kernel 0:NX-1|full ^ 0:NX-1,0:NY-1|chunk(0:NX-1,0:0) -> 0:NY-1|element
|
47
48
|
for (j=0; j<NY; j++) {
|
48
49
|
s[j] = 0;
|
@@ -59,9 +60,11 @@ int main(void) {
|
|
59
60
|
}
|
60
61
|
}
|
61
62
|
#pragma species endkernel bicg-part2
|
63
|
+
#pragma endscop
|
62
64
|
|
63
65
|
// Clean-up and exit the function
|
64
66
|
fflush(stdout);
|
67
|
+
q[9] = q[9];
|
65
68
|
return 0;
|
66
69
|
}
|
67
70
|
|
@@ -38,6 +38,7 @@ int main(void) {
|
|
38
38
|
}
|
39
39
|
|
40
40
|
// Perform the computation
|
41
|
+
#pragma scop
|
41
42
|
for (i=0; i<N; i++) {
|
42
43
|
x[0] = A[i][i];
|
43
44
|
#pragma species kernel i:i,0:i-1|element -> 0:0|shared
|
@@ -57,8 +58,10 @@ int main(void) {
|
|
57
58
|
}
|
58
59
|
#pragma species endkernel cholesky-part2
|
59
60
|
}
|
61
|
+
#pragma endscop
|
60
62
|
|
61
63
|
// Clean-up and exit the function
|
62
64
|
fflush(stdout);
|
65
|
+
A[8][9] = A[8][9];
|
63
66
|
return 0;
|
64
67
|
}
|
@@ -25,9 +25,9 @@
|
|
25
25
|
|
26
26
|
// Select a dataset size
|
27
27
|
//#define MINI_DATASET
|
28
|
-
|
28
|
+
//#define SMALL_DATASET
|
29
29
|
//#define STANDARD_DATASET
|
30
|
-
|
30
|
+
#define LARGE_DATASET
|
31
31
|
//#define EXTRALARGE_DATASET
|
32
32
|
|
33
33
|
// Defines used per benchmark:
|
@@ -15,7 +15,7 @@
|
|
15
15
|
// == File information
|
16
16
|
// Filename...........benchmark/correlation.c
|
17
17
|
// Author.............Cedric Nugteren
|
18
|
-
// Last modified on...
|
18
|
+
// Last modified on...07-Feb-2013
|
19
19
|
//
|
20
20
|
|
21
21
|
#include "common.h"
|
@@ -36,13 +36,15 @@ int main(void) {
|
|
36
36
|
float eps = 0.1;
|
37
37
|
|
38
38
|
// Set the input data
|
39
|
-
for (i=0; i<N; i++) {
|
39
|
+
/* for (i=0; i<N; i++) {
|
40
40
|
for (j=0; j<M; j++) {
|
41
41
|
data[i][j] = ((float) i*j) / M;
|
42
42
|
}
|
43
43
|
}
|
44
|
-
|
44
|
+
*/
|
45
45
|
// Perform the computation
|
46
|
+
#pragma scop
|
47
|
+
|
46
48
|
// Determine the mean of the column vectors of the input data matrix
|
47
49
|
#pragma species kernel 0:N-1,0:M-1|chunk(0:N-1,0:0) -> 0:M-1|element
|
48
50
|
for (j=0; j<M; j++) {
|
@@ -53,8 +55,9 @@ int main(void) {
|
|
53
55
|
mean[j] /= float_n;
|
54
56
|
}
|
55
57
|
#pragma species endkernel correlation-part1
|
56
|
-
|
58
|
+
|
57
59
|
// Determine the standard deviations of the column vectors of the input data matrix
|
60
|
+
#pragma species kernel 0:M-1|element ^ 0:N-1,0:M-1|chunk(0:N-1,0:0) -> 0:M-1|element
|
58
61
|
for (j=0; j<M; j++) {
|
59
62
|
stddev[j] = 0.0;
|
60
63
|
meanj = mean[j];
|
@@ -66,8 +69,9 @@ int main(void) {
|
|
66
69
|
stddev[j] = stddev[j] <= eps ? 1.0 : stddev[j];
|
67
70
|
}
|
68
71
|
#pragma species endkernel correlation-part2
|
69
|
-
|
72
|
+
|
70
73
|
// Center and reduce the column vectors
|
74
|
+
#pragma species kernel 0:N-1,0:M-1|element ^ 0:M-1|element ^ 0:M-1|element -> 0:N-1,0:M-1|element
|
71
75
|
for (i=0; i<N; i++) {
|
72
76
|
for (j=0; j<M; j++) {
|
73
77
|
data[i][j] -= mean[j];
|
@@ -75,10 +79,14 @@ int main(void) {
|
|
75
79
|
}
|
76
80
|
}
|
77
81
|
#pragma species endkernel correlation-part3
|
82
|
+
|
83
|
+
// End of the computation
|
84
|
+
#pragma endscop
|
85
|
+
|
78
86
|
// Calculate the MxM correlation matrix
|
79
87
|
for (j1=0; j1<M-1; j1++) {
|
80
88
|
symmat[j1][j1] = 1.0;
|
81
|
-
|
89
|
+
//#pragma species kernel 0:N-1,j1:j1|full ^ 0:N-1,j1+1:M-1|chunk(0:N-1,0:0) -> j1+1:M-1,j1:j1|element ^ j1:j1,j1+1:M-1|element
|
82
90
|
for (j2=j1+1; j2<M; j2++) {
|
83
91
|
symmat[j1][j2] = 0.0;
|
84
92
|
for (i = 0; i<N; i++) {
|
@@ -86,12 +94,13 @@ int main(void) {
|
|
86
94
|
}
|
87
95
|
symmat[j2][j1] = symmat[j1][j2];
|
88
96
|
}
|
89
|
-
|
97
|
+
//#pragma species endkernel correlation-part4
|
90
98
|
}
|
91
99
|
symmat[M-1][M-1] = 1.0;
|
92
100
|
|
93
101
|
// Clean-up and exit the function
|
94
102
|
fflush(stdout);
|
103
|
+
symmat[8][9] = symmat[8][9];
|
95
104
|
return 0;
|
96
105
|
}
|
97
106
|
|
@@ -33,13 +33,14 @@ int main(void) {
|
|
33
33
|
float float_n = 1.2;
|
34
34
|
|
35
35
|
// Set the input data
|
36
|
-
|
36
|
+
/* for (i=0; i<N; i++) {
|
37
37
|
for (j=0; j<M; j++) {
|
38
38
|
data[i][j] = ((float) i*j) / M;
|
39
39
|
}
|
40
40
|
}
|
41
|
-
|
41
|
+
*/
|
42
42
|
// Perform the computation
|
43
|
+
#pragma scop
|
43
44
|
#pragma species kernel 0:N-1,0:M-1|chunk(0:N-1,0:0) -> 0:M-1|element
|
44
45
|
// Determine the mean of the column vectors of the input data matrix
|
45
46
|
for (j=0; j<M; j++) {
|
@@ -58,6 +59,9 @@ int main(void) {
|
|
58
59
|
}
|
59
60
|
}
|
60
61
|
#pragma species endkernel covariance-part2
|
62
|
+
|
63
|
+
#pragma endscop
|
64
|
+
|
61
65
|
// Calculate the MxM covariance matrix
|
62
66
|
for (j1=0; j1<M; j1++) {
|
63
67
|
#pragma species kernel 0:N-1,j1:j1|full ^ 0:N-1,j1:M-1|chunk(0:N-1,0:0) -> j1:M-1,j1:j1|element ^ j1:j1,j1:M-1|element
|
@@ -73,5 +77,6 @@ int main(void) {
|
|
73
77
|
|
74
78
|
// Clean-up and exit the function
|
75
79
|
fflush(stdout);
|
80
|
+
symmat[8][9] = symmat[8][9];
|
76
81
|
return 0;
|
77
82
|
}
|
@@ -30,10 +30,11 @@ int main(void) {
|
|
30
30
|
float C4[NP][NP];
|
31
31
|
|
32
32
|
// Set the input data
|
33
|
-
for (i=0; i<NR; i++) { for (j=0; j<NQ; j++) { for (k=0; k<NP; k++) { A[i][j][k] = ((float) i*j + k) / NP; } } }
|
33
|
+
/* for (i=0; i<NR; i++) { for (j=0; j<NQ; j++) { for (k=0; k<NP; k++) { A[i][j][k] = ((float) i*j + k) / NP; } } }
|
34
34
|
for (i=0; i<NP; i++) { for (j=0; j<NP; j++) { C4[i][j] = ((float) i*j) / NP; } }
|
35
|
-
|
35
|
+
*/
|
36
36
|
// Perform the computation
|
37
|
+
#pragma scop
|
37
38
|
#pragma species kernel 0:NR-1,0:NQ-1,0:NP-1|chunk(0:0,0:0,0:NP-1) ^ 0:NP-1,0:NP-1|chunk(0:NP-1,0:0) -> 0:NR-1,0:NQ-1,0:NP-1|element
|
38
39
|
for (r=0; r<NR; r++) {
|
39
40
|
for (q=0; q<NQ; q++) {
|
@@ -55,9 +56,11 @@ int main(void) {
|
|
55
56
|
}
|
56
57
|
}
|
57
58
|
#pragma species endkernel doitgen-part2
|
59
|
+
#pragma endscop
|
58
60
|
|
59
61
|
// Clean-up and exit the function
|
60
62
|
fflush(stdout);
|
63
|
+
A[8][9][3] = A[8][9][3];
|
61
64
|
return 0;
|
62
65
|
}
|
63
66
|
|
@@ -45,6 +45,7 @@ int main(void) {
|
|
45
45
|
}
|
46
46
|
|
47
47
|
// Perform the computation
|
48
|
+
#pragma scop
|
48
49
|
y[0][0] = r[0];
|
49
50
|
beta[0] = 1;
|
50
51
|
alpha[0] = r[0];
|
@@ -68,9 +69,11 @@ int main(void) {
|
|
68
69
|
out[i] = y[i][NX-1];
|
69
70
|
}
|
70
71
|
#pragma species endkernel durbin-part2
|
72
|
+
#pragma endscop
|
71
73
|
|
72
74
|
// Clean-up and exit the function
|
73
75
|
fflush(stdout);
|
76
|
+
out[9] = out[9];
|
74
77
|
return 0;
|
75
78
|
}
|
76
79
|
|
@@ -40,6 +40,7 @@ int main(void) {
|
|
40
40
|
}
|
41
41
|
|
42
42
|
// Perform the computation
|
43
|
+
#pragma scop
|
43
44
|
for (iter=0; iter<ITER; iter++) {
|
44
45
|
#pragma species kernel 0:0|void -> 0:LENGTH-1,0:LENGTH-1|element
|
45
46
|
for (i=0; i<=LENGTH-1; i++) {
|
@@ -59,9 +60,11 @@ int main(void) {
|
|
59
60
|
}
|
60
61
|
out += c[0][LENGTH-1];
|
61
62
|
}
|
63
|
+
#pragma endscop
|
62
64
|
|
63
65
|
// Clean-up and exit the function
|
64
66
|
fflush(stdout);
|
67
|
+
c[8][9] = c[8][9];
|
65
68
|
return 0;
|
66
69
|
}
|
67
70
|
|
@@ -72,6 +72,7 @@ int main(void) {
|
|
72
72
|
}
|
73
73
|
|
74
74
|
// Perform the computation
|
75
|
+
#pragma scop
|
75
76
|
for (iz=0; iz<CZ; iz++) {
|
76
77
|
for (iy=0; iy<CYM; iy++) {
|
77
78
|
czm_iz = czm[iz];
|
@@ -106,9 +107,11 @@ int main(void) {
|
|
106
107
|
Bza[iz][CYM][CXM] = tmp;
|
107
108
|
}
|
108
109
|
}
|
110
|
+
#pragma endscop
|
109
111
|
|
110
112
|
// Clean-up and exit the function
|
111
113
|
fflush(stdout);
|
114
|
+
Hz[8][9][3] = Hz[8][9][3];
|
112
115
|
return 0;
|
113
116
|
}
|
114
117
|
|
@@ -30,15 +30,16 @@ int main(void) {
|
|
30
30
|
float hz[NI][NJ];
|
31
31
|
|
32
32
|
// Set the input data
|
33
|
-
for (i=0; i<NI; i++) {
|
33
|
+
/* for (i=0; i<NI; i++) {
|
34
34
|
for (j=0; j<NJ; j++) {
|
35
35
|
ex[i][j] = ((float) i*(j+1)) / NI;
|
36
36
|
ey[i][j] = ((float) i*(j+2)) / NJ;
|
37
37
|
hz[i][j] = ((float) i*(j+3)) / NI;
|
38
38
|
}
|
39
39
|
}
|
40
|
-
|
40
|
+
*/
|
41
41
|
// Perform the computation
|
42
|
+
#pragma scop
|
42
43
|
for (t=0; t<TSTEPS; t++) {
|
43
44
|
#pragma species kernel 0:0|void -> 0:0,0:NJ-1|element
|
44
45
|
for (j=0; j<NJ; j++) {
|
@@ -67,8 +68,10 @@ int main(void) {
|
|
67
68
|
}
|
68
69
|
#pragma species endkernel fdtd-2d-part4
|
69
70
|
}
|
71
|
+
#pragma endscop
|
70
72
|
|
71
73
|
// Clean-up and exit the function
|
72
74
|
fflush(stdout);
|
75
|
+
hz[8][9] = hz[8][9];
|
73
76
|
return 0;
|
74
77
|
}
|
@@ -35,6 +35,7 @@ int main(void) {
|
|
35
35
|
}
|
36
36
|
|
37
37
|
// Perform the computation
|
38
|
+
#pragma scop
|
38
39
|
for (k=0; k<N; k++) {
|
39
40
|
for (i=0; i<N; i++) {
|
40
41
|
for (j=0; j<N; j++) {
|
@@ -42,9 +43,11 @@ int main(void) {
|
|
42
43
|
}
|
43
44
|
}
|
44
45
|
}
|
46
|
+
#pragma endscop
|
45
47
|
|
46
48
|
// Clean-up and exit the function
|
47
49
|
fflush(stdout);
|
50
|
+
path[8][9] = path[8][9];
|
48
51
|
return 0;
|
49
52
|
}
|
50
53
|
|
@@ -34,7 +34,7 @@ int main(void) {
|
|
34
34
|
int beta = 2123;
|
35
35
|
|
36
36
|
// Set the input data
|
37
|
-
for (i=0; i<NI; i++) {
|
37
|
+
/* for (i=0; i<NI; i++) {
|
38
38
|
for (j=0; j<NK; j++) {
|
39
39
|
A[i][j] = ((float) i*j) / NI;
|
40
40
|
}
|
@@ -49,8 +49,9 @@ int main(void) {
|
|
49
49
|
C[i][j] = ((float) i*j) / NI;
|
50
50
|
}
|
51
51
|
}
|
52
|
-
|
52
|
+
*/
|
53
53
|
// Perform the computation (C := alpha*A*B + beta*C)
|
54
|
+
#pragma scop
|
54
55
|
#pragma species kernel 0:NI-1,0:NJ-1|element ^ 0:NI-1,0:NK-1|chunk(0:0,0:NK-1) ^ 0:NK-1,0:NJ-1|chunk(0:NK-1,0:0) -> 0:NI-1,0:NJ-1|element
|
55
56
|
for (i=0; i<NI; i++) {
|
56
57
|
for (j=0; j<NJ; j++) {
|
@@ -61,9 +62,11 @@ int main(void) {
|
|
61
62
|
}
|
62
63
|
}
|
63
64
|
#pragma species endkernel gemm
|
65
|
+
#pragma endscop
|
64
66
|
|
65
67
|
// Clean-up and exit the function
|
66
68
|
fflush(stdout);
|
69
|
+
C[8][9] = C[8][9];
|
67
70
|
return 0;
|
68
71
|
}
|
69
72
|
|