bones-compiler 1.1.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/CHANGELOG +37 -0
- data/LICENSE +1 -1
- data/README.rdoc +95 -70
- data/Rakefile +78 -3
- data/VERSION +1 -1
- data/bin/adarwin +17 -0
- data/examples/benchmarks/PolyBench/2mm.c +104 -0
- data/examples/benchmarks/{3mm.c → PolyBench/3mm.c} +5 -2
- data/examples/benchmarks/{adi.c → PolyBench/adi.c} +6 -3
- data/examples/benchmarks/{atax.c → PolyBench/atax.c} +5 -2
- data/examples/benchmarks/{bicg.c → PolyBench/bicg.c} +5 -2
- data/examples/benchmarks/{cholesky.c → PolyBench/cholesky.c} +3 -0
- data/examples/benchmarks/{common.h → PolyBench/common.h} +2 -2
- data/examples/benchmarks/{correlation.c → PolyBench/correlation.c} +16 -7
- data/examples/benchmarks/{covariance.c → PolyBench/covariance.c} +7 -2
- data/examples/benchmarks/{doitgen.c → PolyBench/doitgen.c} +5 -2
- data/examples/benchmarks/{durbin.c → PolyBench/durbin.c} +3 -0
- data/examples/benchmarks/{dynprog.c → PolyBench/dynprog.c} +3 -0
- data/examples/benchmarks/{fdtd-2d-apml.c → PolyBench/fdtd-2d-apml.c} +3 -0
- data/examples/benchmarks/{fdtd-2d.c → PolyBench/fdtd-2d.c} +5 -2
- data/examples/benchmarks/{floyd-warshall.c → PolyBench/floyd-warshall.c} +3 -0
- data/examples/benchmarks/{gemm.c → PolyBench/gemm.c} +5 -2
- data/examples/benchmarks/{gemver.c → PolyBench/gemver.c} +5 -2
- data/examples/benchmarks/{gesummv.c → PolyBench/gesummv.c} +5 -2
- data/examples/benchmarks/{gramschmidt.c → PolyBench/gramschmidt.c} +3 -0
- data/examples/benchmarks/{jacobi-1d-imper.c → PolyBench/jacobi-1d-imper.c} +10 -2
- data/examples/benchmarks/{jacobi-2d-imper.c → PolyBench/jacobi-2d-imper.c} +8 -3
- data/examples/benchmarks/{lu.c → PolyBench/lu.c} +3 -0
- data/examples/benchmarks/{ludcmp.c → PolyBench/ludcmp.c} +3 -0
- data/examples/benchmarks/{mvt.c → PolyBench/mvt.c} +6 -2
- data/examples/benchmarks/{reg_detect.c → PolyBench/reg_detect.c} +3 -0
- data/examples/benchmarks/{seidel-2d.c → PolyBench/seidel-2d.c} +3 -0
- data/examples/benchmarks/{symm.c → PolyBench/symm.c} +3 -0
- data/examples/benchmarks/{syr2k.c → PolyBench/syr2k.c} +5 -2
- data/examples/benchmarks/{syrk.c → PolyBench/syrk.c} +7 -4
- data/examples/benchmarks/{trisolv.c → PolyBench/trisolv.c} +3 -0
- data/examples/benchmarks/{trmm.c → PolyBench/trmm.c} +3 -0
- data/examples/benchmarks/Rodinia/cfd.c +180 -0
- data/examples/benchmarks/Rodinia/hotspot.c +228 -0
- data/examples/benchmarks/Rodinia/kmeans.c +164 -0
- data/examples/benchmarks/Rodinia/srad.c +188 -0
- data/examples/benchmarks/other/common.h +0 -0
- data/examples/benchmarks/other/dct.c +58 -0
- data/examples/benchmarks/other/mm.c +50 -0
- data/examples/benchmarks/{saxpy.c → other/saxpy.c} +11 -7
- data/examples/chunk/{example1.c → example01.c} +0 -0
- data/examples/chunk/{example2.c → example02.c} +0 -0
- data/examples/chunk/{example3.c → example03.c} +0 -0
- data/examples/chunk/{example4.c → example04.c} +0 -0
- data/examples/chunk/{example5.c → example05.c} +0 -0
- data/examples/chunk/example06.c +45 -0
- data/examples/chunk/example07.c +49 -0
- data/examples/dependences/example01.c +42 -0
- data/examples/dependences/example02.c +40 -0
- data/examples/dependences/example03.c +43 -0
- data/examples/dependences/example04.c +44 -0
- data/examples/dependences/example05.c +42 -0
- data/examples/element/{example1.c → example01.c} +0 -0
- data/examples/element/{example2.c → example02.c} +2 -2
- data/examples/element/{example3.c → example03.c} +0 -0
- data/examples/element/{example4.c → example04.c} +0 -0
- data/examples/element/{example5.c → example05.c} +0 -0
- data/examples/element/{example6.c → example06.c} +0 -0
- data/examples/element/{example7.c → example07.c} +0 -0
- data/examples/element/{example8.c → example08.c} +0 -0
- data/examples/element/{example9.c → example09.c} +0 -0
- data/examples/element/example13.c +73 -0
- data/examples/fusion/example01.c +68 -0
- data/examples/fusion/example02.c +73 -0
- data/examples/fusion/example03.c +72 -0
- data/examples/fusion/example04.c +61 -0
- data/examples/fusion/example05.c +55 -0
- data/examples/neighbourhood/{example1.c → example01.c} +0 -0
- data/examples/neighbourhood/{example2.c → example02.c} +0 -0
- data/examples/neighbourhood/{example3.c → example03.c} +0 -0
- data/examples/neighbourhood/{example4.c → example04.c} +0 -0
- data/examples/neighbourhood/example05.c +44 -0
- data/examples/shared/{example1.c → example01.c} +0 -0
- data/examples/shared/{example2.c → example02.c} +0 -0
- data/examples/shared/{example3.c → example03.c} +0 -0
- data/examples/shared/{example4.c → example04.c} +0 -0
- data/examples/shared/{example5.c → example05.c} +0 -0
- data/lib/adarwin.rb +62 -0
- data/lib/adarwin/dependences.rb +268 -0
- data/lib/adarwin/engine.rb +277 -0
- data/lib/adarwin/fusion.rb +174 -0
- data/lib/adarwin/interval.rb +57 -0
- data/lib/adarwin/memorycopies.rb +153 -0
- data/lib/adarwin/nest.rb +225 -0
- data/lib/adarwin/preprocessor.rb +76 -0
- data/lib/adarwin/reference.rb +261 -0
- data/lib/bones.rb +4 -55
- data/lib/bones/algorithm.rb +77 -40
- data/lib/bones/copy.rb +26 -0
- data/lib/bones/engine.rb +147 -31
- data/lib/bones/preprocessor.rb +92 -12
- data/lib/bones/species.rb +4 -3
- data/lib/bones/structure.rb +14 -4
- data/lib/castaddon.rb +11 -6
- data/lib/castaddon/node_adarwin.rb +245 -0
- data/lib/castaddon/node_bones.rb +316 -0
- data/lib/castaddon/node_common.rb +289 -0
- data/lib/castaddon/transformations.rb +236 -0
- data/lib/common.rb +216 -0
- data/skeletons/CPU-C/common/header.c +3 -0
- data/skeletons/CPU-C/common/mem_global.c +0 -0
- data/skeletons/CPU-C/common/timer_2_start.c +11 -13
- data/skeletons/CPU-C/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-C/common/timer_globals.c +29 -0
- data/skeletons/CPU-OPENCL-INTEL/common/globals.c +1 -1
- data/skeletons/CPU-OPENCL-INTEL/common/header.c +3 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +7 -2
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +4 -2
- data/skeletons/CPU-OPENCL-INTEL/common/mem_global.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +6 -3
- data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-OPENCL-INTEL/common/timer_globals.c +24 -0
- data/skeletons/CPU-OPENMP/common/globals.c +1 -0
- data/skeletons/CPU-OPENMP/common/header.c +3 -0
- data/skeletons/CPU-OPENMP/common/mem_global.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_1_start.c +0 -12
- data/skeletons/CPU-OPENMP/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-OPENMP/common/timer_globals.c +33 -0
- data/skeletons/GPU-CUDA/common/globals.c +27 -3
- data/skeletons/GPU-CUDA/common/header.c +2 -0
- data/skeletons/GPU-CUDA/common/mem_async_alloc.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_copyin.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_copyout.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_free.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +2 -1
- data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +2 -1
- data/skeletons/GPU-CUDA/common/mem_global.c +1 -0
- data/skeletons/GPU-CUDA/common/mem_prologue.c +1 -2
- data/skeletons/GPU-CUDA/common/scheduler.c +86 -0
- data/skeletons/GPU-CUDA/common/timer_2_start.c +2 -4
- data/skeletons/GPU-CUDA/common/timer_2_stop.c +3 -5
- data/skeletons/GPU-CUDA/common/timer_globals.c +26 -0
- data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +5 -7
- data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +4 -6
- data/skeletons/GPU-CUDA/kernel/default.host.c +1 -1
- data/skeletons/GPU-CUDA/kernel/default.kernel.cu +6 -8
- data/skeletons/GPU-CUDA/skeletons.txt +6 -5
- data/{examples/benchmarks/2mm.c → test/examples/benchmarks/PolyBench/2mm_species.c} +19 -15
- data/test/examples/benchmarks/PolyBench/3mm_species.c +82 -0
- data/test/examples/benchmarks/PolyBench/adi_species.c +89 -0
- data/test/examples/benchmarks/PolyBench/atax_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/bicg_species.c +71 -0
- data/test/examples/benchmarks/PolyBench/cholesky_species.c +68 -0
- data/test/examples/benchmarks/PolyBench/correlation_species.c +97 -0
- data/test/examples/benchmarks/PolyBench/covariance_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/doitgen_species.c +67 -0
- data/test/examples/benchmarks/PolyBench/durbin_species.c +80 -0
- data/test/examples/benchmarks/PolyBench/dynprog_species.c +71 -0
- data/test/examples/benchmarks/PolyBench/fdtd-2d-apml_species.c +112 -0
- data/test/examples/benchmarks/PolyBench/fdtd-2d_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/floyd-warshall_species.c +54 -0
- data/test/examples/benchmarks/PolyBench/gemm_species.c +73 -0
- data/test/examples/benchmarks/PolyBench/gemver_species.c +93 -0
- data/test/examples/benchmarks/PolyBench/gesummv_species.c +68 -0
- data/test/examples/benchmarks/PolyBench/gramschmidt_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/jacobi-1d-imper_species.c +59 -0
- data/test/examples/benchmarks/PolyBench/jacobi-2d-imper_species.c +65 -0
- data/test/examples/benchmarks/PolyBench/lu_species.c +57 -0
- data/test/examples/benchmarks/PolyBench/ludcmp_species.c +89 -0
- data/test/examples/benchmarks/PolyBench/mvt_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/reg_detect_species.c +86 -0
- data/test/examples/benchmarks/PolyBench/seidel-2d_species.c +53 -0
- data/test/examples/benchmarks/PolyBench/symm_species.c +74 -0
- data/test/examples/benchmarks/PolyBench/syr2k_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/syrk_species.c +66 -0
- data/test/examples/benchmarks/PolyBench/trisolv_species.c +61 -0
- data/test/examples/benchmarks/PolyBench/trmm_species.c +61 -0
- data/test/examples/chunk/example01_species.c +58 -0
- data/test/examples/chunk/example02_species.c +48 -0
- data/test/examples/chunk/example03_species.c +63 -0
- data/test/examples/chunk/example04_species.c +58 -0
- data/test/examples/chunk/example05_species.c +56 -0
- data/test/examples/chunk/example06_species.c +49 -0
- data/test/examples/chunk/example07_species.c +53 -0
- data/test/examples/dependences/example01_species.c +46 -0
- data/test/examples/dependences/example02_species.c +44 -0
- data/test/examples/dependences/example03_species.c +47 -0
- data/test/examples/dependences/example04_species.c +48 -0
- data/test/examples/dependences/example05_species.c +46 -0
- data/test/examples/element/example01_species.c +50 -0
- data/test/examples/element/example02_species.c +50 -0
- data/test/examples/element/example03_species.c +62 -0
- data/test/examples/element/example04_species.c +53 -0
- data/test/examples/element/example05_species.c +59 -0
- data/test/examples/element/example06_species.c +50 -0
- data/test/examples/element/example07_species.c +58 -0
- data/test/examples/element/example08_species.c +49 -0
- data/test/examples/element/example09_species.c +52 -0
- data/test/examples/element/example10_species.c +54 -0
- data/test/examples/element/example11_species.c +51 -0
- data/test/examples/element/example12_species.c +60 -0
- data/test/examples/element/example13_species.c +77 -0
- data/test/examples/neighbourhood/example01_species.c +57 -0
- data/test/examples/neighbourhood/example02_species.c +56 -0
- data/test/examples/neighbourhood/example03_species.c +83 -0
- data/test/examples/neighbourhood/example04_species.c +55 -0
- data/test/examples/neighbourhood/example05_species.c +48 -0
- data/test/examples/shared/example01_species.c +49 -0
- data/test/examples/shared/example02_species.c +55 -0
- data/test/examples/shared/example03_species.c +59 -0
- data/test/examples/shared/example04_species.c +56 -0
- data/test/examples/shared/example05_species.c +52 -0
- metadata +193 -73
- data/examples/benchmarks/overview.txt +0 -38
- data/lib/castaddon/node.rb +0 -753
|
@@ -34,12 +34,13 @@ int main(void) {
|
|
|
34
34
|
float G[NI][NL];
|
|
35
35
|
|
|
36
36
|
// Set the input data
|
|
37
|
-
for (i=0; i<NI; i++) { for (j=0; j<NK; j++) { A[i][j] = ((float) i*j) / NI; } }
|
|
37
|
+
/* for (i=0; i<NI; i++) { for (j=0; j<NK; j++) { A[i][j] = ((float) i*j) / NI; } }
|
|
38
38
|
for (i=0; i<NK; i++) { for (j=0; j<NJ; j++) { B[i][j] = ((float) i*(j+1)) / NJ; } }
|
|
39
39
|
for (i=0; i<NL; i++) { for (j=0; j<NJ; j++) { C[i][j] = ((float) i*(j+3)) / NL; } }
|
|
40
40
|
for (i=0; i<NI; i++) { for (j=0; j<NL; j++) { D[i][j] = ((float) i*(j+2)) / NK; } }
|
|
41
|
-
|
|
41
|
+
*/
|
|
42
42
|
// Perform the computation (G := E*F, with E := A*B and F := C*D)
|
|
43
|
+
#pragma scop
|
|
43
44
|
#pragma species kernel 0:NI-1,0:NK-1|chunk(0:0,0:NK-1) ^ 0:NK-1,0:NJ-1|chunk(0:NK-1,0:0) -> 0:NI-1,0:NJ-1|element
|
|
44
45
|
// E := A*B
|
|
45
46
|
for (i=0; i<NI; i++) {
|
|
@@ -73,9 +74,11 @@ int main(void) {
|
|
|
73
74
|
}
|
|
74
75
|
}
|
|
75
76
|
#pragma species endkernel 3mm-part3
|
|
77
|
+
#pragma endscop
|
|
76
78
|
|
|
77
79
|
// Clean-up and exit the function
|
|
78
80
|
fflush(stdout);
|
|
81
|
+
G[8][9] = G[8][9];
|
|
79
82
|
return 0;
|
|
80
83
|
}
|
|
81
84
|
|
|
@@ -30,15 +30,16 @@ int main(void) {
|
|
|
30
30
|
float B[N][N];
|
|
31
31
|
|
|
32
32
|
// Set the input data
|
|
33
|
-
for (i=0; i<N; i++) {
|
|
33
|
+
/* for (i=0; i<N; i++) {
|
|
34
34
|
for (j=0; j<N; j++) {
|
|
35
35
|
X[i][j] = ((float) i*(j+1) + 1) / N;
|
|
36
36
|
A[i][j] = ((float) i*(j+2) + 2) / N;
|
|
37
37
|
B[i][j] = ((float) i*(j+3) + 3) / N;
|
|
38
38
|
}
|
|
39
39
|
}
|
|
40
|
-
|
|
40
|
+
*/
|
|
41
41
|
// Perform the computation
|
|
42
|
+
#pragma scop
|
|
42
43
|
for (t=0; t<TSTEPS; t++) {
|
|
43
44
|
for (i1=0; i1<N; i1++) {
|
|
44
45
|
for (i2=1; i2<N; i2++) {
|
|
@@ -46,7 +47,7 @@ int main(void) {
|
|
|
46
47
|
B[i1][i2] = B[i1][i2] - A[i1][i2] * A[i1][i2] / B[i1][i2-1];
|
|
47
48
|
}
|
|
48
49
|
}
|
|
49
|
-
#pragma species kernel 0:N-1,N-1:N-1|element
|
|
50
|
+
#pragma species kernel 0:N-1,N-1:N-1|element ^ 0:N-1,N-1:N-1|element -> 0:N-1,N-1:N-1|element
|
|
50
51
|
for (i1=0; i1<N; i1++) {
|
|
51
52
|
X[i1][N-1] = X[i1][N-1] / B[i1][N-1];
|
|
52
53
|
}
|
|
@@ -73,9 +74,11 @@ int main(void) {
|
|
|
73
74
|
}
|
|
74
75
|
}
|
|
75
76
|
}
|
|
77
|
+
#pragma endscop
|
|
76
78
|
|
|
77
79
|
// Clean-up and exit the function
|
|
78
80
|
fflush(stdout);
|
|
81
|
+
X[8][9] = X[8][9];
|
|
79
82
|
return 0;
|
|
80
83
|
}
|
|
81
84
|
|
|
@@ -31,7 +31,7 @@ int main(void) {
|
|
|
31
31
|
float tmp[NX];
|
|
32
32
|
|
|
33
33
|
// Set the input data
|
|
34
|
-
for (i=0; i<NY; i++) {
|
|
34
|
+
/* for (i=0; i<NY; i++) {
|
|
35
35
|
x[i] = i*3.14159;
|
|
36
36
|
}
|
|
37
37
|
for (i=0; i<NX; i++) {
|
|
@@ -39,8 +39,9 @@ int main(void) {
|
|
|
39
39
|
A[i][j] = ((float) i*(j+1)) / NX;
|
|
40
40
|
}
|
|
41
41
|
}
|
|
42
|
-
|
|
42
|
+
*/
|
|
43
43
|
// Perform the computation (y := A'Ax)
|
|
44
|
+
#pragma scop
|
|
44
45
|
#pragma species kernel 0:NX-1,0:NY-1|chunk(0:0,0:NY-1) ^ 0:NY-1|full -> 0:NX-1|element
|
|
45
46
|
for (i=0; i<NX; i++) {
|
|
46
47
|
tmp[i] = 0;
|
|
@@ -57,9 +58,11 @@ int main(void) {
|
|
|
57
58
|
}
|
|
58
59
|
}
|
|
59
60
|
#pragma species endkernel atax-part2
|
|
61
|
+
#pragma endscop
|
|
60
62
|
|
|
61
63
|
// Clean-up and exit the function
|
|
62
64
|
fflush(stdout);
|
|
65
|
+
y[9] = y[9];
|
|
63
66
|
return 0;
|
|
64
67
|
}
|
|
65
68
|
|
|
@@ -32,7 +32,7 @@ int main(void) {
|
|
|
32
32
|
float s[NY];
|
|
33
33
|
|
|
34
34
|
// Set the input data
|
|
35
|
-
for (i=0; i<NY; i++) {
|
|
35
|
+
/* for (i=0; i<NY; i++) {
|
|
36
36
|
p[i] = i*3.14159;
|
|
37
37
|
}
|
|
38
38
|
for (i=0; i<NX; i++) {
|
|
@@ -41,8 +41,9 @@ int main(void) {
|
|
|
41
41
|
A[i][j] = ((float) i*(j+1)) / NX;
|
|
42
42
|
}
|
|
43
43
|
}
|
|
44
|
-
|
|
44
|
+
*/
|
|
45
45
|
// Perform the computation
|
|
46
|
+
#pragma scop
|
|
46
47
|
#pragma species kernel 0:NX-1|full ^ 0:NX-1,0:NY-1|chunk(0:NX-1,0:0) -> 0:NY-1|element
|
|
47
48
|
for (j=0; j<NY; j++) {
|
|
48
49
|
s[j] = 0;
|
|
@@ -59,9 +60,11 @@ int main(void) {
|
|
|
59
60
|
}
|
|
60
61
|
}
|
|
61
62
|
#pragma species endkernel bicg-part2
|
|
63
|
+
#pragma endscop
|
|
62
64
|
|
|
63
65
|
// Clean-up and exit the function
|
|
64
66
|
fflush(stdout);
|
|
67
|
+
q[9] = q[9];
|
|
65
68
|
return 0;
|
|
66
69
|
}
|
|
67
70
|
|
|
@@ -38,6 +38,7 @@ int main(void) {
|
|
|
38
38
|
}
|
|
39
39
|
|
|
40
40
|
// Perform the computation
|
|
41
|
+
#pragma scop
|
|
41
42
|
for (i=0; i<N; i++) {
|
|
42
43
|
x[0] = A[i][i];
|
|
43
44
|
#pragma species kernel i:i,0:i-1|element -> 0:0|shared
|
|
@@ -57,8 +58,10 @@ int main(void) {
|
|
|
57
58
|
}
|
|
58
59
|
#pragma species endkernel cholesky-part2
|
|
59
60
|
}
|
|
61
|
+
#pragma endscop
|
|
60
62
|
|
|
61
63
|
// Clean-up and exit the function
|
|
62
64
|
fflush(stdout);
|
|
65
|
+
A[8][9] = A[8][9];
|
|
63
66
|
return 0;
|
|
64
67
|
}
|
|
@@ -25,9 +25,9 @@
|
|
|
25
25
|
|
|
26
26
|
// Select a dataset size
|
|
27
27
|
//#define MINI_DATASET
|
|
28
|
-
|
|
28
|
+
//#define SMALL_DATASET
|
|
29
29
|
//#define STANDARD_DATASET
|
|
30
|
-
|
|
30
|
+
#define LARGE_DATASET
|
|
31
31
|
//#define EXTRALARGE_DATASET
|
|
32
32
|
|
|
33
33
|
// Defines used per benchmark:
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
// == File information
|
|
16
16
|
// Filename...........benchmark/correlation.c
|
|
17
17
|
// Author.............Cedric Nugteren
|
|
18
|
-
// Last modified on...
|
|
18
|
+
// Last modified on...07-Feb-2013
|
|
19
19
|
//
|
|
20
20
|
|
|
21
21
|
#include "common.h"
|
|
@@ -36,13 +36,15 @@ int main(void) {
|
|
|
36
36
|
float eps = 0.1;
|
|
37
37
|
|
|
38
38
|
// Set the input data
|
|
39
|
-
for (i=0; i<N; i++) {
|
|
39
|
+
/* for (i=0; i<N; i++) {
|
|
40
40
|
for (j=0; j<M; j++) {
|
|
41
41
|
data[i][j] = ((float) i*j) / M;
|
|
42
42
|
}
|
|
43
43
|
}
|
|
44
|
-
|
|
44
|
+
*/
|
|
45
45
|
// Perform the computation
|
|
46
|
+
#pragma scop
|
|
47
|
+
|
|
46
48
|
// Determine the mean of the column vectors of the input data matrix
|
|
47
49
|
#pragma species kernel 0:N-1,0:M-1|chunk(0:N-1,0:0) -> 0:M-1|element
|
|
48
50
|
for (j=0; j<M; j++) {
|
|
@@ -53,8 +55,9 @@ int main(void) {
|
|
|
53
55
|
mean[j] /= float_n;
|
|
54
56
|
}
|
|
55
57
|
#pragma species endkernel correlation-part1
|
|
56
|
-
|
|
58
|
+
|
|
57
59
|
// Determine the standard deviations of the column vectors of the input data matrix
|
|
60
|
+
#pragma species kernel 0:M-1|element ^ 0:N-1,0:M-1|chunk(0:N-1,0:0) -> 0:M-1|element
|
|
58
61
|
for (j=0; j<M; j++) {
|
|
59
62
|
stddev[j] = 0.0;
|
|
60
63
|
meanj = mean[j];
|
|
@@ -66,8 +69,9 @@ int main(void) {
|
|
|
66
69
|
stddev[j] = stddev[j] <= eps ? 1.0 : stddev[j];
|
|
67
70
|
}
|
|
68
71
|
#pragma species endkernel correlation-part2
|
|
69
|
-
|
|
72
|
+
|
|
70
73
|
// Center and reduce the column vectors
|
|
74
|
+
#pragma species kernel 0:N-1,0:M-1|element ^ 0:M-1|element ^ 0:M-1|element -> 0:N-1,0:M-1|element
|
|
71
75
|
for (i=0; i<N; i++) {
|
|
72
76
|
for (j=0; j<M; j++) {
|
|
73
77
|
data[i][j] -= mean[j];
|
|
@@ -75,10 +79,14 @@ int main(void) {
|
|
|
75
79
|
}
|
|
76
80
|
}
|
|
77
81
|
#pragma species endkernel correlation-part3
|
|
82
|
+
|
|
83
|
+
// End of the computation
|
|
84
|
+
#pragma endscop
|
|
85
|
+
|
|
78
86
|
// Calculate the MxM correlation matrix
|
|
79
87
|
for (j1=0; j1<M-1; j1++) {
|
|
80
88
|
symmat[j1][j1] = 1.0;
|
|
81
|
-
|
|
89
|
+
//#pragma species kernel 0:N-1,j1:j1|full ^ 0:N-1,j1+1:M-1|chunk(0:N-1,0:0) -> j1+1:M-1,j1:j1|element ^ j1:j1,j1+1:M-1|element
|
|
82
90
|
for (j2=j1+1; j2<M; j2++) {
|
|
83
91
|
symmat[j1][j2] = 0.0;
|
|
84
92
|
for (i = 0; i<N; i++) {
|
|
@@ -86,12 +94,13 @@ int main(void) {
|
|
|
86
94
|
}
|
|
87
95
|
symmat[j2][j1] = symmat[j1][j2];
|
|
88
96
|
}
|
|
89
|
-
|
|
97
|
+
//#pragma species endkernel correlation-part4
|
|
90
98
|
}
|
|
91
99
|
symmat[M-1][M-1] = 1.0;
|
|
92
100
|
|
|
93
101
|
// Clean-up and exit the function
|
|
94
102
|
fflush(stdout);
|
|
103
|
+
symmat[8][9] = symmat[8][9];
|
|
95
104
|
return 0;
|
|
96
105
|
}
|
|
97
106
|
|
|
@@ -33,13 +33,14 @@ int main(void) {
|
|
|
33
33
|
float float_n = 1.2;
|
|
34
34
|
|
|
35
35
|
// Set the input data
|
|
36
|
-
|
|
36
|
+
/* for (i=0; i<N; i++) {
|
|
37
37
|
for (j=0; j<M; j++) {
|
|
38
38
|
data[i][j] = ((float) i*j) / M;
|
|
39
39
|
}
|
|
40
40
|
}
|
|
41
|
-
|
|
41
|
+
*/
|
|
42
42
|
// Perform the computation
|
|
43
|
+
#pragma scop
|
|
43
44
|
#pragma species kernel 0:N-1,0:M-1|chunk(0:N-1,0:0) -> 0:M-1|element
|
|
44
45
|
// Determine the mean of the column vectors of the input data matrix
|
|
45
46
|
for (j=0; j<M; j++) {
|
|
@@ -58,6 +59,9 @@ int main(void) {
|
|
|
58
59
|
}
|
|
59
60
|
}
|
|
60
61
|
#pragma species endkernel covariance-part2
|
|
62
|
+
|
|
63
|
+
#pragma endscop
|
|
64
|
+
|
|
61
65
|
// Calculate the MxM covariance matrix
|
|
62
66
|
for (j1=0; j1<M; j1++) {
|
|
63
67
|
#pragma species kernel 0:N-1,j1:j1|full ^ 0:N-1,j1:M-1|chunk(0:N-1,0:0) -> j1:M-1,j1:j1|element ^ j1:j1,j1:M-1|element
|
|
@@ -73,5 +77,6 @@ int main(void) {
|
|
|
73
77
|
|
|
74
78
|
// Clean-up and exit the function
|
|
75
79
|
fflush(stdout);
|
|
80
|
+
symmat[8][9] = symmat[8][9];
|
|
76
81
|
return 0;
|
|
77
82
|
}
|
|
@@ -30,10 +30,11 @@ int main(void) {
|
|
|
30
30
|
float C4[NP][NP];
|
|
31
31
|
|
|
32
32
|
// Set the input data
|
|
33
|
-
for (i=0; i<NR; i++) { for (j=0; j<NQ; j++) { for (k=0; k<NP; k++) { A[i][j][k] = ((float) i*j + k) / NP; } } }
|
|
33
|
+
/* for (i=0; i<NR; i++) { for (j=0; j<NQ; j++) { for (k=0; k<NP; k++) { A[i][j][k] = ((float) i*j + k) / NP; } } }
|
|
34
34
|
for (i=0; i<NP; i++) { for (j=0; j<NP; j++) { C4[i][j] = ((float) i*j) / NP; } }
|
|
35
|
-
|
|
35
|
+
*/
|
|
36
36
|
// Perform the computation
|
|
37
|
+
#pragma scop
|
|
37
38
|
#pragma species kernel 0:NR-1,0:NQ-1,0:NP-1|chunk(0:0,0:0,0:NP-1) ^ 0:NP-1,0:NP-1|chunk(0:NP-1,0:0) -> 0:NR-1,0:NQ-1,0:NP-1|element
|
|
38
39
|
for (r=0; r<NR; r++) {
|
|
39
40
|
for (q=0; q<NQ; q++) {
|
|
@@ -55,9 +56,11 @@ int main(void) {
|
|
|
55
56
|
}
|
|
56
57
|
}
|
|
57
58
|
#pragma species endkernel doitgen-part2
|
|
59
|
+
#pragma endscop
|
|
58
60
|
|
|
59
61
|
// Clean-up and exit the function
|
|
60
62
|
fflush(stdout);
|
|
63
|
+
A[8][9][3] = A[8][9][3];
|
|
61
64
|
return 0;
|
|
62
65
|
}
|
|
63
66
|
|
|
@@ -45,6 +45,7 @@ int main(void) {
|
|
|
45
45
|
}
|
|
46
46
|
|
|
47
47
|
// Perform the computation
|
|
48
|
+
#pragma scop
|
|
48
49
|
y[0][0] = r[0];
|
|
49
50
|
beta[0] = 1;
|
|
50
51
|
alpha[0] = r[0];
|
|
@@ -68,9 +69,11 @@ int main(void) {
|
|
|
68
69
|
out[i] = y[i][NX-1];
|
|
69
70
|
}
|
|
70
71
|
#pragma species endkernel durbin-part2
|
|
72
|
+
#pragma endscop
|
|
71
73
|
|
|
72
74
|
// Clean-up and exit the function
|
|
73
75
|
fflush(stdout);
|
|
76
|
+
out[9] = out[9];
|
|
74
77
|
return 0;
|
|
75
78
|
}
|
|
76
79
|
|
|
@@ -40,6 +40,7 @@ int main(void) {
|
|
|
40
40
|
}
|
|
41
41
|
|
|
42
42
|
// Perform the computation
|
|
43
|
+
#pragma scop
|
|
43
44
|
for (iter=0; iter<ITER; iter++) {
|
|
44
45
|
#pragma species kernel 0:0|void -> 0:LENGTH-1,0:LENGTH-1|element
|
|
45
46
|
for (i=0; i<=LENGTH-1; i++) {
|
|
@@ -59,9 +60,11 @@ int main(void) {
|
|
|
59
60
|
}
|
|
60
61
|
out += c[0][LENGTH-1];
|
|
61
62
|
}
|
|
63
|
+
#pragma endscop
|
|
62
64
|
|
|
63
65
|
// Clean-up and exit the function
|
|
64
66
|
fflush(stdout);
|
|
67
|
+
c[8][9] = c[8][9];
|
|
65
68
|
return 0;
|
|
66
69
|
}
|
|
67
70
|
|
|
@@ -72,6 +72,7 @@ int main(void) {
|
|
|
72
72
|
}
|
|
73
73
|
|
|
74
74
|
// Perform the computation
|
|
75
|
+
#pragma scop
|
|
75
76
|
for (iz=0; iz<CZ; iz++) {
|
|
76
77
|
for (iy=0; iy<CYM; iy++) {
|
|
77
78
|
czm_iz = czm[iz];
|
|
@@ -106,9 +107,11 @@ int main(void) {
|
|
|
106
107
|
Bza[iz][CYM][CXM] = tmp;
|
|
107
108
|
}
|
|
108
109
|
}
|
|
110
|
+
#pragma endscop
|
|
109
111
|
|
|
110
112
|
// Clean-up and exit the function
|
|
111
113
|
fflush(stdout);
|
|
114
|
+
Hz[8][9][3] = Hz[8][9][3];
|
|
112
115
|
return 0;
|
|
113
116
|
}
|
|
114
117
|
|
|
@@ -30,15 +30,16 @@ int main(void) {
|
|
|
30
30
|
float hz[NI][NJ];
|
|
31
31
|
|
|
32
32
|
// Set the input data
|
|
33
|
-
for (i=0; i<NI; i++) {
|
|
33
|
+
/* for (i=0; i<NI; i++) {
|
|
34
34
|
for (j=0; j<NJ; j++) {
|
|
35
35
|
ex[i][j] = ((float) i*(j+1)) / NI;
|
|
36
36
|
ey[i][j] = ((float) i*(j+2)) / NJ;
|
|
37
37
|
hz[i][j] = ((float) i*(j+3)) / NI;
|
|
38
38
|
}
|
|
39
39
|
}
|
|
40
|
-
|
|
40
|
+
*/
|
|
41
41
|
// Perform the computation
|
|
42
|
+
#pragma scop
|
|
42
43
|
for (t=0; t<TSTEPS; t++) {
|
|
43
44
|
#pragma species kernel 0:0|void -> 0:0,0:NJ-1|element
|
|
44
45
|
for (j=0; j<NJ; j++) {
|
|
@@ -67,8 +68,10 @@ int main(void) {
|
|
|
67
68
|
}
|
|
68
69
|
#pragma species endkernel fdtd-2d-part4
|
|
69
70
|
}
|
|
71
|
+
#pragma endscop
|
|
70
72
|
|
|
71
73
|
// Clean-up and exit the function
|
|
72
74
|
fflush(stdout);
|
|
75
|
+
hz[8][9] = hz[8][9];
|
|
73
76
|
return 0;
|
|
74
77
|
}
|
|
@@ -35,6 +35,7 @@ int main(void) {
|
|
|
35
35
|
}
|
|
36
36
|
|
|
37
37
|
// Perform the computation
|
|
38
|
+
#pragma scop
|
|
38
39
|
for (k=0; k<N; k++) {
|
|
39
40
|
for (i=0; i<N; i++) {
|
|
40
41
|
for (j=0; j<N; j++) {
|
|
@@ -42,9 +43,11 @@ int main(void) {
|
|
|
42
43
|
}
|
|
43
44
|
}
|
|
44
45
|
}
|
|
46
|
+
#pragma endscop
|
|
45
47
|
|
|
46
48
|
// Clean-up and exit the function
|
|
47
49
|
fflush(stdout);
|
|
50
|
+
path[8][9] = path[8][9];
|
|
48
51
|
return 0;
|
|
49
52
|
}
|
|
50
53
|
|
|
@@ -34,7 +34,7 @@ int main(void) {
|
|
|
34
34
|
int beta = 2123;
|
|
35
35
|
|
|
36
36
|
// Set the input data
|
|
37
|
-
for (i=0; i<NI; i++) {
|
|
37
|
+
/* for (i=0; i<NI; i++) {
|
|
38
38
|
for (j=0; j<NK; j++) {
|
|
39
39
|
A[i][j] = ((float) i*j) / NI;
|
|
40
40
|
}
|
|
@@ -49,8 +49,9 @@ int main(void) {
|
|
|
49
49
|
C[i][j] = ((float) i*j) / NI;
|
|
50
50
|
}
|
|
51
51
|
}
|
|
52
|
-
|
|
52
|
+
*/
|
|
53
53
|
// Perform the computation (C := alpha*A*B + beta*C)
|
|
54
|
+
#pragma scop
|
|
54
55
|
#pragma species kernel 0:NI-1,0:NJ-1|element ^ 0:NI-1,0:NK-1|chunk(0:0,0:NK-1) ^ 0:NK-1,0:NJ-1|chunk(0:NK-1,0:0) -> 0:NI-1,0:NJ-1|element
|
|
55
56
|
for (i=0; i<NI; i++) {
|
|
56
57
|
for (j=0; j<NJ; j++) {
|
|
@@ -61,9 +62,11 @@ int main(void) {
|
|
|
61
62
|
}
|
|
62
63
|
}
|
|
63
64
|
#pragma species endkernel gemm
|
|
65
|
+
#pragma endscop
|
|
64
66
|
|
|
65
67
|
// Clean-up and exit the function
|
|
66
68
|
fflush(stdout);
|
|
69
|
+
C[8][9] = C[8][9];
|
|
67
70
|
return 0;
|
|
68
71
|
}
|
|
69
72
|
|