bones-compiler 1.1.0 → 1.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/CHANGELOG +37 -0
- data/LICENSE +1 -1
- data/README.rdoc +95 -70
- data/Rakefile +78 -3
- data/VERSION +1 -1
- data/bin/adarwin +17 -0
- data/examples/benchmarks/PolyBench/2mm.c +104 -0
- data/examples/benchmarks/{3mm.c → PolyBench/3mm.c} +5 -2
- data/examples/benchmarks/{adi.c → PolyBench/adi.c} +6 -3
- data/examples/benchmarks/{atax.c → PolyBench/atax.c} +5 -2
- data/examples/benchmarks/{bicg.c → PolyBench/bicg.c} +5 -2
- data/examples/benchmarks/{cholesky.c → PolyBench/cholesky.c} +3 -0
- data/examples/benchmarks/{common.h → PolyBench/common.h} +2 -2
- data/examples/benchmarks/{correlation.c → PolyBench/correlation.c} +16 -7
- data/examples/benchmarks/{covariance.c → PolyBench/covariance.c} +7 -2
- data/examples/benchmarks/{doitgen.c → PolyBench/doitgen.c} +5 -2
- data/examples/benchmarks/{durbin.c → PolyBench/durbin.c} +3 -0
- data/examples/benchmarks/{dynprog.c → PolyBench/dynprog.c} +3 -0
- data/examples/benchmarks/{fdtd-2d-apml.c → PolyBench/fdtd-2d-apml.c} +3 -0
- data/examples/benchmarks/{fdtd-2d.c → PolyBench/fdtd-2d.c} +5 -2
- data/examples/benchmarks/{floyd-warshall.c → PolyBench/floyd-warshall.c} +3 -0
- data/examples/benchmarks/{gemm.c → PolyBench/gemm.c} +5 -2
- data/examples/benchmarks/{gemver.c → PolyBench/gemver.c} +5 -2
- data/examples/benchmarks/{gesummv.c → PolyBench/gesummv.c} +5 -2
- data/examples/benchmarks/{gramschmidt.c → PolyBench/gramschmidt.c} +3 -0
- data/examples/benchmarks/{jacobi-1d-imper.c → PolyBench/jacobi-1d-imper.c} +10 -2
- data/examples/benchmarks/{jacobi-2d-imper.c → PolyBench/jacobi-2d-imper.c} +8 -3
- data/examples/benchmarks/{lu.c → PolyBench/lu.c} +3 -0
- data/examples/benchmarks/{ludcmp.c → PolyBench/ludcmp.c} +3 -0
- data/examples/benchmarks/{mvt.c → PolyBench/mvt.c} +6 -2
- data/examples/benchmarks/{reg_detect.c → PolyBench/reg_detect.c} +3 -0
- data/examples/benchmarks/{seidel-2d.c → PolyBench/seidel-2d.c} +3 -0
- data/examples/benchmarks/{symm.c → PolyBench/symm.c} +3 -0
- data/examples/benchmarks/{syr2k.c → PolyBench/syr2k.c} +5 -2
- data/examples/benchmarks/{syrk.c → PolyBench/syrk.c} +7 -4
- data/examples/benchmarks/{trisolv.c → PolyBench/trisolv.c} +3 -0
- data/examples/benchmarks/{trmm.c → PolyBench/trmm.c} +3 -0
- data/examples/benchmarks/Rodinia/cfd.c +180 -0
- data/examples/benchmarks/Rodinia/hotspot.c +228 -0
- data/examples/benchmarks/Rodinia/kmeans.c +164 -0
- data/examples/benchmarks/Rodinia/srad.c +188 -0
- data/examples/benchmarks/other/common.h +0 -0
- data/examples/benchmarks/other/dct.c +58 -0
- data/examples/benchmarks/other/mm.c +50 -0
- data/examples/benchmarks/{saxpy.c → other/saxpy.c} +11 -7
- data/examples/chunk/{example1.c → example01.c} +0 -0
- data/examples/chunk/{example2.c → example02.c} +0 -0
- data/examples/chunk/{example3.c → example03.c} +0 -0
- data/examples/chunk/{example4.c → example04.c} +0 -0
- data/examples/chunk/{example5.c → example05.c} +0 -0
- data/examples/chunk/example06.c +45 -0
- data/examples/chunk/example07.c +49 -0
- data/examples/dependences/example01.c +42 -0
- data/examples/dependences/example02.c +40 -0
- data/examples/dependences/example03.c +43 -0
- data/examples/dependences/example04.c +44 -0
- data/examples/dependences/example05.c +42 -0
- data/examples/element/{example1.c → example01.c} +0 -0
- data/examples/element/{example2.c → example02.c} +2 -2
- data/examples/element/{example3.c → example03.c} +0 -0
- data/examples/element/{example4.c → example04.c} +0 -0
- data/examples/element/{example5.c → example05.c} +0 -0
- data/examples/element/{example6.c → example06.c} +0 -0
- data/examples/element/{example7.c → example07.c} +0 -0
- data/examples/element/{example8.c → example08.c} +0 -0
- data/examples/element/{example9.c → example09.c} +0 -0
- data/examples/element/example13.c +73 -0
- data/examples/fusion/example01.c +68 -0
- data/examples/fusion/example02.c +73 -0
- data/examples/fusion/example03.c +72 -0
- data/examples/fusion/example04.c +61 -0
- data/examples/fusion/example05.c +55 -0
- data/examples/neighbourhood/{example1.c → example01.c} +0 -0
- data/examples/neighbourhood/{example2.c → example02.c} +0 -0
- data/examples/neighbourhood/{example3.c → example03.c} +0 -0
- data/examples/neighbourhood/{example4.c → example04.c} +0 -0
- data/examples/neighbourhood/example05.c +44 -0
- data/examples/shared/{example1.c → example01.c} +0 -0
- data/examples/shared/{example2.c → example02.c} +0 -0
- data/examples/shared/{example3.c → example03.c} +0 -0
- data/examples/shared/{example4.c → example04.c} +0 -0
- data/examples/shared/{example5.c → example05.c} +0 -0
- data/lib/adarwin.rb +62 -0
- data/lib/adarwin/dependences.rb +268 -0
- data/lib/adarwin/engine.rb +277 -0
- data/lib/adarwin/fusion.rb +174 -0
- data/lib/adarwin/interval.rb +57 -0
- data/lib/adarwin/memorycopies.rb +153 -0
- data/lib/adarwin/nest.rb +225 -0
- data/lib/adarwin/preprocessor.rb +76 -0
- data/lib/adarwin/reference.rb +261 -0
- data/lib/bones.rb +4 -55
- data/lib/bones/algorithm.rb +77 -40
- data/lib/bones/copy.rb +26 -0
- data/lib/bones/engine.rb +147 -31
- data/lib/bones/preprocessor.rb +92 -12
- data/lib/bones/species.rb +4 -3
- data/lib/bones/structure.rb +14 -4
- data/lib/castaddon.rb +11 -6
- data/lib/castaddon/node_adarwin.rb +245 -0
- data/lib/castaddon/node_bones.rb +316 -0
- data/lib/castaddon/node_common.rb +289 -0
- data/lib/castaddon/transformations.rb +236 -0
- data/lib/common.rb +216 -0
- data/skeletons/CPU-C/common/header.c +3 -0
- data/skeletons/CPU-C/common/mem_global.c +0 -0
- data/skeletons/CPU-C/common/timer_2_start.c +11 -13
- data/skeletons/CPU-C/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-C/common/timer_globals.c +29 -0
- data/skeletons/CPU-OPENCL-INTEL/common/globals.c +1 -1
- data/skeletons/CPU-OPENCL-INTEL/common/header.c +3 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +7 -2
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +4 -2
- data/skeletons/CPU-OPENCL-INTEL/common/mem_global.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +6 -3
- data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-OPENCL-INTEL/common/timer_globals.c +24 -0
- data/skeletons/CPU-OPENMP/common/globals.c +1 -0
- data/skeletons/CPU-OPENMP/common/header.c +3 -0
- data/skeletons/CPU-OPENMP/common/mem_global.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_1_start.c +0 -12
- data/skeletons/CPU-OPENMP/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-OPENMP/common/timer_globals.c +33 -0
- data/skeletons/GPU-CUDA/common/globals.c +27 -3
- data/skeletons/GPU-CUDA/common/header.c +2 -0
- data/skeletons/GPU-CUDA/common/mem_async_alloc.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_copyin.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_copyout.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_free.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +2 -1
- data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +2 -1
- data/skeletons/GPU-CUDA/common/mem_global.c +1 -0
- data/skeletons/GPU-CUDA/common/mem_prologue.c +1 -2
- data/skeletons/GPU-CUDA/common/scheduler.c +86 -0
- data/skeletons/GPU-CUDA/common/timer_2_start.c +2 -4
- data/skeletons/GPU-CUDA/common/timer_2_stop.c +3 -5
- data/skeletons/GPU-CUDA/common/timer_globals.c +26 -0
- data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +5 -7
- data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +4 -6
- data/skeletons/GPU-CUDA/kernel/default.host.c +1 -1
- data/skeletons/GPU-CUDA/kernel/default.kernel.cu +6 -8
- data/skeletons/GPU-CUDA/skeletons.txt +6 -5
- data/{examples/benchmarks/2mm.c → test/examples/benchmarks/PolyBench/2mm_species.c} +19 -15
- data/test/examples/benchmarks/PolyBench/3mm_species.c +82 -0
- data/test/examples/benchmarks/PolyBench/adi_species.c +89 -0
- data/test/examples/benchmarks/PolyBench/atax_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/bicg_species.c +71 -0
- data/test/examples/benchmarks/PolyBench/cholesky_species.c +68 -0
- data/test/examples/benchmarks/PolyBench/correlation_species.c +97 -0
- data/test/examples/benchmarks/PolyBench/covariance_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/doitgen_species.c +67 -0
- data/test/examples/benchmarks/PolyBench/durbin_species.c +80 -0
- data/test/examples/benchmarks/PolyBench/dynprog_species.c +71 -0
- data/test/examples/benchmarks/PolyBench/fdtd-2d-apml_species.c +112 -0
- data/test/examples/benchmarks/PolyBench/fdtd-2d_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/floyd-warshall_species.c +54 -0
- data/test/examples/benchmarks/PolyBench/gemm_species.c +73 -0
- data/test/examples/benchmarks/PolyBench/gemver_species.c +93 -0
- data/test/examples/benchmarks/PolyBench/gesummv_species.c +68 -0
- data/test/examples/benchmarks/PolyBench/gramschmidt_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/jacobi-1d-imper_species.c +59 -0
- data/test/examples/benchmarks/PolyBench/jacobi-2d-imper_species.c +65 -0
- data/test/examples/benchmarks/PolyBench/lu_species.c +57 -0
- data/test/examples/benchmarks/PolyBench/ludcmp_species.c +89 -0
- data/test/examples/benchmarks/PolyBench/mvt_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/reg_detect_species.c +86 -0
- data/test/examples/benchmarks/PolyBench/seidel-2d_species.c +53 -0
- data/test/examples/benchmarks/PolyBench/symm_species.c +74 -0
- data/test/examples/benchmarks/PolyBench/syr2k_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/syrk_species.c +66 -0
- data/test/examples/benchmarks/PolyBench/trisolv_species.c +61 -0
- data/test/examples/benchmarks/PolyBench/trmm_species.c +61 -0
- data/test/examples/chunk/example01_species.c +58 -0
- data/test/examples/chunk/example02_species.c +48 -0
- data/test/examples/chunk/example03_species.c +63 -0
- data/test/examples/chunk/example04_species.c +58 -0
- data/test/examples/chunk/example05_species.c +56 -0
- data/test/examples/chunk/example06_species.c +49 -0
- data/test/examples/chunk/example07_species.c +53 -0
- data/test/examples/dependences/example01_species.c +46 -0
- data/test/examples/dependences/example02_species.c +44 -0
- data/test/examples/dependences/example03_species.c +47 -0
- data/test/examples/dependences/example04_species.c +48 -0
- data/test/examples/dependences/example05_species.c +46 -0
- data/test/examples/element/example01_species.c +50 -0
- data/test/examples/element/example02_species.c +50 -0
- data/test/examples/element/example03_species.c +62 -0
- data/test/examples/element/example04_species.c +53 -0
- data/test/examples/element/example05_species.c +59 -0
- data/test/examples/element/example06_species.c +50 -0
- data/test/examples/element/example07_species.c +58 -0
- data/test/examples/element/example08_species.c +49 -0
- data/test/examples/element/example09_species.c +52 -0
- data/test/examples/element/example10_species.c +54 -0
- data/test/examples/element/example11_species.c +51 -0
- data/test/examples/element/example12_species.c +60 -0
- data/test/examples/element/example13_species.c +77 -0
- data/test/examples/neighbourhood/example01_species.c +57 -0
- data/test/examples/neighbourhood/example02_species.c +56 -0
- data/test/examples/neighbourhood/example03_species.c +83 -0
- data/test/examples/neighbourhood/example04_species.c +55 -0
- data/test/examples/neighbourhood/example05_species.c +48 -0
- data/test/examples/shared/example01_species.c +49 -0
- data/test/examples/shared/example02_species.c +55 -0
- data/test/examples/shared/example03_species.c +59 -0
- data/test/examples/shared/example04_species.c +56 -0
- data/test/examples/shared/example05_species.c +52 -0
- metadata +193 -73
- data/examples/benchmarks/overview.txt +0 -38
- data/lib/castaddon/node.rb +0 -753
@@ -40,7 +40,7 @@ int main(void) {
|
|
40
40
|
int beta = 12313;
|
41
41
|
|
42
42
|
// Set the input data
|
43
|
-
for (i=0; i<NX; i++) {
|
43
|
+
/* for (i=0; i<NX; i++) {
|
44
44
|
u1[i] = i;
|
45
45
|
u2[i] = (i+1)/NX/2.0;
|
46
46
|
v1[i] = (i+1)/NX/4.0;
|
@@ -53,8 +53,9 @@ int main(void) {
|
|
53
53
|
A[i][j] = ((float) i*j) / NX;
|
54
54
|
}
|
55
55
|
}
|
56
|
-
|
56
|
+
*/
|
57
57
|
// Perform the computation
|
58
|
+
#pragma scop
|
58
59
|
#pragma species kernel 0:NX-1,0:NX-1|element ^ 0:NX-1|element ^ 0:NX-1|element ^ 0:NX-1|element ^ 0:NX-1|element -> 0:NX-1,0:NX-1|element
|
59
60
|
for (i=0; i<NX; i++) {
|
60
61
|
for (j=0; j<NX; j++) {
|
@@ -81,9 +82,11 @@ int main(void) {
|
|
81
82
|
}
|
82
83
|
}
|
83
84
|
#pragma species endkernel gemver-part4
|
85
|
+
#pragma endscop
|
84
86
|
|
85
87
|
// Clean-up and exit the function
|
86
88
|
fflush(stdout);
|
89
|
+
w[9] = w[9];
|
87
90
|
return 0;
|
88
91
|
}
|
89
92
|
|
@@ -36,15 +36,16 @@ int main(void) {
|
|
36
36
|
float beta = 12313;
|
37
37
|
|
38
38
|
// Set the input data
|
39
|
-
for (i=0; i<NX; i++) {
|
39
|
+
/* for (i=0; i<NX; i++) {
|
40
40
|
x[i] = ((float) i) / NX;
|
41
41
|
for (j=0; j<NX; j++) {
|
42
42
|
A[i][j] = ((float) i*(j+1)) / NX;
|
43
43
|
B[i][j] = ((float) (i+3)*j) / NX;
|
44
44
|
}
|
45
45
|
}
|
46
|
-
|
46
|
+
*/
|
47
47
|
// Perform the computation
|
48
|
+
#pragma scop
|
48
49
|
#pragma species kernel 0:NX-1,0:NX-1|chunk(0:0,0:NX-1) ^ 0:NX-1|full ^ 0:NX-1,0:NX-1|chunk(0:0,0:NX-1) -> 0:NX-1|element ^ 0:NX-1|element
|
49
50
|
for (i=0; i<NX; i++) {
|
50
51
|
tmp[i] = 0;
|
@@ -56,9 +57,11 @@ int main(void) {
|
|
56
57
|
y[i] = alpha*tmp[i] + beta*y[i];
|
57
58
|
}
|
58
59
|
#pragma species endkernel gesummv
|
60
|
+
#pragma endscop
|
59
61
|
|
60
62
|
// Clean-up and exit the function
|
61
63
|
fflush(stdout);
|
64
|
+
y[9] = y[9];
|
62
65
|
return 0;
|
63
66
|
}
|
64
67
|
|
@@ -46,6 +46,7 @@ int main(void) {
|
|
46
46
|
}
|
47
47
|
|
48
48
|
// Perform the computation
|
49
|
+
#pragma scop
|
49
50
|
for (k=0; k<NJ; k++) {
|
50
51
|
nrm[0] = 0;
|
51
52
|
#pragma species kernel 0:NI-1,k:k|element -> 0:0|shared
|
@@ -77,8 +78,10 @@ int main(void) {
|
|
77
78
|
}
|
78
79
|
#pragma species endkernel gramschmidt-part3
|
79
80
|
}
|
81
|
+
#pragma endscop
|
80
82
|
|
81
83
|
// Clean-up and exit the function
|
82
84
|
fflush(stdout);
|
85
|
+
A[8][9] = A[8][9];
|
83
86
|
return 0;
|
84
87
|
}
|
@@ -18,6 +18,7 @@
|
|
18
18
|
// Last modified on...03-April-2012
|
19
19
|
//
|
20
20
|
|
21
|
+
|
21
22
|
#include "common.h"
|
22
23
|
|
23
24
|
// This is 'jacobi-1d-imper', a 1D Jacobi stencil computation
|
@@ -27,14 +28,19 @@ int main(void) {
|
|
27
28
|
// Declare arrays on the stack
|
28
29
|
float A[LARGE_N];
|
29
30
|
float B[LARGE_N];
|
31
|
+
//printf("A: %p\n", A);
|
32
|
+
//printf("B: %p\n", B);
|
33
|
+
//float *A = (float *)malloc(LARGE_N*sizeof(float));
|
34
|
+
//float *B = (float *)malloc(LARGE_N*sizeof(float));
|
30
35
|
|
31
36
|
// Set the input data
|
32
|
-
for (i=0; i<LARGE_N; i++) {
|
37
|
+
/* for (i=0; i<LARGE_N; i++) {
|
33
38
|
A[i] = ((float) i+2) / LARGE_N;
|
34
39
|
B[i] = ((float) i+3) / LARGE_N;
|
35
40
|
}
|
36
|
-
|
41
|
+
*/
|
37
42
|
// Perform the computation
|
43
|
+
#pragma scop
|
38
44
|
for (t=0; t<TSTEPS; t++) {
|
39
45
|
#pragma species kernel 1:LARGE_N-2|neighbourhood(-1:1) -> 1:LARGE_N-2|element
|
40
46
|
for (i=1; i<LARGE_N-1; i++) {
|
@@ -47,9 +53,11 @@ int main(void) {
|
|
47
53
|
}
|
48
54
|
#pragma species endkernel jacobi-1d-imper-part2
|
49
55
|
}
|
56
|
+
#pragma endscop
|
50
57
|
|
51
58
|
// Clean-up and exit the function
|
52
59
|
fflush(stdout);
|
60
|
+
B[9] = B[9];
|
53
61
|
return 0;
|
54
62
|
}
|
55
63
|
|
@@ -29,19 +29,22 @@ int main(void) {
|
|
29
29
|
float B[N][N];
|
30
30
|
|
31
31
|
// Set the input data
|
32
|
-
for (i=0; i<N; i++) {
|
32
|
+
/* for (i=0; i<N; i++) {
|
33
33
|
for (j=0; j<N; j++) {
|
34
34
|
A[i][j] = ((float) i*(j+2) + 2) / N;
|
35
35
|
B[i][j] = ((float) i*(j+3) + 3) / N;
|
36
36
|
}
|
37
37
|
}
|
38
|
-
|
38
|
+
*/
|
39
39
|
// Perform the computation
|
40
|
+
#pragma scop
|
40
41
|
for (t=0; t<TSTEPS; t++) {
|
41
42
|
#pragma species kernel 1:N-2,1:N-2|neighbourhood(-1:1,-1:1) -> 1:N-2,1:N-2|element
|
42
43
|
for (i=1; i<N-1; i++) {
|
43
44
|
for (j=1; j<N-1; j++) {
|
44
|
-
|
45
|
+
if (i < N-1 && j < N-1) {
|
46
|
+
B[i][j] = 0.2 * (A[i][j] + A[i][j-1] + A[i][1+j] + A[1+i][j] + A[i-1][j]);
|
47
|
+
}
|
45
48
|
}
|
46
49
|
}
|
47
50
|
#pragma species endkernel jacobi-2d-imper-part1
|
@@ -53,9 +56,11 @@ int main(void) {
|
|
53
56
|
}
|
54
57
|
#pragma species endkernel jacobi-2d-imper-part2
|
55
58
|
}
|
59
|
+
#pragma endscop
|
56
60
|
|
57
61
|
// Clean-up and exit the function
|
58
62
|
fflush(stdout);
|
63
|
+
B[8][9] = B[8][9];
|
59
64
|
return 0;
|
60
65
|
}
|
61
66
|
|
@@ -35,6 +35,7 @@ int main(void) {
|
|
35
35
|
}
|
36
36
|
|
37
37
|
// Perform the computation
|
38
|
+
#pragma scop
|
38
39
|
for (k=0; k<N; k++) {
|
39
40
|
#pragma species kernel k:k,k+1:N-1|element -> k:k,k+1:N-1|element
|
40
41
|
for (j=k+1; j<N; j++) {
|
@@ -49,9 +50,11 @@ int main(void) {
|
|
49
50
|
}
|
50
51
|
#pragma species endkernel lu-part2
|
51
52
|
}
|
53
|
+
#pragma endscop
|
52
54
|
|
53
55
|
// Clean-up and exit the function
|
54
56
|
fflush(stdout);
|
57
|
+
A[8][9] = A[8][9];
|
55
58
|
return 0;
|
56
59
|
}
|
57
60
|
|
@@ -42,6 +42,7 @@ int main(void) {
|
|
42
42
|
}
|
43
43
|
|
44
44
|
// Perform the computation
|
45
|
+
#pragma scop
|
45
46
|
b[0] = 1.0;
|
46
47
|
for (i=0; i<N; i++) {
|
47
48
|
for (j=i+1; j<=N; j++) {
|
@@ -83,9 +84,11 @@ int main(void) {
|
|
83
84
|
//#pragma species endkernel ludcmp-part4
|
84
85
|
x[N-1-i] = w[0] / A[N-1-i][N-1-i];
|
85
86
|
}
|
87
|
+
#pragma endscop
|
86
88
|
|
87
89
|
// Clean-up and exit the function
|
88
90
|
fflush(stdout);
|
91
|
+
x[9] = x[9];
|
89
92
|
return 0;
|
90
93
|
}
|
91
94
|
|
@@ -30,7 +30,7 @@ int main(void) {
|
|
30
30
|
float x2[NX];
|
31
31
|
float y_1[NX];
|
32
32
|
float y_2[NX];
|
33
|
-
|
33
|
+
/*
|
34
34
|
// Set the input data
|
35
35
|
for (i=0; i<NX; i++) {
|
36
36
|
x1[i] = ((float) i) / NX;
|
@@ -40,9 +40,10 @@ int main(void) {
|
|
40
40
|
for (j=0; j<NX; j++) {
|
41
41
|
A[i][j] = ((float) i*j) / NX;
|
42
42
|
}
|
43
|
-
}
|
43
|
+
}*/
|
44
44
|
|
45
45
|
// Perform the computation
|
46
|
+
#pragma scop
|
46
47
|
#pragma species kernel 0:NX-1|element ^ 0:NX-1,0:NX-1|chunk(0:0,0:NX-1) ^ 0:NX-1|full -> 0:NX-1|element
|
47
48
|
for (i=0; i<NX; i++) {
|
48
49
|
for (j=0; j<NX; j++) {
|
@@ -57,9 +58,12 @@ int main(void) {
|
|
57
58
|
}
|
58
59
|
}
|
59
60
|
#pragma species endkernel mvt-part2
|
61
|
+
#pragma endscop
|
60
62
|
|
61
63
|
// Clean-up and exit the function
|
62
64
|
fflush(stdout);
|
65
|
+
x1[9] = x1[9];
|
66
|
+
x2[9] = x2[9];
|
63
67
|
return 0;
|
64
68
|
}
|
65
69
|
|
@@ -42,6 +42,7 @@ int main(void) {
|
|
42
42
|
}
|
43
43
|
|
44
44
|
// Perform the computation
|
45
|
+
#pragma scop
|
45
46
|
for (t=0; t<ITER; t++) {
|
46
47
|
#pragma species kernel 0:MAXGRID-1,0:MAXGRID-1|element -> 0:MAXGRID-1,0:MAXGRID-1,0:LENGTH-1|chunk(0:0,0:0,0:LENGTH-1)
|
47
48
|
for (j=0; j<=MAXGRID-1; j++) {
|
@@ -75,8 +76,10 @@ int main(void) {
|
|
75
76
|
#pragma species endkernel reg-detect-part3
|
76
77
|
}
|
77
78
|
}
|
79
|
+
#pragma endscop
|
78
80
|
|
79
81
|
// Clean-up and exit the function
|
80
82
|
fflush(stdout);
|
83
|
+
path[8][9] = path[8][9];
|
81
84
|
return 0;
|
82
85
|
}
|
@@ -35,6 +35,7 @@ int main(void) {
|
|
35
35
|
}
|
36
36
|
|
37
37
|
// Perform the computation
|
38
|
+
#pragma scop
|
38
39
|
for (t=0; t<TSTEPS-1; t++) {
|
39
40
|
for (i=1; i<=N-2; i++) {
|
40
41
|
for (j=1; j<=N-2; j++) {
|
@@ -44,8 +45,10 @@ int main(void) {
|
|
44
45
|
}
|
45
46
|
}
|
46
47
|
}
|
48
|
+
#pragma endscop
|
47
49
|
|
48
50
|
// Clean-up and exit the function
|
49
51
|
fflush(stdout);
|
52
|
+
A[8][9] = A[8][9];
|
50
53
|
return 0;
|
51
54
|
}
|
@@ -49,6 +49,7 @@ int main(void) {
|
|
49
49
|
}
|
50
50
|
|
51
51
|
// Perform the computation (C := alpha*A*B + beta*C, with A symmetric)
|
52
|
+
#pragma scop
|
52
53
|
for (i=0; i<NI; i++) {
|
53
54
|
for (j=0; j<NJ; j++) {
|
54
55
|
acc[0] = 0;
|
@@ -66,9 +67,11 @@ int main(void) {
|
|
66
67
|
C[i][j] = beta*C[i][j] + alpha*A[i][i]*bij + alpha*acc[0];
|
67
68
|
}
|
68
69
|
}
|
70
|
+
#pragma endscop
|
69
71
|
|
70
72
|
// Clean-up and exit the function
|
71
73
|
fflush(stdout);
|
74
|
+
C[8][9] = C[8][9];
|
72
75
|
return 0;
|
73
76
|
}
|
74
77
|
|
@@ -34,7 +34,7 @@ int main(void) {
|
|
34
34
|
int beta = 2123;
|
35
35
|
|
36
36
|
// Set the input data
|
37
|
-
for (i=0; i<NI; i++) {
|
37
|
+
/* for (i=0; i<NI; i++) {
|
38
38
|
for (j=0; j<NJ; j++) {
|
39
39
|
A[i][j] = ((float) i*j) / NI;
|
40
40
|
B[i][j] = ((float) i*j) / NI;
|
@@ -45,8 +45,9 @@ int main(void) {
|
|
45
45
|
C[i][j] = ((float) i*j) / NI;
|
46
46
|
}
|
47
47
|
}
|
48
|
-
|
48
|
+
*/
|
49
49
|
// Perform the computation (C := alpha*A*B' + alpha*B*A' + beta*C)
|
50
|
+
#pragma scop
|
50
51
|
#pragma species kernel 0:NI-1,0:NI-1|element ^ 0:NI-1,0:NJ-1|chunk(0:0,0:NJ-1) ^ 0:NI-1,0:NJ-1|chunk(0:0,0:NJ-1) -> 0:NI-1,0:NI-1|element
|
51
52
|
for (i=0; i<NI; i++) {
|
52
53
|
for (j=0; j<NI; j++) {
|
@@ -58,8 +59,10 @@ int main(void) {
|
|
58
59
|
}
|
59
60
|
}
|
60
61
|
#pragma species endkernel syr2k
|
62
|
+
#pragma endscop
|
61
63
|
|
62
64
|
// Clean-up and exit the function
|
63
65
|
fflush(stdout);
|
66
|
+
C[8][9] = C[8][9];
|
64
67
|
return 0;
|
65
68
|
}
|
@@ -15,7 +15,7 @@
|
|
15
15
|
// == File information
|
16
16
|
// Filename...........benchmark/syrk.c
|
17
17
|
// Author.............Cedric Nugteren
|
18
|
-
// Last modified on...
|
18
|
+
// Last modified on...07-May-2013
|
19
19
|
//
|
20
20
|
|
21
21
|
#include "common.h"
|
@@ -33,7 +33,7 @@ int main(void) {
|
|
33
33
|
float beta = 2123;
|
34
34
|
|
35
35
|
// Set the input data
|
36
|
-
for (i=0; i<NI; i++) {
|
36
|
+
/* for (i=0; i<NI; i++) {
|
37
37
|
for (j=0; j<NJ; j++) {
|
38
38
|
A[i][j] = ((float) i*j) / NI;
|
39
39
|
}
|
@@ -43,9 +43,10 @@ int main(void) {
|
|
43
43
|
C[i][j] = ((float) i*j) / NI;
|
44
44
|
}
|
45
45
|
}
|
46
|
-
|
46
|
+
*/
|
47
47
|
// Perform the computation (C := alpha*A*A' + beta*C)
|
48
|
-
#pragma
|
48
|
+
#pragma scop
|
49
|
+
#pragma species kernel C[0:NI-1,0:NI-1]|element ^ A[0:NI-1,0:NJ-1]|chunk(0:0,0:NJ-1) -> C[0:NI-1,0:NI-1]|element
|
49
50
|
for (i=0; i<NI; i++) {
|
50
51
|
for (j=0; j<NI; j++) {
|
51
52
|
C[i][j] *= beta;
|
@@ -55,8 +56,10 @@ int main(void) {
|
|
55
56
|
}
|
56
57
|
}
|
57
58
|
#pragma species endkernel syrk
|
59
|
+
#pragma endscop
|
58
60
|
|
59
61
|
// Clean-up and exit the function
|
60
62
|
fflush(stdout);
|
63
|
+
C[8][9] = C[8][9];
|
61
64
|
return 0;
|
62
65
|
}
|
@@ -40,6 +40,7 @@ int main(void) {
|
|
40
40
|
}
|
41
41
|
|
42
42
|
// Perform the computation
|
43
|
+
#pragma scop
|
43
44
|
for (i=0; i<NX; i++) {
|
44
45
|
x[i] = c[i];
|
45
46
|
A_i_i = A[i][i];
|
@@ -50,8 +51,10 @@ int main(void) {
|
|
50
51
|
}
|
51
52
|
//#pragma species endkernel trisolv
|
52
53
|
}
|
54
|
+
#pragma endscop
|
53
55
|
|
54
56
|
// Clean-up and exit the function
|
55
57
|
fflush(stdout);
|
58
|
+
x[8] = x[8];
|
56
59
|
return 0;
|
57
60
|
}
|
@@ -40,6 +40,7 @@ int main(void) {
|
|
40
40
|
}
|
41
41
|
|
42
42
|
// Perform the computation (B := alpha*A'*B, with A triangular)
|
43
|
+
#pragma scop
|
43
44
|
for (i=1; i<NI; i++) {
|
44
45
|
for (j=0; j<NI; j++) {
|
45
46
|
#pragma species kernel i:i,0:i-1|element ^ j:j,0:i-1|element -> i:i,j:j|shared
|
@@ -49,9 +50,11 @@ int main(void) {
|
|
49
50
|
#pragma species endkernel trmm
|
50
51
|
}
|
51
52
|
}
|
53
|
+
#pragma endscop
|
52
54
|
|
53
55
|
// Clean-up and exit the function
|
54
56
|
fflush(stdout);
|
57
|
+
B[8][9] = B[8][9];
|
55
58
|
return 0;
|
56
59
|
}
|
57
60
|
|
@@ -0,0 +1,180 @@
|
|
1
|
+
//
|
2
|
+
// This file is part of the Bones source-to-source compiler examples. This C-code
|
3
|
+
// demonstrates the use of Bones for an example application: 'Unstructured Grid-
|
4
|
+
// Based CFD Solvers', taken from the Rodinia benchmark suite. For more information
|
5
|
+
// on the application or on Bones please use the contact information below.
|
6
|
+
//
|
7
|
+
// == More information on unstructured grid based CFD solvers:
|
8
|
+
// Website............http://web.cos.gmu.edu/~acorriga/pubs/gpu_cfd/
|
9
|
+
// Article............http://web.cos.gmu.edu/~acorriga/pubs/gpu_cfd/aiaa_2009_4001.pdf
|
10
|
+
// Original code......https://www.cs.virginia.edu/~skadron/wiki/rodinia/
|
11
|
+
//
|
12
|
+
// == More information on Bones
|
13
|
+
// Contact............Cedric Nugteren <c.nugteren@tue.nl>
|
14
|
+
// Web address........http://parse.ele.tue.nl/bones/
|
15
|
+
//
|
16
|
+
// == File information
|
17
|
+
// Filename...........applications/cfd.c
|
18
|
+
// Authors............Cedric Nugteren
|
19
|
+
// Original author....Andrew Corrigan
|
20
|
+
// Last modified on...10-Aug-2012
|
21
|
+
//
|
22
|
+
|
23
|
+
//########################################################################
|
24
|
+
//### Includes
|
25
|
+
//########################################################################
|
26
|
+
|
27
|
+
#include <stdio.h>
|
28
|
+
#include <stdlib.h>
|
29
|
+
#include <math.h>
|
30
|
+
|
31
|
+
//########################################################################
|
32
|
+
//### Data types
|
33
|
+
//########################################################################
|
34
|
+
|
35
|
+
typedef struct {
|
36
|
+
float x;
|
37
|
+
float y;
|
38
|
+
float z;
|
39
|
+
} float3;
|
40
|
+
|
41
|
+
//########################################################################
|
42
|
+
//### Forward declarations
|
43
|
+
//########################################################################
|
44
|
+
|
45
|
+
inline void compute_flux_contribution(float3 momentum, float density_energy, float pressure, float3 velocity, float3 *fc_momentum_x, float3 *fc_momentum_y, float3 *fc_momentum_z, float3 *fc_density_energy);
|
46
|
+
|
47
|
+
//########################################################################
|
48
|
+
//### Options
|
49
|
+
//########################################################################
|
50
|
+
|
51
|
+
#define GAMMA 1.4f
|
52
|
+
#define iterations 2000
|
53
|
+
#define NNB 4
|
54
|
+
#define RK 3 // 3rd order RK
|
55
|
+
|
56
|
+
#define FF_MACH 1.2f
|
57
|
+
#define DEG_ANGLE_OF_ATTACK 0.0f
|
58
|
+
#define NDIM 3
|
59
|
+
|
60
|
+
//########################################################################
|
61
|
+
//### Defines
|
62
|
+
//########################################################################
|
63
|
+
|
64
|
+
#define VAR_DENSITY 0
|
65
|
+
#define VAR_MOMENTUM 1
|
66
|
+
#define VAR_DENSITY_ENERGY (VAR_MOMENTUM+NDIM)
|
67
|
+
#define NVAR (VAR_DENSITY_ENERGY+1)
|
68
|
+
|
69
|
+
//########################################################################
|
70
|
+
//### Global variables
|
71
|
+
//########################################################################
|
72
|
+
|
73
|
+
float ff_variable[NVAR];
|
74
|
+
float3 ff_flux_contribution_momentum_x;
|
75
|
+
float3 ff_flux_contribution_momentum_y;
|
76
|
+
float3 ff_flux_contribution_momentum_z;
|
77
|
+
float3 ff_flux_contribution_density_energy;
|
78
|
+
|
79
|
+
//########################################################################
|
80
|
+
//### Start of the main function
|
81
|
+
//########################################################################
|
82
|
+
|
83
|
+
int main(void) {
|
84
|
+
|
85
|
+
// Declare the loop iterators
|
86
|
+
int i,j;
|
87
|
+
|
88
|
+
// Declare far field variables
|
89
|
+
const float angle_of_attack = (M_PI/180.0f) * DEG_ANGLE_OF_ATTACK;
|
90
|
+
float ff_pressure, ff_speed_of_sound, ff_speed;
|
91
|
+
float3 ff_velocity, ff_momentum;
|
92
|
+
|
93
|
+
// Declare other domain variables
|
94
|
+
|
95
|
+
// Declare other/helper variables
|
96
|
+
|
97
|
+
// Compute the far field
|
98
|
+
printf("\n[cfd] Set the far field conditions"); fflush(stdout);
|
99
|
+
{
|
100
|
+
ff_variable[VAR_DENSITY] = 1.4f;
|
101
|
+
ff_pressure = 1.0f;
|
102
|
+
ff_speed_of_sound = sqrt(GAMMA*ff_pressure / ff_variable[VAR_DENSITY]);
|
103
|
+
ff_speed = FF_MACH*ff_speed_of_sound;
|
104
|
+
|
105
|
+
// Compute the velocity
|
106
|
+
ff_velocity.x = ff_speed*cos(angle_of_attack);
|
107
|
+
ff_velocity.y = ff_speed*sin(angle_of_attack);
|
108
|
+
ff_velocity.z = 0.0f;
|
109
|
+
|
110
|
+
// Update the variable
|
111
|
+
ff_variable[VAR_MOMENTUM+0] = ff_variable[VAR_DENSITY] * ff_velocity.x;
|
112
|
+
ff_variable[VAR_MOMENTUM+1] = ff_variable[VAR_DENSITY] * ff_velocity.y;
|
113
|
+
ff_variable[VAR_MOMENTUM+2] = ff_variable[VAR_DENSITY] * ff_velocity.z;
|
114
|
+
ff_variable[VAR_DENSITY_ENERGY] = ff_variable[VAR_DENSITY]*0.5f*ff_speed*ff_speed + (ff_pressure/(GAMMA-1.0f));
|
115
|
+
|
116
|
+
// Set the momentum
|
117
|
+
ff_momentum.x = ff_variable[VAR_MOMENTUM+0];
|
118
|
+
ff_momentum.y = ff_variable[VAR_MOMENTUM+1];
|
119
|
+
ff_momentum.z = ff_variable[VAR_MOMENTUM+2];
|
120
|
+
|
121
|
+
// Compute the flux contribution
|
122
|
+
compute_flux_contribution(
|
123
|
+
ff_momentum,
|
124
|
+
ff_variable[VAR_DENSITY_ENERGY],
|
125
|
+
ff_pressure,
|
126
|
+
ff_velocity,
|
127
|
+
&ff_flux_contribution_momentum_x,
|
128
|
+
&ff_flux_contribution_momentum_y,
|
129
|
+
&ff_flux_contribution_momentum_z,
|
130
|
+
&ff_flux_contribution_density_energy
|
131
|
+
);
|
132
|
+
}
|
133
|
+
|
134
|
+
// Initialising memory
|
135
|
+
printf("\n[cfd] Initialising memory"); fflush(stdout);
|
136
|
+
|
137
|
+
|
138
|
+
// Clean-up and exit
|
139
|
+
printf("\n[cfd] Completed\n\n"); fflush(stdout);
|
140
|
+
fflush(stdout);
|
141
|
+
return 0;
|
142
|
+
}
|
143
|
+
|
144
|
+
//########################################################################
|
145
|
+
//### Function to compute the flux contribution
|
146
|
+
//########################################################################
|
147
|
+
|
148
|
+
inline void compute_flux_contribution(
|
149
|
+
float3 momentum,
|
150
|
+
float density_energy,
|
151
|
+
float pressure,
|
152
|
+
float3 velocity,
|
153
|
+
float3 *fc_momentum_x,
|
154
|
+
float3 *fc_momentum_y,
|
155
|
+
float3 *fc_momentum_z,
|
156
|
+
float3 *fc_density_energy
|
157
|
+
) {
|
158
|
+
|
159
|
+
// Compute the x-momentum
|
160
|
+
(*fc_momentum_x).x = velocity.x*momentum.x + pressure;
|
161
|
+
(*fc_momentum_x).y = velocity.x*momentum.y;
|
162
|
+
(*fc_momentum_x).z = velocity.x*momentum.z;
|
163
|
+
|
164
|
+
// Compute the y-momentum
|
165
|
+
(*fc_momentum_y).x = velocity.x*momentum.y;
|
166
|
+
(*fc_momentum_y).y = velocity.y*momentum.y + pressure;
|
167
|
+
(*fc_momentum_y).z = velocity.y*momentum.z;
|
168
|
+
|
169
|
+
// Compute the z-momentum
|
170
|
+
(*fc_momentum_z).x = velocity.x*momentum.z;
|
171
|
+
(*fc_momentum_z).y = velocity.y*momentum.z;
|
172
|
+
(*fc_momentum_z).z = velocity.z*momentum.z + pressure;
|
173
|
+
|
174
|
+
// Compute energy density
|
175
|
+
(*fc_density_energy).x = velocity.x*density_energy+pressure;
|
176
|
+
(*fc_density_energy).y = velocity.y*density_energy+pressure;
|
177
|
+
(*fc_density_energy).z = velocity.z*density_energy+pressure;
|
178
|
+
}
|
179
|
+
|
180
|
+
//########################################################################
|