bones-compiler 1.1.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/CHANGELOG +37 -0
- data/LICENSE +1 -1
- data/README.rdoc +95 -70
- data/Rakefile +78 -3
- data/VERSION +1 -1
- data/bin/adarwin +17 -0
- data/examples/benchmarks/PolyBench/2mm.c +104 -0
- data/examples/benchmarks/{3mm.c → PolyBench/3mm.c} +5 -2
- data/examples/benchmarks/{adi.c → PolyBench/adi.c} +6 -3
- data/examples/benchmarks/{atax.c → PolyBench/atax.c} +5 -2
- data/examples/benchmarks/{bicg.c → PolyBench/bicg.c} +5 -2
- data/examples/benchmarks/{cholesky.c → PolyBench/cholesky.c} +3 -0
- data/examples/benchmarks/{common.h → PolyBench/common.h} +2 -2
- data/examples/benchmarks/{correlation.c → PolyBench/correlation.c} +16 -7
- data/examples/benchmarks/{covariance.c → PolyBench/covariance.c} +7 -2
- data/examples/benchmarks/{doitgen.c → PolyBench/doitgen.c} +5 -2
- data/examples/benchmarks/{durbin.c → PolyBench/durbin.c} +3 -0
- data/examples/benchmarks/{dynprog.c → PolyBench/dynprog.c} +3 -0
- data/examples/benchmarks/{fdtd-2d-apml.c → PolyBench/fdtd-2d-apml.c} +3 -0
- data/examples/benchmarks/{fdtd-2d.c → PolyBench/fdtd-2d.c} +5 -2
- data/examples/benchmarks/{floyd-warshall.c → PolyBench/floyd-warshall.c} +3 -0
- data/examples/benchmarks/{gemm.c → PolyBench/gemm.c} +5 -2
- data/examples/benchmarks/{gemver.c → PolyBench/gemver.c} +5 -2
- data/examples/benchmarks/{gesummv.c → PolyBench/gesummv.c} +5 -2
- data/examples/benchmarks/{gramschmidt.c → PolyBench/gramschmidt.c} +3 -0
- data/examples/benchmarks/{jacobi-1d-imper.c → PolyBench/jacobi-1d-imper.c} +10 -2
- data/examples/benchmarks/{jacobi-2d-imper.c → PolyBench/jacobi-2d-imper.c} +8 -3
- data/examples/benchmarks/{lu.c → PolyBench/lu.c} +3 -0
- data/examples/benchmarks/{ludcmp.c → PolyBench/ludcmp.c} +3 -0
- data/examples/benchmarks/{mvt.c → PolyBench/mvt.c} +6 -2
- data/examples/benchmarks/{reg_detect.c → PolyBench/reg_detect.c} +3 -0
- data/examples/benchmarks/{seidel-2d.c → PolyBench/seidel-2d.c} +3 -0
- data/examples/benchmarks/{symm.c → PolyBench/symm.c} +3 -0
- data/examples/benchmarks/{syr2k.c → PolyBench/syr2k.c} +5 -2
- data/examples/benchmarks/{syrk.c → PolyBench/syrk.c} +7 -4
- data/examples/benchmarks/{trisolv.c → PolyBench/trisolv.c} +3 -0
- data/examples/benchmarks/{trmm.c → PolyBench/trmm.c} +3 -0
- data/examples/benchmarks/Rodinia/cfd.c +180 -0
- data/examples/benchmarks/Rodinia/hotspot.c +228 -0
- data/examples/benchmarks/Rodinia/kmeans.c +164 -0
- data/examples/benchmarks/Rodinia/srad.c +188 -0
- data/examples/benchmarks/other/common.h +0 -0
- data/examples/benchmarks/other/dct.c +58 -0
- data/examples/benchmarks/other/mm.c +50 -0
- data/examples/benchmarks/{saxpy.c → other/saxpy.c} +11 -7
- data/examples/chunk/{example1.c → example01.c} +0 -0
- data/examples/chunk/{example2.c → example02.c} +0 -0
- data/examples/chunk/{example3.c → example03.c} +0 -0
- data/examples/chunk/{example4.c → example04.c} +0 -0
- data/examples/chunk/{example5.c → example05.c} +0 -0
- data/examples/chunk/example06.c +45 -0
- data/examples/chunk/example07.c +49 -0
- data/examples/dependences/example01.c +42 -0
- data/examples/dependences/example02.c +40 -0
- data/examples/dependences/example03.c +43 -0
- data/examples/dependences/example04.c +44 -0
- data/examples/dependences/example05.c +42 -0
- data/examples/element/{example1.c → example01.c} +0 -0
- data/examples/element/{example2.c → example02.c} +2 -2
- data/examples/element/{example3.c → example03.c} +0 -0
- data/examples/element/{example4.c → example04.c} +0 -0
- data/examples/element/{example5.c → example05.c} +0 -0
- data/examples/element/{example6.c → example06.c} +0 -0
- data/examples/element/{example7.c → example07.c} +0 -0
- data/examples/element/{example8.c → example08.c} +0 -0
- data/examples/element/{example9.c → example09.c} +0 -0
- data/examples/element/example13.c +73 -0
- data/examples/fusion/example01.c +68 -0
- data/examples/fusion/example02.c +73 -0
- data/examples/fusion/example03.c +72 -0
- data/examples/fusion/example04.c +61 -0
- data/examples/fusion/example05.c +55 -0
- data/examples/neighbourhood/{example1.c → example01.c} +0 -0
- data/examples/neighbourhood/{example2.c → example02.c} +0 -0
- data/examples/neighbourhood/{example3.c → example03.c} +0 -0
- data/examples/neighbourhood/{example4.c → example04.c} +0 -0
- data/examples/neighbourhood/example05.c +44 -0
- data/examples/shared/{example1.c → example01.c} +0 -0
- data/examples/shared/{example2.c → example02.c} +0 -0
- data/examples/shared/{example3.c → example03.c} +0 -0
- data/examples/shared/{example4.c → example04.c} +0 -0
- data/examples/shared/{example5.c → example05.c} +0 -0
- data/lib/adarwin.rb +62 -0
- data/lib/adarwin/dependences.rb +268 -0
- data/lib/adarwin/engine.rb +277 -0
- data/lib/adarwin/fusion.rb +174 -0
- data/lib/adarwin/interval.rb +57 -0
- data/lib/adarwin/memorycopies.rb +153 -0
- data/lib/adarwin/nest.rb +225 -0
- data/lib/adarwin/preprocessor.rb +76 -0
- data/lib/adarwin/reference.rb +261 -0
- data/lib/bones.rb +4 -55
- data/lib/bones/algorithm.rb +77 -40
- data/lib/bones/copy.rb +26 -0
- data/lib/bones/engine.rb +147 -31
- data/lib/bones/preprocessor.rb +92 -12
- data/lib/bones/species.rb +4 -3
- data/lib/bones/structure.rb +14 -4
- data/lib/castaddon.rb +11 -6
- data/lib/castaddon/node_adarwin.rb +245 -0
- data/lib/castaddon/node_bones.rb +316 -0
- data/lib/castaddon/node_common.rb +289 -0
- data/lib/castaddon/transformations.rb +236 -0
- data/lib/common.rb +216 -0
- data/skeletons/CPU-C/common/header.c +3 -0
- data/skeletons/CPU-C/common/mem_global.c +0 -0
- data/skeletons/CPU-C/common/timer_2_start.c +11 -13
- data/skeletons/CPU-C/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-C/common/timer_globals.c +29 -0
- data/skeletons/CPU-OPENCL-INTEL/common/globals.c +1 -1
- data/skeletons/CPU-OPENCL-INTEL/common/header.c +3 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +7 -2
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +4 -2
- data/skeletons/CPU-OPENCL-INTEL/common/mem_global.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +6 -3
- data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-OPENCL-INTEL/common/timer_globals.c +24 -0
- data/skeletons/CPU-OPENMP/common/globals.c +1 -0
- data/skeletons/CPU-OPENMP/common/header.c +3 -0
- data/skeletons/CPU-OPENMP/common/mem_global.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_1_start.c +0 -12
- data/skeletons/CPU-OPENMP/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-OPENMP/common/timer_globals.c +33 -0
- data/skeletons/GPU-CUDA/common/globals.c +27 -3
- data/skeletons/GPU-CUDA/common/header.c +2 -0
- data/skeletons/GPU-CUDA/common/mem_async_alloc.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_copyin.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_copyout.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_free.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +2 -1
- data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +2 -1
- data/skeletons/GPU-CUDA/common/mem_global.c +1 -0
- data/skeletons/GPU-CUDA/common/mem_prologue.c +1 -2
- data/skeletons/GPU-CUDA/common/scheduler.c +86 -0
- data/skeletons/GPU-CUDA/common/timer_2_start.c +2 -4
- data/skeletons/GPU-CUDA/common/timer_2_stop.c +3 -5
- data/skeletons/GPU-CUDA/common/timer_globals.c +26 -0
- data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +5 -7
- data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +4 -6
- data/skeletons/GPU-CUDA/kernel/default.host.c +1 -1
- data/skeletons/GPU-CUDA/kernel/default.kernel.cu +6 -8
- data/skeletons/GPU-CUDA/skeletons.txt +6 -5
- data/{examples/benchmarks/2mm.c → test/examples/benchmarks/PolyBench/2mm_species.c} +19 -15
- data/test/examples/benchmarks/PolyBench/3mm_species.c +82 -0
- data/test/examples/benchmarks/PolyBench/adi_species.c +89 -0
- data/test/examples/benchmarks/PolyBench/atax_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/bicg_species.c +71 -0
- data/test/examples/benchmarks/PolyBench/cholesky_species.c +68 -0
- data/test/examples/benchmarks/PolyBench/correlation_species.c +97 -0
- data/test/examples/benchmarks/PolyBench/covariance_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/doitgen_species.c +67 -0
- data/test/examples/benchmarks/PolyBench/durbin_species.c +80 -0
- data/test/examples/benchmarks/PolyBench/dynprog_species.c +71 -0
- data/test/examples/benchmarks/PolyBench/fdtd-2d-apml_species.c +112 -0
- data/test/examples/benchmarks/PolyBench/fdtd-2d_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/floyd-warshall_species.c +54 -0
- data/test/examples/benchmarks/PolyBench/gemm_species.c +73 -0
- data/test/examples/benchmarks/PolyBench/gemver_species.c +93 -0
- data/test/examples/benchmarks/PolyBench/gesummv_species.c +68 -0
- data/test/examples/benchmarks/PolyBench/gramschmidt_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/jacobi-1d-imper_species.c +59 -0
- data/test/examples/benchmarks/PolyBench/jacobi-2d-imper_species.c +65 -0
- data/test/examples/benchmarks/PolyBench/lu_species.c +57 -0
- data/test/examples/benchmarks/PolyBench/ludcmp_species.c +89 -0
- data/test/examples/benchmarks/PolyBench/mvt_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/reg_detect_species.c +86 -0
- data/test/examples/benchmarks/PolyBench/seidel-2d_species.c +53 -0
- data/test/examples/benchmarks/PolyBench/symm_species.c +74 -0
- data/test/examples/benchmarks/PolyBench/syr2k_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/syrk_species.c +66 -0
- data/test/examples/benchmarks/PolyBench/trisolv_species.c +61 -0
- data/test/examples/benchmarks/PolyBench/trmm_species.c +61 -0
- data/test/examples/chunk/example01_species.c +58 -0
- data/test/examples/chunk/example02_species.c +48 -0
- data/test/examples/chunk/example03_species.c +63 -0
- data/test/examples/chunk/example04_species.c +58 -0
- data/test/examples/chunk/example05_species.c +56 -0
- data/test/examples/chunk/example06_species.c +49 -0
- data/test/examples/chunk/example07_species.c +53 -0
- data/test/examples/dependences/example01_species.c +46 -0
- data/test/examples/dependences/example02_species.c +44 -0
- data/test/examples/dependences/example03_species.c +47 -0
- data/test/examples/dependences/example04_species.c +48 -0
- data/test/examples/dependences/example05_species.c +46 -0
- data/test/examples/element/example01_species.c +50 -0
- data/test/examples/element/example02_species.c +50 -0
- data/test/examples/element/example03_species.c +62 -0
- data/test/examples/element/example04_species.c +53 -0
- data/test/examples/element/example05_species.c +59 -0
- data/test/examples/element/example06_species.c +50 -0
- data/test/examples/element/example07_species.c +58 -0
- data/test/examples/element/example08_species.c +49 -0
- data/test/examples/element/example09_species.c +52 -0
- data/test/examples/element/example10_species.c +54 -0
- data/test/examples/element/example11_species.c +51 -0
- data/test/examples/element/example12_species.c +60 -0
- data/test/examples/element/example13_species.c +77 -0
- data/test/examples/neighbourhood/example01_species.c +57 -0
- data/test/examples/neighbourhood/example02_species.c +56 -0
- data/test/examples/neighbourhood/example03_species.c +83 -0
- data/test/examples/neighbourhood/example04_species.c +55 -0
- data/test/examples/neighbourhood/example05_species.c +48 -0
- data/test/examples/shared/example01_species.c +49 -0
- data/test/examples/shared/example02_species.c +55 -0
- data/test/examples/shared/example03_species.c +59 -0
- data/test/examples/shared/example04_species.c +56 -0
- data/test/examples/shared/example05_species.c +52 -0
- metadata +193 -73
- data/examples/benchmarks/overview.txt +0 -38
- data/lib/castaddon/node.rb +0 -753
@@ -40,7 +40,7 @@ int main(void) {
|
|
40
40
|
int beta = 12313;
|
41
41
|
|
42
42
|
// Set the input data
|
43
|
-
for (i=0; i<NX; i++) {
|
43
|
+
/* for (i=0; i<NX; i++) {
|
44
44
|
u1[i] = i;
|
45
45
|
u2[i] = (i+1)/NX/2.0;
|
46
46
|
v1[i] = (i+1)/NX/4.0;
|
@@ -53,8 +53,9 @@ int main(void) {
|
|
53
53
|
A[i][j] = ((float) i*j) / NX;
|
54
54
|
}
|
55
55
|
}
|
56
|
-
|
56
|
+
*/
|
57
57
|
// Perform the computation
|
58
|
+
#pragma scop
|
58
59
|
#pragma species kernel 0:NX-1,0:NX-1|element ^ 0:NX-1|element ^ 0:NX-1|element ^ 0:NX-1|element ^ 0:NX-1|element -> 0:NX-1,0:NX-1|element
|
59
60
|
for (i=0; i<NX; i++) {
|
60
61
|
for (j=0; j<NX; j++) {
|
@@ -81,9 +82,11 @@ int main(void) {
|
|
81
82
|
}
|
82
83
|
}
|
83
84
|
#pragma species endkernel gemver-part4
|
85
|
+
#pragma endscop
|
84
86
|
|
85
87
|
// Clean-up and exit the function
|
86
88
|
fflush(stdout);
|
89
|
+
w[9] = w[9];
|
87
90
|
return 0;
|
88
91
|
}
|
89
92
|
|
@@ -36,15 +36,16 @@ int main(void) {
|
|
36
36
|
float beta = 12313;
|
37
37
|
|
38
38
|
// Set the input data
|
39
|
-
for (i=0; i<NX; i++) {
|
39
|
+
/* for (i=0; i<NX; i++) {
|
40
40
|
x[i] = ((float) i) / NX;
|
41
41
|
for (j=0; j<NX; j++) {
|
42
42
|
A[i][j] = ((float) i*(j+1)) / NX;
|
43
43
|
B[i][j] = ((float) (i+3)*j) / NX;
|
44
44
|
}
|
45
45
|
}
|
46
|
-
|
46
|
+
*/
|
47
47
|
// Perform the computation
|
48
|
+
#pragma scop
|
48
49
|
#pragma species kernel 0:NX-1,0:NX-1|chunk(0:0,0:NX-1) ^ 0:NX-1|full ^ 0:NX-1,0:NX-1|chunk(0:0,0:NX-1) -> 0:NX-1|element ^ 0:NX-1|element
|
49
50
|
for (i=0; i<NX; i++) {
|
50
51
|
tmp[i] = 0;
|
@@ -56,9 +57,11 @@ int main(void) {
|
|
56
57
|
y[i] = alpha*tmp[i] + beta*y[i];
|
57
58
|
}
|
58
59
|
#pragma species endkernel gesummv
|
60
|
+
#pragma endscop
|
59
61
|
|
60
62
|
// Clean-up and exit the function
|
61
63
|
fflush(stdout);
|
64
|
+
y[9] = y[9];
|
62
65
|
return 0;
|
63
66
|
}
|
64
67
|
|
@@ -46,6 +46,7 @@ int main(void) {
|
|
46
46
|
}
|
47
47
|
|
48
48
|
// Perform the computation
|
49
|
+
#pragma scop
|
49
50
|
for (k=0; k<NJ; k++) {
|
50
51
|
nrm[0] = 0;
|
51
52
|
#pragma species kernel 0:NI-1,k:k|element -> 0:0|shared
|
@@ -77,8 +78,10 @@ int main(void) {
|
|
77
78
|
}
|
78
79
|
#pragma species endkernel gramschmidt-part3
|
79
80
|
}
|
81
|
+
#pragma endscop
|
80
82
|
|
81
83
|
// Clean-up and exit the function
|
82
84
|
fflush(stdout);
|
85
|
+
A[8][9] = A[8][9];
|
83
86
|
return 0;
|
84
87
|
}
|
@@ -18,6 +18,7 @@
|
|
18
18
|
// Last modified on...03-April-2012
|
19
19
|
//
|
20
20
|
|
21
|
+
|
21
22
|
#include "common.h"
|
22
23
|
|
23
24
|
// This is 'jacobi-1d-imper', a 1D Jacobi stencil computation
|
@@ -27,14 +28,19 @@ int main(void) {
|
|
27
28
|
// Declare arrays on the stack
|
28
29
|
float A[LARGE_N];
|
29
30
|
float B[LARGE_N];
|
31
|
+
//printf("A: %p\n", A);
|
32
|
+
//printf("B: %p\n", B);
|
33
|
+
//float *A = (float *)malloc(LARGE_N*sizeof(float));
|
34
|
+
//float *B = (float *)malloc(LARGE_N*sizeof(float));
|
30
35
|
|
31
36
|
// Set the input data
|
32
|
-
for (i=0; i<LARGE_N; i++) {
|
37
|
+
/* for (i=0; i<LARGE_N; i++) {
|
33
38
|
A[i] = ((float) i+2) / LARGE_N;
|
34
39
|
B[i] = ((float) i+3) / LARGE_N;
|
35
40
|
}
|
36
|
-
|
41
|
+
*/
|
37
42
|
// Perform the computation
|
43
|
+
#pragma scop
|
38
44
|
for (t=0; t<TSTEPS; t++) {
|
39
45
|
#pragma species kernel 1:LARGE_N-2|neighbourhood(-1:1) -> 1:LARGE_N-2|element
|
40
46
|
for (i=1; i<LARGE_N-1; i++) {
|
@@ -47,9 +53,11 @@ int main(void) {
|
|
47
53
|
}
|
48
54
|
#pragma species endkernel jacobi-1d-imper-part2
|
49
55
|
}
|
56
|
+
#pragma endscop
|
50
57
|
|
51
58
|
// Clean-up and exit the function
|
52
59
|
fflush(stdout);
|
60
|
+
B[9] = B[9];
|
53
61
|
return 0;
|
54
62
|
}
|
55
63
|
|
@@ -29,19 +29,22 @@ int main(void) {
|
|
29
29
|
float B[N][N];
|
30
30
|
|
31
31
|
// Set the input data
|
32
|
-
for (i=0; i<N; i++) {
|
32
|
+
/* for (i=0; i<N; i++) {
|
33
33
|
for (j=0; j<N; j++) {
|
34
34
|
A[i][j] = ((float) i*(j+2) + 2) / N;
|
35
35
|
B[i][j] = ((float) i*(j+3) + 3) / N;
|
36
36
|
}
|
37
37
|
}
|
38
|
-
|
38
|
+
*/
|
39
39
|
// Perform the computation
|
40
|
+
#pragma scop
|
40
41
|
for (t=0; t<TSTEPS; t++) {
|
41
42
|
#pragma species kernel 1:N-2,1:N-2|neighbourhood(-1:1,-1:1) -> 1:N-2,1:N-2|element
|
42
43
|
for (i=1; i<N-1; i++) {
|
43
44
|
for (j=1; j<N-1; j++) {
|
44
|
-
|
45
|
+
if (i < N-1 && j < N-1) {
|
46
|
+
B[i][j] = 0.2 * (A[i][j] + A[i][j-1] + A[i][1+j] + A[1+i][j] + A[i-1][j]);
|
47
|
+
}
|
45
48
|
}
|
46
49
|
}
|
47
50
|
#pragma species endkernel jacobi-2d-imper-part1
|
@@ -53,9 +56,11 @@ int main(void) {
|
|
53
56
|
}
|
54
57
|
#pragma species endkernel jacobi-2d-imper-part2
|
55
58
|
}
|
59
|
+
#pragma endscop
|
56
60
|
|
57
61
|
// Clean-up and exit the function
|
58
62
|
fflush(stdout);
|
63
|
+
B[8][9] = B[8][9];
|
59
64
|
return 0;
|
60
65
|
}
|
61
66
|
|
@@ -35,6 +35,7 @@ int main(void) {
|
|
35
35
|
}
|
36
36
|
|
37
37
|
// Perform the computation
|
38
|
+
#pragma scop
|
38
39
|
for (k=0; k<N; k++) {
|
39
40
|
#pragma species kernel k:k,k+1:N-1|element -> k:k,k+1:N-1|element
|
40
41
|
for (j=k+1; j<N; j++) {
|
@@ -49,9 +50,11 @@ int main(void) {
|
|
49
50
|
}
|
50
51
|
#pragma species endkernel lu-part2
|
51
52
|
}
|
53
|
+
#pragma endscop
|
52
54
|
|
53
55
|
// Clean-up and exit the function
|
54
56
|
fflush(stdout);
|
57
|
+
A[8][9] = A[8][9];
|
55
58
|
return 0;
|
56
59
|
}
|
57
60
|
|
@@ -42,6 +42,7 @@ int main(void) {
|
|
42
42
|
}
|
43
43
|
|
44
44
|
// Perform the computation
|
45
|
+
#pragma scop
|
45
46
|
b[0] = 1.0;
|
46
47
|
for (i=0; i<N; i++) {
|
47
48
|
for (j=i+1; j<=N; j++) {
|
@@ -83,9 +84,11 @@ int main(void) {
|
|
83
84
|
//#pragma species endkernel ludcmp-part4
|
84
85
|
x[N-1-i] = w[0] / A[N-1-i][N-1-i];
|
85
86
|
}
|
87
|
+
#pragma endscop
|
86
88
|
|
87
89
|
// Clean-up and exit the function
|
88
90
|
fflush(stdout);
|
91
|
+
x[9] = x[9];
|
89
92
|
return 0;
|
90
93
|
}
|
91
94
|
|
@@ -30,7 +30,7 @@ int main(void) {
|
|
30
30
|
float x2[NX];
|
31
31
|
float y_1[NX];
|
32
32
|
float y_2[NX];
|
33
|
-
|
33
|
+
/*
|
34
34
|
// Set the input data
|
35
35
|
for (i=0; i<NX; i++) {
|
36
36
|
x1[i] = ((float) i) / NX;
|
@@ -40,9 +40,10 @@ int main(void) {
|
|
40
40
|
for (j=0; j<NX; j++) {
|
41
41
|
A[i][j] = ((float) i*j) / NX;
|
42
42
|
}
|
43
|
-
}
|
43
|
+
}*/
|
44
44
|
|
45
45
|
// Perform the computation
|
46
|
+
#pragma scop
|
46
47
|
#pragma species kernel 0:NX-1|element ^ 0:NX-1,0:NX-1|chunk(0:0,0:NX-1) ^ 0:NX-1|full -> 0:NX-1|element
|
47
48
|
for (i=0; i<NX; i++) {
|
48
49
|
for (j=0; j<NX; j++) {
|
@@ -57,9 +58,12 @@ int main(void) {
|
|
57
58
|
}
|
58
59
|
}
|
59
60
|
#pragma species endkernel mvt-part2
|
61
|
+
#pragma endscop
|
60
62
|
|
61
63
|
// Clean-up and exit the function
|
62
64
|
fflush(stdout);
|
65
|
+
x1[9] = x1[9];
|
66
|
+
x2[9] = x2[9];
|
63
67
|
return 0;
|
64
68
|
}
|
65
69
|
|
@@ -42,6 +42,7 @@ int main(void) {
|
|
42
42
|
}
|
43
43
|
|
44
44
|
// Perform the computation
|
45
|
+
#pragma scop
|
45
46
|
for (t=0; t<ITER; t++) {
|
46
47
|
#pragma species kernel 0:MAXGRID-1,0:MAXGRID-1|element -> 0:MAXGRID-1,0:MAXGRID-1,0:LENGTH-1|chunk(0:0,0:0,0:LENGTH-1)
|
47
48
|
for (j=0; j<=MAXGRID-1; j++) {
|
@@ -75,8 +76,10 @@ int main(void) {
|
|
75
76
|
#pragma species endkernel reg-detect-part3
|
76
77
|
}
|
77
78
|
}
|
79
|
+
#pragma endscop
|
78
80
|
|
79
81
|
// Clean-up and exit the function
|
80
82
|
fflush(stdout);
|
83
|
+
path[8][9] = path[8][9];
|
81
84
|
return 0;
|
82
85
|
}
|
@@ -35,6 +35,7 @@ int main(void) {
|
|
35
35
|
}
|
36
36
|
|
37
37
|
// Perform the computation
|
38
|
+
#pragma scop
|
38
39
|
for (t=0; t<TSTEPS-1; t++) {
|
39
40
|
for (i=1; i<=N-2; i++) {
|
40
41
|
for (j=1; j<=N-2; j++) {
|
@@ -44,8 +45,10 @@ int main(void) {
|
|
44
45
|
}
|
45
46
|
}
|
46
47
|
}
|
48
|
+
#pragma endscop
|
47
49
|
|
48
50
|
// Clean-up and exit the function
|
49
51
|
fflush(stdout);
|
52
|
+
A[8][9] = A[8][9];
|
50
53
|
return 0;
|
51
54
|
}
|
@@ -49,6 +49,7 @@ int main(void) {
|
|
49
49
|
}
|
50
50
|
|
51
51
|
// Perform the computation (C := alpha*A*B + beta*C, with A symmetric)
|
52
|
+
#pragma scop
|
52
53
|
for (i=0; i<NI; i++) {
|
53
54
|
for (j=0; j<NJ; j++) {
|
54
55
|
acc[0] = 0;
|
@@ -66,9 +67,11 @@ int main(void) {
|
|
66
67
|
C[i][j] = beta*C[i][j] + alpha*A[i][i]*bij + alpha*acc[0];
|
67
68
|
}
|
68
69
|
}
|
70
|
+
#pragma endscop
|
69
71
|
|
70
72
|
// Clean-up and exit the function
|
71
73
|
fflush(stdout);
|
74
|
+
C[8][9] = C[8][9];
|
72
75
|
return 0;
|
73
76
|
}
|
74
77
|
|
@@ -34,7 +34,7 @@ int main(void) {
|
|
34
34
|
int beta = 2123;
|
35
35
|
|
36
36
|
// Set the input data
|
37
|
-
for (i=0; i<NI; i++) {
|
37
|
+
/* for (i=0; i<NI; i++) {
|
38
38
|
for (j=0; j<NJ; j++) {
|
39
39
|
A[i][j] = ((float) i*j) / NI;
|
40
40
|
B[i][j] = ((float) i*j) / NI;
|
@@ -45,8 +45,9 @@ int main(void) {
|
|
45
45
|
C[i][j] = ((float) i*j) / NI;
|
46
46
|
}
|
47
47
|
}
|
48
|
-
|
48
|
+
*/
|
49
49
|
// Perform the computation (C := alpha*A*B' + alpha*B*A' + beta*C)
|
50
|
+
#pragma scop
|
50
51
|
#pragma species kernel 0:NI-1,0:NI-1|element ^ 0:NI-1,0:NJ-1|chunk(0:0,0:NJ-1) ^ 0:NI-1,0:NJ-1|chunk(0:0,0:NJ-1) -> 0:NI-1,0:NI-1|element
|
51
52
|
for (i=0; i<NI; i++) {
|
52
53
|
for (j=0; j<NI; j++) {
|
@@ -58,8 +59,10 @@ int main(void) {
|
|
58
59
|
}
|
59
60
|
}
|
60
61
|
#pragma species endkernel syr2k
|
62
|
+
#pragma endscop
|
61
63
|
|
62
64
|
// Clean-up and exit the function
|
63
65
|
fflush(stdout);
|
66
|
+
C[8][9] = C[8][9];
|
64
67
|
return 0;
|
65
68
|
}
|
@@ -15,7 +15,7 @@
|
|
15
15
|
// == File information
|
16
16
|
// Filename...........benchmark/syrk.c
|
17
17
|
// Author.............Cedric Nugteren
|
18
|
-
// Last modified on...
|
18
|
+
// Last modified on...07-May-2013
|
19
19
|
//
|
20
20
|
|
21
21
|
#include "common.h"
|
@@ -33,7 +33,7 @@ int main(void) {
|
|
33
33
|
float beta = 2123;
|
34
34
|
|
35
35
|
// Set the input data
|
36
|
-
for (i=0; i<NI; i++) {
|
36
|
+
/* for (i=0; i<NI; i++) {
|
37
37
|
for (j=0; j<NJ; j++) {
|
38
38
|
A[i][j] = ((float) i*j) / NI;
|
39
39
|
}
|
@@ -43,9 +43,10 @@ int main(void) {
|
|
43
43
|
C[i][j] = ((float) i*j) / NI;
|
44
44
|
}
|
45
45
|
}
|
46
|
-
|
46
|
+
*/
|
47
47
|
// Perform the computation (C := alpha*A*A' + beta*C)
|
48
|
-
#pragma
|
48
|
+
#pragma scop
|
49
|
+
#pragma species kernel C[0:NI-1,0:NI-1]|element ^ A[0:NI-1,0:NJ-1]|chunk(0:0,0:NJ-1) -> C[0:NI-1,0:NI-1]|element
|
49
50
|
for (i=0; i<NI; i++) {
|
50
51
|
for (j=0; j<NI; j++) {
|
51
52
|
C[i][j] *= beta;
|
@@ -55,8 +56,10 @@ int main(void) {
|
|
55
56
|
}
|
56
57
|
}
|
57
58
|
#pragma species endkernel syrk
|
59
|
+
#pragma endscop
|
58
60
|
|
59
61
|
// Clean-up and exit the function
|
60
62
|
fflush(stdout);
|
63
|
+
C[8][9] = C[8][9];
|
61
64
|
return 0;
|
62
65
|
}
|
@@ -40,6 +40,7 @@ int main(void) {
|
|
40
40
|
}
|
41
41
|
|
42
42
|
// Perform the computation
|
43
|
+
#pragma scop
|
43
44
|
for (i=0; i<NX; i++) {
|
44
45
|
x[i] = c[i];
|
45
46
|
A_i_i = A[i][i];
|
@@ -50,8 +51,10 @@ int main(void) {
|
|
50
51
|
}
|
51
52
|
//#pragma species endkernel trisolv
|
52
53
|
}
|
54
|
+
#pragma endscop
|
53
55
|
|
54
56
|
// Clean-up and exit the function
|
55
57
|
fflush(stdout);
|
58
|
+
x[8] = x[8];
|
56
59
|
return 0;
|
57
60
|
}
|
@@ -40,6 +40,7 @@ int main(void) {
|
|
40
40
|
}
|
41
41
|
|
42
42
|
// Perform the computation (B := alpha*A'*B, with A triangular)
|
43
|
+
#pragma scop
|
43
44
|
for (i=1; i<NI; i++) {
|
44
45
|
for (j=0; j<NI; j++) {
|
45
46
|
#pragma species kernel i:i,0:i-1|element ^ j:j,0:i-1|element -> i:i,j:j|shared
|
@@ -49,9 +50,11 @@ int main(void) {
|
|
49
50
|
#pragma species endkernel trmm
|
50
51
|
}
|
51
52
|
}
|
53
|
+
#pragma endscop
|
52
54
|
|
53
55
|
// Clean-up and exit the function
|
54
56
|
fflush(stdout);
|
57
|
+
B[8][9] = B[8][9];
|
55
58
|
return 0;
|
56
59
|
}
|
57
60
|
|
@@ -0,0 +1,180 @@
|
|
1
|
+
//
|
2
|
+
// This file is part of the Bones source-to-source compiler examples. This C-code
|
3
|
+
// demonstrates the use of Bones for an example application: 'Unstructured Grid-
|
4
|
+
// Based CFD Solvers', taken from the Rodinia benchmark suite. For more information
|
5
|
+
// on the application or on Bones please use the contact information below.
|
6
|
+
//
|
7
|
+
// == More information on unstructured grid based CFD solvers:
|
8
|
+
// Website............http://web.cos.gmu.edu/~acorriga/pubs/gpu_cfd/
|
9
|
+
// Article............http://web.cos.gmu.edu/~acorriga/pubs/gpu_cfd/aiaa_2009_4001.pdf
|
10
|
+
// Original code......https://www.cs.virginia.edu/~skadron/wiki/rodinia/
|
11
|
+
//
|
12
|
+
// == More information on Bones
|
13
|
+
// Contact............Cedric Nugteren <c.nugteren@tue.nl>
|
14
|
+
// Web address........http://parse.ele.tue.nl/bones/
|
15
|
+
//
|
16
|
+
// == File information
|
17
|
+
// Filename...........applications/cfd.c
|
18
|
+
// Authors............Cedric Nugteren
|
19
|
+
// Original author....Andrew Corrigan
|
20
|
+
// Last modified on...10-Aug-2012
|
21
|
+
//
|
22
|
+
|
23
|
+
//########################################################################
|
24
|
+
//### Includes
|
25
|
+
//########################################################################
|
26
|
+
|
27
|
+
#include <stdio.h>
|
28
|
+
#include <stdlib.h>
|
29
|
+
#include <math.h>
|
30
|
+
|
31
|
+
//########################################################################
|
32
|
+
//### Data types
|
33
|
+
//########################################################################
|
34
|
+
|
35
|
+
typedef struct {
|
36
|
+
float x;
|
37
|
+
float y;
|
38
|
+
float z;
|
39
|
+
} float3;
|
40
|
+
|
41
|
+
//########################################################################
|
42
|
+
//### Forward declarations
|
43
|
+
//########################################################################
|
44
|
+
|
45
|
+
inline void compute_flux_contribution(float3 momentum, float density_energy, float pressure, float3 velocity, float3 *fc_momentum_x, float3 *fc_momentum_y, float3 *fc_momentum_z, float3 *fc_density_energy);
|
46
|
+
|
47
|
+
//########################################################################
|
48
|
+
//### Options
|
49
|
+
//########################################################################
|
50
|
+
|
51
|
+
#define GAMMA 1.4f
|
52
|
+
#define iterations 2000
|
53
|
+
#define NNB 4
|
54
|
+
#define RK 3 // 3rd order RK
|
55
|
+
|
56
|
+
#define FF_MACH 1.2f
|
57
|
+
#define DEG_ANGLE_OF_ATTACK 0.0f
|
58
|
+
#define NDIM 3
|
59
|
+
|
60
|
+
//########################################################################
|
61
|
+
//### Defines
|
62
|
+
//########################################################################
|
63
|
+
|
64
|
+
#define VAR_DENSITY 0
|
65
|
+
#define VAR_MOMENTUM 1
|
66
|
+
#define VAR_DENSITY_ENERGY (VAR_MOMENTUM+NDIM)
|
67
|
+
#define NVAR (VAR_DENSITY_ENERGY+1)
|
68
|
+
|
69
|
+
//########################################################################
|
70
|
+
//### Global variables
|
71
|
+
//########################################################################
|
72
|
+
|
73
|
+
float ff_variable[NVAR];
|
74
|
+
float3 ff_flux_contribution_momentum_x;
|
75
|
+
float3 ff_flux_contribution_momentum_y;
|
76
|
+
float3 ff_flux_contribution_momentum_z;
|
77
|
+
float3 ff_flux_contribution_density_energy;
|
78
|
+
|
79
|
+
//########################################################################
|
80
|
+
//### Start of the main function
|
81
|
+
//########################################################################
|
82
|
+
|
83
|
+
int main(void) {
|
84
|
+
|
85
|
+
// Declare the loop iterators
|
86
|
+
int i,j;
|
87
|
+
|
88
|
+
// Declare far field variables
|
89
|
+
const float angle_of_attack = (M_PI/180.0f) * DEG_ANGLE_OF_ATTACK;
|
90
|
+
float ff_pressure, ff_speed_of_sound, ff_speed;
|
91
|
+
float3 ff_velocity, ff_momentum;
|
92
|
+
|
93
|
+
// Declare other domain variables
|
94
|
+
|
95
|
+
// Declare other/helper variables
|
96
|
+
|
97
|
+
// Compute the far field
|
98
|
+
printf("\n[cfd] Set the far field conditions"); fflush(stdout);
|
99
|
+
{
|
100
|
+
ff_variable[VAR_DENSITY] = 1.4f;
|
101
|
+
ff_pressure = 1.0f;
|
102
|
+
ff_speed_of_sound = sqrt(GAMMA*ff_pressure / ff_variable[VAR_DENSITY]);
|
103
|
+
ff_speed = FF_MACH*ff_speed_of_sound;
|
104
|
+
|
105
|
+
// Compute the velocity
|
106
|
+
ff_velocity.x = ff_speed*cos(angle_of_attack);
|
107
|
+
ff_velocity.y = ff_speed*sin(angle_of_attack);
|
108
|
+
ff_velocity.z = 0.0f;
|
109
|
+
|
110
|
+
// Update the variable
|
111
|
+
ff_variable[VAR_MOMENTUM+0] = ff_variable[VAR_DENSITY] * ff_velocity.x;
|
112
|
+
ff_variable[VAR_MOMENTUM+1] = ff_variable[VAR_DENSITY] * ff_velocity.y;
|
113
|
+
ff_variable[VAR_MOMENTUM+2] = ff_variable[VAR_DENSITY] * ff_velocity.z;
|
114
|
+
ff_variable[VAR_DENSITY_ENERGY] = ff_variable[VAR_DENSITY]*0.5f*ff_speed*ff_speed + (ff_pressure/(GAMMA-1.0f));
|
115
|
+
|
116
|
+
// Set the momentum
|
117
|
+
ff_momentum.x = ff_variable[VAR_MOMENTUM+0];
|
118
|
+
ff_momentum.y = ff_variable[VAR_MOMENTUM+1];
|
119
|
+
ff_momentum.z = ff_variable[VAR_MOMENTUM+2];
|
120
|
+
|
121
|
+
// Compute the flux contribution
|
122
|
+
compute_flux_contribution(
|
123
|
+
ff_momentum,
|
124
|
+
ff_variable[VAR_DENSITY_ENERGY],
|
125
|
+
ff_pressure,
|
126
|
+
ff_velocity,
|
127
|
+
&ff_flux_contribution_momentum_x,
|
128
|
+
&ff_flux_contribution_momentum_y,
|
129
|
+
&ff_flux_contribution_momentum_z,
|
130
|
+
&ff_flux_contribution_density_energy
|
131
|
+
);
|
132
|
+
}
|
133
|
+
|
134
|
+
// Initialising memory
|
135
|
+
printf("\n[cfd] Initialising memory"); fflush(stdout);
|
136
|
+
|
137
|
+
|
138
|
+
// Clean-up and exit
|
139
|
+
printf("\n[cfd] Completed\n\n"); fflush(stdout);
|
140
|
+
fflush(stdout);
|
141
|
+
return 0;
|
142
|
+
}
|
143
|
+
|
144
|
+
//########################################################################
|
145
|
+
//### Function to compute the flux contribution
|
146
|
+
//########################################################################
|
147
|
+
|
148
|
+
inline void compute_flux_contribution(
|
149
|
+
float3 momentum,
|
150
|
+
float density_energy,
|
151
|
+
float pressure,
|
152
|
+
float3 velocity,
|
153
|
+
float3 *fc_momentum_x,
|
154
|
+
float3 *fc_momentum_y,
|
155
|
+
float3 *fc_momentum_z,
|
156
|
+
float3 *fc_density_energy
|
157
|
+
) {
|
158
|
+
|
159
|
+
// Compute the x-momentum
|
160
|
+
(*fc_momentum_x).x = velocity.x*momentum.x + pressure;
|
161
|
+
(*fc_momentum_x).y = velocity.x*momentum.y;
|
162
|
+
(*fc_momentum_x).z = velocity.x*momentum.z;
|
163
|
+
|
164
|
+
// Compute the y-momentum
|
165
|
+
(*fc_momentum_y).x = velocity.x*momentum.y;
|
166
|
+
(*fc_momentum_y).y = velocity.y*momentum.y + pressure;
|
167
|
+
(*fc_momentum_y).z = velocity.y*momentum.z;
|
168
|
+
|
169
|
+
// Compute the z-momentum
|
170
|
+
(*fc_momentum_z).x = velocity.x*momentum.z;
|
171
|
+
(*fc_momentum_z).y = velocity.y*momentum.z;
|
172
|
+
(*fc_momentum_z).z = velocity.z*momentum.z + pressure;
|
173
|
+
|
174
|
+
// Compute energy density
|
175
|
+
(*fc_density_energy).x = velocity.x*density_energy+pressure;
|
176
|
+
(*fc_density_energy).y = velocity.y*density_energy+pressure;
|
177
|
+
(*fc_density_energy).z = velocity.z*density_energy+pressure;
|
178
|
+
}
|
179
|
+
|
180
|
+
//########################################################################
|