bones-compiler 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +117 -0
- data/LICENSE +9 -0
- data/README.rdoc +126 -0
- data/Rakefile +107 -0
- data/VERSION +1 -0
- data/bin/bones +20 -0
- data/examples/applications/ffos.c +552 -0
- data/examples/benchmarks/2mm.c +70 -0
- data/examples/benchmarks/3mm.c +81 -0
- data/examples/benchmarks/adi.c +81 -0
- data/examples/benchmarks/atax.c +65 -0
- data/examples/benchmarks/bicg.c +67 -0
- data/examples/benchmarks/cholesky.c +64 -0
- data/examples/benchmarks/common.h +168 -0
- data/examples/benchmarks/correlation.c +97 -0
- data/examples/benchmarks/covariance.c +77 -0
- data/examples/benchmarks/doitgen.c +63 -0
- data/examples/benchmarks/durbin.c +76 -0
- data/examples/benchmarks/dynprog.c +67 -0
- data/examples/benchmarks/fdtd-2d-apml.c +114 -0
- data/examples/benchmarks/fdtd-2d.c +74 -0
- data/examples/benchmarks/floyd-warshall.c +50 -0
- data/examples/benchmarks/gemm.c +69 -0
- data/examples/benchmarks/gemver.c +89 -0
- data/examples/benchmarks/gesummv.c +64 -0
- data/examples/benchmarks/gramschmidt.c +84 -0
- data/examples/benchmarks/jacobi-1d-imper.c +55 -0
- data/examples/benchmarks/jacobi-2d-imper.c +61 -0
- data/examples/benchmarks/lu.c +57 -0
- data/examples/benchmarks/ludcmp.c +91 -0
- data/examples/benchmarks/mvt.c +65 -0
- data/examples/benchmarks/overview.txt +38 -0
- data/examples/benchmarks/reg_detect.c +82 -0
- data/examples/benchmarks/saxpy.c +45 -0
- data/examples/benchmarks/seidel-2d.c +51 -0
- data/examples/benchmarks/symm.c +74 -0
- data/examples/benchmarks/syr2k.c +65 -0
- data/examples/benchmarks/syrk.c +62 -0
- data/examples/benchmarks/trisolv.c +57 -0
- data/examples/benchmarks/trmm.c +57 -0
- data/examples/chunk/example1.c +54 -0
- data/examples/chunk/example2.c +44 -0
- data/examples/chunk/example3.c +59 -0
- data/examples/chunk/example4.c +55 -0
- data/examples/chunk/example5.c +52 -0
- data/examples/element/example1.c +46 -0
- data/examples/element/example10.c +50 -0
- data/examples/element/example11.c +47 -0
- data/examples/element/example12.c +56 -0
- data/examples/element/example2.c +46 -0
- data/examples/element/example3.c +58 -0
- data/examples/element/example4.c +49 -0
- data/examples/element/example5.c +56 -0
- data/examples/element/example6.c +46 -0
- data/examples/element/example7.c +54 -0
- data/examples/element/example8.c +45 -0
- data/examples/element/example9.c +48 -0
- data/examples/neighbourhood/example1.c +54 -0
- data/examples/neighbourhood/example2.c +55 -0
- data/examples/neighbourhood/example3.c +82 -0
- data/examples/neighbourhood/example4.c +52 -0
- data/examples/shared/example1.c +45 -0
- data/examples/shared/example2.c +51 -0
- data/examples/shared/example3.c +55 -0
- data/examples/shared/example4.c +52 -0
- data/examples/shared/example5.c +48 -0
- data/lib/bones.rb +266 -0
- data/lib/bones/algorithm.rb +541 -0
- data/lib/bones/engine.rb +386 -0
- data/lib/bones/preprocessor.rb +161 -0
- data/lib/bones/species.rb +196 -0
- data/lib/bones/structure.rb +94 -0
- data/lib/bones/variable.rb +169 -0
- data/lib/bones/variablelist.rb +72 -0
- data/lib/castaddon.rb +27 -0
- data/lib/castaddon/index.rb +40 -0
- data/lib/castaddon/node.rb +753 -0
- data/lib/castaddon/type.rb +37 -0
- data/skeletons/CPU-C/common/epilogue.c +0 -0
- data/skeletons/CPU-C/common/globals.c +17 -0
- data/skeletons/CPU-C/common/globals_kernel.c +1 -0
- data/skeletons/CPU-C/common/header.c +0 -0
- data/skeletons/CPU-C/common/mem_copy_D2H.c +0 -0
- data/skeletons/CPU-C/common/mem_copy_H2D.c +0 -0
- data/skeletons/CPU-C/common/mem_epilogue.c +0 -0
- data/skeletons/CPU-C/common/mem_prologue.c +3 -0
- data/skeletons/CPU-C/common/prologue.c +0 -0
- data/skeletons/CPU-C/common/timer_1_start.c +0 -0
- data/skeletons/CPU-C/common/timer_1_stop.c +0 -0
- data/skeletons/CPU-C/common/timer_2_start.c +20 -0
- data/skeletons/CPU-C/common/timer_2_stop.c +8 -0
- data/skeletons/CPU-C/kernel/default.host.c +3 -0
- data/skeletons/CPU-C/kernel/default.kernel.c +15 -0
- data/skeletons/CPU-C/skeletons.txt +24 -0
- data/skeletons/CPU-OPENCL-AMD/common/epilogue.c +6 -0
- data/skeletons/CPU-OPENCL-AMD/common/globals.c +155 -0
- data/skeletons/CPU-OPENCL-AMD/common/globals_kernel.c +4 -0
- data/skeletons/CPU-OPENCL-AMD/common/header.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/mem_copy_D2H.c +8 -0
- data/skeletons/CPU-OPENCL-AMD/common/mem_copy_H2D.c +4 -0
- data/skeletons/CPU-OPENCL-AMD/common/mem_epilogue.c +3 -0
- data/skeletons/CPU-OPENCL-AMD/common/mem_prologue.c +6 -0
- data/skeletons/CPU-OPENCL-AMD/common/prologue.c +24 -0
- data/skeletons/CPU-OPENCL-AMD/common/timer_1_start.c +5 -0
- data/skeletons/CPU-OPENCL-AMD/common/timer_1_stop.c +9 -0
- data/skeletons/CPU-OPENCL-AMD/common/timer_2_start.c +16 -0
- data/skeletons/CPU-OPENCL-AMD/common/timer_2_stop.c +11 -0
- data/skeletons/CPU-OPENCL-AMD/kernel/D-element-to-1-shared.host.c +67 -0
- data/skeletons/CPU-OPENCL-AMD/kernel/D-element-to-1-shared.kernel.cl +72 -0
- data/skeletons/CPU-OPENCL-AMD/kernel/default.host.c +14 -0
- data/skeletons/CPU-OPENCL-AMD/kernel/default.kernel.cl +13 -0
- data/skeletons/CPU-OPENCL-AMD/skeletons.txt +26 -0
- data/skeletons/CPU-OPENCL-INTEL/common/epilogue.c +3 -0
- data/skeletons/CPU-OPENCL-INTEL/common/globals.c +154 -0
- data/skeletons/CPU-OPENCL-INTEL/common/globals_kernel.c +4 -0
- data/skeletons/CPU-OPENCL-INTEL/common/header.c +31 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +5 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +3 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_epilogue.c +3 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +4 -0
- data/skeletons/CPU-OPENCL-INTEL/common/prologue.c +24 -0
- data/skeletons/CPU-OPENCL-INTEL/common/timer_1_start.c +5 -0
- data/skeletons/CPU-OPENCL-INTEL/common/timer_1_stop.c +9 -0
- data/skeletons/CPU-OPENCL-INTEL/common/timer_2_start.c +16 -0
- data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +11 -0
- data/skeletons/CPU-OPENCL-INTEL/kernel/D-element-to-1-shared.host.c +67 -0
- data/skeletons/CPU-OPENCL-INTEL/kernel/D-element-to-1-shared.kernel.cl +72 -0
- data/skeletons/CPU-OPENCL-INTEL/kernel/default.host.c +14 -0
- data/skeletons/CPU-OPENCL-INTEL/kernel/default.kernel.cl +13 -0
- data/skeletons/CPU-OPENCL-INTEL/skeletons.txt +26 -0
- data/skeletons/CPU-OPENMP/common/epilogue.c +0 -0
- data/skeletons/CPU-OPENMP/common/globals.c +37 -0
- data/skeletons/CPU-OPENMP/common/globals_kernel.c +6 -0
- data/skeletons/CPU-OPENMP/common/header.c +0 -0
- data/skeletons/CPU-OPENMP/common/mem_copy_D2H.c +0 -0
- data/skeletons/CPU-OPENMP/common/mem_copy_H2D.c +0 -0
- data/skeletons/CPU-OPENMP/common/mem_epilogue.c +0 -0
- data/skeletons/CPU-OPENMP/common/mem_prologue.c +3 -0
- data/skeletons/CPU-OPENMP/common/prologue.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_1_start.c +12 -0
- data/skeletons/CPU-OPENMP/common/timer_1_stop.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_2_start.c +18 -0
- data/skeletons/CPU-OPENMP/common/timer_2_stop.c +8 -0
- data/skeletons/CPU-OPENMP/kernel/D-element-to-1-shared.host.c +27 -0
- data/skeletons/CPU-OPENMP/kernel/D-element-to-1-shared.kernel.c +46 -0
- data/skeletons/CPU-OPENMP/kernel/default.host.c +11 -0
- data/skeletons/CPU-OPENMP/kernel/default.kernel.c +18 -0
- data/skeletons/CPU-OPENMP/skeletons.txt +26 -0
- data/skeletons/GPU-CUDA/common/epilogue.c +0 -0
- data/skeletons/GPU-CUDA/common/globals.c +31 -0
- data/skeletons/GPU-CUDA/common/globals_kernel.c +4 -0
- data/skeletons/GPU-CUDA/common/header.c +0 -0
- data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +3 -0
- data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +3 -0
- data/skeletons/GPU-CUDA/common/mem_epilogue.c +3 -0
- data/skeletons/GPU-CUDA/common/mem_prologue.c +5 -0
- data/skeletons/GPU-CUDA/common/prologue.c +6 -0
- data/skeletons/GPU-CUDA/common/timer_1_start.c +6 -0
- data/skeletons/GPU-CUDA/common/timer_1_stop.c +10 -0
- data/skeletons/GPU-CUDA/common/timer_2_start.c +6 -0
- data/skeletons/GPU-CUDA/common/timer_2_stop.c +10 -0
- data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.host.c +3 -0
- data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +105 -0
- data/skeletons/GPU-CUDA/kernel/D-element-to-1-shared.host.c +3 -0
- data/skeletons/GPU-CUDA/kernel/D-element-to-1-shared.kernel.cu +119 -0
- data/skeletons/GPU-CUDA/kernel/D-element-to-N-shared.host.c +3 -0
- data/skeletons/GPU-CUDA/kernel/D-element-to-N-shared.kernel.cu +166 -0
- data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.host.c +3 -0
- data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +69 -0
- data/skeletons/GPU-CUDA/kernel/N-neighbourhood-N-to-N-element.host.c +3 -0
- data/skeletons/GPU-CUDA/kernel/N-neighbourhood-N-to-N-element.kernel.cu +42 -0
- data/skeletons/GPU-CUDA/kernel/default.host.c +3 -0
- data/skeletons/GPU-CUDA/kernel/default.kernel.cu +28 -0
- data/skeletons/GPU-CUDA/skeletons.txt +30 -0
- data/skeletons/GPU-OPENCL-AMD/common/epilogue.c +3 -0
- data/skeletons/GPU-OPENCL-AMD/common/globals.c +155 -0
- data/skeletons/GPU-OPENCL-AMD/common/globals_kernel.c +4 -0
- data/skeletons/GPU-OPENCL-AMD/common/header.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/common/mem_copy_D2H.c +4 -0
- data/skeletons/GPU-OPENCL-AMD/common/mem_copy_H2D.c +4 -0
- data/skeletons/GPU-OPENCL-AMD/common/mem_epilogue.c +3 -0
- data/skeletons/GPU-OPENCL-AMD/common/mem_prologue.c +3 -0
- data/skeletons/GPU-OPENCL-AMD/common/prologue.c +24 -0
- data/skeletons/GPU-OPENCL-AMD/common/timer_1_start.c +5 -0
- data/skeletons/GPU-OPENCL-AMD/common/timer_1_stop.c +9 -0
- data/skeletons/GPU-OPENCL-AMD/common/timer_2_start.c +4 -0
- data/skeletons/GPU-OPENCL-AMD/common/timer_2_stop.c +11 -0
- data/skeletons/GPU-OPENCL-AMD/kernel/D-element-to-1-shared.host.c +67 -0
- data/skeletons/GPU-OPENCL-AMD/kernel/D-element-to-1-shared.kernel.cl +72 -0
- data/skeletons/GPU-OPENCL-AMD/kernel/default.host.c +14 -0
- data/skeletons/GPU-OPENCL-AMD/kernel/default.kernel.cl +13 -0
- data/skeletons/GPU-OPENCL-AMD/skeletons.txt +26 -0
- data/skeletons/verification/header.c +2 -0
- data/skeletons/verification/timer_start.c +4 -0
- data/skeletons/verification/timer_stop.c +6 -0
- data/skeletons/verification/verify_results.c +23 -0
- data/test/bones/test_algorithm.rb +40 -0
- data/test/bones/test_common.rb +54 -0
- data/test/bones/test_preprocessor.rb +46 -0
- data/test/bones/test_species.rb +21 -0
- data/test/bones/test_variable.rb +84 -0
- data/test/test_helper.rb +106 -0
- metadata +303 -0
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
//
|
|
2
|
+
// This file is part of the Bones source-to-source compiler examples. The C-code
|
|
3
|
+
// is largely identical in terms of functionality and variable naming to the code
|
|
4
|
+
// found in PolyBench/C version 3.2. For more information on PolyBench/C or Bones
|
|
5
|
+
// please use the contact information below.
|
|
6
|
+
//
|
|
7
|
+
// == More information on PolyBench/C
|
|
8
|
+
// Contact............Louis-Noel Pouchet <pouchet@cse.ohio-state.edu>
|
|
9
|
+
// Web address........http://polybench.sourceforge.net/
|
|
10
|
+
//
|
|
11
|
+
// == More information on Bones
|
|
12
|
+
// Contact............Cedric Nugteren <c.nugteren@tue.nl>
|
|
13
|
+
// Web address........http://parse.ele.tue.nl/bones/
|
|
14
|
+
//
|
|
15
|
+
// == File information
|
|
16
|
+
// Filename...........benchmark/2mm.c
|
|
17
|
+
// Author.............Cedric Nugteren
|
|
18
|
+
// Last modified on...03-April-2012
|
|
19
|
+
//
|
|
20
|
+
|
|
21
|
+
#include "common.h"
|
|
22
|
+
|
|
23
|
+
// This is '2mm', a 2 matrix multiply kernel
|
|
24
|
+
int main(void) {
|
|
25
|
+
int i,j,k;
|
|
26
|
+
|
|
27
|
+
// Declare arrays on the stack
|
|
28
|
+
float A[NI][NK];
|
|
29
|
+
float B[NK][NJ];
|
|
30
|
+
float C[NJ][NL];
|
|
31
|
+
float D[NI][NL];
|
|
32
|
+
float tmp[NI][NJ];
|
|
33
|
+
|
|
34
|
+
// Set the constants
|
|
35
|
+
int alpha = 32412;
|
|
36
|
+
int beta = 2123;
|
|
37
|
+
|
|
38
|
+
// Set the input data
|
|
39
|
+
for (i=0; i<NI; i++) { for (j=0; j<NK; j++) { A[i][j] = ((float) i*j) / NI; } }
|
|
40
|
+
for (i=0; i<NK; i++) { for (j=0; j<NJ; j++) { B[i][j] = ((float) i*(j+1)) / NJ; } }
|
|
41
|
+
for (i=0; i<NL; i++) { for (j=0; j<NJ; j++) { C[i][j] = ((float) i*(j+3)) / NL; } }
|
|
42
|
+
for (i=0; i<NI; i++) { for (j=0; j<NL; j++) { D[i][j] = ((float) i*(j+2)) / NK; } }
|
|
43
|
+
|
|
44
|
+
// Perform the computation (E := alpha*A*B*C + beta*D)
|
|
45
|
+
#pragma species kernel 0:NI-1,0:NK-1|chunk(0:0,0:NK-1) ^ 0:NK-1,0:NJ-1|chunk(0:NK-1,0:0) -> 0:NI-1,0:NJ-1|element
|
|
46
|
+
for (i=0; i<NI; i++) {
|
|
47
|
+
for (j=0; j<NJ; j++) {
|
|
48
|
+
tmp[i][j] = 0;
|
|
49
|
+
for (k=0; k<NK; k++) {
|
|
50
|
+
tmp[i][j] += alpha * A[i][k] * B[k][j];
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
#pragma species endkernel 2mm-part1
|
|
55
|
+
#pragma species kernel 0:NI-1,0:NL-1|element ^ 0:NI-1,0:NJ-1|chunk(0:0,0:NJ-1) ^ 0:NJ-1,0:NL-1|chunk(0:NJ-1,0:0) -> 0:NI-1,0:NL-1|element
|
|
56
|
+
for (i=0; i<NI; i++) {
|
|
57
|
+
for (j=0; j<NL; j++) {
|
|
58
|
+
D[i][j] *= beta;
|
|
59
|
+
for (k=0; k<NJ; k++) {
|
|
60
|
+
D[i][j] += tmp[i][k] * C[k][j];
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
#pragma species endkernel 2mm-part2
|
|
65
|
+
|
|
66
|
+
// Clean-up and exit the function
|
|
67
|
+
fflush(stdout);
|
|
68
|
+
return 0;
|
|
69
|
+
}
|
|
70
|
+
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
//
|
|
2
|
+
// This file is part of the Bones source-to-source compiler examples. The C-code
|
|
3
|
+
// is largely identical in terms of functionality and variable naming to the code
|
|
4
|
+
// found in PolyBench/C version 3.2. For more information on PolyBench/C or Bones
|
|
5
|
+
// please use the contact information below.
|
|
6
|
+
//
|
|
7
|
+
// == More information on PolyBench/C
|
|
8
|
+
// Contact............Louis-Noel Pouchet <pouchet@cse.ohio-state.edu>
|
|
9
|
+
// Web address........http://polybench.sourceforge.net/
|
|
10
|
+
//
|
|
11
|
+
// == More information on Bones
|
|
12
|
+
// Contact............Cedric Nugteren <c.nugteren@tue.nl>
|
|
13
|
+
// Web address........http://parse.ele.tue.nl/bones/
|
|
14
|
+
//
|
|
15
|
+
// == File information
|
|
16
|
+
// Filename...........benchmark/3mm.c
|
|
17
|
+
// Author.............Cedric Nugteren
|
|
18
|
+
// Last modified on...03-April-2012
|
|
19
|
+
//
|
|
20
|
+
|
|
21
|
+
#include "common.h"
|
|
22
|
+
|
|
23
|
+
// This is '3mm', a 3 matrix multiply kernel
|
|
24
|
+
int main(void) {
|
|
25
|
+
int i,j,k;
|
|
26
|
+
|
|
27
|
+
// Declare arrays on the stack
|
|
28
|
+
float A[NI][NK];
|
|
29
|
+
float B[NK][NJ];
|
|
30
|
+
float C[NJ][NM];
|
|
31
|
+
float D[NM][NL];
|
|
32
|
+
float E[NI][NJ];
|
|
33
|
+
float F[NJ][NL];
|
|
34
|
+
float G[NI][NL];
|
|
35
|
+
|
|
36
|
+
// Set the input data
|
|
37
|
+
for (i=0; i<NI; i++) { for (j=0; j<NK; j++) { A[i][j] = ((float) i*j) / NI; } }
|
|
38
|
+
for (i=0; i<NK; i++) { for (j=0; j<NJ; j++) { B[i][j] = ((float) i*(j+1)) / NJ; } }
|
|
39
|
+
for (i=0; i<NL; i++) { for (j=0; j<NJ; j++) { C[i][j] = ((float) i*(j+3)) / NL; } }
|
|
40
|
+
for (i=0; i<NI; i++) { for (j=0; j<NL; j++) { D[i][j] = ((float) i*(j+2)) / NK; } }
|
|
41
|
+
|
|
42
|
+
// Perform the computation (G := E*F, with E := A*B and F := C*D)
|
|
43
|
+
#pragma species kernel 0:NI-1,0:NK-1|chunk(0:0,0:NK-1) ^ 0:NK-1,0:NJ-1|chunk(0:NK-1,0:0) -> 0:NI-1,0:NJ-1|element
|
|
44
|
+
// E := A*B
|
|
45
|
+
for (i=0; i<NI; i++) {
|
|
46
|
+
for (j=0; j<NJ; j++) {
|
|
47
|
+
E[i][j] = 0;
|
|
48
|
+
for (k=0; k<NK; k++) {
|
|
49
|
+
E[i][j] += A[i][k] * B[k][j];
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
#pragma species endkernel 3mm-part1
|
|
54
|
+
#pragma species kernel 0:NJ-1,0:NM-1|chunk(0:0,0:NM-1) ^ 0:NM-1,0:NL-1|chunk(0:NM-1,0:0) -> 0:NJ-1,0:NL-1|element
|
|
55
|
+
// F := C*D
|
|
56
|
+
for (i=0; i<NJ; i++) {
|
|
57
|
+
for (j=0; j<NL; j++) {
|
|
58
|
+
F[i][j] = 0;
|
|
59
|
+
for (k=0; k<NM; k++) {
|
|
60
|
+
F[i][j] += C[i][k] * D[k][j];
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
#pragma species endkernel 3mm-part2
|
|
65
|
+
#pragma species kernel 0:NI-1,0:NJ-1|chunk(0:0,0:NJ-1) ^ 0:NJ-1,0:NL-1|chunk(0:NJ-1,0:0) -> 0:NI-1,0:NL-1|element
|
|
66
|
+
// G := E*F
|
|
67
|
+
for (i=0; i<NI; i++) {
|
|
68
|
+
for (j=0; j<NL; j++) {
|
|
69
|
+
G[i][j] = 0;
|
|
70
|
+
for (k=0; k<NJ; k++) {
|
|
71
|
+
G[i][j] += E[i][k] * F[k][j];
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
#pragma species endkernel 3mm-part3
|
|
76
|
+
|
|
77
|
+
// Clean-up and exit the function
|
|
78
|
+
fflush(stdout);
|
|
79
|
+
return 0;
|
|
80
|
+
}
|
|
81
|
+
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
//
|
|
2
|
+
// This file is part of the Bones source-to-source compiler examples. The C-code
|
|
3
|
+
// is largely identical in terms of functionality and variable naming to the code
|
|
4
|
+
// found in PolyBench/C version 3.2. For more information on PolyBench/C or Bones
|
|
5
|
+
// please use the contact information below.
|
|
6
|
+
//
|
|
7
|
+
// == More information on PolyBench/C
|
|
8
|
+
// Contact............Louis-Noel Pouchet <pouchet@cse.ohio-state.edu>
|
|
9
|
+
// Web address........http://polybench.sourceforge.net/
|
|
10
|
+
//
|
|
11
|
+
// == More information on Bones
|
|
12
|
+
// Contact............Cedric Nugteren <c.nugteren@tue.nl>
|
|
13
|
+
// Web address........http://parse.ele.tue.nl/bones/
|
|
14
|
+
//
|
|
15
|
+
// == File information
|
|
16
|
+
// Filename...........benchmark/adi.c
|
|
17
|
+
// Author.............Cedric Nugteren
|
|
18
|
+
// Last modified on...29-May-2012
|
|
19
|
+
//
|
|
20
|
+
|
|
21
|
+
#include "common.h"
|
|
22
|
+
|
|
23
|
+
// This is 'adi', an alternating direction implicit solver
|
|
24
|
+
int main(void) {
|
|
25
|
+
int t,i,j,i1,i2;
|
|
26
|
+
|
|
27
|
+
// Declare arrays on the stack
|
|
28
|
+
float X[N][N];
|
|
29
|
+
float A[N][N];
|
|
30
|
+
float B[N][N];
|
|
31
|
+
|
|
32
|
+
// Set the input data
|
|
33
|
+
for (i=0; i<N; i++) {
|
|
34
|
+
for (j=0; j<N; j++) {
|
|
35
|
+
X[i][j] = ((float) i*(j+1) + 1) / N;
|
|
36
|
+
A[i][j] = ((float) i*(j+2) + 2) / N;
|
|
37
|
+
B[i][j] = ((float) i*(j+3) + 3) / N;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// Perform the computation
|
|
42
|
+
for (t=0; t<TSTEPS; t++) {
|
|
43
|
+
for (i1=0; i1<N; i1++) {
|
|
44
|
+
for (i2=1; i2<N; i2++) {
|
|
45
|
+
X[i1][i2] = X[i1][i2] - X[i1][i2-1] * A[i1][i2] / B[i1][i2-1];
|
|
46
|
+
B[i1][i2] = B[i1][i2] - A[i1][i2] * A[i1][i2] / B[i1][i2-1];
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
#pragma species kernel 0:N-1,N-1:N-1|element ^ 0:N-1,N-1:N-1|element -> 0:N-1,N-1:N-1|element
|
|
50
|
+
for (i1=0; i1<N; i1++) {
|
|
51
|
+
X[i1][N-1] = X[i1][N-1] / B[i1][N-1];
|
|
52
|
+
}
|
|
53
|
+
#pragma species endkernel adi-part1
|
|
54
|
+
for (i1=0; i1<N; i1++) {
|
|
55
|
+
for (i2=0; i2<N-2; i2++) {
|
|
56
|
+
X[i1][N-i2-2] = (X[i1][N-2-i2] - X[i1][N-2-i2-1] * A[i1][N-i2-3]) / B[i1][N-3-i2];
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
for (i1=1; i1<N; i1++) {
|
|
60
|
+
for (i2=0; i2<N; i2++) {
|
|
61
|
+
X[i1][i2] = X[i1][i2] - X[i1-1][i2] * A[i1][i2] / B[i1-1][i2];
|
|
62
|
+
B[i1][i2] = B[i1][i2] - A[i1][i2] * A[i1][i2] / B[i1-1][i2];
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
#pragma species kernel N-1:N-1,0:N-1|element ^ N-1:N-1,0:N-1|element -> N-1:N-1,0:N-1|element
|
|
66
|
+
for (i2=0; i2<N; i2++) {
|
|
67
|
+
X[N-1][i2] = X[N-1][i2] / B[N-1][i2];
|
|
68
|
+
}
|
|
69
|
+
#pragma species endkernel adi-part2
|
|
70
|
+
for (i1=0; i1<N-2; i1++) {
|
|
71
|
+
for (i2=0; i2<N; i2++) {
|
|
72
|
+
X[N-2-i1][i2] = (X[N-2-i1][i2] - X[N-i1-3][i2] * A[N-3-i1][i2]) / B[N-2-i1][i2];
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Clean-up and exit the function
|
|
78
|
+
fflush(stdout);
|
|
79
|
+
return 0;
|
|
80
|
+
}
|
|
81
|
+
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
//
|
|
2
|
+
// This file is part of the Bones source-to-source compiler examples. The C-code
|
|
3
|
+
// is largely identical in terms of functionality and variable naming to the code
|
|
4
|
+
// found in PolyBench/C version 3.2. For more information on PolyBench/C or Bones
|
|
5
|
+
// please use the contact information below.
|
|
6
|
+
//
|
|
7
|
+
// == More information on PolyBench/C
|
|
8
|
+
// Contact............Louis-Noel Pouchet <pouchet@cse.ohio-state.edu>
|
|
9
|
+
// Web address........http://polybench.sourceforge.net/
|
|
10
|
+
//
|
|
11
|
+
// == More information on Bones
|
|
12
|
+
// Contact............Cedric Nugteren <c.nugteren@tue.nl>
|
|
13
|
+
// Web address........http://parse.ele.tue.nl/bones/
|
|
14
|
+
//
|
|
15
|
+
// == File information
|
|
16
|
+
// Filename...........benchmark/atax.c
|
|
17
|
+
// Author.............Cedric Nugteren
|
|
18
|
+
// Last modified on...03-April-2012
|
|
19
|
+
//
|
|
20
|
+
|
|
21
|
+
#include "common.h"
|
|
22
|
+
|
|
23
|
+
// This is 'atax', a matrix transpose and vector multiplication kernel
|
|
24
|
+
int main(void) {
|
|
25
|
+
int i,j;
|
|
26
|
+
|
|
27
|
+
// Declare arrays on the stack
|
|
28
|
+
float A[NX][NY];
|
|
29
|
+
float x[NY];
|
|
30
|
+
float y[NX];
|
|
31
|
+
float tmp[NX];
|
|
32
|
+
|
|
33
|
+
// Set the input data
|
|
34
|
+
for (i=0; i<NY; i++) {
|
|
35
|
+
x[i] = i*3.14159;
|
|
36
|
+
}
|
|
37
|
+
for (i=0; i<NX; i++) {
|
|
38
|
+
for (j=0; j<NY; j++) {
|
|
39
|
+
A[i][j] = ((float) i*(j+1)) / NX;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// Perform the computation (y := A'Ax)
|
|
44
|
+
#pragma species kernel 0:NX-1,0:NY-1|chunk(0:0,0:NY-1) ^ 0:NY-1|full -> 0:NX-1|element
|
|
45
|
+
for (i=0; i<NX; i++) {
|
|
46
|
+
tmp[i] = 0;
|
|
47
|
+
for (j=0; j<NY; j++) {
|
|
48
|
+
tmp[i] = tmp[i] + A[i][j] * x[j];
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
#pragma species endkernel atax-part1
|
|
52
|
+
#pragma species kernel 0:NX-1,0:NY-1|chunk(0:NX-1,0:0) ^ 0:NX-1|full -> 0:NY-1|element
|
|
53
|
+
for (j=0; j<NY; j++) {
|
|
54
|
+
y[j] = 0;
|
|
55
|
+
for (i=0; i<NX; i++) {
|
|
56
|
+
y[j] = y[j] + A[i][j] * tmp[i];
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
#pragma species endkernel atax-part2
|
|
60
|
+
|
|
61
|
+
// Clean-up and exit the function
|
|
62
|
+
fflush(stdout);
|
|
63
|
+
return 0;
|
|
64
|
+
}
|
|
65
|
+
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
//
|
|
2
|
+
// This file is part of the Bones source-to-source compiler examples. The C-code
|
|
3
|
+
// is largely identical in terms of functionality and variable naming to the code
|
|
4
|
+
// found in PolyBench/C version 3.2. For more information on PolyBench/C or Bones
|
|
5
|
+
// please use the contact information below.
|
|
6
|
+
//
|
|
7
|
+
// == More information on PolyBench/C
|
|
8
|
+
// Contact............Louis-Noel Pouchet <pouchet@cse.ohio-state.edu>
|
|
9
|
+
// Web address........http://polybench.sourceforge.net/
|
|
10
|
+
//
|
|
11
|
+
// == More information on Bones
|
|
12
|
+
// Contact............Cedric Nugteren <c.nugteren@tue.nl>
|
|
13
|
+
// Web address........http://parse.ele.tue.nl/bones/
|
|
14
|
+
//
|
|
15
|
+
// == File information
|
|
16
|
+
// Filename...........benchmark/bicg.c
|
|
17
|
+
// Author.............Cedric Nugteren
|
|
18
|
+
// Last modified on...03-April-2012
|
|
19
|
+
//
|
|
20
|
+
|
|
21
|
+
#include "common.h"
|
|
22
|
+
|
|
23
|
+
// This is 'bicg', a biconjugate gradients method kernel
|
|
24
|
+
int main(void) {
|
|
25
|
+
int i,j;
|
|
26
|
+
|
|
27
|
+
// Declare arrays on the stack
|
|
28
|
+
float A[NX][NY];
|
|
29
|
+
float p[NY];
|
|
30
|
+
float q[NX];
|
|
31
|
+
float r[NX];
|
|
32
|
+
float s[NY];
|
|
33
|
+
|
|
34
|
+
// Set the input data
|
|
35
|
+
for (i=0; i<NY; i++) {
|
|
36
|
+
p[i] = i*3.14159;
|
|
37
|
+
}
|
|
38
|
+
for (i=0; i<NX; i++) {
|
|
39
|
+
r[i] = i*3.14159;
|
|
40
|
+
for (j=0; j<NY; j++) {
|
|
41
|
+
A[i][j] = ((float) i*(j+1)) / NX;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// Perform the computation
|
|
46
|
+
#pragma species kernel 0:NX-1|full ^ 0:NX-1,0:NY-1|chunk(0:NX-1,0:0) -> 0:NY-1|element
|
|
47
|
+
for (j=0; j<NY; j++) {
|
|
48
|
+
s[j] = 0;
|
|
49
|
+
for (i=0; i<NX; i++) {
|
|
50
|
+
s[j] = s[j] + r[i] * A[i][j];
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
#pragma species endkernel bicg-part1
|
|
54
|
+
#pragma species kernel 0:NX-1,0:NY-1|chunk(0:0,0:NY-1) ^ 0:NY-1|full -> 0:NX-1|element
|
|
55
|
+
for (i=0; i<NX; i++) {
|
|
56
|
+
q[i] = 0;
|
|
57
|
+
for (j=0; j<NY; j++) {
|
|
58
|
+
q[i] = q[i] + A[i][j] * p[j];
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
#pragma species endkernel bicg-part2
|
|
62
|
+
|
|
63
|
+
// Clean-up and exit the function
|
|
64
|
+
fflush(stdout);
|
|
65
|
+
return 0;
|
|
66
|
+
}
|
|
67
|
+
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
//
|
|
2
|
+
// This file is part of the Bones source-to-source compiler examples. The C-code
|
|
3
|
+
// is largely identical in terms of functionality and variable naming to the code
|
|
4
|
+
// found in PolyBench/C version 3.2. For more information on PolyBench/C or Bones
|
|
5
|
+
// please use the contact information below.
|
|
6
|
+
//
|
|
7
|
+
// == More information on PolyBench/C
|
|
8
|
+
// Contact............Louis-Noel Pouchet <pouchet@cse.ohio-state.edu>
|
|
9
|
+
// Web address........http://polybench.sourceforge.net/
|
|
10
|
+
//
|
|
11
|
+
// == More information on Bones
|
|
12
|
+
// Contact............Cedric Nugteren <c.nugteren@tue.nl>
|
|
13
|
+
// Web address........http://parse.ele.tue.nl/bones/
|
|
14
|
+
//
|
|
15
|
+
// == File information
|
|
16
|
+
// Filename...........benchmark/cholesky.c
|
|
17
|
+
// Author.............Cedric Nugteren
|
|
18
|
+
// Last modified on...03-Jul-2012
|
|
19
|
+
//
|
|
20
|
+
|
|
21
|
+
#include "common.h"
|
|
22
|
+
|
|
23
|
+
// This is 'cholesky', a cholesky decomposition kernel
|
|
24
|
+
int main(void) {
|
|
25
|
+
int i,j,k;
|
|
26
|
+
float x[1];
|
|
27
|
+
float p_i;
|
|
28
|
+
|
|
29
|
+
// Declare arrays on the stack
|
|
30
|
+
float A[N][N];
|
|
31
|
+
float p[N];
|
|
32
|
+
|
|
33
|
+
// Set the input data
|
|
34
|
+
for (i=0; i<N; i++) {
|
|
35
|
+
for (j=0; j<N; j++) {
|
|
36
|
+
A[i][j] = i*2.3 + 1;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// Perform the computation
|
|
41
|
+
for (i=0; i<N; i++) {
|
|
42
|
+
x[0] = A[i][i];
|
|
43
|
+
#pragma species kernel i:i,0:i-1|element -> 0:0|shared
|
|
44
|
+
for (j=0; j<=i-1; j++) {
|
|
45
|
+
x[0] = x[0] - A[i][j] * A[i][j];
|
|
46
|
+
}
|
|
47
|
+
#pragma species endkernel cholesky-part1
|
|
48
|
+
p[i] = 1.0 / sqrt(x[0]);
|
|
49
|
+
p_i = p[i];
|
|
50
|
+
#pragma species kernel i:i,i+1:N-1|element ^ i+1:N-1,0:i-1|chunk(0:0,0:i-1) ^ i:i,0:i-1|full -> 0:0|shared ^ i+1:N-1,i:i|element
|
|
51
|
+
for (j=i+1; j<N; j++) {
|
|
52
|
+
x[0] = A[i][j];
|
|
53
|
+
for (k=0; k<=i-1; k++) {
|
|
54
|
+
x[0] = x[0] - A[j][k] * A[i][k];
|
|
55
|
+
}
|
|
56
|
+
A[j][i] = x[0] * p_i;
|
|
57
|
+
}
|
|
58
|
+
#pragma species endkernel cholesky-part2
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// Clean-up and exit the function
|
|
62
|
+
fflush(stdout);
|
|
63
|
+
return 0;
|
|
64
|
+
}
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
//
|
|
2
|
+
// This file is part of the Bones source-to-source compiler examples. This header
|
|
3
|
+
// contains array size definitions and is common among the examples that are also
|
|
4
|
+
// found in PolyBench/C version 3.2. For more information on PolyBench/C or Bones
|
|
5
|
+
// please use the contact information below.
|
|
6
|
+
//
|
|
7
|
+
// == More information on PolyBench/C
|
|
8
|
+
// Contact............Louis-Noel Pouchet <pouchet@cse.ohio-state.edu>
|
|
9
|
+
// Web address........http://polybench.sourceforge.net/
|
|
10
|
+
//
|
|
11
|
+
// == More information on Bones
|
|
12
|
+
// Contact............Cedric Nugteren <c.nugteren@tue.nl>
|
|
13
|
+
// Web address........http://parse.ele.tue.nl/bones/
|
|
14
|
+
//
|
|
15
|
+
// == File information
|
|
16
|
+
// Filename...........benchmark/polybench.h
|
|
17
|
+
// Author.............Cedric Nugteren
|
|
18
|
+
// Last modified on...23-May-2012
|
|
19
|
+
//
|
|
20
|
+
|
|
21
|
+
// Include C-libraries
|
|
22
|
+
#include <stdio.h>
|
|
23
|
+
#include <stdlib.h>
|
|
24
|
+
#include <math.h>
|
|
25
|
+
|
|
26
|
+
// Select a dataset size
|
|
27
|
+
//#define MINI_DATASET
|
|
28
|
+
#define SMALL_DATASET
|
|
29
|
+
//#define STANDARD_DATASET
|
|
30
|
+
//#define LARGE_DATASET
|
|
31
|
+
//#define EXTRALARGE_DATASET
|
|
32
|
+
|
|
33
|
+
// Defines used per benchmark:
|
|
34
|
+
//
|
|
35
|
+
// N [adi, cholesky, correlation, covariance, floyd-warshall, jacobi-2d-imper, lu, ludcmp, seidel-2d]
|
|
36
|
+
// M [correlation, covariance]
|
|
37
|
+
// NI [2mm, 3mm, fdtd-2d, gemm, symm, syrk, syr2k, trmm]
|
|
38
|
+
// NJ [2mm, 3mm, fdtd-2d, gemm, symm, syrk, syr2k]
|
|
39
|
+
// NK [2mm, 3mm, gemm]
|
|
40
|
+
// NL [2mm, 3mm]
|
|
41
|
+
// NQ [doitgen]
|
|
42
|
+
// NR [doitgen]
|
|
43
|
+
// NP [doitgen]
|
|
44
|
+
// NM [3mm]
|
|
45
|
+
// NX [atax, bicg, durbin, gemver, gesummv, mvt, trisolv]
|
|
46
|
+
// NY [atax, bicg]
|
|
47
|
+
// CZ [fdtd-2d-apml]
|
|
48
|
+
// CYM [fdtd-2d-apml]
|
|
49
|
+
// CXM [fdtd-2d-apml]
|
|
50
|
+
// LARGE_N [jacobi-1d-imper]
|
|
51
|
+
// LENGTH [dynprog, reg_detect]
|
|
52
|
+
// TSTEPS [adi, fdtd-2d, jacobi-1d-imper, jacobi-2d-imper, seidel-2d]
|
|
53
|
+
// ITER [dynprog, reg_detect]
|
|
54
|
+
// MAXGRID [reg_detect]
|
|
55
|
+
//
|
|
56
|
+
|
|
57
|
+
// Determine the sizes of the 5 possible datasets
|
|
58
|
+
#ifdef MINI_DATASET
|
|
59
|
+
#define N 32
|
|
60
|
+
#define M 32
|
|
61
|
+
#define NI 32
|
|
62
|
+
#define NJ 32
|
|
63
|
+
#define NK 32
|
|
64
|
+
#define NL 32
|
|
65
|
+
#define NM 32
|
|
66
|
+
#define NQ 10
|
|
67
|
+
#define NR 10
|
|
68
|
+
#define NP 10
|
|
69
|
+
#define NX 32
|
|
70
|
+
#define NY 32
|
|
71
|
+
#define CZ 32
|
|
72
|
+
#define CYM 32
|
|
73
|
+
#define CXM 32
|
|
74
|
+
#define LARGE_N 500
|
|
75
|
+
#define LENGTH 32
|
|
76
|
+
#define TSTEPS 2
|
|
77
|
+
#define ITER 10
|
|
78
|
+
#define MAXGRID 2
|
|
79
|
+
#endif
|
|
80
|
+
#ifdef SMALL_DATASET
|
|
81
|
+
#define N 256
|
|
82
|
+
#define M 256
|
|
83
|
+
#define NI 128
|
|
84
|
+
#define NJ 128
|
|
85
|
+
#define NK 128
|
|
86
|
+
#define NL 128
|
|
87
|
+
#define NM 128
|
|
88
|
+
#define NQ 32
|
|
89
|
+
#define NR 32
|
|
90
|
+
#define NP 32
|
|
91
|
+
#define NX 500
|
|
92
|
+
#define NY 500
|
|
93
|
+
#define CZ 64
|
|
94
|
+
#define CYM 64
|
|
95
|
+
#define CXM 64
|
|
96
|
+
#define LARGE_N 1000
|
|
97
|
+
#define LENGTH 50
|
|
98
|
+
#define TSTEPS 2
|
|
99
|
+
#define ITER 2
|
|
100
|
+
#define MAXGRID 8
|
|
101
|
+
#endif
|
|
102
|
+
#ifdef STANDARD_DATASET
|
|
103
|
+
#define N 1024
|
|
104
|
+
#define M 1024
|
|
105
|
+
#define NI 1024
|
|
106
|
+
#define NJ 1024
|
|
107
|
+
#define NK 1024
|
|
108
|
+
#define NL 1024
|
|
109
|
+
#define NM 1024
|
|
110
|
+
#define NQ 128
|
|
111
|
+
#define NR 128
|
|
112
|
+
#define NP 128
|
|
113
|
+
#define NX 4000
|
|
114
|
+
#define NY 4000
|
|
115
|
+
#define CZ 256
|
|
116
|
+
#define CYM 256
|
|
117
|
+
#define CXM 256
|
|
118
|
+
#define LARGE_N 10000
|
|
119
|
+
#define LENGTH 50
|
|
120
|
+
#define TSTEPS 2
|
|
121
|
+
#define ITER 10
|
|
122
|
+
#define MAXGRID 32
|
|
123
|
+
#endif
|
|
124
|
+
#ifdef LARGE_DATASET
|
|
125
|
+
#define N 2048
|
|
126
|
+
#define M 2048
|
|
127
|
+
#define NI 2048
|
|
128
|
+
#define NJ 2048
|
|
129
|
+
#define NK 2048
|
|
130
|
+
#define NL 2048
|
|
131
|
+
#define NM 2048
|
|
132
|
+
#define NQ 256
|
|
133
|
+
#define NR 256
|
|
134
|
+
#define NP 256
|
|
135
|
+
#define NX 4096
|
|
136
|
+
#define NY 4096
|
|
137
|
+
#define CZ 512
|
|
138
|
+
#define CYM 512
|
|
139
|
+
#define CXM 512
|
|
140
|
+
#define LARGE_N 2048*2048
|
|
141
|
+
#define LENGTH 500
|
|
142
|
+
#define TSTEPS 5
|
|
143
|
+
#define ITER 100
|
|
144
|
+
#define MAXGRID 128
|
|
145
|
+
#endif
|
|
146
|
+
#ifdef EXTRALARGE_DATASET
|
|
147
|
+
#define N 4000
|
|
148
|
+
#define M 4000
|
|
149
|
+
#define NI 4000
|
|
150
|
+
#define NJ 4000
|
|
151
|
+
#define NK 4000
|
|
152
|
+
#define NL 4000
|
|
153
|
+
#define NM 4000
|
|
154
|
+
#define NQ 1000
|
|
155
|
+
#define NR 1000
|
|
156
|
+
#define NP 1000
|
|
157
|
+
#define NX 100000
|
|
158
|
+
#define NY 100000
|
|
159
|
+
#define CZ 1000
|
|
160
|
+
#define CYM 1000
|
|
161
|
+
#define CXM 1000
|
|
162
|
+
#define LARGE_N 10000000
|
|
163
|
+
#define LENGTH 500
|
|
164
|
+
#define TSTEPS 10
|
|
165
|
+
#define ITER 1000
|
|
166
|
+
#define MAXGRID 512
|
|
167
|
+
#endif
|
|
168
|
+
|