bones-compiler 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +117 -0
- data/LICENSE +9 -0
- data/README.rdoc +126 -0
- data/Rakefile +107 -0
- data/VERSION +1 -0
- data/bin/bones +20 -0
- data/examples/applications/ffos.c +552 -0
- data/examples/benchmarks/2mm.c +70 -0
- data/examples/benchmarks/3mm.c +81 -0
- data/examples/benchmarks/adi.c +81 -0
- data/examples/benchmarks/atax.c +65 -0
- data/examples/benchmarks/bicg.c +67 -0
- data/examples/benchmarks/cholesky.c +64 -0
- data/examples/benchmarks/common.h +168 -0
- data/examples/benchmarks/correlation.c +97 -0
- data/examples/benchmarks/covariance.c +77 -0
- data/examples/benchmarks/doitgen.c +63 -0
- data/examples/benchmarks/durbin.c +76 -0
- data/examples/benchmarks/dynprog.c +67 -0
- data/examples/benchmarks/fdtd-2d-apml.c +114 -0
- data/examples/benchmarks/fdtd-2d.c +74 -0
- data/examples/benchmarks/floyd-warshall.c +50 -0
- data/examples/benchmarks/gemm.c +69 -0
- data/examples/benchmarks/gemver.c +89 -0
- data/examples/benchmarks/gesummv.c +64 -0
- data/examples/benchmarks/gramschmidt.c +84 -0
- data/examples/benchmarks/jacobi-1d-imper.c +55 -0
- data/examples/benchmarks/jacobi-2d-imper.c +61 -0
- data/examples/benchmarks/lu.c +57 -0
- data/examples/benchmarks/ludcmp.c +91 -0
- data/examples/benchmarks/mvt.c +65 -0
- data/examples/benchmarks/overview.txt +38 -0
- data/examples/benchmarks/reg_detect.c +82 -0
- data/examples/benchmarks/saxpy.c +45 -0
- data/examples/benchmarks/seidel-2d.c +51 -0
- data/examples/benchmarks/symm.c +74 -0
- data/examples/benchmarks/syr2k.c +65 -0
- data/examples/benchmarks/syrk.c +62 -0
- data/examples/benchmarks/trisolv.c +57 -0
- data/examples/benchmarks/trmm.c +57 -0
- data/examples/chunk/example1.c +54 -0
- data/examples/chunk/example2.c +44 -0
- data/examples/chunk/example3.c +59 -0
- data/examples/chunk/example4.c +55 -0
- data/examples/chunk/example5.c +52 -0
- data/examples/element/example1.c +46 -0
- data/examples/element/example10.c +50 -0
- data/examples/element/example11.c +47 -0
- data/examples/element/example12.c +56 -0
- data/examples/element/example2.c +46 -0
- data/examples/element/example3.c +58 -0
- data/examples/element/example4.c +49 -0
- data/examples/element/example5.c +56 -0
- data/examples/element/example6.c +46 -0
- data/examples/element/example7.c +54 -0
- data/examples/element/example8.c +45 -0
- data/examples/element/example9.c +48 -0
- data/examples/neighbourhood/example1.c +54 -0
- data/examples/neighbourhood/example2.c +55 -0
- data/examples/neighbourhood/example3.c +82 -0
- data/examples/neighbourhood/example4.c +52 -0
- data/examples/shared/example1.c +45 -0
- data/examples/shared/example2.c +51 -0
- data/examples/shared/example3.c +55 -0
- data/examples/shared/example4.c +52 -0
- data/examples/shared/example5.c +48 -0
- data/lib/bones.rb +266 -0
- data/lib/bones/algorithm.rb +541 -0
- data/lib/bones/engine.rb +386 -0
- data/lib/bones/preprocessor.rb +161 -0
- data/lib/bones/species.rb +196 -0
- data/lib/bones/structure.rb +94 -0
- data/lib/bones/variable.rb +169 -0
- data/lib/bones/variablelist.rb +72 -0
- data/lib/castaddon.rb +27 -0
- data/lib/castaddon/index.rb +40 -0
- data/lib/castaddon/node.rb +753 -0
- data/lib/castaddon/type.rb +37 -0
- data/skeletons/CPU-C/common/epilogue.c +0 -0
- data/skeletons/CPU-C/common/globals.c +17 -0
- data/skeletons/CPU-C/common/globals_kernel.c +1 -0
- data/skeletons/CPU-C/common/header.c +0 -0
- data/skeletons/CPU-C/common/mem_copy_D2H.c +0 -0
- data/skeletons/CPU-C/common/mem_copy_H2D.c +0 -0
- data/skeletons/CPU-C/common/mem_epilogue.c +0 -0
- data/skeletons/CPU-C/common/mem_prologue.c +3 -0
- data/skeletons/CPU-C/common/prologue.c +0 -0
- data/skeletons/CPU-C/common/timer_1_start.c +0 -0
- data/skeletons/CPU-C/common/timer_1_stop.c +0 -0
- data/skeletons/CPU-C/common/timer_2_start.c +20 -0
- data/skeletons/CPU-C/common/timer_2_stop.c +8 -0
- data/skeletons/CPU-C/kernel/default.host.c +3 -0
- data/skeletons/CPU-C/kernel/default.kernel.c +15 -0
- data/skeletons/CPU-C/skeletons.txt +24 -0
- data/skeletons/CPU-OPENCL-AMD/common/epilogue.c +6 -0
- data/skeletons/CPU-OPENCL-AMD/common/globals.c +155 -0
- data/skeletons/CPU-OPENCL-AMD/common/globals_kernel.c +4 -0
- data/skeletons/CPU-OPENCL-AMD/common/header.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/mem_copy_D2H.c +8 -0
- data/skeletons/CPU-OPENCL-AMD/common/mem_copy_H2D.c +4 -0
- data/skeletons/CPU-OPENCL-AMD/common/mem_epilogue.c +3 -0
- data/skeletons/CPU-OPENCL-AMD/common/mem_prologue.c +6 -0
- data/skeletons/CPU-OPENCL-AMD/common/prologue.c +24 -0
- data/skeletons/CPU-OPENCL-AMD/common/timer_1_start.c +5 -0
- data/skeletons/CPU-OPENCL-AMD/common/timer_1_stop.c +9 -0
- data/skeletons/CPU-OPENCL-AMD/common/timer_2_start.c +16 -0
- data/skeletons/CPU-OPENCL-AMD/common/timer_2_stop.c +11 -0
- data/skeletons/CPU-OPENCL-AMD/kernel/D-element-to-1-shared.host.c +67 -0
- data/skeletons/CPU-OPENCL-AMD/kernel/D-element-to-1-shared.kernel.cl +72 -0
- data/skeletons/CPU-OPENCL-AMD/kernel/default.host.c +14 -0
- data/skeletons/CPU-OPENCL-AMD/kernel/default.kernel.cl +13 -0
- data/skeletons/CPU-OPENCL-AMD/skeletons.txt +26 -0
- data/skeletons/CPU-OPENCL-INTEL/common/epilogue.c +3 -0
- data/skeletons/CPU-OPENCL-INTEL/common/globals.c +154 -0
- data/skeletons/CPU-OPENCL-INTEL/common/globals_kernel.c +4 -0
- data/skeletons/CPU-OPENCL-INTEL/common/header.c +31 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +5 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +3 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_epilogue.c +3 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +4 -0
- data/skeletons/CPU-OPENCL-INTEL/common/prologue.c +24 -0
- data/skeletons/CPU-OPENCL-INTEL/common/timer_1_start.c +5 -0
- data/skeletons/CPU-OPENCL-INTEL/common/timer_1_stop.c +9 -0
- data/skeletons/CPU-OPENCL-INTEL/common/timer_2_start.c +16 -0
- data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +11 -0
- data/skeletons/CPU-OPENCL-INTEL/kernel/D-element-to-1-shared.host.c +67 -0
- data/skeletons/CPU-OPENCL-INTEL/kernel/D-element-to-1-shared.kernel.cl +72 -0
- data/skeletons/CPU-OPENCL-INTEL/kernel/default.host.c +14 -0
- data/skeletons/CPU-OPENCL-INTEL/kernel/default.kernel.cl +13 -0
- data/skeletons/CPU-OPENCL-INTEL/skeletons.txt +26 -0
- data/skeletons/CPU-OPENMP/common/epilogue.c +0 -0
- data/skeletons/CPU-OPENMP/common/globals.c +37 -0
- data/skeletons/CPU-OPENMP/common/globals_kernel.c +6 -0
- data/skeletons/CPU-OPENMP/common/header.c +0 -0
- data/skeletons/CPU-OPENMP/common/mem_copy_D2H.c +0 -0
- data/skeletons/CPU-OPENMP/common/mem_copy_H2D.c +0 -0
- data/skeletons/CPU-OPENMP/common/mem_epilogue.c +0 -0
- data/skeletons/CPU-OPENMP/common/mem_prologue.c +3 -0
- data/skeletons/CPU-OPENMP/common/prologue.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_1_start.c +12 -0
- data/skeletons/CPU-OPENMP/common/timer_1_stop.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_2_start.c +18 -0
- data/skeletons/CPU-OPENMP/common/timer_2_stop.c +8 -0
- data/skeletons/CPU-OPENMP/kernel/D-element-to-1-shared.host.c +27 -0
- data/skeletons/CPU-OPENMP/kernel/D-element-to-1-shared.kernel.c +46 -0
- data/skeletons/CPU-OPENMP/kernel/default.host.c +11 -0
- data/skeletons/CPU-OPENMP/kernel/default.kernel.c +18 -0
- data/skeletons/CPU-OPENMP/skeletons.txt +26 -0
- data/skeletons/GPU-CUDA/common/epilogue.c +0 -0
- data/skeletons/GPU-CUDA/common/globals.c +31 -0
- data/skeletons/GPU-CUDA/common/globals_kernel.c +4 -0
- data/skeletons/GPU-CUDA/common/header.c +0 -0
- data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +3 -0
- data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +3 -0
- data/skeletons/GPU-CUDA/common/mem_epilogue.c +3 -0
- data/skeletons/GPU-CUDA/common/mem_prologue.c +5 -0
- data/skeletons/GPU-CUDA/common/prologue.c +6 -0
- data/skeletons/GPU-CUDA/common/timer_1_start.c +6 -0
- data/skeletons/GPU-CUDA/common/timer_1_stop.c +10 -0
- data/skeletons/GPU-CUDA/common/timer_2_start.c +6 -0
- data/skeletons/GPU-CUDA/common/timer_2_stop.c +10 -0
- data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.host.c +3 -0
- data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +105 -0
- data/skeletons/GPU-CUDA/kernel/D-element-to-1-shared.host.c +3 -0
- data/skeletons/GPU-CUDA/kernel/D-element-to-1-shared.kernel.cu +119 -0
- data/skeletons/GPU-CUDA/kernel/D-element-to-N-shared.host.c +3 -0
- data/skeletons/GPU-CUDA/kernel/D-element-to-N-shared.kernel.cu +166 -0
- data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.host.c +3 -0
- data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +69 -0
- data/skeletons/GPU-CUDA/kernel/N-neighbourhood-N-to-N-element.host.c +3 -0
- data/skeletons/GPU-CUDA/kernel/N-neighbourhood-N-to-N-element.kernel.cu +42 -0
- data/skeletons/GPU-CUDA/kernel/default.host.c +3 -0
- data/skeletons/GPU-CUDA/kernel/default.kernel.cu +28 -0
- data/skeletons/GPU-CUDA/skeletons.txt +30 -0
- data/skeletons/GPU-OPENCL-AMD/common/epilogue.c +3 -0
- data/skeletons/GPU-OPENCL-AMD/common/globals.c +155 -0
- data/skeletons/GPU-OPENCL-AMD/common/globals_kernel.c +4 -0
- data/skeletons/GPU-OPENCL-AMD/common/header.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/common/mem_copy_D2H.c +4 -0
- data/skeletons/GPU-OPENCL-AMD/common/mem_copy_H2D.c +4 -0
- data/skeletons/GPU-OPENCL-AMD/common/mem_epilogue.c +3 -0
- data/skeletons/GPU-OPENCL-AMD/common/mem_prologue.c +3 -0
- data/skeletons/GPU-OPENCL-AMD/common/prologue.c +24 -0
- data/skeletons/GPU-OPENCL-AMD/common/timer_1_start.c +5 -0
- data/skeletons/GPU-OPENCL-AMD/common/timer_1_stop.c +9 -0
- data/skeletons/GPU-OPENCL-AMD/common/timer_2_start.c +4 -0
- data/skeletons/GPU-OPENCL-AMD/common/timer_2_stop.c +11 -0
- data/skeletons/GPU-OPENCL-AMD/kernel/D-element-to-1-shared.host.c +67 -0
- data/skeletons/GPU-OPENCL-AMD/kernel/D-element-to-1-shared.kernel.cl +72 -0
- data/skeletons/GPU-OPENCL-AMD/kernel/default.host.c +14 -0
- data/skeletons/GPU-OPENCL-AMD/kernel/default.kernel.cl +13 -0
- data/skeletons/GPU-OPENCL-AMD/skeletons.txt +26 -0
- data/skeletons/verification/header.c +2 -0
- data/skeletons/verification/timer_start.c +4 -0
- data/skeletons/verification/timer_stop.c +6 -0
- data/skeletons/verification/verify_results.c +23 -0
- data/test/bones/test_algorithm.rb +40 -0
- data/test/bones/test_common.rb +54 -0
- data/test/bones/test_preprocessor.rb +46 -0
- data/test/bones/test_species.rb +21 -0
- data/test/bones/test_variable.rb +84 -0
- data/test/test_helper.rb +106 -0
- metadata +303 -0
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
//
|
|
2
|
+
// This file is part of the Bones source-to-source compiler examples. The C-code
|
|
3
|
+
// is largely identical in terms of functionality and variable naming to the code
|
|
4
|
+
// found in PolyBench/C version 3.2. For more information on PolyBench/C or Bones
|
|
5
|
+
// please use the contact information below.
|
|
6
|
+
//
|
|
7
|
+
// == More information on PolyBench/C
|
|
8
|
+
// Contact............Louis-Noel Pouchet <pouchet@cse.ohio-state.edu>
|
|
9
|
+
// Web address........http://polybench.sourceforge.net/
|
|
10
|
+
//
|
|
11
|
+
// == More information on Bones
|
|
12
|
+
// Contact............Cedric Nugteren <c.nugteren@tue.nl>
|
|
13
|
+
// Web address........http://parse.ele.tue.nl/bones/
|
|
14
|
+
//
|
|
15
|
+
// == File information
|
|
16
|
+
// Filename...........benchmark/jacobi-2d-imper.c
|
|
17
|
+
// Author.............Cedric Nugteren
|
|
18
|
+
// Last modified on...03-April-2012
|
|
19
|
+
//
|
|
20
|
+
|
|
21
|
+
#include "common.h"
|
|
22
|
+
|
|
23
|
+
// This is 'jacobi-2d-imper', a 2D Jacobi stencil computation
|
|
24
|
+
int main(void) {
|
|
25
|
+
int i,j,t;
|
|
26
|
+
|
|
27
|
+
// Declare arrays on the stack
|
|
28
|
+
float A[N][N];
|
|
29
|
+
float B[N][N];
|
|
30
|
+
|
|
31
|
+
// Set the input data
|
|
32
|
+
for (i=0; i<N; i++) {
|
|
33
|
+
for (j=0; j<N; j++) {
|
|
34
|
+
A[i][j] = ((float) i*(j+2) + 2) / N;
|
|
35
|
+
B[i][j] = ((float) i*(j+3) + 3) / N;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// Perform the computation
|
|
40
|
+
for (t=0; t<TSTEPS; t++) {
|
|
41
|
+
#pragma species kernel 1:N-2,1:N-2|neighbourhood(-1:1,-1:1) -> 1:N-2,1:N-2|element
|
|
42
|
+
for (i=1; i<N-1; i++) {
|
|
43
|
+
for (j=1; j<N-1; j++) {
|
|
44
|
+
B[i][j] = 0.2 * (A[i][j] + A[i][j-1] + A[i][1+j] + A[1+i][j] + A[i-1][j]);
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
#pragma species endkernel jacobi-2d-imper-part1
|
|
48
|
+
#pragma species kernel 1:N-2,1:N-2|element -> 1:N-2,1:N-2|element
|
|
49
|
+
for (i=1; i<N-1; i++) {
|
|
50
|
+
for (j=1; j<N-1; j++) {
|
|
51
|
+
A[i][j] = B[i][j];
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
#pragma species endkernel jacobi-2d-imper-part2
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// Clean-up and exit the function
|
|
58
|
+
fflush(stdout);
|
|
59
|
+
return 0;
|
|
60
|
+
}
|
|
61
|
+
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
//
|
|
2
|
+
// This file is part of the Bones source-to-source compiler examples. The C-code
|
|
3
|
+
// is largely identical in terms of functionality and variable naming to the code
|
|
4
|
+
// found in PolyBench/C version 3.2. For more information on PolyBench/C or Bones
|
|
5
|
+
// please use the contact information below.
|
|
6
|
+
//
|
|
7
|
+
// == More information on PolyBench/C
|
|
8
|
+
// Contact............Louis-Noel Pouchet <pouchet@cse.ohio-state.edu>
|
|
9
|
+
// Web address........http://polybench.sourceforge.net/
|
|
10
|
+
//
|
|
11
|
+
// == More information on Bones
|
|
12
|
+
// Contact............Cedric Nugteren <c.nugteren@tue.nl>
|
|
13
|
+
// Web address........http://parse.ele.tue.nl/bones/
|
|
14
|
+
//
|
|
15
|
+
// == File information
|
|
16
|
+
// Filename...........benchmark/lu.c
|
|
17
|
+
// Author.............Cedric Nugteren
|
|
18
|
+
// Last modified on...26-Jun-2012
|
|
19
|
+
//
|
|
20
|
+
|
|
21
|
+
#include "common.h"
|
|
22
|
+
|
|
23
|
+
// This is 'lu', an LU decomposition kernel
|
|
24
|
+
int main(void) {
|
|
25
|
+
int i,j,k;
|
|
26
|
+
|
|
27
|
+
// Declare arrays on the stack
|
|
28
|
+
float A[N][N];
|
|
29
|
+
|
|
30
|
+
// Set the input data
|
|
31
|
+
for (i=0; i<N; i++) {
|
|
32
|
+
for (j=0; j<N; j++) {
|
|
33
|
+
A[i][j] = ((float) (i+1)*(j+1)) / N;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// Perform the computation
|
|
38
|
+
for (k=0; k<N; k++) {
|
|
39
|
+
#pragma species kernel k:k,k+1:N-1|element -> k:k,k+1:N-1|element
|
|
40
|
+
for (j=k+1; j<N; j++) {
|
|
41
|
+
A[k][j] = A[k][j] / A[k][k];
|
|
42
|
+
}
|
|
43
|
+
#pragma species endkernel lu-part1
|
|
44
|
+
#pragma species kernel k+1:N-1,k:k|element ^ k:k,k+1:N-1|element ^ k+1:N-1,k+1:N-1|element -> k+1:N-1,k+1:N-1|element
|
|
45
|
+
for(i=k+1; i<N; i++) {
|
|
46
|
+
for (j=k+1; j<N; j++) {
|
|
47
|
+
A[i][j] = A[i][j] - A[i][k] * A[k][j];
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
#pragma species endkernel lu-part2
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// Clean-up and exit the function
|
|
54
|
+
fflush(stdout);
|
|
55
|
+
return 0;
|
|
56
|
+
}
|
|
57
|
+
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
//
|
|
2
|
+
// This file is part of the Bones source-to-source compiler examples. The C-code
|
|
3
|
+
// is largely identical in terms of functionality and variable naming to the code
|
|
4
|
+
// found in PolyBench/C version 3.2. For more information on PolyBench/C or Bones
|
|
5
|
+
// please use the contact information below.
|
|
6
|
+
//
|
|
7
|
+
// == More information on PolyBench/C
|
|
8
|
+
// Contact............Louis-Noel Pouchet <pouchet@cse.ohio-state.edu>
|
|
9
|
+
// Web address........http://polybench.sourceforge.net/
|
|
10
|
+
//
|
|
11
|
+
// == More information on Bones
|
|
12
|
+
// Contact............Cedric Nugteren <c.nugteren@tue.nl>
|
|
13
|
+
// Web address........http://parse.ele.tue.nl/bones/
|
|
14
|
+
//
|
|
15
|
+
// == File information
|
|
16
|
+
// Filename...........benchmark/ludcmp.c
|
|
17
|
+
// Author.............Cedric Nugteren
|
|
18
|
+
// Last modified on...23-May-2012
|
|
19
|
+
//
|
|
20
|
+
|
|
21
|
+
#include "common.h"
|
|
22
|
+
|
|
23
|
+
// This is 'ludcmp', an LU decomposition kernel
|
|
24
|
+
int main(void) {
|
|
25
|
+
int i,j,k;
|
|
26
|
+
float w[1];
|
|
27
|
+
|
|
28
|
+
// Declare arrays on the stack
|
|
29
|
+
float A[N+1][N+1];
|
|
30
|
+
float b[N+1];
|
|
31
|
+
float x[N+1];
|
|
32
|
+
float y[N+1];
|
|
33
|
+
|
|
34
|
+
// Set the input data
|
|
35
|
+
for (i=0; i<=N; i++) {
|
|
36
|
+
x[i] = i+1;
|
|
37
|
+
y[i] = (i+1)/(float)(N*40) + 1;
|
|
38
|
+
b[i] = (i+1)/(float)(N*20) + 42;
|
|
39
|
+
for (j=0; j<=N; j++) {
|
|
40
|
+
A[i][j] = (i+1)/(float)(10*N) + (j+1)/(float)(5*N);
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// Perform the computation
|
|
45
|
+
b[0] = 1.0;
|
|
46
|
+
for (i=0; i<N; i++) {
|
|
47
|
+
for (j=i+1; j<=N; j++) {
|
|
48
|
+
w[0] = A[j][i];
|
|
49
|
+
#pragma species kernel j:j,0:i-1|element ^ 0:i-1,i:i|element -> 0:0|shared
|
|
50
|
+
for (k=0; k<i; k++) {
|
|
51
|
+
w[0] = w[0] - A[j][k] * A[k][i];
|
|
52
|
+
}
|
|
53
|
+
#pragma species endkernel ludcmp-part1
|
|
54
|
+
A[j][i] = w[0] / A[i][i];
|
|
55
|
+
}
|
|
56
|
+
for (j=i+1; j<=N; j++) {
|
|
57
|
+
w[0] = A[i+1][j];
|
|
58
|
+
#pragma species kernel i+1:i+1,0:i|element ^ 0:i,j:j|element -> 0:0|shared
|
|
59
|
+
for (k=0; k<=i; k++) {
|
|
60
|
+
w[0] = w[0] - A[i+1][k] * A[k][j];
|
|
61
|
+
}
|
|
62
|
+
#pragma species endkernel ludcmp-part2
|
|
63
|
+
A[i+1][j] = w[0];
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
y[0] = b[0];
|
|
67
|
+
for (i=1; i<=N; i++) {
|
|
68
|
+
w[0] = b[i];
|
|
69
|
+
#pragma species kernel i:i,0:i-1|element ^ 0:i-1|element -> 0:0|shared
|
|
70
|
+
for (j=0; j<i; j++) {
|
|
71
|
+
w[0] = w[0] - A[i][j] * y[j];
|
|
72
|
+
}
|
|
73
|
+
#pragma species endkernel ludcmp-part3
|
|
74
|
+
y[i] = w[0];
|
|
75
|
+
}
|
|
76
|
+
x[N] = y[N] / A[N][N];
|
|
77
|
+
for (i=0; i<=N-1; i++) {
|
|
78
|
+
w[0] = y[N-1-i];
|
|
79
|
+
//#pragma species kernel N-1-i:N-1-i,N-i:N|element ^ N-i:N|element -> 0:0|shared
|
|
80
|
+
for (j=N-i; j<=N; j++) {
|
|
81
|
+
w[0] = w[0] - A[N-1-i][j] * x[j];
|
|
82
|
+
}
|
|
83
|
+
//#pragma species endkernel ludcmp-part4
|
|
84
|
+
x[N-1-i] = w[0] / A[N-1-i][N-1-i];
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// Clean-up and exit the function
|
|
88
|
+
fflush(stdout);
|
|
89
|
+
return 0;
|
|
90
|
+
}
|
|
91
|
+
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
//
|
|
2
|
+
// This file is part of the Bones source-to-source compiler examples. The C-code
|
|
3
|
+
// is largely identical in terms of functionality and variable naming to the code
|
|
4
|
+
// found in PolyBench/C version 3.2. For more information on PolyBench/C or Bones
|
|
5
|
+
// please use the contact information below.
|
|
6
|
+
//
|
|
7
|
+
// == More information on PolyBench/C
|
|
8
|
+
// Contact............Louis-Noel Pouchet <pouchet@cse.ohio-state.edu>
|
|
9
|
+
// Web address........http://polybench.sourceforge.net/
|
|
10
|
+
//
|
|
11
|
+
// == More information on Bones
|
|
12
|
+
// Contact............Cedric Nugteren <c.nugteren@tue.nl>
|
|
13
|
+
// Web address........http://parse.ele.tue.nl/bones/
|
|
14
|
+
//
|
|
15
|
+
// == File information
|
|
16
|
+
// Filename...........benchmark/mvt.c
|
|
17
|
+
// Author.............Cedric Nugteren
|
|
18
|
+
// Last modified on...23-May-2012
|
|
19
|
+
//
|
|
20
|
+
|
|
21
|
+
#include "common.h"
|
|
22
|
+
|
|
23
|
+
// This is 'mvt', a matrix vector product and transpose kernel
|
|
24
|
+
int main(void) {
|
|
25
|
+
int i,j;
|
|
26
|
+
|
|
27
|
+
// Declare arrays on the stack
|
|
28
|
+
float A[NX][NX];
|
|
29
|
+
float x1[NX];
|
|
30
|
+
float x2[NX];
|
|
31
|
+
float y_1[NX];
|
|
32
|
+
float y_2[NX];
|
|
33
|
+
|
|
34
|
+
// Set the input data
|
|
35
|
+
for (i=0; i<NX; i++) {
|
|
36
|
+
x1[i] = ((float) i) / NX;
|
|
37
|
+
x2[i] = ((float) i + 1) / NX;
|
|
38
|
+
y_1[i] = ((float) i + 3) / NX;
|
|
39
|
+
y_2[i] = ((float) i + 4) / NX;
|
|
40
|
+
for (j=0; j<NX; j++) {
|
|
41
|
+
A[i][j] = ((float) i*j) / NX;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// Perform the computation
|
|
46
|
+
#pragma species kernel 0:NX-1|element ^ 0:NX-1,0:NX-1|chunk(0:0,0:NX-1) ^ 0:NX-1|full -> 0:NX-1|element
|
|
47
|
+
for (i=0; i<NX; i++) {
|
|
48
|
+
for (j=0; j<NX; j++) {
|
|
49
|
+
x1[i] = x1[i] + A[i][j] * y_1[j];
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
#pragma species endkernel mvt-part1
|
|
53
|
+
#pragma species kernel 0:NX-1|element ^ 0:NX-1,0:NX-1|chunk(0:NX-1,0:0) ^ 0:NX-1|full -> 0:NX-1|element
|
|
54
|
+
for (i=0; i<NX; i++) {
|
|
55
|
+
for (j=0; j<NX; j++) {
|
|
56
|
+
x2[i] = x2[i] + A[j][i] * y_2[j];
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
#pragma species endkernel mvt-part2
|
|
60
|
+
|
|
61
|
+
// Clean-up and exit the function
|
|
62
|
+
fflush(stdout);
|
|
63
|
+
return 0;
|
|
64
|
+
}
|
|
65
|
+
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
|
|
2
|
+
== Benchmarks with full parallelism
|
|
3
|
+
linear-algebra/kernels/2mm...........2 species...[fully classified]
|
|
4
|
+
linear-algebra/kernels/3mm...........3 species...[fully classified]
|
|
5
|
+
linear-algebra/kernels/atax..........2 species...[fully classified]
|
|
6
|
+
linear-algebra/kernels/bicg..........2 species...[fully classified]
|
|
7
|
+
linear-algebra/kernels/doitgen.......2 species...[fully classified]
|
|
8
|
+
linear-algebra/kernels/gemm..........1 species...[fully classified]
|
|
9
|
+
linear-algebra/kernels/gemver........4 species...[fully classified]
|
|
10
|
+
linear-algebra/kernels/gesummv.......1 species...[fully classified]
|
|
11
|
+
linear-algebra/kernels/mvt...........2 species...[fully classified]
|
|
12
|
+
linear-algebra/kernels/syr2k.........1 species...[fully classified]
|
|
13
|
+
linear-algebra/kernels/syrk..........1 species...[fully classified]
|
|
14
|
+
stencils/fdtd-2d.....................4 species...[fully classified]
|
|
15
|
+
stencils/jacobi-1d-imper.............2 species...[fully classified]
|
|
16
|
+
stencils/jacobi-2d-imper.............2 species...[fully classified]
|
|
17
|
+
|
|
18
|
+
== Benchmarks with significant parallelism
|
|
19
|
+
linear-algebra/kernels/cholesky......3 species...[no outer-loop parallelism, inner-loops only classified]
|
|
20
|
+
linear-algebra/kernels/symm..........1 species...[no outer-loop parallelism, inner-loops only classified]
|
|
21
|
+
linear-algebra/kernels/trisolv.......1 species...[no outer-loop parallelism, inner-loops only classified]
|
|
22
|
+
linear-algebra/kernels/trmm..........1 species...[no outer-loop parallelism, inner-loops only classified]
|
|
23
|
+
linear-algebra/solvers/gramschmidt...4 species...[no outer-loop parallelism, inner-loops only classified]
|
|
24
|
+
linear-algebra/solvers/lu............3 species...[no outer-loop parallelism, inner-loops only classified]
|
|
25
|
+
linear-algebra/solvers/ludcmp........4 species...[no outer-loop parallelism, inner-loops only classified]
|
|
26
|
+
datamining/correlation...............5 species...[most parts classified, final part inner-loop only]
|
|
27
|
+
datamining/covariance................4 species...[most parts classified, final part inner-loop only]
|
|
28
|
+
medley/reg_detect....................2 species...[partly classified, several parts have no parallelism]
|
|
29
|
+
|
|
30
|
+
== Benchmarks with very little parallelism
|
|
31
|
+
linear-algebra/solvers/durbin........2 species...[partly classified, most parts have no parallelism]
|
|
32
|
+
linear-algebra/solvers/dynprog.......1 species...[partly classified, most parts have no parallelism]
|
|
33
|
+
stencils/adi.........................2 species...[partly classified, most parts have no parallelism]
|
|
34
|
+
stencils/fdtd-ampl...................1 species...[partly classified, most parts have no parallelism]
|
|
35
|
+
|
|
36
|
+
== Benchmarks without parallelism
|
|
37
|
+
medley/floyd-warshall................0 species...[no parallelism]
|
|
38
|
+
stencils/seidel-2d...................0 species...[no parallelism]
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
//
|
|
2
|
+
// This file is part of the Bones source-to-source compiler examples. The C-code
|
|
3
|
+
// is largely identical in terms of functionality and variable naming to the code
|
|
4
|
+
// found in PolyBench/C version 3.2. For more information on PolyBench/C or Bones
|
|
5
|
+
// please use the contact information below.
|
|
6
|
+
//
|
|
7
|
+
// == More information on PolyBench/C
|
|
8
|
+
// Contact............Louis-Noel Pouchet <pouchet@cse.ohio-state.edu>
|
|
9
|
+
// Web address........http://polybench.sourceforge.net/
|
|
10
|
+
//
|
|
11
|
+
// == More information on Bones
|
|
12
|
+
// Contact............Cedric Nugteren <c.nugteren@tue.nl>
|
|
13
|
+
// Web address........http://parse.ele.tue.nl/bones/
|
|
14
|
+
//
|
|
15
|
+
// == File information
|
|
16
|
+
// Filename...........benchmark/reg_detect.c
|
|
17
|
+
// Author.............Cedric Nugteren
|
|
18
|
+
// Last modified on...26-Jun-2012
|
|
19
|
+
//
|
|
20
|
+
|
|
21
|
+
#include "common.h"
|
|
22
|
+
|
|
23
|
+
// This is 'reg_detect', a regularity detection algorithm
|
|
24
|
+
int main(void) {
|
|
25
|
+
int i,j,t,cnt;
|
|
26
|
+
float sum;
|
|
27
|
+
|
|
28
|
+
// Declare arrays on the stack
|
|
29
|
+
float sum_tang[MAXGRID][MAXGRID];
|
|
30
|
+
float mean[MAXGRID][MAXGRID];
|
|
31
|
+
float path[MAXGRID][MAXGRID];
|
|
32
|
+
float diff[MAXGRID][MAXGRID][LENGTH];
|
|
33
|
+
float sum_diff[MAXGRID][MAXGRID][LENGTH];
|
|
34
|
+
|
|
35
|
+
// Set the input data
|
|
36
|
+
for (i=0; i<MAXGRID; i++) {
|
|
37
|
+
for (j=0; j<MAXGRID; j++) {
|
|
38
|
+
sum_tang[i][j] = (float)((i+1)*(j+1));
|
|
39
|
+
mean[i][j] = ((float) i-j) / MAXGRID;
|
|
40
|
+
path[i][j] = ((float) i*(j-1)) / MAXGRID;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// Perform the computation
|
|
45
|
+
for (t=0; t<ITER; t++) {
|
|
46
|
+
#pragma species kernel 0:MAXGRID-1,0:MAXGRID-1|element -> 0:MAXGRID-1,0:MAXGRID-1,0:LENGTH-1|chunk(0:0,0:0,0:LENGTH-1)
|
|
47
|
+
for (j=0; j<=MAXGRID-1; j++) {
|
|
48
|
+
for (i=0; i<=MAXGRID-1; i++) {
|
|
49
|
+
sum = sum_tang[j][i];
|
|
50
|
+
for (cnt=0; cnt<=LENGTH-1; cnt++) {
|
|
51
|
+
diff[j][i][cnt] = sum;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
#pragma species endkernel reg-detect-part1
|
|
56
|
+
for (j=0; j<=MAXGRID-1; j++) {
|
|
57
|
+
for (i=j; i<=MAXGRID-1; i++) {
|
|
58
|
+
sum_diff[j][i][0] = diff[j][i][0];
|
|
59
|
+
for (cnt=1; cnt<=LENGTH-1; cnt++) {
|
|
60
|
+
sum_diff[j][i][cnt] = sum_diff[j][i][cnt-1] + diff[j][i][cnt];
|
|
61
|
+
}
|
|
62
|
+
mean[j][i] = sum_diff[j][i][LENGTH-1];
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
#pragma species kernel 0:0,0:MAXGRID-1|element -> 0:0,0:MAXGRID-1|element
|
|
66
|
+
for (i=0; i<=MAXGRID-1; i++) {
|
|
67
|
+
path[0][i] = mean[0][i];
|
|
68
|
+
}
|
|
69
|
+
#pragma species endkernel reg-detect-part2
|
|
70
|
+
for (j=1; j<=MAXGRID-1; j++) {
|
|
71
|
+
#pragma species kernel j-1:j-1,j-1:MAXGRID-2|element ^ j:j,j:MAXGRID-1|element -> j:j,j:MAXGRID-1|element
|
|
72
|
+
for (i=j; i<=MAXGRID-1; i++) {
|
|
73
|
+
path[j][i] = path[j-1][i-1] + mean[j][i];
|
|
74
|
+
}
|
|
75
|
+
#pragma species endkernel reg-detect-part3
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// Clean-up and exit the function
|
|
80
|
+
fflush(stdout);
|
|
81
|
+
return 0;
|
|
82
|
+
}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
//
|
|
2
|
+
// This file is part of the Bones source-to-source compiler examples. For more
|
|
3
|
+
// information on Bones please use the contact information below.
|
|
4
|
+
//
|
|
5
|
+
// == More information on Bones
|
|
6
|
+
// Contact............Cedric Nugteren <c.nugteren@tue.nl>
|
|
7
|
+
// Web address........http://parse.ele.tue.nl/bones/
|
|
8
|
+
//
|
|
9
|
+
// == File information
|
|
10
|
+
// Filename...........benchmark/saxpy.c
|
|
11
|
+
// Author.............Cedric Nugteren
|
|
12
|
+
// Last modified on...04-Jul-2012
|
|
13
|
+
//
|
|
14
|
+
|
|
15
|
+
#include "common.h"
|
|
16
|
+
|
|
17
|
+
// This is 'saxpy', a scalar multiplication and vector addition kernel
|
|
18
|
+
int main(void) {
|
|
19
|
+
int i;
|
|
20
|
+
|
|
21
|
+
// Declare arrays on the stack
|
|
22
|
+
float x[LARGE_N];
|
|
23
|
+
float y[LARGE_N];
|
|
24
|
+
|
|
25
|
+
// Set the input data
|
|
26
|
+
for (i=0; i<LARGE_N; i++) {
|
|
27
|
+
x[i] = i*1.4;
|
|
28
|
+
y[i] = i/0.9;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
// Set the constants
|
|
32
|
+
float a = 411.3;
|
|
33
|
+
|
|
34
|
+
// Perform the computation (y := ax+y)
|
|
35
|
+
#pragma species kernel 0:LARGE_N-1|element ^ 0:LARGE_N-1|element -> 0:LARGE_N-1|element
|
|
36
|
+
for (i=0; i<LARGE_N; i++) {
|
|
37
|
+
y[i] = a*x[i] + y[i];
|
|
38
|
+
}
|
|
39
|
+
#pragma species endkernel saxpy
|
|
40
|
+
|
|
41
|
+
// Clean-up and exit the function
|
|
42
|
+
fflush(stdout);
|
|
43
|
+
return 0;
|
|
44
|
+
}
|
|
45
|
+
|