bones-compiler 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +117 -0
- data/LICENSE +9 -0
- data/README.rdoc +126 -0
- data/Rakefile +107 -0
- data/VERSION +1 -0
- data/bin/bones +20 -0
- data/examples/applications/ffos.c +552 -0
- data/examples/benchmarks/2mm.c +70 -0
- data/examples/benchmarks/3mm.c +81 -0
- data/examples/benchmarks/adi.c +81 -0
- data/examples/benchmarks/atax.c +65 -0
- data/examples/benchmarks/bicg.c +67 -0
- data/examples/benchmarks/cholesky.c +64 -0
- data/examples/benchmarks/common.h +168 -0
- data/examples/benchmarks/correlation.c +97 -0
- data/examples/benchmarks/covariance.c +77 -0
- data/examples/benchmarks/doitgen.c +63 -0
- data/examples/benchmarks/durbin.c +76 -0
- data/examples/benchmarks/dynprog.c +67 -0
- data/examples/benchmarks/fdtd-2d-apml.c +114 -0
- data/examples/benchmarks/fdtd-2d.c +74 -0
- data/examples/benchmarks/floyd-warshall.c +50 -0
- data/examples/benchmarks/gemm.c +69 -0
- data/examples/benchmarks/gemver.c +89 -0
- data/examples/benchmarks/gesummv.c +64 -0
- data/examples/benchmarks/gramschmidt.c +84 -0
- data/examples/benchmarks/jacobi-1d-imper.c +55 -0
- data/examples/benchmarks/jacobi-2d-imper.c +61 -0
- data/examples/benchmarks/lu.c +57 -0
- data/examples/benchmarks/ludcmp.c +91 -0
- data/examples/benchmarks/mvt.c +65 -0
- data/examples/benchmarks/overview.txt +38 -0
- data/examples/benchmarks/reg_detect.c +82 -0
- data/examples/benchmarks/saxpy.c +45 -0
- data/examples/benchmarks/seidel-2d.c +51 -0
- data/examples/benchmarks/symm.c +74 -0
- data/examples/benchmarks/syr2k.c +65 -0
- data/examples/benchmarks/syrk.c +62 -0
- data/examples/benchmarks/trisolv.c +57 -0
- data/examples/benchmarks/trmm.c +57 -0
- data/examples/chunk/example1.c +54 -0
- data/examples/chunk/example2.c +44 -0
- data/examples/chunk/example3.c +59 -0
- data/examples/chunk/example4.c +55 -0
- data/examples/chunk/example5.c +52 -0
- data/examples/element/example1.c +46 -0
- data/examples/element/example10.c +50 -0
- data/examples/element/example11.c +47 -0
- data/examples/element/example12.c +56 -0
- data/examples/element/example2.c +46 -0
- data/examples/element/example3.c +58 -0
- data/examples/element/example4.c +49 -0
- data/examples/element/example5.c +56 -0
- data/examples/element/example6.c +46 -0
- data/examples/element/example7.c +54 -0
- data/examples/element/example8.c +45 -0
- data/examples/element/example9.c +48 -0
- data/examples/neighbourhood/example1.c +54 -0
- data/examples/neighbourhood/example2.c +55 -0
- data/examples/neighbourhood/example3.c +82 -0
- data/examples/neighbourhood/example4.c +52 -0
- data/examples/shared/example1.c +45 -0
- data/examples/shared/example2.c +51 -0
- data/examples/shared/example3.c +55 -0
- data/examples/shared/example4.c +52 -0
- data/examples/shared/example5.c +48 -0
- data/lib/bones.rb +266 -0
- data/lib/bones/algorithm.rb +541 -0
- data/lib/bones/engine.rb +386 -0
- data/lib/bones/preprocessor.rb +161 -0
- data/lib/bones/species.rb +196 -0
- data/lib/bones/structure.rb +94 -0
- data/lib/bones/variable.rb +169 -0
- data/lib/bones/variablelist.rb +72 -0
- data/lib/castaddon.rb +27 -0
- data/lib/castaddon/index.rb +40 -0
- data/lib/castaddon/node.rb +753 -0
- data/lib/castaddon/type.rb +37 -0
- data/skeletons/CPU-C/common/epilogue.c +0 -0
- data/skeletons/CPU-C/common/globals.c +17 -0
- data/skeletons/CPU-C/common/globals_kernel.c +1 -0
- data/skeletons/CPU-C/common/header.c +0 -0
- data/skeletons/CPU-C/common/mem_copy_D2H.c +0 -0
- data/skeletons/CPU-C/common/mem_copy_H2D.c +0 -0
- data/skeletons/CPU-C/common/mem_epilogue.c +0 -0
- data/skeletons/CPU-C/common/mem_prologue.c +3 -0
- data/skeletons/CPU-C/common/prologue.c +0 -0
- data/skeletons/CPU-C/common/timer_1_start.c +0 -0
- data/skeletons/CPU-C/common/timer_1_stop.c +0 -0
- data/skeletons/CPU-C/common/timer_2_start.c +20 -0
- data/skeletons/CPU-C/common/timer_2_stop.c +8 -0
- data/skeletons/CPU-C/kernel/default.host.c +3 -0
- data/skeletons/CPU-C/kernel/default.kernel.c +15 -0
- data/skeletons/CPU-C/skeletons.txt +24 -0
- data/skeletons/CPU-OPENCL-AMD/common/epilogue.c +6 -0
- data/skeletons/CPU-OPENCL-AMD/common/globals.c +155 -0
- data/skeletons/CPU-OPENCL-AMD/common/globals_kernel.c +4 -0
- data/skeletons/CPU-OPENCL-AMD/common/header.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/mem_copy_D2H.c +8 -0
- data/skeletons/CPU-OPENCL-AMD/common/mem_copy_H2D.c +4 -0
- data/skeletons/CPU-OPENCL-AMD/common/mem_epilogue.c +3 -0
- data/skeletons/CPU-OPENCL-AMD/common/mem_prologue.c +6 -0
- data/skeletons/CPU-OPENCL-AMD/common/prologue.c +24 -0
- data/skeletons/CPU-OPENCL-AMD/common/timer_1_start.c +5 -0
- data/skeletons/CPU-OPENCL-AMD/common/timer_1_stop.c +9 -0
- data/skeletons/CPU-OPENCL-AMD/common/timer_2_start.c +16 -0
- data/skeletons/CPU-OPENCL-AMD/common/timer_2_stop.c +11 -0
- data/skeletons/CPU-OPENCL-AMD/kernel/D-element-to-1-shared.host.c +67 -0
- data/skeletons/CPU-OPENCL-AMD/kernel/D-element-to-1-shared.kernel.cl +72 -0
- data/skeletons/CPU-OPENCL-AMD/kernel/default.host.c +14 -0
- data/skeletons/CPU-OPENCL-AMD/kernel/default.kernel.cl +13 -0
- data/skeletons/CPU-OPENCL-AMD/skeletons.txt +26 -0
- data/skeletons/CPU-OPENCL-INTEL/common/epilogue.c +3 -0
- data/skeletons/CPU-OPENCL-INTEL/common/globals.c +154 -0
- data/skeletons/CPU-OPENCL-INTEL/common/globals_kernel.c +4 -0
- data/skeletons/CPU-OPENCL-INTEL/common/header.c +31 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +5 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +3 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_epilogue.c +3 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +4 -0
- data/skeletons/CPU-OPENCL-INTEL/common/prologue.c +24 -0
- data/skeletons/CPU-OPENCL-INTEL/common/timer_1_start.c +5 -0
- data/skeletons/CPU-OPENCL-INTEL/common/timer_1_stop.c +9 -0
- data/skeletons/CPU-OPENCL-INTEL/common/timer_2_start.c +16 -0
- data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +11 -0
- data/skeletons/CPU-OPENCL-INTEL/kernel/D-element-to-1-shared.host.c +67 -0
- data/skeletons/CPU-OPENCL-INTEL/kernel/D-element-to-1-shared.kernel.cl +72 -0
- data/skeletons/CPU-OPENCL-INTEL/kernel/default.host.c +14 -0
- data/skeletons/CPU-OPENCL-INTEL/kernel/default.kernel.cl +13 -0
- data/skeletons/CPU-OPENCL-INTEL/skeletons.txt +26 -0
- data/skeletons/CPU-OPENMP/common/epilogue.c +0 -0
- data/skeletons/CPU-OPENMP/common/globals.c +37 -0
- data/skeletons/CPU-OPENMP/common/globals_kernel.c +6 -0
- data/skeletons/CPU-OPENMP/common/header.c +0 -0
- data/skeletons/CPU-OPENMP/common/mem_copy_D2H.c +0 -0
- data/skeletons/CPU-OPENMP/common/mem_copy_H2D.c +0 -0
- data/skeletons/CPU-OPENMP/common/mem_epilogue.c +0 -0
- data/skeletons/CPU-OPENMP/common/mem_prologue.c +3 -0
- data/skeletons/CPU-OPENMP/common/prologue.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_1_start.c +12 -0
- data/skeletons/CPU-OPENMP/common/timer_1_stop.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_2_start.c +18 -0
- data/skeletons/CPU-OPENMP/common/timer_2_stop.c +8 -0
- data/skeletons/CPU-OPENMP/kernel/D-element-to-1-shared.host.c +27 -0
- data/skeletons/CPU-OPENMP/kernel/D-element-to-1-shared.kernel.c +46 -0
- data/skeletons/CPU-OPENMP/kernel/default.host.c +11 -0
- data/skeletons/CPU-OPENMP/kernel/default.kernel.c +18 -0
- data/skeletons/CPU-OPENMP/skeletons.txt +26 -0
- data/skeletons/GPU-CUDA/common/epilogue.c +0 -0
- data/skeletons/GPU-CUDA/common/globals.c +31 -0
- data/skeletons/GPU-CUDA/common/globals_kernel.c +4 -0
- data/skeletons/GPU-CUDA/common/header.c +0 -0
- data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +3 -0
- data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +3 -0
- data/skeletons/GPU-CUDA/common/mem_epilogue.c +3 -0
- data/skeletons/GPU-CUDA/common/mem_prologue.c +5 -0
- data/skeletons/GPU-CUDA/common/prologue.c +6 -0
- data/skeletons/GPU-CUDA/common/timer_1_start.c +6 -0
- data/skeletons/GPU-CUDA/common/timer_1_stop.c +10 -0
- data/skeletons/GPU-CUDA/common/timer_2_start.c +6 -0
- data/skeletons/GPU-CUDA/common/timer_2_stop.c +10 -0
- data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.host.c +3 -0
- data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +105 -0
- data/skeletons/GPU-CUDA/kernel/D-element-to-1-shared.host.c +3 -0
- data/skeletons/GPU-CUDA/kernel/D-element-to-1-shared.kernel.cu +119 -0
- data/skeletons/GPU-CUDA/kernel/D-element-to-N-shared.host.c +3 -0
- data/skeletons/GPU-CUDA/kernel/D-element-to-N-shared.kernel.cu +166 -0
- data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.host.c +3 -0
- data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +69 -0
- data/skeletons/GPU-CUDA/kernel/N-neighbourhood-N-to-N-element.host.c +3 -0
- data/skeletons/GPU-CUDA/kernel/N-neighbourhood-N-to-N-element.kernel.cu +42 -0
- data/skeletons/GPU-CUDA/kernel/default.host.c +3 -0
- data/skeletons/GPU-CUDA/kernel/default.kernel.cu +28 -0
- data/skeletons/GPU-CUDA/skeletons.txt +30 -0
- data/skeletons/GPU-OPENCL-AMD/common/epilogue.c +3 -0
- data/skeletons/GPU-OPENCL-AMD/common/globals.c +155 -0
- data/skeletons/GPU-OPENCL-AMD/common/globals_kernel.c +4 -0
- data/skeletons/GPU-OPENCL-AMD/common/header.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/common/mem_copy_D2H.c +4 -0
- data/skeletons/GPU-OPENCL-AMD/common/mem_copy_H2D.c +4 -0
- data/skeletons/GPU-OPENCL-AMD/common/mem_epilogue.c +3 -0
- data/skeletons/GPU-OPENCL-AMD/common/mem_prologue.c +3 -0
- data/skeletons/GPU-OPENCL-AMD/common/prologue.c +24 -0
- data/skeletons/GPU-OPENCL-AMD/common/timer_1_start.c +5 -0
- data/skeletons/GPU-OPENCL-AMD/common/timer_1_stop.c +9 -0
- data/skeletons/GPU-OPENCL-AMD/common/timer_2_start.c +4 -0
- data/skeletons/GPU-OPENCL-AMD/common/timer_2_stop.c +11 -0
- data/skeletons/GPU-OPENCL-AMD/kernel/D-element-to-1-shared.host.c +67 -0
- data/skeletons/GPU-OPENCL-AMD/kernel/D-element-to-1-shared.kernel.cl +72 -0
- data/skeletons/GPU-OPENCL-AMD/kernel/default.host.c +14 -0
- data/skeletons/GPU-OPENCL-AMD/kernel/default.kernel.cl +13 -0
- data/skeletons/GPU-OPENCL-AMD/skeletons.txt +26 -0
- data/skeletons/verification/header.c +2 -0
- data/skeletons/verification/timer_start.c +4 -0
- data/skeletons/verification/timer_stop.c +6 -0
- data/skeletons/verification/verify_results.c +23 -0
- data/test/bones/test_algorithm.rb +40 -0
- data/test/bones/test_common.rb +54 -0
- data/test/bones/test_preprocessor.rb +46 -0
- data/test/bones/test_species.rb +21 -0
- data/test/bones/test_variable.rb +84 -0
- data/test/test_helper.rb +106 -0
- metadata +303 -0
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
//
|
|
2
|
+
// This file is part of the Bones source-to-source compiler examples. The C-code
|
|
3
|
+
// is largely identical in terms of functionality and variable naming to the code
|
|
4
|
+
// found in PolyBench/C version 3.2. For more information on PolyBench/C or Bones
|
|
5
|
+
// please use the contact information below.
|
|
6
|
+
//
|
|
7
|
+
// == More information on PolyBench/C
|
|
8
|
+
// Contact............Louis-Noel Pouchet <pouchet@cse.ohio-state.edu>
|
|
9
|
+
// Web address........http://polybench.sourceforge.net/
|
|
10
|
+
//
|
|
11
|
+
// == More information on Bones
|
|
12
|
+
// Contact............Cedric Nugteren <c.nugteren@tue.nl>
|
|
13
|
+
// Web address........http://parse.ele.tue.nl/bones/
|
|
14
|
+
//
|
|
15
|
+
// == File information
|
|
16
|
+
// Filename...........benchmark/fdtd-2d.c
|
|
17
|
+
// Author.............Cedric Nugteren
|
|
18
|
+
// Last modified on...08-May-2012
|
|
19
|
+
//
|
|
20
|
+
|
|
21
|
+
#include "common.h"
|
|
22
|
+
|
|
23
|
+
// This is 'fdtd-2d', a 2D finite different time domain kernel
|
|
24
|
+
int main(void) {
|
|
25
|
+
int i,j,t;
|
|
26
|
+
|
|
27
|
+
// Declare arrays on the stack
|
|
28
|
+
float ex[NI][NJ];
|
|
29
|
+
float ey[NI][NJ];
|
|
30
|
+
float hz[NI][NJ];
|
|
31
|
+
|
|
32
|
+
// Set the input data
|
|
33
|
+
for (i=0; i<NI; i++) {
|
|
34
|
+
for (j=0; j<NJ; j++) {
|
|
35
|
+
ex[i][j] = ((float) i*(j+1)) / NI;
|
|
36
|
+
ey[i][j] = ((float) i*(j+2)) / NJ;
|
|
37
|
+
hz[i][j] = ((float) i*(j+3)) / NI;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// Perform the computation
|
|
42
|
+
for (t=0; t<TSTEPS; t++) {
|
|
43
|
+
#pragma species kernel 0:0|void -> 0:0,0:NJ-1|element
|
|
44
|
+
for (j=0; j<NJ; j++) {
|
|
45
|
+
ey[0][j] = t;
|
|
46
|
+
}
|
|
47
|
+
#pragma species endkernel fdtd-2d-part1
|
|
48
|
+
#pragma species kernel 1:NI-1,0:NJ-1|element ^ 1:NI-1,0:NJ-1|neighbourhood(-1:0,0:0) -> 1:NI-1,0:NJ-1|element
|
|
49
|
+
for (i=1; i<NI; i++) {
|
|
50
|
+
for (j=0; j<NJ; j++) {
|
|
51
|
+
ey[i][j] = ey[i][j] - 0.5*(hz[i][j] - hz[i-1][j]);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
#pragma species endkernel fdtd-2d-part2
|
|
55
|
+
#pragma species kernel 0:NI-1,1:NJ-1|element ^ 0:NI-1,1:NJ-1|neighbourhood(0:0,-1:0) -> 0:NI-1,1:NJ-1|element
|
|
56
|
+
for (i=0; i<NI; i++) {
|
|
57
|
+
for (j=1; j<NJ; j++) {
|
|
58
|
+
ex[i][j] = ex[i][j] - 0.5*(hz[i][j] - hz[i][j-1]);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
#pragma species endkernel fdtd-2d-part3
|
|
62
|
+
#pragma species kernel 0:NI-2,0:NJ-2|element ^ 0:NI-2,0:NJ-2|neighbourhood(0:0,0:1) ^ 0:NI-2,0:NJ-2|neighbourhood(0:1,0:0) -> 0:NI-2,0:NJ-2|element
|
|
63
|
+
for (i=0; i<NI-1; i++) {
|
|
64
|
+
for (j=0; j<NJ-1; j++) {
|
|
65
|
+
hz[i][j] = hz[i][j] - 0.7*(ex[i][j+1] - ex[i][j] + ey[i+1][j] - ey[i][j]);
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
#pragma species endkernel fdtd-2d-part4
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// Clean-up and exit the function
|
|
72
|
+
fflush(stdout);
|
|
73
|
+
return 0;
|
|
74
|
+
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
//
|
|
2
|
+
// This file is part of the Bones source-to-source compiler examples. The C-code
|
|
3
|
+
// is largely identical in terms of functionality and variable naming to the code
|
|
4
|
+
// found in PolyBench/C version 3.2. For more information on PolyBench/C or Bones
|
|
5
|
+
// please use the contact information below.
|
|
6
|
+
//
|
|
7
|
+
// == More information on PolyBench/C
|
|
8
|
+
// Contact............Louis-Noel Pouchet <pouchet@cse.ohio-state.edu>
|
|
9
|
+
// Web address........http://polybench.sourceforge.net/
|
|
10
|
+
//
|
|
11
|
+
// == More information on Bones
|
|
12
|
+
// Contact............Cedric Nugteren <c.nugteren@tue.nl>
|
|
13
|
+
// Web address........http://parse.ele.tue.nl/bones/
|
|
14
|
+
//
|
|
15
|
+
// == File information
|
|
16
|
+
// Filename...........benchmark/floyd-warshall.c
|
|
17
|
+
// Author.............Cedric Nugteren
|
|
18
|
+
// Last modified on...10-April-2012
|
|
19
|
+
//
|
|
20
|
+
|
|
21
|
+
#include "common.h"
|
|
22
|
+
|
|
23
|
+
// This is 'floyd-warshall', a graph analysis algorithm to find shortest paths in a weighted graph
|
|
24
|
+
int main(void) {
|
|
25
|
+
int i,j,k;
|
|
26
|
+
|
|
27
|
+
// Declare arrays on the stack
|
|
28
|
+
float path[N][N];
|
|
29
|
+
|
|
30
|
+
// Set the input data
|
|
31
|
+
for (i=0; i<N; i++) {
|
|
32
|
+
for (j=0; j<N; j++) {
|
|
33
|
+
path[i][j] = ((float) (i+1)*(j+1)) / N;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// Perform the computation
|
|
38
|
+
for (k=0; k<N; k++) {
|
|
39
|
+
for (i=0; i<N; i++) {
|
|
40
|
+
for (j=0; j<N; j++) {
|
|
41
|
+
path[i][j] = (path[i][j] < path[i][k]+path[k][j]) ? path[i][j] : path[i][k]+path[k][j];
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// Clean-up and exit the function
|
|
47
|
+
fflush(stdout);
|
|
48
|
+
return 0;
|
|
49
|
+
}
|
|
50
|
+
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
//
|
|
2
|
+
// This file is part of the Bones source-to-source compiler examples. The C-code
|
|
3
|
+
// is largely identical in terms of functionality and variable naming to the code
|
|
4
|
+
// found in PolyBench/C version 3.2. For more information on PolyBench/C or Bones
|
|
5
|
+
// please use the contact information below.
|
|
6
|
+
//
|
|
7
|
+
// == More information on PolyBench/C
|
|
8
|
+
// Contact............Louis-Noel Pouchet <pouchet@cse.ohio-state.edu>
|
|
9
|
+
// Web address........http://polybench.sourceforge.net/
|
|
10
|
+
//
|
|
11
|
+
// == More information on Bones
|
|
12
|
+
// Contact............Cedric Nugteren <c.nugteren@tue.nl>
|
|
13
|
+
// Web address........http://parse.ele.tue.nl/bones/
|
|
14
|
+
//
|
|
15
|
+
// == File information
|
|
16
|
+
// Filename...........benchmark/gemm.c
|
|
17
|
+
// Author.............Cedric Nugteren
|
|
18
|
+
// Last modified on...04-April-2012
|
|
19
|
+
//
|
|
20
|
+
|
|
21
|
+
#include "common.h"
|
|
22
|
+
|
|
23
|
+
// This is 'gemm', a general matrix multiplication kernel
|
|
24
|
+
int main(void) {
|
|
25
|
+
int i,j,k;
|
|
26
|
+
|
|
27
|
+
// Declare arrays on the stack
|
|
28
|
+
float A[NI][NK];
|
|
29
|
+
float B[NK][NJ];
|
|
30
|
+
float C[NI][NJ];
|
|
31
|
+
|
|
32
|
+
// Set the constants
|
|
33
|
+
int alpha = 32412;
|
|
34
|
+
int beta = 2123;
|
|
35
|
+
|
|
36
|
+
// Set the input data
|
|
37
|
+
for (i=0; i<NI; i++) {
|
|
38
|
+
for (j=0; j<NK; j++) {
|
|
39
|
+
A[i][j] = ((float) i*j) / NI;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
for (i=0; i<NK; i++) {
|
|
43
|
+
for (j=0; j<NJ; j++) {
|
|
44
|
+
B[i][j] = ((float) i*j) / NI;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
for (i=0; i<NI; i++) {
|
|
48
|
+
for (j=0; j<NJ; j++) {
|
|
49
|
+
C[i][j] = ((float) i*j) / NI;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// Perform the computation (C := alpha*A*B + beta*C)
|
|
54
|
+
#pragma species kernel 0:NI-1,0:NJ-1|element ^ 0:NI-1,0:NK-1|chunk(0:0,0:NK-1) ^ 0:NK-1,0:NJ-1|chunk(0:NK-1,0:0) -> 0:NI-1,0:NJ-1|element
|
|
55
|
+
for (i=0; i<NI; i++) {
|
|
56
|
+
for (j=0; j<NJ; j++) {
|
|
57
|
+
C[i][j] *= beta;
|
|
58
|
+
for (k=0; k<NK; k++) {
|
|
59
|
+
C[i][j] += alpha * A[i][k] * B[k][j];
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
#pragma species endkernel gemm
|
|
64
|
+
|
|
65
|
+
// Clean-up and exit the function
|
|
66
|
+
fflush(stdout);
|
|
67
|
+
return 0;
|
|
68
|
+
}
|
|
69
|
+
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
//
|
|
2
|
+
// This file is part of the Bones source-to-source compiler examples. The C-code
|
|
3
|
+
// is largely identical in terms of functionality and variable naming to the code
|
|
4
|
+
// found in PolyBench/C version 3.2. For more information on PolyBench/C or Bones
|
|
5
|
+
// please use the contact information below.
|
|
6
|
+
//
|
|
7
|
+
// == More information on PolyBench/C
|
|
8
|
+
// Contact............Louis-Noel Pouchet <pouchet@cse.ohio-state.edu>
|
|
9
|
+
// Web address........http://polybench.sourceforge.net/
|
|
10
|
+
//
|
|
11
|
+
// == More information on Bones
|
|
12
|
+
// Contact............Cedric Nugteren <c.nugteren@tue.nl>
|
|
13
|
+
// Web address........http://parse.ele.tue.nl/bones/
|
|
14
|
+
//
|
|
15
|
+
// == File information
|
|
16
|
+
// Filename...........benchmark/gemver.c
|
|
17
|
+
// Author.............Cedric Nugteren
|
|
18
|
+
// Last modified on...04-April-2012
|
|
19
|
+
//
|
|
20
|
+
|
|
21
|
+
#include "common.h"
|
|
22
|
+
|
|
23
|
+
// This is 'gemver', a general matrix vector multiplication and matrix addition kernel
|
|
24
|
+
int main(void) {
|
|
25
|
+
int i,j;
|
|
26
|
+
|
|
27
|
+
// Declare arrays on the stack
|
|
28
|
+
float A[NX][NX];
|
|
29
|
+
float u1[NX];
|
|
30
|
+
float u2[NX];
|
|
31
|
+
float v1[NX];
|
|
32
|
+
float v2[NX];
|
|
33
|
+
float w[NX];
|
|
34
|
+
float x[NX];
|
|
35
|
+
float y[NX];
|
|
36
|
+
float z[NX];
|
|
37
|
+
|
|
38
|
+
// Set the constants
|
|
39
|
+
int alpha = 43532;
|
|
40
|
+
int beta = 12313;
|
|
41
|
+
|
|
42
|
+
// Set the input data
|
|
43
|
+
for (i=0; i<NX; i++) {
|
|
44
|
+
u1[i] = i;
|
|
45
|
+
u2[i] = (i+1)/NX/2.0;
|
|
46
|
+
v1[i] = (i+1)/NX/4.0;
|
|
47
|
+
v2[i] = (i+1)/NX/6.0;
|
|
48
|
+
w[i] = 0.0;
|
|
49
|
+
x[i] = 0.0;
|
|
50
|
+
y[i] = (i+1)/NX/8.0;
|
|
51
|
+
z[i] = (i+1)/NX/9.0;
|
|
52
|
+
for (j=0; j<NX; j++) {
|
|
53
|
+
A[i][j] = ((float) i*j) / NX;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// Perform the computation
|
|
58
|
+
#pragma species kernel 0:NX-1,0:NX-1|element ^ 0:NX-1|element ^ 0:NX-1|element ^ 0:NX-1|element ^ 0:NX-1|element -> 0:NX-1,0:NX-1|element
|
|
59
|
+
for (i=0; i<NX; i++) {
|
|
60
|
+
for (j=0; j<NX; j++) {
|
|
61
|
+
A[i][j] = A[i][j] + u1[i] * v1[j] + u2[i] * v2[j];
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
#pragma species endkernel gemver-part1
|
|
65
|
+
#pragma species kernel 0:NX-1|element ^ 0:NX-1,0:NX-1|chunk(0:NX-1,0:0) ^ 0:NX-1|full -> 0:NX-1|element
|
|
66
|
+
for (i=0; i<NX; i++) {
|
|
67
|
+
for (j=0; j<NX; j++) {
|
|
68
|
+
x[i] = x[i] + beta * A[j][i] * y[j];
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
#pragma species endkernel gemver-part2
|
|
72
|
+
#pragma species kernel 0:NX-1|element ^ 0:NX-1|element -> 0:NX-1|element
|
|
73
|
+
for (i=0; i<NX; i++) {
|
|
74
|
+
x[i] = x[i] + z[i];
|
|
75
|
+
}
|
|
76
|
+
#pragma species endkernel gemver-part3
|
|
77
|
+
#pragma species kernel 0:NX-1|element ^ 0:NX-1,0:NX-1|chunk(0:0,0:NX-1) ^ 0:NX-1|full -> 0:NX-1|element
|
|
78
|
+
for (i=0; i<NX; i++) {
|
|
79
|
+
for (j=0; j<NX; j++) {
|
|
80
|
+
w[i] = w[i] + alpha * A[i][j] * x[j];
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
#pragma species endkernel gemver-part4
|
|
84
|
+
|
|
85
|
+
// Clean-up and exit the function
|
|
86
|
+
fflush(stdout);
|
|
87
|
+
return 0;
|
|
88
|
+
}
|
|
89
|
+
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
//
|
|
2
|
+
// This file is part of the Bones source-to-source compiler examples. The C-code
|
|
3
|
+
// is largely identical in terms of functionality and variable naming to the code
|
|
4
|
+
// found in PolyBench/C version 3.2. For more information on PolyBench/C or Bones
|
|
5
|
+
// please use the contact information below.
|
|
6
|
+
//
|
|
7
|
+
// == More information on PolyBench/C
|
|
8
|
+
// Contact............Louis-Noel Pouchet <pouchet@cse.ohio-state.edu>
|
|
9
|
+
// Web address........http://polybench.sourceforge.net/
|
|
10
|
+
//
|
|
11
|
+
// == More information on Bones
|
|
12
|
+
// Contact............Cedric Nugteren <c.nugteren@tue.nl>
|
|
13
|
+
// Web address........http://parse.ele.tue.nl/bones/
|
|
14
|
+
//
|
|
15
|
+
// == File information
|
|
16
|
+
// Filename...........benchmark/gesummv.c
|
|
17
|
+
// Author.............Cedric Nugteren
|
|
18
|
+
// Last modified on...20-Jul-2012
|
|
19
|
+
//
|
|
20
|
+
|
|
21
|
+
#include "common.h"
|
|
22
|
+
|
|
23
|
+
// This is 'gesummv', a general scalar, vector and matrix multiplication kernel
|
|
24
|
+
int main(void) {
|
|
25
|
+
int i,j;
|
|
26
|
+
|
|
27
|
+
// Declare arrays on the stack
|
|
28
|
+
float A[NX][NX];
|
|
29
|
+
float B[NX][NX];
|
|
30
|
+
float x[NX];
|
|
31
|
+
float y[NX];
|
|
32
|
+
float tmp[NX];
|
|
33
|
+
|
|
34
|
+
// Set the constants
|
|
35
|
+
float alpha = 43532;
|
|
36
|
+
float beta = 12313;
|
|
37
|
+
|
|
38
|
+
// Set the input data
|
|
39
|
+
for (i=0; i<NX; i++) {
|
|
40
|
+
x[i] = ((float) i) / NX;
|
|
41
|
+
for (j=0; j<NX; j++) {
|
|
42
|
+
A[i][j] = ((float) i*(j+1)) / NX;
|
|
43
|
+
B[i][j] = ((float) (i+3)*j) / NX;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// Perform the computation
|
|
48
|
+
#pragma species kernel 0:NX-1,0:NX-1|chunk(0:0,0:NX-1) ^ 0:NX-1|full ^ 0:NX-1,0:NX-1|chunk(0:0,0:NX-1) -> 0:NX-1|element ^ 0:NX-1|element
|
|
49
|
+
for (i=0; i<NX; i++) {
|
|
50
|
+
tmp[i] = 0;
|
|
51
|
+
y[i] = 0;
|
|
52
|
+
for (j=0; j<NX; j++) {
|
|
53
|
+
tmp[i] = A[i][j] * x[j] + tmp[i];
|
|
54
|
+
y[i] = B[i][j] * x[j] + y[i];
|
|
55
|
+
}
|
|
56
|
+
y[i] = alpha*tmp[i] + beta*y[i];
|
|
57
|
+
}
|
|
58
|
+
#pragma species endkernel gesummv
|
|
59
|
+
|
|
60
|
+
// Clean-up and exit the function
|
|
61
|
+
fflush(stdout);
|
|
62
|
+
return 0;
|
|
63
|
+
}
|
|
64
|
+
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
//
|
|
2
|
+
// This file is part of the Bones source-to-source compiler examples. The C-code
|
|
3
|
+
// is largely identical in terms of functionality and variable naming to the code
|
|
4
|
+
// found in PolyBench/C version 3.2. For more information on PolyBench/C or Bones
|
|
5
|
+
// please use the contact information below.
|
|
6
|
+
//
|
|
7
|
+
// == More information on PolyBench/C
|
|
8
|
+
// Contact............Louis-Noel Pouchet <pouchet@cse.ohio-state.edu>
|
|
9
|
+
// Web address........http://polybench.sourceforge.net/
|
|
10
|
+
//
|
|
11
|
+
// == More information on Bones
|
|
12
|
+
// Contact............Cedric Nugteren <c.nugteren@tue.nl>
|
|
13
|
+
// Web address........http://parse.ele.tue.nl/bones/
|
|
14
|
+
//
|
|
15
|
+
// == File information
|
|
16
|
+
// Filename...........benchmark/gramschmidt.c
|
|
17
|
+
// Author.............Cedric Nugteren
|
|
18
|
+
// Last modified on...26-Jun-2012
|
|
19
|
+
//
|
|
20
|
+
|
|
21
|
+
#include "common.h"
|
|
22
|
+
|
|
23
|
+
// This is 'gramschmidt', an algorithm for the Gram-Schmidt process
|
|
24
|
+
int main(void) {
|
|
25
|
+
int i,j,k;
|
|
26
|
+
float nrm[1];
|
|
27
|
+
float rkk;
|
|
28
|
+
float rkj;
|
|
29
|
+
|
|
30
|
+
// Declare arrays on the stack
|
|
31
|
+
float A[NI][NJ];
|
|
32
|
+
float R[NJ][NJ];
|
|
33
|
+
float Q[NI][NJ];
|
|
34
|
+
|
|
35
|
+
// Set the input data
|
|
36
|
+
for (i=0; i<NI; i++) {
|
|
37
|
+
for (j=0; j<NJ; j++) {
|
|
38
|
+
A[i][j] = ((float) i*j) / NI + 1;
|
|
39
|
+
Q[i][j] = ((float) i*(j+1)) / NJ;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
for (i=0; i<NJ; i++) {
|
|
43
|
+
for (j=0; j<NJ; j++) {
|
|
44
|
+
R[i][j] = ((float) i*(j+2)) / NJ;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// Perform the computation
|
|
49
|
+
for (k=0; k<NJ; k++) {
|
|
50
|
+
nrm[0] = 0;
|
|
51
|
+
#pragma species kernel 0:NI-1,k:k|element -> 0:0|shared
|
|
52
|
+
for (i=0; i<NI; i++) {
|
|
53
|
+
nrm[0] += A[i][k] * A[i][k];
|
|
54
|
+
}
|
|
55
|
+
#pragma species endkernel gramschmidt-part1
|
|
56
|
+
R[k][k] = sqrt(nrm[0]);
|
|
57
|
+
rkk = R[k][k];
|
|
58
|
+
#pragma species kernel 0:NI-1,k:k|element -> 0:NI-1,k:k|element
|
|
59
|
+
for (i=0; i<NI; i++) {
|
|
60
|
+
Q[i][k] = A[i][k] / rkk;
|
|
61
|
+
}
|
|
62
|
+
#pragma species endkernel gramschmidt-part2
|
|
63
|
+
#pragma species kernel 0:NI-1,k:k|element ^ 0:NI-1,k+1:NJ-1|element -> 0:NI-1,k+1:NJ-1|element ^ k:k,k+1:NJ-1|element
|
|
64
|
+
for (j=k+1; j<NJ; j++) {
|
|
65
|
+
R[k][j] = 0;
|
|
66
|
+
#pragma species kernel 0:NI-1,k:k|element ^ 0:NI-1,j:j|element -> k:k,j:j|shared
|
|
67
|
+
for (i=0; i<NI; i++) {
|
|
68
|
+
R[k][j] += Q[i][k] * A[i][j];
|
|
69
|
+
}
|
|
70
|
+
#pragma species endkernel gramschmidt-part3a
|
|
71
|
+
rkj = R[k][j];
|
|
72
|
+
#pragma species kernel 0:NI-1,j:j|element ^ 0:NI-1,k:k|element -> 0:NI-1,j:j|element
|
|
73
|
+
for (i=0; i<NI; i++) {
|
|
74
|
+
A[i][j] = A[i][j] - Q[i][k] * rkj;
|
|
75
|
+
}
|
|
76
|
+
#pragma species endkernel gramschmidt-part3b
|
|
77
|
+
}
|
|
78
|
+
#pragma species endkernel gramschmidt-part3
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Clean-up and exit the function
|
|
82
|
+
fflush(stdout);
|
|
83
|
+
return 0;
|
|
84
|
+
}
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
//
|
|
2
|
+
// This file is part of the Bones source-to-source compiler examples. The C-code
|
|
3
|
+
// is largely identical in terms of functionality and variable naming to the code
|
|
4
|
+
// found in PolyBench/C version 3.2. For more information on PolyBench/C or Bones
|
|
5
|
+
// please use the contact information below.
|
|
6
|
+
//
|
|
7
|
+
// == More information on PolyBench/C
|
|
8
|
+
// Contact............Louis-Noel Pouchet <pouchet@cse.ohio-state.edu>
|
|
9
|
+
// Web address........http://polybench.sourceforge.net/
|
|
10
|
+
//
|
|
11
|
+
// == More information on Bones
|
|
12
|
+
// Contact............Cedric Nugteren <c.nugteren@tue.nl>
|
|
13
|
+
// Web address........http://parse.ele.tue.nl/bones/
|
|
14
|
+
//
|
|
15
|
+
// == File information
|
|
16
|
+
// Filename...........benchmark/jacobi-1d-imper.c
|
|
17
|
+
// Author.............Cedric Nugteren
|
|
18
|
+
// Last modified on...03-April-2012
|
|
19
|
+
//
|
|
20
|
+
|
|
21
|
+
#include "common.h"
|
|
22
|
+
|
|
23
|
+
// This is 'jacobi-1d-imper', a 1D Jacobi stencil computation
|
|
24
|
+
int main(void) {
|
|
25
|
+
int i,j,t;
|
|
26
|
+
|
|
27
|
+
// Declare arrays on the stack
|
|
28
|
+
float A[LARGE_N];
|
|
29
|
+
float B[LARGE_N];
|
|
30
|
+
|
|
31
|
+
// Set the input data
|
|
32
|
+
for (i=0; i<LARGE_N; i++) {
|
|
33
|
+
A[i] = ((float) i+2) / LARGE_N;
|
|
34
|
+
B[i] = ((float) i+3) / LARGE_N;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// Perform the computation
|
|
38
|
+
for (t=0; t<TSTEPS; t++) {
|
|
39
|
+
#pragma species kernel 1:LARGE_N-2|neighbourhood(-1:1) -> 1:LARGE_N-2|element
|
|
40
|
+
for (i=1; i<LARGE_N-1; i++) {
|
|
41
|
+
B[i] = 0.33333 * (A[i-1] + A[i] + A[i+1]);
|
|
42
|
+
}
|
|
43
|
+
#pragma species endkernel jacobi-1d-imper-part1
|
|
44
|
+
#pragma species kernel 1:LARGE_N-2|element -> 1:LARGE_N-2|element
|
|
45
|
+
for (j=1; j<LARGE_N-1; j++) {
|
|
46
|
+
A[j] = B[j];
|
|
47
|
+
}
|
|
48
|
+
#pragma species endkernel jacobi-1d-imper-part2
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// Clean-up and exit the function
|
|
52
|
+
fflush(stdout);
|
|
53
|
+
return 0;
|
|
54
|
+
}
|
|
55
|
+
|