bones-compiler 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (203) hide show
  1. data/CHANGELOG +117 -0
  2. data/LICENSE +9 -0
  3. data/README.rdoc +126 -0
  4. data/Rakefile +107 -0
  5. data/VERSION +1 -0
  6. data/bin/bones +20 -0
  7. data/examples/applications/ffos.c +552 -0
  8. data/examples/benchmarks/2mm.c +70 -0
  9. data/examples/benchmarks/3mm.c +81 -0
  10. data/examples/benchmarks/adi.c +81 -0
  11. data/examples/benchmarks/atax.c +65 -0
  12. data/examples/benchmarks/bicg.c +67 -0
  13. data/examples/benchmarks/cholesky.c +64 -0
  14. data/examples/benchmarks/common.h +168 -0
  15. data/examples/benchmarks/correlation.c +97 -0
  16. data/examples/benchmarks/covariance.c +77 -0
  17. data/examples/benchmarks/doitgen.c +63 -0
  18. data/examples/benchmarks/durbin.c +76 -0
  19. data/examples/benchmarks/dynprog.c +67 -0
  20. data/examples/benchmarks/fdtd-2d-apml.c +114 -0
  21. data/examples/benchmarks/fdtd-2d.c +74 -0
  22. data/examples/benchmarks/floyd-warshall.c +50 -0
  23. data/examples/benchmarks/gemm.c +69 -0
  24. data/examples/benchmarks/gemver.c +89 -0
  25. data/examples/benchmarks/gesummv.c +64 -0
  26. data/examples/benchmarks/gramschmidt.c +84 -0
  27. data/examples/benchmarks/jacobi-1d-imper.c +55 -0
  28. data/examples/benchmarks/jacobi-2d-imper.c +61 -0
  29. data/examples/benchmarks/lu.c +57 -0
  30. data/examples/benchmarks/ludcmp.c +91 -0
  31. data/examples/benchmarks/mvt.c +65 -0
  32. data/examples/benchmarks/overview.txt +38 -0
  33. data/examples/benchmarks/reg_detect.c +82 -0
  34. data/examples/benchmarks/saxpy.c +45 -0
  35. data/examples/benchmarks/seidel-2d.c +51 -0
  36. data/examples/benchmarks/symm.c +74 -0
  37. data/examples/benchmarks/syr2k.c +65 -0
  38. data/examples/benchmarks/syrk.c +62 -0
  39. data/examples/benchmarks/trisolv.c +57 -0
  40. data/examples/benchmarks/trmm.c +57 -0
  41. data/examples/chunk/example1.c +54 -0
  42. data/examples/chunk/example2.c +44 -0
  43. data/examples/chunk/example3.c +59 -0
  44. data/examples/chunk/example4.c +55 -0
  45. data/examples/chunk/example5.c +52 -0
  46. data/examples/element/example1.c +46 -0
  47. data/examples/element/example10.c +50 -0
  48. data/examples/element/example11.c +47 -0
  49. data/examples/element/example12.c +56 -0
  50. data/examples/element/example2.c +46 -0
  51. data/examples/element/example3.c +58 -0
  52. data/examples/element/example4.c +49 -0
  53. data/examples/element/example5.c +56 -0
  54. data/examples/element/example6.c +46 -0
  55. data/examples/element/example7.c +54 -0
  56. data/examples/element/example8.c +45 -0
  57. data/examples/element/example9.c +48 -0
  58. data/examples/neighbourhood/example1.c +54 -0
  59. data/examples/neighbourhood/example2.c +55 -0
  60. data/examples/neighbourhood/example3.c +82 -0
  61. data/examples/neighbourhood/example4.c +52 -0
  62. data/examples/shared/example1.c +45 -0
  63. data/examples/shared/example2.c +51 -0
  64. data/examples/shared/example3.c +55 -0
  65. data/examples/shared/example4.c +52 -0
  66. data/examples/shared/example5.c +48 -0
  67. data/lib/bones.rb +266 -0
  68. data/lib/bones/algorithm.rb +541 -0
  69. data/lib/bones/engine.rb +386 -0
  70. data/lib/bones/preprocessor.rb +161 -0
  71. data/lib/bones/species.rb +196 -0
  72. data/lib/bones/structure.rb +94 -0
  73. data/lib/bones/variable.rb +169 -0
  74. data/lib/bones/variablelist.rb +72 -0
  75. data/lib/castaddon.rb +27 -0
  76. data/lib/castaddon/index.rb +40 -0
  77. data/lib/castaddon/node.rb +753 -0
  78. data/lib/castaddon/type.rb +37 -0
  79. data/skeletons/CPU-C/common/epilogue.c +0 -0
  80. data/skeletons/CPU-C/common/globals.c +17 -0
  81. data/skeletons/CPU-C/common/globals_kernel.c +1 -0
  82. data/skeletons/CPU-C/common/header.c +0 -0
  83. data/skeletons/CPU-C/common/mem_copy_D2H.c +0 -0
  84. data/skeletons/CPU-C/common/mem_copy_H2D.c +0 -0
  85. data/skeletons/CPU-C/common/mem_epilogue.c +0 -0
  86. data/skeletons/CPU-C/common/mem_prologue.c +3 -0
  87. data/skeletons/CPU-C/common/prologue.c +0 -0
  88. data/skeletons/CPU-C/common/timer_1_start.c +0 -0
  89. data/skeletons/CPU-C/common/timer_1_stop.c +0 -0
  90. data/skeletons/CPU-C/common/timer_2_start.c +20 -0
  91. data/skeletons/CPU-C/common/timer_2_stop.c +8 -0
  92. data/skeletons/CPU-C/kernel/default.host.c +3 -0
  93. data/skeletons/CPU-C/kernel/default.kernel.c +15 -0
  94. data/skeletons/CPU-C/skeletons.txt +24 -0
  95. data/skeletons/CPU-OPENCL-AMD/common/epilogue.c +6 -0
  96. data/skeletons/CPU-OPENCL-AMD/common/globals.c +155 -0
  97. data/skeletons/CPU-OPENCL-AMD/common/globals_kernel.c +4 -0
  98. data/skeletons/CPU-OPENCL-AMD/common/header.c +0 -0
  99. data/skeletons/CPU-OPENCL-AMD/common/mem_copy_D2H.c +8 -0
  100. data/skeletons/CPU-OPENCL-AMD/common/mem_copy_H2D.c +4 -0
  101. data/skeletons/CPU-OPENCL-AMD/common/mem_epilogue.c +3 -0
  102. data/skeletons/CPU-OPENCL-AMD/common/mem_prologue.c +6 -0
  103. data/skeletons/CPU-OPENCL-AMD/common/prologue.c +24 -0
  104. data/skeletons/CPU-OPENCL-AMD/common/timer_1_start.c +5 -0
  105. data/skeletons/CPU-OPENCL-AMD/common/timer_1_stop.c +9 -0
  106. data/skeletons/CPU-OPENCL-AMD/common/timer_2_start.c +16 -0
  107. data/skeletons/CPU-OPENCL-AMD/common/timer_2_stop.c +11 -0
  108. data/skeletons/CPU-OPENCL-AMD/kernel/D-element-to-1-shared.host.c +67 -0
  109. data/skeletons/CPU-OPENCL-AMD/kernel/D-element-to-1-shared.kernel.cl +72 -0
  110. data/skeletons/CPU-OPENCL-AMD/kernel/default.host.c +14 -0
  111. data/skeletons/CPU-OPENCL-AMD/kernel/default.kernel.cl +13 -0
  112. data/skeletons/CPU-OPENCL-AMD/skeletons.txt +26 -0
  113. data/skeletons/CPU-OPENCL-INTEL/common/epilogue.c +3 -0
  114. data/skeletons/CPU-OPENCL-INTEL/common/globals.c +154 -0
  115. data/skeletons/CPU-OPENCL-INTEL/common/globals_kernel.c +4 -0
  116. data/skeletons/CPU-OPENCL-INTEL/common/header.c +31 -0
  117. data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +5 -0
  118. data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +3 -0
  119. data/skeletons/CPU-OPENCL-INTEL/common/mem_epilogue.c +3 -0
  120. data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +4 -0
  121. data/skeletons/CPU-OPENCL-INTEL/common/prologue.c +24 -0
  122. data/skeletons/CPU-OPENCL-INTEL/common/timer_1_start.c +5 -0
  123. data/skeletons/CPU-OPENCL-INTEL/common/timer_1_stop.c +9 -0
  124. data/skeletons/CPU-OPENCL-INTEL/common/timer_2_start.c +16 -0
  125. data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +11 -0
  126. data/skeletons/CPU-OPENCL-INTEL/kernel/D-element-to-1-shared.host.c +67 -0
  127. data/skeletons/CPU-OPENCL-INTEL/kernel/D-element-to-1-shared.kernel.cl +72 -0
  128. data/skeletons/CPU-OPENCL-INTEL/kernel/default.host.c +14 -0
  129. data/skeletons/CPU-OPENCL-INTEL/kernel/default.kernel.cl +13 -0
  130. data/skeletons/CPU-OPENCL-INTEL/skeletons.txt +26 -0
  131. data/skeletons/CPU-OPENMP/common/epilogue.c +0 -0
  132. data/skeletons/CPU-OPENMP/common/globals.c +37 -0
  133. data/skeletons/CPU-OPENMP/common/globals_kernel.c +6 -0
  134. data/skeletons/CPU-OPENMP/common/header.c +0 -0
  135. data/skeletons/CPU-OPENMP/common/mem_copy_D2H.c +0 -0
  136. data/skeletons/CPU-OPENMP/common/mem_copy_H2D.c +0 -0
  137. data/skeletons/CPU-OPENMP/common/mem_epilogue.c +0 -0
  138. data/skeletons/CPU-OPENMP/common/mem_prologue.c +3 -0
  139. data/skeletons/CPU-OPENMP/common/prologue.c +0 -0
  140. data/skeletons/CPU-OPENMP/common/timer_1_start.c +12 -0
  141. data/skeletons/CPU-OPENMP/common/timer_1_stop.c +0 -0
  142. data/skeletons/CPU-OPENMP/common/timer_2_start.c +18 -0
  143. data/skeletons/CPU-OPENMP/common/timer_2_stop.c +8 -0
  144. data/skeletons/CPU-OPENMP/kernel/D-element-to-1-shared.host.c +27 -0
  145. data/skeletons/CPU-OPENMP/kernel/D-element-to-1-shared.kernel.c +46 -0
  146. data/skeletons/CPU-OPENMP/kernel/default.host.c +11 -0
  147. data/skeletons/CPU-OPENMP/kernel/default.kernel.c +18 -0
  148. data/skeletons/CPU-OPENMP/skeletons.txt +26 -0
  149. data/skeletons/GPU-CUDA/common/epilogue.c +0 -0
  150. data/skeletons/GPU-CUDA/common/globals.c +31 -0
  151. data/skeletons/GPU-CUDA/common/globals_kernel.c +4 -0
  152. data/skeletons/GPU-CUDA/common/header.c +0 -0
  153. data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +3 -0
  154. data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +3 -0
  155. data/skeletons/GPU-CUDA/common/mem_epilogue.c +3 -0
  156. data/skeletons/GPU-CUDA/common/mem_prologue.c +5 -0
  157. data/skeletons/GPU-CUDA/common/prologue.c +6 -0
  158. data/skeletons/GPU-CUDA/common/timer_1_start.c +6 -0
  159. data/skeletons/GPU-CUDA/common/timer_1_stop.c +10 -0
  160. data/skeletons/GPU-CUDA/common/timer_2_start.c +6 -0
  161. data/skeletons/GPU-CUDA/common/timer_2_stop.c +10 -0
  162. data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.host.c +3 -0
  163. data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +105 -0
  164. data/skeletons/GPU-CUDA/kernel/D-element-to-1-shared.host.c +3 -0
  165. data/skeletons/GPU-CUDA/kernel/D-element-to-1-shared.kernel.cu +119 -0
  166. data/skeletons/GPU-CUDA/kernel/D-element-to-N-shared.host.c +3 -0
  167. data/skeletons/GPU-CUDA/kernel/D-element-to-N-shared.kernel.cu +166 -0
  168. data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.host.c +3 -0
  169. data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +69 -0
  170. data/skeletons/GPU-CUDA/kernel/N-neighbourhood-N-to-N-element.host.c +3 -0
  171. data/skeletons/GPU-CUDA/kernel/N-neighbourhood-N-to-N-element.kernel.cu +42 -0
  172. data/skeletons/GPU-CUDA/kernel/default.host.c +3 -0
  173. data/skeletons/GPU-CUDA/kernel/default.kernel.cu +28 -0
  174. data/skeletons/GPU-CUDA/skeletons.txt +30 -0
  175. data/skeletons/GPU-OPENCL-AMD/common/epilogue.c +3 -0
  176. data/skeletons/GPU-OPENCL-AMD/common/globals.c +155 -0
  177. data/skeletons/GPU-OPENCL-AMD/common/globals_kernel.c +4 -0
  178. data/skeletons/GPU-OPENCL-AMD/common/header.c +0 -0
  179. data/skeletons/GPU-OPENCL-AMD/common/mem_copy_D2H.c +4 -0
  180. data/skeletons/GPU-OPENCL-AMD/common/mem_copy_H2D.c +4 -0
  181. data/skeletons/GPU-OPENCL-AMD/common/mem_epilogue.c +3 -0
  182. data/skeletons/GPU-OPENCL-AMD/common/mem_prologue.c +3 -0
  183. data/skeletons/GPU-OPENCL-AMD/common/prologue.c +24 -0
  184. data/skeletons/GPU-OPENCL-AMD/common/timer_1_start.c +5 -0
  185. data/skeletons/GPU-OPENCL-AMD/common/timer_1_stop.c +9 -0
  186. data/skeletons/GPU-OPENCL-AMD/common/timer_2_start.c +4 -0
  187. data/skeletons/GPU-OPENCL-AMD/common/timer_2_stop.c +11 -0
  188. data/skeletons/GPU-OPENCL-AMD/kernel/D-element-to-1-shared.host.c +67 -0
  189. data/skeletons/GPU-OPENCL-AMD/kernel/D-element-to-1-shared.kernel.cl +72 -0
  190. data/skeletons/GPU-OPENCL-AMD/kernel/default.host.c +14 -0
  191. data/skeletons/GPU-OPENCL-AMD/kernel/default.kernel.cl +13 -0
  192. data/skeletons/GPU-OPENCL-AMD/skeletons.txt +26 -0
  193. data/skeletons/verification/header.c +2 -0
  194. data/skeletons/verification/timer_start.c +4 -0
  195. data/skeletons/verification/timer_stop.c +6 -0
  196. data/skeletons/verification/verify_results.c +23 -0
  197. data/test/bones/test_algorithm.rb +40 -0
  198. data/test/bones/test_common.rb +54 -0
  199. data/test/bones/test_preprocessor.rb +46 -0
  200. data/test/bones/test_species.rb +21 -0
  201. data/test/bones/test_variable.rb +84 -0
  202. data/test/test_helper.rb +106 -0
  203. metadata +303 -0
@@ -0,0 +1,61 @@
1
+ //
2
+ // This file is part of the Bones source-to-source compiler examples. The C-code
3
+ // is largely identical in terms of functionality and variable naming to the code
4
+ // found in PolyBench/C version 3.2. For more information on PolyBench/C or Bones
5
+ // please use the contact information below.
6
+ //
7
+ // == More information on PolyBench/C
8
+ // Contact............Louis-Noel Pouchet <pouchet@cse.ohio-state.edu>
9
+ // Web address........http://polybench.sourceforge.net/
10
+ //
11
+ // == More information on Bones
12
+ // Contact............Cedric Nugteren <c.nugteren@tue.nl>
13
+ // Web address........http://parse.ele.tue.nl/bones/
14
+ //
15
+ // == File information
16
+ // Filename...........benchmark/jacobi-2d-imper.c
17
+ // Author.............Cedric Nugteren
18
+ // Last modified on...03-April-2012
19
+ //
20
+
21
+ #include "common.h"
22
+
23
+ // This is 'jacobi-2d-imper', a 2D Jacobi stencil computation
24
+ int main(void) {
25
+ int i,j,t;
26
+
27
+ // Declare arrays on the stack
28
+ float A[N][N];
29
+ float B[N][N];
30
+
31
+ // Set the input data
32
+ for (i=0; i<N; i++) {
33
+ for (j=0; j<N; j++) {
34
+ A[i][j] = ((float) i*(j+2) + 2) / N;
35
+ B[i][j] = ((float) i*(j+3) + 3) / N;
36
+ }
37
+ }
38
+
39
+ // Perform the computation
40
+ for (t=0; t<TSTEPS; t++) {
41
+ #pragma species kernel 1:N-2,1:N-2|neighbourhood(-1:1,-1:1) -> 1:N-2,1:N-2|element
42
+ for (i=1; i<N-1; i++) {
43
+ for (j=1; j<N-1; j++) {
44
+ B[i][j] = 0.2 * (A[i][j] + A[i][j-1] + A[i][1+j] + A[1+i][j] + A[i-1][j]);
45
+ }
46
+ }
47
+ #pragma species endkernel jacobi-2d-imper-part1
48
+ #pragma species kernel 1:N-2,1:N-2|element -> 1:N-2,1:N-2|element
49
+ for (i=1; i<N-1; i++) {
50
+ for (j=1; j<N-1; j++) {
51
+ A[i][j] = B[i][j];
52
+ }
53
+ }
54
+ #pragma species endkernel jacobi-2d-imper-part2
55
+ }
56
+
57
+ // Clean-up and exit the function
58
+ fflush(stdout);
59
+ return 0;
60
+ }
61
+
@@ -0,0 +1,57 @@
1
+ //
2
+ // This file is part of the Bones source-to-source compiler examples. The C-code
3
+ // is largely identical in terms of functionality and variable naming to the code
4
+ // found in PolyBench/C version 3.2. For more information on PolyBench/C or Bones
5
+ // please use the contact information below.
6
+ //
7
+ // == More information on PolyBench/C
8
+ // Contact............Louis-Noel Pouchet <pouchet@cse.ohio-state.edu>
9
+ // Web address........http://polybench.sourceforge.net/
10
+ //
11
+ // == More information on Bones
12
+ // Contact............Cedric Nugteren <c.nugteren@tue.nl>
13
+ // Web address........http://parse.ele.tue.nl/bones/
14
+ //
15
+ // == File information
16
+ // Filename...........benchmark/lu.c
17
+ // Author.............Cedric Nugteren
18
+ // Last modified on...26-Jun-2012
19
+ //
20
+
21
+ #include "common.h"
22
+
23
+ // This is 'lu', an LU decomposition kernel
24
+ int main(void) {
25
+ int i,j,k;
26
+
27
+ // Declare arrays on the stack
28
+ float A[N][N];
29
+
30
+ // Set the input data
31
+ for (i=0; i<N; i++) {
32
+ for (j=0; j<N; j++) {
33
+ A[i][j] = ((float) (i+1)*(j+1)) / N;
34
+ }
35
+ }
36
+
37
+ // Perform the computation
38
+ for (k=0; k<N; k++) {
39
+ #pragma species kernel k:k,k+1:N-1|element -> k:k,k+1:N-1|element
40
+ for (j=k+1; j<N; j++) {
41
+ A[k][j] = A[k][j] / A[k][k];
42
+ }
43
+ #pragma species endkernel lu-part1
44
+ #pragma species kernel k+1:N-1,k:k|element ^ k:k,k+1:N-1|element ^ k+1:N-1,k+1:N-1|element -> k+1:N-1,k+1:N-1|element
45
+ for(i=k+1; i<N; i++) {
46
+ for (j=k+1; j<N; j++) {
47
+ A[i][j] = A[i][j] - A[i][k] * A[k][j];
48
+ }
49
+ }
50
+ #pragma species endkernel lu-part2
51
+ }
52
+
53
+ // Clean-up and exit the function
54
+ fflush(stdout);
55
+ return 0;
56
+ }
57
+
@@ -0,0 +1,91 @@
1
+ //
2
+ // This file is part of the Bones source-to-source compiler examples. The C-code
3
+ // is largely identical in terms of functionality and variable naming to the code
4
+ // found in PolyBench/C version 3.2. For more information on PolyBench/C or Bones
5
+ // please use the contact information below.
6
+ //
7
+ // == More information on PolyBench/C
8
+ // Contact............Louis-Noel Pouchet <pouchet@cse.ohio-state.edu>
9
+ // Web address........http://polybench.sourceforge.net/
10
+ //
11
+ // == More information on Bones
12
+ // Contact............Cedric Nugteren <c.nugteren@tue.nl>
13
+ // Web address........http://parse.ele.tue.nl/bones/
14
+ //
15
+ // == File information
16
+ // Filename...........benchmark/ludcmp.c
17
+ // Author.............Cedric Nugteren
18
+ // Last modified on...23-May-2012
19
+ //
20
+
21
+ #include "common.h"
22
+
23
+ // This is 'ludcmp', an LU decomposition kernel
24
+ int main(void) {
25
+ int i,j,k;
26
+ float w[1];
27
+
28
+ // Declare arrays on the stack
29
+ float A[N+1][N+1];
30
+ float b[N+1];
31
+ float x[N+1];
32
+ float y[N+1];
33
+
34
+ // Set the input data
35
+ for (i=0; i<=N; i++) {
36
+ x[i] = i+1;
37
+ y[i] = (i+1)/(float)(N*40) + 1;
38
+ b[i] = (i+1)/(float)(N*20) + 42;
39
+ for (j=0; j<=N; j++) {
40
+ A[i][j] = (i+1)/(float)(10*N) + (j+1)/(float)(5*N);
41
+ }
42
+ }
43
+
44
+ // Perform the computation
45
+ b[0] = 1.0;
46
+ for (i=0; i<N; i++) {
47
+ for (j=i+1; j<=N; j++) {
48
+ w[0] = A[j][i];
49
+ #pragma species kernel j:j,0:i-1|element ^ 0:i-1,i:i|element -> 0:0|shared
50
+ for (k=0; k<i; k++) {
51
+ w[0] = w[0] - A[j][k] * A[k][i];
52
+ }
53
+ #pragma species endkernel ludcmp-part1
54
+ A[j][i] = w[0] / A[i][i];
55
+ }
56
+ for (j=i+1; j<=N; j++) {
57
+ w[0] = A[i+1][j];
58
+ #pragma species kernel i+1:i+1,0:i|element ^ 0:i,j:j|element -> 0:0|shared
59
+ for (k=0; k<=i; k++) {
60
+ w[0] = w[0] - A[i+1][k] * A[k][j];
61
+ }
62
+ #pragma species endkernel ludcmp-part2
63
+ A[i+1][j] = w[0];
64
+ }
65
+ }
66
+ y[0] = b[0];
67
+ for (i=1; i<=N; i++) {
68
+ w[0] = b[i];
69
+ #pragma species kernel i:i,0:i-1|element ^ 0:i-1|element -> 0:0|shared
70
+ for (j=0; j<i; j++) {
71
+ w[0] = w[0] - A[i][j] * y[j];
72
+ }
73
+ #pragma species endkernel ludcmp-part3
74
+ y[i] = w[0];
75
+ }
76
+ x[N] = y[N] / A[N][N];
77
+ for (i=0; i<=N-1; i++) {
78
+ w[0] = y[N-1-i];
79
+ //#pragma species kernel N-1-i:N-1-i,N-i:N|element ^ N-i:N|element -> 0:0|shared
80
+ for (j=N-i; j<=N; j++) {
81
+ w[0] = w[0] - A[N-1-i][j] * x[j];
82
+ }
83
+ //#pragma species endkernel ludcmp-part4
84
+ x[N-1-i] = w[0] / A[N-1-i][N-1-i];
85
+ }
86
+
87
+ // Clean-up and exit the function
88
+ fflush(stdout);
89
+ return 0;
90
+ }
91
+
@@ -0,0 +1,65 @@
1
+ //
2
+ // This file is part of the Bones source-to-source compiler examples. The C-code
3
+ // is largely identical in terms of functionality and variable naming to the code
4
+ // found in PolyBench/C version 3.2. For more information on PolyBench/C or Bones
5
+ // please use the contact information below.
6
+ //
7
+ // == More information on PolyBench/C
8
+ // Contact............Louis-Noel Pouchet <pouchet@cse.ohio-state.edu>
9
+ // Web address........http://polybench.sourceforge.net/
10
+ //
11
+ // == More information on Bones
12
+ // Contact............Cedric Nugteren <c.nugteren@tue.nl>
13
+ // Web address........http://parse.ele.tue.nl/bones/
14
+ //
15
+ // == File information
16
+ // Filename...........benchmark/mvt.c
17
+ // Author.............Cedric Nugteren
18
+ // Last modified on...23-May-2012
19
+ //
20
+
21
+ #include "common.h"
22
+
23
+ // This is 'mvt', a matrix vector product and transpose kernel
24
+ int main(void) {
25
+ int i,j;
26
+
27
+ // Declare arrays on the stack
28
+ float A[NX][NX];
29
+ float x1[NX];
30
+ float x2[NX];
31
+ float y_1[NX];
32
+ float y_2[NX];
33
+
34
+ // Set the input data
35
+ for (i=0; i<NX; i++) {
36
+ x1[i] = ((float) i) / NX;
37
+ x2[i] = ((float) i + 1) / NX;
38
+ y_1[i] = ((float) i + 3) / NX;
39
+ y_2[i] = ((float) i + 4) / NX;
40
+ for (j=0; j<NX; j++) {
41
+ A[i][j] = ((float) i*j) / NX;
42
+ }
43
+ }
44
+
45
+ // Perform the computation
46
+ #pragma species kernel 0:NX-1|element ^ 0:NX-1,0:NX-1|chunk(0:0,0:NX-1) ^ 0:NX-1|full -> 0:NX-1|element
47
+ for (i=0; i<NX; i++) {
48
+ for (j=0; j<NX; j++) {
49
+ x1[i] = x1[i] + A[i][j] * y_1[j];
50
+ }
51
+ }
52
+ #pragma species endkernel mvt-part1
53
+ #pragma species kernel 0:NX-1|element ^ 0:NX-1,0:NX-1|chunk(0:NX-1,0:0) ^ 0:NX-1|full -> 0:NX-1|element
54
+ for (i=0; i<NX; i++) {
55
+ for (j=0; j<NX; j++) {
56
+ x2[i] = x2[i] + A[j][i] * y_2[j];
57
+ }
58
+ }
59
+ #pragma species endkernel mvt-part2
60
+
61
+ // Clean-up and exit the function
62
+ fflush(stdout);
63
+ return 0;
64
+ }
65
+
@@ -0,0 +1,38 @@
1
+
2
+ == Benchmarks with full parallelism
3
+ linear-algebra/kernels/2mm...........2 species...[fully classified]
4
+ linear-algebra/kernels/3mm...........3 species...[fully classified]
5
+ linear-algebra/kernels/atax..........2 species...[fully classified]
6
+ linear-algebra/kernels/bicg..........2 species...[fully classified]
7
+ linear-algebra/kernels/doitgen.......2 species...[fully classified]
8
+ linear-algebra/kernels/gemm..........1 species...[fully classified]
9
+ linear-algebra/kernels/gemver........4 species...[fully classified]
10
+ linear-algebra/kernels/gesummv.......1 species...[fully classified]
11
+ linear-algebra/kernels/mvt...........2 species...[fully classified]
12
+ linear-algebra/kernels/syr2k.........1 species...[fully classified]
13
+ linear-algebra/kernels/syrk..........1 species...[fully classified]
14
+ stencils/fdtd-2d.....................4 species...[fully classified]
15
+ stencils/jacobi-1d-imper.............2 species...[fully classified]
16
+ stencils/jacobi-2d-imper.............2 species...[fully classified]
17
+
18
+ == Benchmarks with significant parallelism
19
+ linear-algebra/kernels/cholesky......3 species...[no outer-loop parallelism, inner-loops only classified]
20
+ linear-algebra/kernels/symm..........1 species...[no outer-loop parallelism, inner-loops only classified]
21
+ linear-algebra/kernels/trisolv.......1 species...[no outer-loop parallelism, inner-loops only classified]
22
+ linear-algebra/kernels/trmm..........1 species...[no outer-loop parallelism, inner-loops only classified]
23
+ linear-algebra/solvers/gramschmidt...4 species...[no outer-loop parallelism, inner-loops only classified]
24
+ linear-algebra/solvers/lu............3 species...[no outer-loop parallelism, inner-loops only classified]
25
+ linear-algebra/solvers/ludcmp........4 species...[no outer-loop parallelism, inner-loops only classified]
26
+ datamining/correlation...............5 species...[most parts classified, final part inner-loop only]
27
+ datamining/covariance................4 species...[most parts classified, final part inner-loop only]
28
+ medley/reg_detect....................2 species...[partly classified, several parts have no parallelism]
29
+
30
+ == Benchmarks with very little parallelism
31
+ linear-algebra/solvers/durbin........2 species...[partly classified, most parts have no parallelism]
32
+ linear-algebra/solvers/dynprog.......1 species...[partly classified, most parts have no parallelism]
33
+ stencils/adi.........................2 species...[partly classified, most parts have no parallelism]
34
+ stencils/fdtd-ampl...................1 species...[partly classified, most parts have no parallelism]
35
+
36
+ == Benchmarks without parallelism
37
+ medley/floyd-warshall................0 species...[no parallelism]
38
+ stencils/seidel-2d...................0 species...[no parallelism]
@@ -0,0 +1,82 @@
1
+ //
2
+ // This file is part of the Bones source-to-source compiler examples. The C-code
3
+ // is largely identical in terms of functionality and variable naming to the code
4
+ // found in PolyBench/C version 3.2. For more information on PolyBench/C or Bones
5
+ // please use the contact information below.
6
+ //
7
+ // == More information on PolyBench/C
8
+ // Contact............Louis-Noel Pouchet <pouchet@cse.ohio-state.edu>
9
+ // Web address........http://polybench.sourceforge.net/
10
+ //
11
+ // == More information on Bones
12
+ // Contact............Cedric Nugteren <c.nugteren@tue.nl>
13
+ // Web address........http://parse.ele.tue.nl/bones/
14
+ //
15
+ // == File information
16
+ // Filename...........benchmark/reg_detect.c
17
+ // Author.............Cedric Nugteren
18
+ // Last modified on...26-Jun-2012
19
+ //
20
+
21
+ #include "common.h"
22
+
23
+ // This is 'reg_detect', a regularity detection algorithm
24
+ int main(void) {
25
+ int i,j,t,cnt;
26
+ float sum;
27
+
28
+ // Declare arrays on the stack
29
+ float sum_tang[MAXGRID][MAXGRID];
30
+ float mean[MAXGRID][MAXGRID];
31
+ float path[MAXGRID][MAXGRID];
32
+ float diff[MAXGRID][MAXGRID][LENGTH];
33
+ float sum_diff[MAXGRID][MAXGRID][LENGTH];
34
+
35
+ // Set the input data
36
+ for (i=0; i<MAXGRID; i++) {
37
+ for (j=0; j<MAXGRID; j++) {
38
+ sum_tang[i][j] = (float)((i+1)*(j+1));
39
+ mean[i][j] = ((float) i-j) / MAXGRID;
40
+ path[i][j] = ((float) i*(j-1)) / MAXGRID;
41
+ }
42
+ }
43
+
44
+ // Perform the computation
45
+ for (t=0; t<ITER; t++) {
46
+ #pragma species kernel 0:MAXGRID-1,0:MAXGRID-1|element -> 0:MAXGRID-1,0:MAXGRID-1,0:LENGTH-1|chunk(0:0,0:0,0:LENGTH-1)
47
+ for (j=0; j<=MAXGRID-1; j++) {
48
+ for (i=0; i<=MAXGRID-1; i++) {
49
+ sum = sum_tang[j][i];
50
+ for (cnt=0; cnt<=LENGTH-1; cnt++) {
51
+ diff[j][i][cnt] = sum;
52
+ }
53
+ }
54
+ }
55
+ #pragma species endkernel reg-detect-part1
56
+ for (j=0; j<=MAXGRID-1; j++) {
57
+ for (i=j; i<=MAXGRID-1; i++) {
58
+ sum_diff[j][i][0] = diff[j][i][0];
59
+ for (cnt=1; cnt<=LENGTH-1; cnt++) {
60
+ sum_diff[j][i][cnt] = sum_diff[j][i][cnt-1] + diff[j][i][cnt];
61
+ }
62
+ mean[j][i] = sum_diff[j][i][LENGTH-1];
63
+ }
64
+ }
65
+ #pragma species kernel 0:0,0:MAXGRID-1|element -> 0:0,0:MAXGRID-1|element
66
+ for (i=0; i<=MAXGRID-1; i++) {
67
+ path[0][i] = mean[0][i];
68
+ }
69
+ #pragma species endkernel reg-detect-part2
70
+ for (j=1; j<=MAXGRID-1; j++) {
71
+ #pragma species kernel j-1:j-1,j-1:MAXGRID-2|element ^ j:j,j:MAXGRID-1|element -> j:j,j:MAXGRID-1|element
72
+ for (i=j; i<=MAXGRID-1; i++) {
73
+ path[j][i] = path[j-1][i-1] + mean[j][i];
74
+ }
75
+ #pragma species endkernel reg-detect-part3
76
+ }
77
+ }
78
+
79
+ // Clean-up and exit the function
80
+ fflush(stdout);
81
+ return 0;
82
+ }
@@ -0,0 +1,45 @@
1
+ //
2
+ // This file is part of the Bones source-to-source compiler examples. For more
3
+ // information on Bones please use the contact information below.
4
+ //
5
+ // == More information on Bones
6
+ // Contact............Cedric Nugteren <c.nugteren@tue.nl>
7
+ // Web address........http://parse.ele.tue.nl/bones/
8
+ //
9
+ // == File information
10
+ // Filename...........benchmark/saxpy.c
11
+ // Author.............Cedric Nugteren
12
+ // Last modified on...04-Jul-2012
13
+ //
14
+
15
+ #include "common.h"
16
+
17
+ // This is 'saxpy', a scalar multiplication and vector addition kernel
18
+ int main(void) {
19
+ int i;
20
+
21
+ // Declare arrays on the stack
22
+ float x[LARGE_N];
23
+ float y[LARGE_N];
24
+
25
+ // Set the input data
26
+ for (i=0; i<LARGE_N; i++) {
27
+ x[i] = i*1.4;
28
+ y[i] = i/0.9;
29
+ }
30
+
31
+ // Set the constants
32
+ float a = 411.3;
33
+
34
+ // Perform the computation (y := ax+y)
35
+ #pragma species kernel 0:LARGE_N-1|element ^ 0:LARGE_N-1|element -> 0:LARGE_N-1|element
36
+ for (i=0; i<LARGE_N; i++) {
37
+ y[i] = a*x[i] + y[i];
38
+ }
39
+ #pragma species endkernel saxpy
40
+
41
+ // Clean-up and exit the function
42
+ fflush(stdout);
43
+ return 0;
44
+ }
45
+