bones-compiler 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (203) hide show
  1. data/CHANGELOG +117 -0
  2. data/LICENSE +9 -0
  3. data/README.rdoc +126 -0
  4. data/Rakefile +107 -0
  5. data/VERSION +1 -0
  6. data/bin/bones +20 -0
  7. data/examples/applications/ffos.c +552 -0
  8. data/examples/benchmarks/2mm.c +70 -0
  9. data/examples/benchmarks/3mm.c +81 -0
  10. data/examples/benchmarks/adi.c +81 -0
  11. data/examples/benchmarks/atax.c +65 -0
  12. data/examples/benchmarks/bicg.c +67 -0
  13. data/examples/benchmarks/cholesky.c +64 -0
  14. data/examples/benchmarks/common.h +168 -0
  15. data/examples/benchmarks/correlation.c +97 -0
  16. data/examples/benchmarks/covariance.c +77 -0
  17. data/examples/benchmarks/doitgen.c +63 -0
  18. data/examples/benchmarks/durbin.c +76 -0
  19. data/examples/benchmarks/dynprog.c +67 -0
  20. data/examples/benchmarks/fdtd-2d-apml.c +114 -0
  21. data/examples/benchmarks/fdtd-2d.c +74 -0
  22. data/examples/benchmarks/floyd-warshall.c +50 -0
  23. data/examples/benchmarks/gemm.c +69 -0
  24. data/examples/benchmarks/gemver.c +89 -0
  25. data/examples/benchmarks/gesummv.c +64 -0
  26. data/examples/benchmarks/gramschmidt.c +84 -0
  27. data/examples/benchmarks/jacobi-1d-imper.c +55 -0
  28. data/examples/benchmarks/jacobi-2d-imper.c +61 -0
  29. data/examples/benchmarks/lu.c +57 -0
  30. data/examples/benchmarks/ludcmp.c +91 -0
  31. data/examples/benchmarks/mvt.c +65 -0
  32. data/examples/benchmarks/overview.txt +38 -0
  33. data/examples/benchmarks/reg_detect.c +82 -0
  34. data/examples/benchmarks/saxpy.c +45 -0
  35. data/examples/benchmarks/seidel-2d.c +51 -0
  36. data/examples/benchmarks/symm.c +74 -0
  37. data/examples/benchmarks/syr2k.c +65 -0
  38. data/examples/benchmarks/syrk.c +62 -0
  39. data/examples/benchmarks/trisolv.c +57 -0
  40. data/examples/benchmarks/trmm.c +57 -0
  41. data/examples/chunk/example1.c +54 -0
  42. data/examples/chunk/example2.c +44 -0
  43. data/examples/chunk/example3.c +59 -0
  44. data/examples/chunk/example4.c +55 -0
  45. data/examples/chunk/example5.c +52 -0
  46. data/examples/element/example1.c +46 -0
  47. data/examples/element/example10.c +50 -0
  48. data/examples/element/example11.c +47 -0
  49. data/examples/element/example12.c +56 -0
  50. data/examples/element/example2.c +46 -0
  51. data/examples/element/example3.c +58 -0
  52. data/examples/element/example4.c +49 -0
  53. data/examples/element/example5.c +56 -0
  54. data/examples/element/example6.c +46 -0
  55. data/examples/element/example7.c +54 -0
  56. data/examples/element/example8.c +45 -0
  57. data/examples/element/example9.c +48 -0
  58. data/examples/neighbourhood/example1.c +54 -0
  59. data/examples/neighbourhood/example2.c +55 -0
  60. data/examples/neighbourhood/example3.c +82 -0
  61. data/examples/neighbourhood/example4.c +52 -0
  62. data/examples/shared/example1.c +45 -0
  63. data/examples/shared/example2.c +51 -0
  64. data/examples/shared/example3.c +55 -0
  65. data/examples/shared/example4.c +52 -0
  66. data/examples/shared/example5.c +48 -0
  67. data/lib/bones.rb +266 -0
  68. data/lib/bones/algorithm.rb +541 -0
  69. data/lib/bones/engine.rb +386 -0
  70. data/lib/bones/preprocessor.rb +161 -0
  71. data/lib/bones/species.rb +196 -0
  72. data/lib/bones/structure.rb +94 -0
  73. data/lib/bones/variable.rb +169 -0
  74. data/lib/bones/variablelist.rb +72 -0
  75. data/lib/castaddon.rb +27 -0
  76. data/lib/castaddon/index.rb +40 -0
  77. data/lib/castaddon/node.rb +753 -0
  78. data/lib/castaddon/type.rb +37 -0
  79. data/skeletons/CPU-C/common/epilogue.c +0 -0
  80. data/skeletons/CPU-C/common/globals.c +17 -0
  81. data/skeletons/CPU-C/common/globals_kernel.c +1 -0
  82. data/skeletons/CPU-C/common/header.c +0 -0
  83. data/skeletons/CPU-C/common/mem_copy_D2H.c +0 -0
  84. data/skeletons/CPU-C/common/mem_copy_H2D.c +0 -0
  85. data/skeletons/CPU-C/common/mem_epilogue.c +0 -0
  86. data/skeletons/CPU-C/common/mem_prologue.c +3 -0
  87. data/skeletons/CPU-C/common/prologue.c +0 -0
  88. data/skeletons/CPU-C/common/timer_1_start.c +0 -0
  89. data/skeletons/CPU-C/common/timer_1_stop.c +0 -0
  90. data/skeletons/CPU-C/common/timer_2_start.c +20 -0
  91. data/skeletons/CPU-C/common/timer_2_stop.c +8 -0
  92. data/skeletons/CPU-C/kernel/default.host.c +3 -0
  93. data/skeletons/CPU-C/kernel/default.kernel.c +15 -0
  94. data/skeletons/CPU-C/skeletons.txt +24 -0
  95. data/skeletons/CPU-OPENCL-AMD/common/epilogue.c +6 -0
  96. data/skeletons/CPU-OPENCL-AMD/common/globals.c +155 -0
  97. data/skeletons/CPU-OPENCL-AMD/common/globals_kernel.c +4 -0
  98. data/skeletons/CPU-OPENCL-AMD/common/header.c +0 -0
  99. data/skeletons/CPU-OPENCL-AMD/common/mem_copy_D2H.c +8 -0
  100. data/skeletons/CPU-OPENCL-AMD/common/mem_copy_H2D.c +4 -0
  101. data/skeletons/CPU-OPENCL-AMD/common/mem_epilogue.c +3 -0
  102. data/skeletons/CPU-OPENCL-AMD/common/mem_prologue.c +6 -0
  103. data/skeletons/CPU-OPENCL-AMD/common/prologue.c +24 -0
  104. data/skeletons/CPU-OPENCL-AMD/common/timer_1_start.c +5 -0
  105. data/skeletons/CPU-OPENCL-AMD/common/timer_1_stop.c +9 -0
  106. data/skeletons/CPU-OPENCL-AMD/common/timer_2_start.c +16 -0
  107. data/skeletons/CPU-OPENCL-AMD/common/timer_2_stop.c +11 -0
  108. data/skeletons/CPU-OPENCL-AMD/kernel/D-element-to-1-shared.host.c +67 -0
  109. data/skeletons/CPU-OPENCL-AMD/kernel/D-element-to-1-shared.kernel.cl +72 -0
  110. data/skeletons/CPU-OPENCL-AMD/kernel/default.host.c +14 -0
  111. data/skeletons/CPU-OPENCL-AMD/kernel/default.kernel.cl +13 -0
  112. data/skeletons/CPU-OPENCL-AMD/skeletons.txt +26 -0
  113. data/skeletons/CPU-OPENCL-INTEL/common/epilogue.c +3 -0
  114. data/skeletons/CPU-OPENCL-INTEL/common/globals.c +154 -0
  115. data/skeletons/CPU-OPENCL-INTEL/common/globals_kernel.c +4 -0
  116. data/skeletons/CPU-OPENCL-INTEL/common/header.c +31 -0
  117. data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +5 -0
  118. data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +3 -0
  119. data/skeletons/CPU-OPENCL-INTEL/common/mem_epilogue.c +3 -0
  120. data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +4 -0
  121. data/skeletons/CPU-OPENCL-INTEL/common/prologue.c +24 -0
  122. data/skeletons/CPU-OPENCL-INTEL/common/timer_1_start.c +5 -0
  123. data/skeletons/CPU-OPENCL-INTEL/common/timer_1_stop.c +9 -0
  124. data/skeletons/CPU-OPENCL-INTEL/common/timer_2_start.c +16 -0
  125. data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +11 -0
  126. data/skeletons/CPU-OPENCL-INTEL/kernel/D-element-to-1-shared.host.c +67 -0
  127. data/skeletons/CPU-OPENCL-INTEL/kernel/D-element-to-1-shared.kernel.cl +72 -0
  128. data/skeletons/CPU-OPENCL-INTEL/kernel/default.host.c +14 -0
  129. data/skeletons/CPU-OPENCL-INTEL/kernel/default.kernel.cl +13 -0
  130. data/skeletons/CPU-OPENCL-INTEL/skeletons.txt +26 -0
  131. data/skeletons/CPU-OPENMP/common/epilogue.c +0 -0
  132. data/skeletons/CPU-OPENMP/common/globals.c +37 -0
  133. data/skeletons/CPU-OPENMP/common/globals_kernel.c +6 -0
  134. data/skeletons/CPU-OPENMP/common/header.c +0 -0
  135. data/skeletons/CPU-OPENMP/common/mem_copy_D2H.c +0 -0
  136. data/skeletons/CPU-OPENMP/common/mem_copy_H2D.c +0 -0
  137. data/skeletons/CPU-OPENMP/common/mem_epilogue.c +0 -0
  138. data/skeletons/CPU-OPENMP/common/mem_prologue.c +3 -0
  139. data/skeletons/CPU-OPENMP/common/prologue.c +0 -0
  140. data/skeletons/CPU-OPENMP/common/timer_1_start.c +12 -0
  141. data/skeletons/CPU-OPENMP/common/timer_1_stop.c +0 -0
  142. data/skeletons/CPU-OPENMP/common/timer_2_start.c +18 -0
  143. data/skeletons/CPU-OPENMP/common/timer_2_stop.c +8 -0
  144. data/skeletons/CPU-OPENMP/kernel/D-element-to-1-shared.host.c +27 -0
  145. data/skeletons/CPU-OPENMP/kernel/D-element-to-1-shared.kernel.c +46 -0
  146. data/skeletons/CPU-OPENMP/kernel/default.host.c +11 -0
  147. data/skeletons/CPU-OPENMP/kernel/default.kernel.c +18 -0
  148. data/skeletons/CPU-OPENMP/skeletons.txt +26 -0
  149. data/skeletons/GPU-CUDA/common/epilogue.c +0 -0
  150. data/skeletons/GPU-CUDA/common/globals.c +31 -0
  151. data/skeletons/GPU-CUDA/common/globals_kernel.c +4 -0
  152. data/skeletons/GPU-CUDA/common/header.c +0 -0
  153. data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +3 -0
  154. data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +3 -0
  155. data/skeletons/GPU-CUDA/common/mem_epilogue.c +3 -0
  156. data/skeletons/GPU-CUDA/common/mem_prologue.c +5 -0
  157. data/skeletons/GPU-CUDA/common/prologue.c +6 -0
  158. data/skeletons/GPU-CUDA/common/timer_1_start.c +6 -0
  159. data/skeletons/GPU-CUDA/common/timer_1_stop.c +10 -0
  160. data/skeletons/GPU-CUDA/common/timer_2_start.c +6 -0
  161. data/skeletons/GPU-CUDA/common/timer_2_stop.c +10 -0
  162. data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.host.c +3 -0
  163. data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +105 -0
  164. data/skeletons/GPU-CUDA/kernel/D-element-to-1-shared.host.c +3 -0
  165. data/skeletons/GPU-CUDA/kernel/D-element-to-1-shared.kernel.cu +119 -0
  166. data/skeletons/GPU-CUDA/kernel/D-element-to-N-shared.host.c +3 -0
  167. data/skeletons/GPU-CUDA/kernel/D-element-to-N-shared.kernel.cu +166 -0
  168. data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.host.c +3 -0
  169. data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +69 -0
  170. data/skeletons/GPU-CUDA/kernel/N-neighbourhood-N-to-N-element.host.c +3 -0
  171. data/skeletons/GPU-CUDA/kernel/N-neighbourhood-N-to-N-element.kernel.cu +42 -0
  172. data/skeletons/GPU-CUDA/kernel/default.host.c +3 -0
  173. data/skeletons/GPU-CUDA/kernel/default.kernel.cu +28 -0
  174. data/skeletons/GPU-CUDA/skeletons.txt +30 -0
  175. data/skeletons/GPU-OPENCL-AMD/common/epilogue.c +3 -0
  176. data/skeletons/GPU-OPENCL-AMD/common/globals.c +155 -0
  177. data/skeletons/GPU-OPENCL-AMD/common/globals_kernel.c +4 -0
  178. data/skeletons/GPU-OPENCL-AMD/common/header.c +0 -0
  179. data/skeletons/GPU-OPENCL-AMD/common/mem_copy_D2H.c +4 -0
  180. data/skeletons/GPU-OPENCL-AMD/common/mem_copy_H2D.c +4 -0
  181. data/skeletons/GPU-OPENCL-AMD/common/mem_epilogue.c +3 -0
  182. data/skeletons/GPU-OPENCL-AMD/common/mem_prologue.c +3 -0
  183. data/skeletons/GPU-OPENCL-AMD/common/prologue.c +24 -0
  184. data/skeletons/GPU-OPENCL-AMD/common/timer_1_start.c +5 -0
  185. data/skeletons/GPU-OPENCL-AMD/common/timer_1_stop.c +9 -0
  186. data/skeletons/GPU-OPENCL-AMD/common/timer_2_start.c +4 -0
  187. data/skeletons/GPU-OPENCL-AMD/common/timer_2_stop.c +11 -0
  188. data/skeletons/GPU-OPENCL-AMD/kernel/D-element-to-1-shared.host.c +67 -0
  189. data/skeletons/GPU-OPENCL-AMD/kernel/D-element-to-1-shared.kernel.cl +72 -0
  190. data/skeletons/GPU-OPENCL-AMD/kernel/default.host.c +14 -0
  191. data/skeletons/GPU-OPENCL-AMD/kernel/default.kernel.cl +13 -0
  192. data/skeletons/GPU-OPENCL-AMD/skeletons.txt +26 -0
  193. data/skeletons/verification/header.c +2 -0
  194. data/skeletons/verification/timer_start.c +4 -0
  195. data/skeletons/verification/timer_stop.c +6 -0
  196. data/skeletons/verification/verify_results.c +23 -0
  197. data/test/bones/test_algorithm.rb +40 -0
  198. data/test/bones/test_common.rb +54 -0
  199. data/test/bones/test_preprocessor.rb +46 -0
  200. data/test/bones/test_species.rb +21 -0
  201. data/test/bones/test_variable.rb +84 -0
  202. data/test/test_helper.rb +106 -0
  203. metadata +303 -0
@@ -0,0 +1,61 @@
1
+ //
2
+ // This file is part of the Bones source-to-source compiler examples. The C-code
3
+ // is largely identical in terms of functionality and variable naming to the code
4
+ // found in PolyBench/C version 3.2. For more information on PolyBench/C or Bones
5
+ // please use the contact information below.
6
+ //
7
+ // == More information on PolyBench/C
8
+ // Contact............Louis-Noel Pouchet <pouchet@cse.ohio-state.edu>
9
+ // Web address........http://polybench.sourceforge.net/
10
+ //
11
+ // == More information on Bones
12
+ // Contact............Cedric Nugteren <c.nugteren@tue.nl>
13
+ // Web address........http://parse.ele.tue.nl/bones/
14
+ //
15
+ // == File information
16
+ // Filename...........benchmark/jacobi-2d-imper.c
17
+ // Author.............Cedric Nugteren
18
+ // Last modified on...03-April-2012
19
+ //
20
+
21
+ #include "common.h"
22
+
23
+ // This is 'jacobi-2d-imper', a 2D Jacobi stencil computation
24
+ int main(void) {
25
+ int i,j,t;
26
+
27
+ // Declare arrays on the stack
28
+ float A[N][N];
29
+ float B[N][N];
30
+
31
+ // Set the input data
32
+ for (i=0; i<N; i++) {
33
+ for (j=0; j<N; j++) {
34
+ A[i][j] = ((float) i*(j+2) + 2) / N;
35
+ B[i][j] = ((float) i*(j+3) + 3) / N;
36
+ }
37
+ }
38
+
39
+ // Perform the computation
40
+ for (t=0; t<TSTEPS; t++) {
41
+ #pragma species kernel 1:N-2,1:N-2|neighbourhood(-1:1,-1:1) -> 1:N-2,1:N-2|element
42
+ for (i=1; i<N-1; i++) {
43
+ for (j=1; j<N-1; j++) {
44
+ B[i][j] = 0.2 * (A[i][j] + A[i][j-1] + A[i][1+j] + A[1+i][j] + A[i-1][j]);
45
+ }
46
+ }
47
+ #pragma species endkernel jacobi-2d-imper-part1
48
+ #pragma species kernel 1:N-2,1:N-2|element -> 1:N-2,1:N-2|element
49
+ for (i=1; i<N-1; i++) {
50
+ for (j=1; j<N-1; j++) {
51
+ A[i][j] = B[i][j];
52
+ }
53
+ }
54
+ #pragma species endkernel jacobi-2d-imper-part2
55
+ }
56
+
57
+ // Clean-up and exit the function
58
+ fflush(stdout);
59
+ return 0;
60
+ }
61
+
@@ -0,0 +1,57 @@
1
+ //
2
+ // This file is part of the Bones source-to-source compiler examples. The C-code
3
+ // is largely identical in terms of functionality and variable naming to the code
4
+ // found in PolyBench/C version 3.2. For more information on PolyBench/C or Bones
5
+ // please use the contact information below.
6
+ //
7
+ // == More information on PolyBench/C
8
+ // Contact............Louis-Noel Pouchet <pouchet@cse.ohio-state.edu>
9
+ // Web address........http://polybench.sourceforge.net/
10
+ //
11
+ // == More information on Bones
12
+ // Contact............Cedric Nugteren <c.nugteren@tue.nl>
13
+ // Web address........http://parse.ele.tue.nl/bones/
14
+ //
15
+ // == File information
16
+ // Filename...........benchmark/lu.c
17
+ // Author.............Cedric Nugteren
18
+ // Last modified on...26-Jun-2012
19
+ //
20
+
21
+ #include "common.h"
22
+
23
+ // This is 'lu', an LU decomposition kernel
24
+ int main(void) {
25
+ int i,j,k;
26
+
27
+ // Declare arrays on the stack
28
+ float A[N][N];
29
+
30
+ // Set the input data
31
+ for (i=0; i<N; i++) {
32
+ for (j=0; j<N; j++) {
33
+ A[i][j] = ((float) (i+1)*(j+1)) / N;
34
+ }
35
+ }
36
+
37
+ // Perform the computation
38
+ for (k=0; k<N; k++) {
39
+ #pragma species kernel k:k,k+1:N-1|element -> k:k,k+1:N-1|element
40
+ for (j=k+1; j<N; j++) {
41
+ A[k][j] = A[k][j] / A[k][k];
42
+ }
43
+ #pragma species endkernel lu-part1
44
+ #pragma species kernel k+1:N-1,k:k|element ^ k:k,k+1:N-1|element ^ k+1:N-1,k+1:N-1|element -> k+1:N-1,k+1:N-1|element
45
+ for(i=k+1; i<N; i++) {
46
+ for (j=k+1; j<N; j++) {
47
+ A[i][j] = A[i][j] - A[i][k] * A[k][j];
48
+ }
49
+ }
50
+ #pragma species endkernel lu-part2
51
+ }
52
+
53
+ // Clean-up and exit the function
54
+ fflush(stdout);
55
+ return 0;
56
+ }
57
+
@@ -0,0 +1,91 @@
1
+ //
2
+ // This file is part of the Bones source-to-source compiler examples. The C-code
3
+ // is largely identical in terms of functionality and variable naming to the code
4
+ // found in PolyBench/C version 3.2. For more information on PolyBench/C or Bones
5
+ // please use the contact information below.
6
+ //
7
+ // == More information on PolyBench/C
8
+ // Contact............Louis-Noel Pouchet <pouchet@cse.ohio-state.edu>
9
+ // Web address........http://polybench.sourceforge.net/
10
+ //
11
+ // == More information on Bones
12
+ // Contact............Cedric Nugteren <c.nugteren@tue.nl>
13
+ // Web address........http://parse.ele.tue.nl/bones/
14
+ //
15
+ // == File information
16
+ // Filename...........benchmark/ludcmp.c
17
+ // Author.............Cedric Nugteren
18
+ // Last modified on...23-May-2012
19
+ //
20
+
21
+ #include "common.h"
22
+
23
+ // This is 'ludcmp', an LU decomposition kernel
24
+ int main(void) {
25
+ int i,j,k;
26
+ float w[1];
27
+
28
+ // Declare arrays on the stack
29
+ float A[N+1][N+1];
30
+ float b[N+1];
31
+ float x[N+1];
32
+ float y[N+1];
33
+
34
+ // Set the input data
35
+ for (i=0; i<=N; i++) {
36
+ x[i] = i+1;
37
+ y[i] = (i+1)/(float)(N*40) + 1;
38
+ b[i] = (i+1)/(float)(N*20) + 42;
39
+ for (j=0; j<=N; j++) {
40
+ A[i][j] = (i+1)/(float)(10*N) + (j+1)/(float)(5*N);
41
+ }
42
+ }
43
+
44
+ // Perform the computation
45
+ b[0] = 1.0;
46
+ for (i=0; i<N; i++) {
47
+ for (j=i+1; j<=N; j++) {
48
+ w[0] = A[j][i];
49
+ #pragma species kernel j:j,0:i-1|element ^ 0:i-1,i:i|element -> 0:0|shared
50
+ for (k=0; k<i; k++) {
51
+ w[0] = w[0] - A[j][k] * A[k][i];
52
+ }
53
+ #pragma species endkernel ludcmp-part1
54
+ A[j][i] = w[0] / A[i][i];
55
+ }
56
+ for (j=i+1; j<=N; j++) {
57
+ w[0] = A[i+1][j];
58
+ #pragma species kernel i+1:i+1,0:i|element ^ 0:i,j:j|element -> 0:0|shared
59
+ for (k=0; k<=i; k++) {
60
+ w[0] = w[0] - A[i+1][k] * A[k][j];
61
+ }
62
+ #pragma species endkernel ludcmp-part2
63
+ A[i+1][j] = w[0];
64
+ }
65
+ }
66
+ y[0] = b[0];
67
+ for (i=1; i<=N; i++) {
68
+ w[0] = b[i];
69
+ #pragma species kernel i:i,0:i-1|element ^ 0:i-1|element -> 0:0|shared
70
+ for (j=0; j<i; j++) {
71
+ w[0] = w[0] - A[i][j] * y[j];
72
+ }
73
+ #pragma species endkernel ludcmp-part3
74
+ y[i] = w[0];
75
+ }
76
+ x[N] = y[N] / A[N][N];
77
+ for (i=0; i<=N-1; i++) {
78
+ w[0] = y[N-1-i];
79
+ //#pragma species kernel N-1-i:N-1-i,N-i:N|element ^ N-i:N|element -> 0:0|shared
80
+ for (j=N-i; j<=N; j++) {
81
+ w[0] = w[0] - A[N-1-i][j] * x[j];
82
+ }
83
+ //#pragma species endkernel ludcmp-part4
84
+ x[N-1-i] = w[0] / A[N-1-i][N-1-i];
85
+ }
86
+
87
+ // Clean-up and exit the function
88
+ fflush(stdout);
89
+ return 0;
90
+ }
91
+
@@ -0,0 +1,65 @@
1
+ //
2
+ // This file is part of the Bones source-to-source compiler examples. The C-code
3
+ // is largely identical in terms of functionality and variable naming to the code
4
+ // found in PolyBench/C version 3.2. For more information on PolyBench/C or Bones
5
+ // please use the contact information below.
6
+ //
7
+ // == More information on PolyBench/C
8
+ // Contact............Louis-Noel Pouchet <pouchet@cse.ohio-state.edu>
9
+ // Web address........http://polybench.sourceforge.net/
10
+ //
11
+ // == More information on Bones
12
+ // Contact............Cedric Nugteren <c.nugteren@tue.nl>
13
+ // Web address........http://parse.ele.tue.nl/bones/
14
+ //
15
+ // == File information
16
+ // Filename...........benchmark/mvt.c
17
+ // Author.............Cedric Nugteren
18
+ // Last modified on...23-May-2012
19
+ //
20
+
21
+ #include "common.h"
22
+
23
+ // This is 'mvt', a matrix vector product and transpose kernel
24
+ int main(void) {
25
+ int i,j;
26
+
27
+ // Declare arrays on the stack
28
+ float A[NX][NX];
29
+ float x1[NX];
30
+ float x2[NX];
31
+ float y_1[NX];
32
+ float y_2[NX];
33
+
34
+ // Set the input data
35
+ for (i=0; i<NX; i++) {
36
+ x1[i] = ((float) i) / NX;
37
+ x2[i] = ((float) i + 1) / NX;
38
+ y_1[i] = ((float) i + 3) / NX;
39
+ y_2[i] = ((float) i + 4) / NX;
40
+ for (j=0; j<NX; j++) {
41
+ A[i][j] = ((float) i*j) / NX;
42
+ }
43
+ }
44
+
45
+ // Perform the computation
46
+ #pragma species kernel 0:NX-1|element ^ 0:NX-1,0:NX-1|chunk(0:0,0:NX-1) ^ 0:NX-1|full -> 0:NX-1|element
47
+ for (i=0; i<NX; i++) {
48
+ for (j=0; j<NX; j++) {
49
+ x1[i] = x1[i] + A[i][j] * y_1[j];
50
+ }
51
+ }
52
+ #pragma species endkernel mvt-part1
53
+ #pragma species kernel 0:NX-1|element ^ 0:NX-1,0:NX-1|chunk(0:NX-1,0:0) ^ 0:NX-1|full -> 0:NX-1|element
54
+ for (i=0; i<NX; i++) {
55
+ for (j=0; j<NX; j++) {
56
+ x2[i] = x2[i] + A[j][i] * y_2[j];
57
+ }
58
+ }
59
+ #pragma species endkernel mvt-part2
60
+
61
+ // Clean-up and exit the function
62
+ fflush(stdout);
63
+ return 0;
64
+ }
65
+
@@ -0,0 +1,38 @@
1
+
2
+ == Benchmarks with full parallelism
3
+ linear-algebra/kernels/2mm...........2 species...[fully classified]
4
+ linear-algebra/kernels/3mm...........3 species...[fully classified]
5
+ linear-algebra/kernels/atax..........2 species...[fully classified]
6
+ linear-algebra/kernels/bicg..........2 species...[fully classified]
7
+ linear-algebra/kernels/doitgen.......2 species...[fully classified]
8
+ linear-algebra/kernels/gemm..........1 species...[fully classified]
9
+ linear-algebra/kernels/gemver........4 species...[fully classified]
10
+ linear-algebra/kernels/gesummv.......1 species...[fully classified]
11
+ linear-algebra/kernels/mvt...........2 species...[fully classified]
12
+ linear-algebra/kernels/syr2k.........1 species...[fully classified]
13
+ linear-algebra/kernels/syrk..........1 species...[fully classified]
14
+ stencils/fdtd-2d.....................4 species...[fully classified]
15
+ stencils/jacobi-1d-imper.............2 species...[fully classified]
16
+ stencils/jacobi-2d-imper.............2 species...[fully classified]
17
+
18
+ == Benchmarks with significant parallelism
19
+ linear-algebra/kernels/cholesky......3 species...[no outer-loop parallelism, inner-loops only classified]
20
+ linear-algebra/kernels/symm..........1 species...[no outer-loop parallelism, inner-loops only classified]
21
+ linear-algebra/kernels/trisolv.......1 species...[no outer-loop parallelism, inner-loops only classified]
22
+ linear-algebra/kernels/trmm..........1 species...[no outer-loop parallelism, inner-loops only classified]
23
+ linear-algebra/solvers/gramschmidt...4 species...[no outer-loop parallelism, inner-loops only classified]
24
+ linear-algebra/solvers/lu............3 species...[no outer-loop parallelism, inner-loops only classified]
25
+ linear-algebra/solvers/ludcmp........4 species...[no outer-loop parallelism, inner-loops only classified]
26
+ datamining/correlation...............5 species...[most parts classified, final part inner-loop only]
27
+ datamining/covariance................4 species...[most parts classified, final part inner-loop only]
28
+ medley/reg_detect....................2 species...[partly classified, several parts have no parallelism]
29
+
30
+ == Benchmarks with very little parallelism
31
+ linear-algebra/solvers/durbin........2 species...[partly classified, most parts have no parallelism]
32
+ linear-algebra/solvers/dynprog.......1 species...[partly classified, most parts have no parallelism]
33
+ stencils/adi.........................2 species...[partly classified, most parts have no parallelism]
34
+ stencils/fdtd-ampl...................1 species...[partly classified, most parts have no parallelism]
35
+
36
+ == Benchmarks without parallelism
37
+ medley/floyd-warshall................0 species...[no parallelism]
38
+ stencils/seidel-2d...................0 species...[no parallelism]
@@ -0,0 +1,82 @@
1
+ //
2
+ // This file is part of the Bones source-to-source compiler examples. The C-code
3
+ // is largely identical in terms of functionality and variable naming to the code
4
+ // found in PolyBench/C version 3.2. For more information on PolyBench/C or Bones
5
+ // please use the contact information below.
6
+ //
7
+ // == More information on PolyBench/C
8
+ // Contact............Louis-Noel Pouchet <pouchet@cse.ohio-state.edu>
9
+ // Web address........http://polybench.sourceforge.net/
10
+ //
11
+ // == More information on Bones
12
+ // Contact............Cedric Nugteren <c.nugteren@tue.nl>
13
+ // Web address........http://parse.ele.tue.nl/bones/
14
+ //
15
+ // == File information
16
+ // Filename...........benchmark/reg_detect.c
17
+ // Author.............Cedric Nugteren
18
+ // Last modified on...26-Jun-2012
19
+ //
20
+
21
+ #include "common.h"
22
+
23
+ // This is 'reg_detect', a regularity detection algorithm
24
+ int main(void) {
25
+ int i,j,t,cnt;
26
+ float sum;
27
+
28
+ // Declare arrays on the stack
29
+ float sum_tang[MAXGRID][MAXGRID];
30
+ float mean[MAXGRID][MAXGRID];
31
+ float path[MAXGRID][MAXGRID];
32
+ float diff[MAXGRID][MAXGRID][LENGTH];
33
+ float sum_diff[MAXGRID][MAXGRID][LENGTH];
34
+
35
+ // Set the input data
36
+ for (i=0; i<MAXGRID; i++) {
37
+ for (j=0; j<MAXGRID; j++) {
38
+ sum_tang[i][j] = (float)((i+1)*(j+1));
39
+ mean[i][j] = ((float) i-j) / MAXGRID;
40
+ path[i][j] = ((float) i*(j-1)) / MAXGRID;
41
+ }
42
+ }
43
+
44
+ // Perform the computation
45
+ for (t=0; t<ITER; t++) {
46
+ #pragma species kernel 0:MAXGRID-1,0:MAXGRID-1|element -> 0:MAXGRID-1,0:MAXGRID-1,0:LENGTH-1|chunk(0:0,0:0,0:LENGTH-1)
47
+ for (j=0; j<=MAXGRID-1; j++) {
48
+ for (i=0; i<=MAXGRID-1; i++) {
49
+ sum = sum_tang[j][i];
50
+ for (cnt=0; cnt<=LENGTH-1; cnt++) {
51
+ diff[j][i][cnt] = sum;
52
+ }
53
+ }
54
+ }
55
+ #pragma species endkernel reg-detect-part1
56
+ for (j=0; j<=MAXGRID-1; j++) {
57
+ for (i=j; i<=MAXGRID-1; i++) {
58
+ sum_diff[j][i][0] = diff[j][i][0];
59
+ for (cnt=1; cnt<=LENGTH-1; cnt++) {
60
+ sum_diff[j][i][cnt] = sum_diff[j][i][cnt-1] + diff[j][i][cnt];
61
+ }
62
+ mean[j][i] = sum_diff[j][i][LENGTH-1];
63
+ }
64
+ }
65
+ #pragma species kernel 0:0,0:MAXGRID-1|element -> 0:0,0:MAXGRID-1|element
66
+ for (i=0; i<=MAXGRID-1; i++) {
67
+ path[0][i] = mean[0][i];
68
+ }
69
+ #pragma species endkernel reg-detect-part2
70
+ for (j=1; j<=MAXGRID-1; j++) {
71
+ #pragma species kernel j-1:j-1,j-1:MAXGRID-2|element ^ j:j,j:MAXGRID-1|element -> j:j,j:MAXGRID-1|element
72
+ for (i=j; i<=MAXGRID-1; i++) {
73
+ path[j][i] = path[j-1][i-1] + mean[j][i];
74
+ }
75
+ #pragma species endkernel reg-detect-part3
76
+ }
77
+ }
78
+
79
+ // Clean-up and exit the function
80
+ fflush(stdout);
81
+ return 0;
82
+ }
@@ -0,0 +1,45 @@
1
+ //
2
+ // This file is part of the Bones source-to-source compiler examples. For more
3
+ // information on Bones please use the contact information below.
4
+ //
5
+ // == More information on Bones
6
+ // Contact............Cedric Nugteren <c.nugteren@tue.nl>
7
+ // Web address........http://parse.ele.tue.nl/bones/
8
+ //
9
+ // == File information
10
+ // Filename...........benchmark/saxpy.c
11
+ // Author.............Cedric Nugteren
12
+ // Last modified on...04-Jul-2012
13
+ //
14
+
15
+ #include "common.h"
16
+
17
+ // This is 'saxpy', a scalar multiplication and vector addition kernel
18
+ int main(void) {
19
+ int i;
20
+
21
+ // Declare arrays on the stack
22
+ float x[LARGE_N];
23
+ float y[LARGE_N];
24
+
25
+ // Set the input data
26
+ for (i=0; i<LARGE_N; i++) {
27
+ x[i] = i*1.4;
28
+ y[i] = i/0.9;
29
+ }
30
+
31
+ // Set the constants
32
+ float a = 411.3;
33
+
34
+ // Perform the computation (y := ax+y)
35
+ #pragma species kernel 0:LARGE_N-1|element ^ 0:LARGE_N-1|element -> 0:LARGE_N-1|element
36
+ for (i=0; i<LARGE_N; i++) {
37
+ y[i] = a*x[i] + y[i];
38
+ }
39
+ #pragma species endkernel saxpy
40
+
41
+ // Clean-up and exit the function
42
+ fflush(stdout);
43
+ return 0;
44
+ }
45
+