bones-compiler 1.1.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. checksums.yaml +15 -0
  2. data/CHANGELOG +37 -0
  3. data/LICENSE +1 -1
  4. data/README.rdoc +95 -70
  5. data/Rakefile +78 -3
  6. data/VERSION +1 -1
  7. data/bin/adarwin +17 -0
  8. data/examples/benchmarks/PolyBench/2mm.c +104 -0
  9. data/examples/benchmarks/{3mm.c → PolyBench/3mm.c} +5 -2
  10. data/examples/benchmarks/{adi.c → PolyBench/adi.c} +6 -3
  11. data/examples/benchmarks/{atax.c → PolyBench/atax.c} +5 -2
  12. data/examples/benchmarks/{bicg.c → PolyBench/bicg.c} +5 -2
  13. data/examples/benchmarks/{cholesky.c → PolyBench/cholesky.c} +3 -0
  14. data/examples/benchmarks/{common.h → PolyBench/common.h} +2 -2
  15. data/examples/benchmarks/{correlation.c → PolyBench/correlation.c} +16 -7
  16. data/examples/benchmarks/{covariance.c → PolyBench/covariance.c} +7 -2
  17. data/examples/benchmarks/{doitgen.c → PolyBench/doitgen.c} +5 -2
  18. data/examples/benchmarks/{durbin.c → PolyBench/durbin.c} +3 -0
  19. data/examples/benchmarks/{dynprog.c → PolyBench/dynprog.c} +3 -0
  20. data/examples/benchmarks/{fdtd-2d-apml.c → PolyBench/fdtd-2d-apml.c} +3 -0
  21. data/examples/benchmarks/{fdtd-2d.c → PolyBench/fdtd-2d.c} +5 -2
  22. data/examples/benchmarks/{floyd-warshall.c → PolyBench/floyd-warshall.c} +3 -0
  23. data/examples/benchmarks/{gemm.c → PolyBench/gemm.c} +5 -2
  24. data/examples/benchmarks/{gemver.c → PolyBench/gemver.c} +5 -2
  25. data/examples/benchmarks/{gesummv.c → PolyBench/gesummv.c} +5 -2
  26. data/examples/benchmarks/{gramschmidt.c → PolyBench/gramschmidt.c} +3 -0
  27. data/examples/benchmarks/{jacobi-1d-imper.c → PolyBench/jacobi-1d-imper.c} +10 -2
  28. data/examples/benchmarks/{jacobi-2d-imper.c → PolyBench/jacobi-2d-imper.c} +8 -3
  29. data/examples/benchmarks/{lu.c → PolyBench/lu.c} +3 -0
  30. data/examples/benchmarks/{ludcmp.c → PolyBench/ludcmp.c} +3 -0
  31. data/examples/benchmarks/{mvt.c → PolyBench/mvt.c} +6 -2
  32. data/examples/benchmarks/{reg_detect.c → PolyBench/reg_detect.c} +3 -0
  33. data/examples/benchmarks/{seidel-2d.c → PolyBench/seidel-2d.c} +3 -0
  34. data/examples/benchmarks/{symm.c → PolyBench/symm.c} +3 -0
  35. data/examples/benchmarks/{syr2k.c → PolyBench/syr2k.c} +5 -2
  36. data/examples/benchmarks/{syrk.c → PolyBench/syrk.c} +7 -4
  37. data/examples/benchmarks/{trisolv.c → PolyBench/trisolv.c} +3 -0
  38. data/examples/benchmarks/{trmm.c → PolyBench/trmm.c} +3 -0
  39. data/examples/benchmarks/Rodinia/cfd.c +180 -0
  40. data/examples/benchmarks/Rodinia/hotspot.c +228 -0
  41. data/examples/benchmarks/Rodinia/kmeans.c +164 -0
  42. data/examples/benchmarks/Rodinia/srad.c +188 -0
  43. data/examples/benchmarks/other/common.h +0 -0
  44. data/examples/benchmarks/other/dct.c +58 -0
  45. data/examples/benchmarks/other/mm.c +50 -0
  46. data/examples/benchmarks/{saxpy.c → other/saxpy.c} +11 -7
  47. data/examples/chunk/{example1.c → example01.c} +0 -0
  48. data/examples/chunk/{example2.c → example02.c} +0 -0
  49. data/examples/chunk/{example3.c → example03.c} +0 -0
  50. data/examples/chunk/{example4.c → example04.c} +0 -0
  51. data/examples/chunk/{example5.c → example05.c} +0 -0
  52. data/examples/chunk/example06.c +45 -0
  53. data/examples/chunk/example07.c +49 -0
  54. data/examples/dependences/example01.c +42 -0
  55. data/examples/dependences/example02.c +40 -0
  56. data/examples/dependences/example03.c +43 -0
  57. data/examples/dependences/example04.c +44 -0
  58. data/examples/dependences/example05.c +42 -0
  59. data/examples/element/{example1.c → example01.c} +0 -0
  60. data/examples/element/{example2.c → example02.c} +2 -2
  61. data/examples/element/{example3.c → example03.c} +0 -0
  62. data/examples/element/{example4.c → example04.c} +0 -0
  63. data/examples/element/{example5.c → example05.c} +0 -0
  64. data/examples/element/{example6.c → example06.c} +0 -0
  65. data/examples/element/{example7.c → example07.c} +0 -0
  66. data/examples/element/{example8.c → example08.c} +0 -0
  67. data/examples/element/{example9.c → example09.c} +0 -0
  68. data/examples/element/example13.c +73 -0
  69. data/examples/fusion/example01.c +68 -0
  70. data/examples/fusion/example02.c +73 -0
  71. data/examples/fusion/example03.c +72 -0
  72. data/examples/fusion/example04.c +61 -0
  73. data/examples/fusion/example05.c +55 -0
  74. data/examples/neighbourhood/{example1.c → example01.c} +0 -0
  75. data/examples/neighbourhood/{example2.c → example02.c} +0 -0
  76. data/examples/neighbourhood/{example3.c → example03.c} +0 -0
  77. data/examples/neighbourhood/{example4.c → example04.c} +0 -0
  78. data/examples/neighbourhood/example05.c +44 -0
  79. data/examples/shared/{example1.c → example01.c} +0 -0
  80. data/examples/shared/{example2.c → example02.c} +0 -0
  81. data/examples/shared/{example3.c → example03.c} +0 -0
  82. data/examples/shared/{example4.c → example04.c} +0 -0
  83. data/examples/shared/{example5.c → example05.c} +0 -0
  84. data/lib/adarwin.rb +62 -0
  85. data/lib/adarwin/dependences.rb +268 -0
  86. data/lib/adarwin/engine.rb +277 -0
  87. data/lib/adarwin/fusion.rb +174 -0
  88. data/lib/adarwin/interval.rb +57 -0
  89. data/lib/adarwin/memorycopies.rb +153 -0
  90. data/lib/adarwin/nest.rb +225 -0
  91. data/lib/adarwin/preprocessor.rb +76 -0
  92. data/lib/adarwin/reference.rb +261 -0
  93. data/lib/bones.rb +4 -55
  94. data/lib/bones/algorithm.rb +77 -40
  95. data/lib/bones/copy.rb +26 -0
  96. data/lib/bones/engine.rb +147 -31
  97. data/lib/bones/preprocessor.rb +92 -12
  98. data/lib/bones/species.rb +4 -3
  99. data/lib/bones/structure.rb +14 -4
  100. data/lib/castaddon.rb +11 -6
  101. data/lib/castaddon/node_adarwin.rb +245 -0
  102. data/lib/castaddon/node_bones.rb +316 -0
  103. data/lib/castaddon/node_common.rb +289 -0
  104. data/lib/castaddon/transformations.rb +236 -0
  105. data/lib/common.rb +216 -0
  106. data/skeletons/CPU-C/common/header.c +3 -0
  107. data/skeletons/CPU-C/common/mem_global.c +0 -0
  108. data/skeletons/CPU-C/common/timer_2_start.c +11 -13
  109. data/skeletons/CPU-C/common/timer_2_stop.c +1 -1
  110. data/skeletons/CPU-C/common/timer_globals.c +29 -0
  111. data/skeletons/CPU-OPENCL-INTEL/common/globals.c +1 -1
  112. data/skeletons/CPU-OPENCL-INTEL/common/header.c +3 -0
  113. data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +7 -2
  114. data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +4 -2
  115. data/skeletons/CPU-OPENCL-INTEL/common/mem_global.c +0 -0
  116. data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +6 -3
  117. data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +1 -1
  118. data/skeletons/CPU-OPENCL-INTEL/common/timer_globals.c +24 -0
  119. data/skeletons/CPU-OPENMP/common/globals.c +1 -0
  120. data/skeletons/CPU-OPENMP/common/header.c +3 -0
  121. data/skeletons/CPU-OPENMP/common/mem_global.c +0 -0
  122. data/skeletons/CPU-OPENMP/common/timer_1_start.c +0 -12
  123. data/skeletons/CPU-OPENMP/common/timer_2_stop.c +1 -1
  124. data/skeletons/CPU-OPENMP/common/timer_globals.c +33 -0
  125. data/skeletons/GPU-CUDA/common/globals.c +27 -3
  126. data/skeletons/GPU-CUDA/common/header.c +2 -0
  127. data/skeletons/GPU-CUDA/common/mem_async_alloc.c +6 -0
  128. data/skeletons/GPU-CUDA/common/mem_async_copyin.c +6 -0
  129. data/skeletons/GPU-CUDA/common/mem_async_copyout.c +6 -0
  130. data/skeletons/GPU-CUDA/common/mem_async_free.c +6 -0
  131. data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +2 -1
  132. data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +2 -1
  133. data/skeletons/GPU-CUDA/common/mem_global.c +1 -0
  134. data/skeletons/GPU-CUDA/common/mem_prologue.c +1 -2
  135. data/skeletons/GPU-CUDA/common/scheduler.c +86 -0
  136. data/skeletons/GPU-CUDA/common/timer_2_start.c +2 -4
  137. data/skeletons/GPU-CUDA/common/timer_2_stop.c +3 -5
  138. data/skeletons/GPU-CUDA/common/timer_globals.c +26 -0
  139. data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +5 -7
  140. data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +4 -6
  141. data/skeletons/GPU-CUDA/kernel/default.host.c +1 -1
  142. data/skeletons/GPU-CUDA/kernel/default.kernel.cu +6 -8
  143. data/skeletons/GPU-CUDA/skeletons.txt +6 -5
  144. data/{examples/benchmarks/2mm.c → test/examples/benchmarks/PolyBench/2mm_species.c} +19 -15
  145. data/test/examples/benchmarks/PolyBench/3mm_species.c +82 -0
  146. data/test/examples/benchmarks/PolyBench/adi_species.c +89 -0
  147. data/test/examples/benchmarks/PolyBench/atax_species.c +69 -0
  148. data/test/examples/benchmarks/PolyBench/bicg_species.c +71 -0
  149. data/test/examples/benchmarks/PolyBench/cholesky_species.c +68 -0
  150. data/test/examples/benchmarks/PolyBench/correlation_species.c +97 -0
  151. data/test/examples/benchmarks/PolyBench/covariance_species.c +78 -0
  152. data/test/examples/benchmarks/PolyBench/doitgen_species.c +67 -0
  153. data/test/examples/benchmarks/PolyBench/durbin_species.c +80 -0
  154. data/test/examples/benchmarks/PolyBench/dynprog_species.c +71 -0
  155. data/test/examples/benchmarks/PolyBench/fdtd-2d-apml_species.c +112 -0
  156. data/test/examples/benchmarks/PolyBench/fdtd-2d_species.c +78 -0
  157. data/test/examples/benchmarks/PolyBench/floyd-warshall_species.c +54 -0
  158. data/test/examples/benchmarks/PolyBench/gemm_species.c +73 -0
  159. data/test/examples/benchmarks/PolyBench/gemver_species.c +93 -0
  160. data/test/examples/benchmarks/PolyBench/gesummv_species.c +68 -0
  161. data/test/examples/benchmarks/PolyBench/gramschmidt_species.c +78 -0
  162. data/test/examples/benchmarks/PolyBench/jacobi-1d-imper_species.c +59 -0
  163. data/test/examples/benchmarks/PolyBench/jacobi-2d-imper_species.c +65 -0
  164. data/test/examples/benchmarks/PolyBench/lu_species.c +57 -0
  165. data/test/examples/benchmarks/PolyBench/ludcmp_species.c +89 -0
  166. data/test/examples/benchmarks/PolyBench/mvt_species.c +69 -0
  167. data/test/examples/benchmarks/PolyBench/reg_detect_species.c +86 -0
  168. data/test/examples/benchmarks/PolyBench/seidel-2d_species.c +53 -0
  169. data/test/examples/benchmarks/PolyBench/symm_species.c +74 -0
  170. data/test/examples/benchmarks/PolyBench/syr2k_species.c +69 -0
  171. data/test/examples/benchmarks/PolyBench/syrk_species.c +66 -0
  172. data/test/examples/benchmarks/PolyBench/trisolv_species.c +61 -0
  173. data/test/examples/benchmarks/PolyBench/trmm_species.c +61 -0
  174. data/test/examples/chunk/example01_species.c +58 -0
  175. data/test/examples/chunk/example02_species.c +48 -0
  176. data/test/examples/chunk/example03_species.c +63 -0
  177. data/test/examples/chunk/example04_species.c +58 -0
  178. data/test/examples/chunk/example05_species.c +56 -0
  179. data/test/examples/chunk/example06_species.c +49 -0
  180. data/test/examples/chunk/example07_species.c +53 -0
  181. data/test/examples/dependences/example01_species.c +46 -0
  182. data/test/examples/dependences/example02_species.c +44 -0
  183. data/test/examples/dependences/example03_species.c +47 -0
  184. data/test/examples/dependences/example04_species.c +48 -0
  185. data/test/examples/dependences/example05_species.c +46 -0
  186. data/test/examples/element/example01_species.c +50 -0
  187. data/test/examples/element/example02_species.c +50 -0
  188. data/test/examples/element/example03_species.c +62 -0
  189. data/test/examples/element/example04_species.c +53 -0
  190. data/test/examples/element/example05_species.c +59 -0
  191. data/test/examples/element/example06_species.c +50 -0
  192. data/test/examples/element/example07_species.c +58 -0
  193. data/test/examples/element/example08_species.c +49 -0
  194. data/test/examples/element/example09_species.c +52 -0
  195. data/test/examples/element/example10_species.c +54 -0
  196. data/test/examples/element/example11_species.c +51 -0
  197. data/test/examples/element/example12_species.c +60 -0
  198. data/test/examples/element/example13_species.c +77 -0
  199. data/test/examples/neighbourhood/example01_species.c +57 -0
  200. data/test/examples/neighbourhood/example02_species.c +56 -0
  201. data/test/examples/neighbourhood/example03_species.c +83 -0
  202. data/test/examples/neighbourhood/example04_species.c +55 -0
  203. data/test/examples/neighbourhood/example05_species.c +48 -0
  204. data/test/examples/shared/example01_species.c +49 -0
  205. data/test/examples/shared/example02_species.c +55 -0
  206. data/test/examples/shared/example03_species.c +59 -0
  207. data/test/examples/shared/example04_species.c +56 -0
  208. data/test/examples/shared/example05_species.c +52 -0
  209. metadata +193 -73
  210. data/examples/benchmarks/overview.txt +0 -38
  211. data/lib/castaddon/node.rb +0 -753
File without changes
@@ -10,7 +10,7 @@
10
10
  // == File information
11
11
  // Filename...........element/example2.c
12
12
  // Author.............Cedric Nugteren
13
- // Last modified on...16-April-2012
13
+ // Last modified on...06-May-2013
14
14
  //
15
15
 
16
16
  #include <stdio.h>
@@ -31,7 +31,7 @@ int main(void) {
31
31
  }
32
32
 
33
33
  // Perform the computation
34
- #pragma species kernel unordered 0:3,0:7|element -> 0:3,0:7|element
34
+ #pragma species kernel 0:3,0:7|element -> 0:3,0:7|element
35
35
  for(i=0;i<4;i++) {
36
36
  for(j=0;j<8;j++) {
37
37
  B[i][j] = A[i][7-j];
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
@@ -0,0 +1,73 @@
1
+ //
2
+ // This file is part of the Bones source-to-source compiler examples. This C-code
3
+ // example is meant to illustrate the use of Bones. For more information on Bones
4
+ // use the contact information below.
5
+ //
6
+ // == More information on Bones
7
+ // Contact............Cedric Nugteren <c.nugteren@tue.nl>
8
+ // Web address........http://parse.ele.tue.nl/bones/
9
+ //
10
+ // == File information
11
+ // Filename...........element/example13.c
12
+ // Author.............Cedric Nugteren
13
+ // Last modified on...07-May-2013
14
+ //
15
+
16
+ #include <stdio.h>
17
+
18
+ // This is 'example13', an example with multiple loop nests and various if-statements
19
+ int main(void) {
20
+ int i,j;
21
+ int N = 256;
22
+
23
+ // Declare input/output arrays
24
+ int A[N];
25
+ int B[N];
26
+ int C[N];
27
+ int D[N][N];
28
+ int E[N][N];
29
+
30
+ // Set the input data
31
+ for(i=0;i<N;i++) {
32
+ A[i] = i;
33
+ B[i] = i+5;
34
+ C[i] = i+9;
35
+ for(j=0;j<N;j++) {
36
+ D[i][j] = i*j+3;
37
+ E[i][j] = i*j+9;
38
+ }
39
+ }
40
+
41
+ // Perform the computation
42
+ #pragma species kernel C[0:N-1]|element -> B[11:N-1]|element ^ A[0:5]|element
43
+ for (i=0; i<N; i++) {
44
+ if (i > 10) {
45
+ B[i] = C[i];
46
+ }
47
+ if (i < 6) {
48
+ A[i] = C[i];
49
+ }
50
+ }
51
+ #pragma species endkernel example13_k1
52
+ #pragma species kernel A[50:N-1]|element -> B[50:N-1]|element
53
+ for (i=0; i<N-9; i++) {
54
+ if (i+10 > 50) {
55
+ B[i+9] = A[i+9];
56
+ }
57
+ }
58
+ #pragma species endkernel example13_k2
59
+ #pragma species kernel E[5:N-1,0:N-1]|element -> D[5:N-1,0:N-1]|element
60
+ for (i=0; i<N; i++) {
61
+ for (j=0; j<N; j++) {
62
+ if (i > 4) {
63
+ D[i][j] = E[i][j];
64
+ }
65
+ }
66
+ }
67
+ #pragma species endkernel example13_k3
68
+
69
+ // Clean-up and exit the function
70
+ fflush(stdout);
71
+ return 0;
72
+ }
73
+
@@ -0,0 +1,68 @@
1
+ //
2
+ // This file is part of the Bones source-to-source compiler examples. This C-code
3
+ // example is meant to illustrate the use of Bones. For more information on Bones
4
+ // use the contact information below.
5
+ //
6
+ // == More information on Bones
7
+ // Contact............Cedric Nugteren <c.nugteren@tue.nl>
8
+ // Web address........http://parse.ele.tue.nl/bones/
9
+ //
10
+ // == File information
11
+ // Filename...........fusion/example01.c
12
+ // Author.............Cedric Nugteren
13
+ // Last modified on...09-July-2013
14
+ //
15
+
16
+ #include <stdio.h>
17
+ #define N 512
18
+ #define M 2048
19
+
20
+ // This is 'example01', a basic example of an opportunity for scalar kernel fusion.
21
+ int main(void) {
22
+ int i,j;
23
+
24
+ // Declare input/output arrays
25
+ int A[N][M];
26
+ int B[N][M];
27
+ int C[N][M];
28
+
29
+ // Set the input data
30
+ for(i=0;i<N;i++) {
31
+ for(j=0;j<M;j++) {
32
+ A[i][j] = i+j;
33
+ }
34
+ }
35
+
36
+ // Perform the computation
37
+ #pragma species kernel A[0:N-1,0:M-1]|element -> B[0:N-1,0:M-1]|element
38
+ for(i=0;i<N;i++) {
39
+ for(j=0;j<M;j++) {
40
+ B[i][j] = 2*A[i][j];
41
+ }
42
+ }
43
+ #pragma species endkernel example01-part1
44
+ #pragma species kernel B[0:N-1,0:M-1]|element -> C[0:N-1,0:M-1]|element
45
+ for(i=0;i<N;i++) {
46
+ for(j=0;j<M;j++) {
47
+ C[i][j] = 8*B[i][j];
48
+ }
49
+ }
50
+ #pragma species endkernel example01-part2
51
+
52
+ /*
53
+ #pragma species kernel A[0:N-1,0:M-1]|element -> B[0:N-1,0:M-1]|element ^ C[0:N-1,0:M-1]|element
54
+ for(i=0;i<N;i++) {
55
+ for(j=0;j<M;j++) {
56
+ B[i][j] = 2*A[i][j];
57
+ C[i][j] = 8*B[i][j];
58
+ }
59
+ }
60
+ #pragma species endkernel example01-fused
61
+ */
62
+
63
+ // Clean-up and exit the function
64
+ fflush(stdout);
65
+ C[8][9] = C[8][9];
66
+ return 0;
67
+ }
68
+
@@ -0,0 +1,73 @@
1
+ //
2
+ // This file is part of the Bones source-to-source compiler examples. This C-code
3
+ // example is meant to illustrate the use of Bones. For more information on Bones
4
+ // use the contact information below.
5
+ //
6
+ // == More information on Bones
7
+ // Contact............Cedric Nugteren <c.nugteren@tue.nl>
8
+ // Web address........http://parse.ele.tue.nl/bones/
9
+ //
10
+ // == File information
11
+ // Filename...........fusion/example02.c
12
+ // Author.............Cedric Nugteren
13
+ // Last modified on...09-July-2013
14
+ //
15
+
16
+ #include <stdio.h>
17
+ #define N 2048
18
+ #define M 512
19
+ // Condition: M must be smaller than N
20
+
21
+ // This is 'example02', an example of scalar kernel fusion with mismatching bounds but independent loop bodies.
22
+ int main(void) {
23
+ int i,j;
24
+
25
+ // Declare input/output arrays
26
+ int A[N][M];
27
+ int B[N][M];
28
+ int C[N][M];
29
+
30
+ // Set the input data
31
+ for(i=0;i<N;i++) {
32
+ for(j=0;j<M;j++) {
33
+ A[i][j] = i+j;
34
+ }
35
+ }
36
+
37
+ // Perform the computation
38
+ #pragma species kernel A[0:N-1,10:M-1]|element -> B[0:N-1,10:M-1]|element
39
+ for(i=0;i<N;i++) {
40
+ for(j=10;j<M;j++) {
41
+ B[i][j] = A[i][j] + 3;
42
+ }
43
+ }
44
+ #pragma species endkernel example02-part1
45
+ #pragma species kernel A[0:M-1,0:M-1]|element -> C[0:M-1,0:M-1]|element
46
+ for(i=0;i<M;i++) {
47
+ for(j=0;j<M;j++) {
48
+ C[i][j] = -9*A[i][j];
49
+ }
50
+ }
51
+ #pragma species endkernel example02-part2
52
+
53
+ /*
54
+ #pragma species kernel A[0:N-1,0:M-1]|element -> B[0:N-1,0:M-1]|element ^ C[0:N-1,0:M-1]|element
55
+ for(i=0;i<MAX(N,M);i++) {
56
+ for(j=0;j<M;j++) {
57
+ if (j >= 10 && i < N) {
58
+ B[i][j] = A[i][j] + 3;
59
+ }
60
+ if (i < M) {
61
+ C[i][j] = -9*A[i][j];
62
+ }
63
+ }
64
+ }
65
+ #pragma species endkernel example02-fused
66
+ */
67
+
68
+ // Clean-up and exit the function
69
+ fflush(stdout);
70
+ C[8][9] = C[8][9];
71
+ return 0;
72
+ }
73
+
@@ -0,0 +1,72 @@
1
+ //
2
+ // This file is part of the Bones source-to-source compiler examples. This C-code
3
+ // example is meant to illustrate the use of Bones. For more information on Bones
4
+ // use the contact information below.
5
+ //
6
+ // == More information on Bones
7
+ // Contact............Cedric Nugteren <c.nugteren@tue.nl>
8
+ // Web address........http://parse.ele.tue.nl/bones/
9
+ //
10
+ // == File information
11
+ // Filename...........fusion/example03.c
12
+ // Author.............Cedric Nugteren
13
+ // Last modified on...02-Oct-2013
14
+ //
15
+
16
+ #include <stdio.h>
17
+
18
+ // This is 'example03', with code similar to PolyBench's "2mm" benchmark. This is an example where fusion is only legal w.r.t. the i-loop
19
+ int main(void) {
20
+ int i,j,k;
21
+
22
+ // Declare arrays on the stack
23
+ float A[2048][2048];
24
+ float B[2048][2048];
25
+ float C[2048][2048];
26
+ float D[2048][2048];
27
+ float tmp[2048][2048];
28
+
29
+ // Set the constants
30
+ int alpha = 32412;
31
+ int beta = 2123;
32
+
33
+ // Set the input data
34
+ for (i=0; i<2048; i++) { for (j=0; j<2048; j++) { A[i][j] = ((float) i*j) / 2048; } }
35
+ for (i=0; i<2048; i++) { for (j=0; j<2048; j++) { B[i][j] = ((float) i*(j+1)) / 2048; } }
36
+ for (i=0; i<2048; i++) { for (j=0; j<2048; j++) { C[i][j] = ((float) i*(j+3)) / 2048; } }
37
+ for (i=0; i<2048; i++) { for (j=0; j<2048; j++) { D[i][j] = ((float) i*(j+2)) / 2048; } }
38
+
39
+ // Perform the computation (E := alpha*A*B*C + beta*D)
40
+ #pragma species copyin A[0:2047,0:2047]|0 ^ B[0:2047,0:2047]|0 ^ D[0:2047,0:2047]|1 ^ C[0:2047,0:2047]|1
41
+ #pragma species sync 0
42
+ #pragma species kernel A[0:2047,0:2047]|chunk(0:0,0:2047) ^ B[0:2047,0:2047]|chunk(0:2047,0:0) -> tmp[0:2047,0:2047]|element
43
+ for (i=0; i<2048; i++) {
44
+ for (j=0; j<2048; j++) {
45
+ tmp[i][j] = 0;
46
+ for (k=0; k<2048; k++) {
47
+ tmp[i][j] += alpha * A[i][k] * B[k][j];
48
+ }
49
+ }
50
+ }
51
+ #pragma species endkernel example03-part1
52
+ #pragma species copyout tmp[0:2047,0:2047]|2
53
+ #pragma species sync 1
54
+ #pragma species kernel D[0:2047,0:2047]|element ^ tmp[0:2047,0:2047]|chunk(0:0,0:2047) ^ C[0:2047,0:2047]|chunk(0:2047,0:0) -> D[0:2047,0:2047]|element
55
+ for (i=0; i<2048; i++) {
56
+ for (j=0; j<2048; j++) {
57
+ D[i][j] *= beta;
58
+ for (k=0; k<2048; k++) {
59
+ D[i][j] += tmp[i][k] * C[k][j];
60
+ }
61
+ }
62
+ }
63
+ #pragma species endkernel example03-part2
64
+ #pragma species copyout D[0:2047,0:2047]|2
65
+ #pragma species sync 2
66
+
67
+ // Clean-up and exit the function
68
+ fflush(stdout);
69
+ D[8][9] = D[8][9];
70
+ return 0;
71
+ }
72
+
@@ -0,0 +1,61 @@
1
+ //
2
+ // This file is part of the Bones source-to-source compiler examples. This C-code
3
+ // example is meant to illustrate the use of Bones. For more information on Bones
4
+ // use the contact information below.
5
+ //
6
+ // == More information on Bones
7
+ // Contact............Cedric Nugteren <c.nugteren@tue.nl>
8
+ // Web address........http://parse.ele.tue.nl/bones/
9
+ //
10
+ // == File information
11
+ // Filename...........fusion/example04.c
12
+ // Author.............Cedric Nugteren
13
+ // Last modified on...02-Oct-2013
14
+ //
15
+
16
+ #include <stdio.h>
17
+
18
+ // This is 'example04', with code similar to PolyBench's "atax" benchmark
19
+ int main(void) {
20
+ int i,j;
21
+
22
+ // Declare arrays on the stack
23
+ float A[4096][4096];
24
+ float x[4096];
25
+ float y[4096];
26
+ float tmp[4096];
27
+
28
+ // Set the input data
29
+ for (i=0; i<4096; i++) {
30
+ x[i] = i*3.14159;
31
+ }
32
+ for (i=0; i<4096; i++) {
33
+ for (j=0; j<4096; j++) {
34
+ A[i][j] = ((float) i*(j+1)) / 4096;
35
+ }
36
+ }
37
+
38
+ // Perform the computation (y := A'Ax)
39
+ #pragma species kernel 0:4095,0:4095|chunk(0:0,0:4095) ^ 0:4095|full -> 0:4095|element
40
+ for (i=0; i<4096; i++) {
41
+ tmp[i] = 0;
42
+ for (j=0; j<4096; j++) {
43
+ tmp[i] = tmp[i] + A[i][j] * x[j];
44
+ }
45
+ }
46
+ #pragma species endkernel atax-part1
47
+ #pragma species kernel 0:4095,0:4095|chunk(0:4095,0:0) ^ 0:4095|full -> 0:4095|element
48
+ for (j=0; j<4096; j++) {
49
+ y[j] = 0;
50
+ for (i=0; i<4096; i++) {
51
+ y[j] = y[j] + A[i][j] * tmp[i];
52
+ }
53
+ }
54
+ #pragma species endkernel atax-part2
55
+
56
+ // Clean-up and exit the function
57
+ fflush(stdout);
58
+ y[9] = y[9];
59
+ return 0;
60
+ }
61
+
@@ -0,0 +1,55 @@
1
+ //
2
+ // This file is part of the Bones source-to-source compiler examples. This C-code
3
+ // example is meant to illustrate the use of Bones. For more information on Bones
4
+ // use the contact information below.
5
+ //
6
+ // == More information on Bones
7
+ // Contact............Cedric Nugteren <c.nugteren@tue.nl>
8
+ // Web address........http://parse.ele.tue.nl/bones/
9
+ //
10
+ // == File information
11
+ // Filename...........fusion/example05.c
12
+ // Author.............Cedric Nugteren
13
+ // Last modified on...08-October-2013
14
+ //
15
+
16
+ #include <stdio.h>
17
+
18
+ // This is 'example05', like example02 but with constant values.
19
+ int main(void) {
20
+ int i,j;
21
+
22
+ // Declare input/output arrays
23
+ int A[2048][1024];
24
+ int B[2048][1024];
25
+ int C[2048][1024];
26
+
27
+ // Set the input data
28
+ for(i=0;i<2048;i++) {
29
+ for(j=0;j<1024;j++) {
30
+ A[i][j] = i+j;
31
+ }
32
+ }
33
+
34
+ // Perform the computation
35
+ #pragma species kernel A[0:2047,0:1023]|element -> B[0:2047,0:1023]|element
36
+ for(i=0;i<2048;i++) {
37
+ for(j=0;j<1024;j++) {
38
+ B[i][j] = A[i][j] + 3;
39
+ }
40
+ }
41
+ #pragma species endkernel example05-part1
42
+ #pragma species kernel A[0:2047,0:979]|element -> C[0:2047,0:979]|element
43
+ for(i=0;i<2048;i++) {
44
+ for(j=0;j<980;j++) {
45
+ C[i][j] = 9*A[i][j];
46
+ }
47
+ }
48
+ #pragma species endkernel example05-part2
49
+
50
+ // Clean-up and exit the function
51
+ fflush(stdout);
52
+ C[8][9] = C[8][9];
53
+ return 0;
54
+ }
55
+