bones-compiler 1.1.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. checksums.yaml +15 -0
  2. data/CHANGELOG +37 -0
  3. data/LICENSE +1 -1
  4. data/README.rdoc +95 -70
  5. data/Rakefile +78 -3
  6. data/VERSION +1 -1
  7. data/bin/adarwin +17 -0
  8. data/examples/benchmarks/PolyBench/2mm.c +104 -0
  9. data/examples/benchmarks/{3mm.c → PolyBench/3mm.c} +5 -2
  10. data/examples/benchmarks/{adi.c → PolyBench/adi.c} +6 -3
  11. data/examples/benchmarks/{atax.c → PolyBench/atax.c} +5 -2
  12. data/examples/benchmarks/{bicg.c → PolyBench/bicg.c} +5 -2
  13. data/examples/benchmarks/{cholesky.c → PolyBench/cholesky.c} +3 -0
  14. data/examples/benchmarks/{common.h → PolyBench/common.h} +2 -2
  15. data/examples/benchmarks/{correlation.c → PolyBench/correlation.c} +16 -7
  16. data/examples/benchmarks/{covariance.c → PolyBench/covariance.c} +7 -2
  17. data/examples/benchmarks/{doitgen.c → PolyBench/doitgen.c} +5 -2
  18. data/examples/benchmarks/{durbin.c → PolyBench/durbin.c} +3 -0
  19. data/examples/benchmarks/{dynprog.c → PolyBench/dynprog.c} +3 -0
  20. data/examples/benchmarks/{fdtd-2d-apml.c → PolyBench/fdtd-2d-apml.c} +3 -0
  21. data/examples/benchmarks/{fdtd-2d.c → PolyBench/fdtd-2d.c} +5 -2
  22. data/examples/benchmarks/{floyd-warshall.c → PolyBench/floyd-warshall.c} +3 -0
  23. data/examples/benchmarks/{gemm.c → PolyBench/gemm.c} +5 -2
  24. data/examples/benchmarks/{gemver.c → PolyBench/gemver.c} +5 -2
  25. data/examples/benchmarks/{gesummv.c → PolyBench/gesummv.c} +5 -2
  26. data/examples/benchmarks/{gramschmidt.c → PolyBench/gramschmidt.c} +3 -0
  27. data/examples/benchmarks/{jacobi-1d-imper.c → PolyBench/jacobi-1d-imper.c} +10 -2
  28. data/examples/benchmarks/{jacobi-2d-imper.c → PolyBench/jacobi-2d-imper.c} +8 -3
  29. data/examples/benchmarks/{lu.c → PolyBench/lu.c} +3 -0
  30. data/examples/benchmarks/{ludcmp.c → PolyBench/ludcmp.c} +3 -0
  31. data/examples/benchmarks/{mvt.c → PolyBench/mvt.c} +6 -2
  32. data/examples/benchmarks/{reg_detect.c → PolyBench/reg_detect.c} +3 -0
  33. data/examples/benchmarks/{seidel-2d.c → PolyBench/seidel-2d.c} +3 -0
  34. data/examples/benchmarks/{symm.c → PolyBench/symm.c} +3 -0
  35. data/examples/benchmarks/{syr2k.c → PolyBench/syr2k.c} +5 -2
  36. data/examples/benchmarks/{syrk.c → PolyBench/syrk.c} +7 -4
  37. data/examples/benchmarks/{trisolv.c → PolyBench/trisolv.c} +3 -0
  38. data/examples/benchmarks/{trmm.c → PolyBench/trmm.c} +3 -0
  39. data/examples/benchmarks/Rodinia/cfd.c +180 -0
  40. data/examples/benchmarks/Rodinia/hotspot.c +228 -0
  41. data/examples/benchmarks/Rodinia/kmeans.c +164 -0
  42. data/examples/benchmarks/Rodinia/srad.c +188 -0
  43. data/examples/benchmarks/other/common.h +0 -0
  44. data/examples/benchmarks/other/dct.c +58 -0
  45. data/examples/benchmarks/other/mm.c +50 -0
  46. data/examples/benchmarks/{saxpy.c → other/saxpy.c} +11 -7
  47. data/examples/chunk/{example1.c → example01.c} +0 -0
  48. data/examples/chunk/{example2.c → example02.c} +0 -0
  49. data/examples/chunk/{example3.c → example03.c} +0 -0
  50. data/examples/chunk/{example4.c → example04.c} +0 -0
  51. data/examples/chunk/{example5.c → example05.c} +0 -0
  52. data/examples/chunk/example06.c +45 -0
  53. data/examples/chunk/example07.c +49 -0
  54. data/examples/dependences/example01.c +42 -0
  55. data/examples/dependences/example02.c +40 -0
  56. data/examples/dependences/example03.c +43 -0
  57. data/examples/dependences/example04.c +44 -0
  58. data/examples/dependences/example05.c +42 -0
  59. data/examples/element/{example1.c → example01.c} +0 -0
  60. data/examples/element/{example2.c → example02.c} +2 -2
  61. data/examples/element/{example3.c → example03.c} +0 -0
  62. data/examples/element/{example4.c → example04.c} +0 -0
  63. data/examples/element/{example5.c → example05.c} +0 -0
  64. data/examples/element/{example6.c → example06.c} +0 -0
  65. data/examples/element/{example7.c → example07.c} +0 -0
  66. data/examples/element/{example8.c → example08.c} +0 -0
  67. data/examples/element/{example9.c → example09.c} +0 -0
  68. data/examples/element/example13.c +73 -0
  69. data/examples/fusion/example01.c +68 -0
  70. data/examples/fusion/example02.c +73 -0
  71. data/examples/fusion/example03.c +72 -0
  72. data/examples/fusion/example04.c +61 -0
  73. data/examples/fusion/example05.c +55 -0
  74. data/examples/neighbourhood/{example1.c → example01.c} +0 -0
  75. data/examples/neighbourhood/{example2.c → example02.c} +0 -0
  76. data/examples/neighbourhood/{example3.c → example03.c} +0 -0
  77. data/examples/neighbourhood/{example4.c → example04.c} +0 -0
  78. data/examples/neighbourhood/example05.c +44 -0
  79. data/examples/shared/{example1.c → example01.c} +0 -0
  80. data/examples/shared/{example2.c → example02.c} +0 -0
  81. data/examples/shared/{example3.c → example03.c} +0 -0
  82. data/examples/shared/{example4.c → example04.c} +0 -0
  83. data/examples/shared/{example5.c → example05.c} +0 -0
  84. data/lib/adarwin.rb +62 -0
  85. data/lib/adarwin/dependences.rb +268 -0
  86. data/lib/adarwin/engine.rb +277 -0
  87. data/lib/adarwin/fusion.rb +174 -0
  88. data/lib/adarwin/interval.rb +57 -0
  89. data/lib/adarwin/memorycopies.rb +153 -0
  90. data/lib/adarwin/nest.rb +225 -0
  91. data/lib/adarwin/preprocessor.rb +76 -0
  92. data/lib/adarwin/reference.rb +261 -0
  93. data/lib/bones.rb +4 -55
  94. data/lib/bones/algorithm.rb +77 -40
  95. data/lib/bones/copy.rb +26 -0
  96. data/lib/bones/engine.rb +147 -31
  97. data/lib/bones/preprocessor.rb +92 -12
  98. data/lib/bones/species.rb +4 -3
  99. data/lib/bones/structure.rb +14 -4
  100. data/lib/castaddon.rb +11 -6
  101. data/lib/castaddon/node_adarwin.rb +245 -0
  102. data/lib/castaddon/node_bones.rb +316 -0
  103. data/lib/castaddon/node_common.rb +289 -0
  104. data/lib/castaddon/transformations.rb +236 -0
  105. data/lib/common.rb +216 -0
  106. data/skeletons/CPU-C/common/header.c +3 -0
  107. data/skeletons/CPU-C/common/mem_global.c +0 -0
  108. data/skeletons/CPU-C/common/timer_2_start.c +11 -13
  109. data/skeletons/CPU-C/common/timer_2_stop.c +1 -1
  110. data/skeletons/CPU-C/common/timer_globals.c +29 -0
  111. data/skeletons/CPU-OPENCL-INTEL/common/globals.c +1 -1
  112. data/skeletons/CPU-OPENCL-INTEL/common/header.c +3 -0
  113. data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +7 -2
  114. data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +4 -2
  115. data/skeletons/CPU-OPENCL-INTEL/common/mem_global.c +0 -0
  116. data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +6 -3
  117. data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +1 -1
  118. data/skeletons/CPU-OPENCL-INTEL/common/timer_globals.c +24 -0
  119. data/skeletons/CPU-OPENMP/common/globals.c +1 -0
  120. data/skeletons/CPU-OPENMP/common/header.c +3 -0
  121. data/skeletons/CPU-OPENMP/common/mem_global.c +0 -0
  122. data/skeletons/CPU-OPENMP/common/timer_1_start.c +0 -12
  123. data/skeletons/CPU-OPENMP/common/timer_2_stop.c +1 -1
  124. data/skeletons/CPU-OPENMP/common/timer_globals.c +33 -0
  125. data/skeletons/GPU-CUDA/common/globals.c +27 -3
  126. data/skeletons/GPU-CUDA/common/header.c +2 -0
  127. data/skeletons/GPU-CUDA/common/mem_async_alloc.c +6 -0
  128. data/skeletons/GPU-CUDA/common/mem_async_copyin.c +6 -0
  129. data/skeletons/GPU-CUDA/common/mem_async_copyout.c +6 -0
  130. data/skeletons/GPU-CUDA/common/mem_async_free.c +6 -0
  131. data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +2 -1
  132. data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +2 -1
  133. data/skeletons/GPU-CUDA/common/mem_global.c +1 -0
  134. data/skeletons/GPU-CUDA/common/mem_prologue.c +1 -2
  135. data/skeletons/GPU-CUDA/common/scheduler.c +86 -0
  136. data/skeletons/GPU-CUDA/common/timer_2_start.c +2 -4
  137. data/skeletons/GPU-CUDA/common/timer_2_stop.c +3 -5
  138. data/skeletons/GPU-CUDA/common/timer_globals.c +26 -0
  139. data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +5 -7
  140. data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +4 -6
  141. data/skeletons/GPU-CUDA/kernel/default.host.c +1 -1
  142. data/skeletons/GPU-CUDA/kernel/default.kernel.cu +6 -8
  143. data/skeletons/GPU-CUDA/skeletons.txt +6 -5
  144. data/{examples/benchmarks/2mm.c → test/examples/benchmarks/PolyBench/2mm_species.c} +19 -15
  145. data/test/examples/benchmarks/PolyBench/3mm_species.c +82 -0
  146. data/test/examples/benchmarks/PolyBench/adi_species.c +89 -0
  147. data/test/examples/benchmarks/PolyBench/atax_species.c +69 -0
  148. data/test/examples/benchmarks/PolyBench/bicg_species.c +71 -0
  149. data/test/examples/benchmarks/PolyBench/cholesky_species.c +68 -0
  150. data/test/examples/benchmarks/PolyBench/correlation_species.c +97 -0
  151. data/test/examples/benchmarks/PolyBench/covariance_species.c +78 -0
  152. data/test/examples/benchmarks/PolyBench/doitgen_species.c +67 -0
  153. data/test/examples/benchmarks/PolyBench/durbin_species.c +80 -0
  154. data/test/examples/benchmarks/PolyBench/dynprog_species.c +71 -0
  155. data/test/examples/benchmarks/PolyBench/fdtd-2d-apml_species.c +112 -0
  156. data/test/examples/benchmarks/PolyBench/fdtd-2d_species.c +78 -0
  157. data/test/examples/benchmarks/PolyBench/floyd-warshall_species.c +54 -0
  158. data/test/examples/benchmarks/PolyBench/gemm_species.c +73 -0
  159. data/test/examples/benchmarks/PolyBench/gemver_species.c +93 -0
  160. data/test/examples/benchmarks/PolyBench/gesummv_species.c +68 -0
  161. data/test/examples/benchmarks/PolyBench/gramschmidt_species.c +78 -0
  162. data/test/examples/benchmarks/PolyBench/jacobi-1d-imper_species.c +59 -0
  163. data/test/examples/benchmarks/PolyBench/jacobi-2d-imper_species.c +65 -0
  164. data/test/examples/benchmarks/PolyBench/lu_species.c +57 -0
  165. data/test/examples/benchmarks/PolyBench/ludcmp_species.c +89 -0
  166. data/test/examples/benchmarks/PolyBench/mvt_species.c +69 -0
  167. data/test/examples/benchmarks/PolyBench/reg_detect_species.c +86 -0
  168. data/test/examples/benchmarks/PolyBench/seidel-2d_species.c +53 -0
  169. data/test/examples/benchmarks/PolyBench/symm_species.c +74 -0
  170. data/test/examples/benchmarks/PolyBench/syr2k_species.c +69 -0
  171. data/test/examples/benchmarks/PolyBench/syrk_species.c +66 -0
  172. data/test/examples/benchmarks/PolyBench/trisolv_species.c +61 -0
  173. data/test/examples/benchmarks/PolyBench/trmm_species.c +61 -0
  174. data/test/examples/chunk/example01_species.c +58 -0
  175. data/test/examples/chunk/example02_species.c +48 -0
  176. data/test/examples/chunk/example03_species.c +63 -0
  177. data/test/examples/chunk/example04_species.c +58 -0
  178. data/test/examples/chunk/example05_species.c +56 -0
  179. data/test/examples/chunk/example06_species.c +49 -0
  180. data/test/examples/chunk/example07_species.c +53 -0
  181. data/test/examples/dependences/example01_species.c +46 -0
  182. data/test/examples/dependences/example02_species.c +44 -0
  183. data/test/examples/dependences/example03_species.c +47 -0
  184. data/test/examples/dependences/example04_species.c +48 -0
  185. data/test/examples/dependences/example05_species.c +46 -0
  186. data/test/examples/element/example01_species.c +50 -0
  187. data/test/examples/element/example02_species.c +50 -0
  188. data/test/examples/element/example03_species.c +62 -0
  189. data/test/examples/element/example04_species.c +53 -0
  190. data/test/examples/element/example05_species.c +59 -0
  191. data/test/examples/element/example06_species.c +50 -0
  192. data/test/examples/element/example07_species.c +58 -0
  193. data/test/examples/element/example08_species.c +49 -0
  194. data/test/examples/element/example09_species.c +52 -0
  195. data/test/examples/element/example10_species.c +54 -0
  196. data/test/examples/element/example11_species.c +51 -0
  197. data/test/examples/element/example12_species.c +60 -0
  198. data/test/examples/element/example13_species.c +77 -0
  199. data/test/examples/neighbourhood/example01_species.c +57 -0
  200. data/test/examples/neighbourhood/example02_species.c +56 -0
  201. data/test/examples/neighbourhood/example03_species.c +83 -0
  202. data/test/examples/neighbourhood/example04_species.c +55 -0
  203. data/test/examples/neighbourhood/example05_species.c +48 -0
  204. data/test/examples/shared/example01_species.c +49 -0
  205. data/test/examples/shared/example02_species.c +55 -0
  206. data/test/examples/shared/example03_species.c +59 -0
  207. data/test/examples/shared/example04_species.c +56 -0
  208. data/test/examples/shared/example05_species.c +52 -0
  209. metadata +193 -73
  210. data/examples/benchmarks/overview.txt +0 -38
  211. data/lib/castaddon/node.rb +0 -753
@@ -40,7 +40,7 @@ int main(void) {
40
40
  int beta = 12313;
41
41
 
42
42
  // Set the input data
43
- for (i=0; i<NX; i++) {
43
+ /* for (i=0; i<NX; i++) {
44
44
  u1[i] = i;
45
45
  u2[i] = (i+1)/NX/2.0;
46
46
  v1[i] = (i+1)/NX/4.0;
@@ -53,8 +53,9 @@ int main(void) {
53
53
  A[i][j] = ((float) i*j) / NX;
54
54
  }
55
55
  }
56
-
56
+ */
57
57
  // Perform the computation
58
+ #pragma scop
58
59
  #pragma species kernel 0:NX-1,0:NX-1|element ^ 0:NX-1|element ^ 0:NX-1|element ^ 0:NX-1|element ^ 0:NX-1|element -> 0:NX-1,0:NX-1|element
59
60
  for (i=0; i<NX; i++) {
60
61
  for (j=0; j<NX; j++) {
@@ -81,9 +82,11 @@ int main(void) {
81
82
  }
82
83
  }
83
84
  #pragma species endkernel gemver-part4
85
+ #pragma endscop
84
86
 
85
87
  // Clean-up and exit the function
86
88
  fflush(stdout);
89
+ w[9] = w[9];
87
90
  return 0;
88
91
  }
89
92
 
@@ -36,15 +36,16 @@ int main(void) {
36
36
  float beta = 12313;
37
37
 
38
38
  // Set the input data
39
- for (i=0; i<NX; i++) {
39
+ /* for (i=0; i<NX; i++) {
40
40
  x[i] = ((float) i) / NX;
41
41
  for (j=0; j<NX; j++) {
42
42
  A[i][j] = ((float) i*(j+1)) / NX;
43
43
  B[i][j] = ((float) (i+3)*j) / NX;
44
44
  }
45
45
  }
46
-
46
+ */
47
47
  // Perform the computation
48
+ #pragma scop
48
49
  #pragma species kernel 0:NX-1,0:NX-1|chunk(0:0,0:NX-1) ^ 0:NX-1|full ^ 0:NX-1,0:NX-1|chunk(0:0,0:NX-1) -> 0:NX-1|element ^ 0:NX-1|element
49
50
  for (i=0; i<NX; i++) {
50
51
  tmp[i] = 0;
@@ -56,9 +57,11 @@ int main(void) {
56
57
  y[i] = alpha*tmp[i] + beta*y[i];
57
58
  }
58
59
  #pragma species endkernel gesummv
60
+ #pragma endscop
59
61
 
60
62
  // Clean-up and exit the function
61
63
  fflush(stdout);
64
+ y[9] = y[9];
62
65
  return 0;
63
66
  }
64
67
 
@@ -46,6 +46,7 @@ int main(void) {
46
46
  }
47
47
 
48
48
  // Perform the computation
49
+ #pragma scop
49
50
  for (k=0; k<NJ; k++) {
50
51
  nrm[0] = 0;
51
52
  #pragma species kernel 0:NI-1,k:k|element -> 0:0|shared
@@ -77,8 +78,10 @@ int main(void) {
77
78
  }
78
79
  #pragma species endkernel gramschmidt-part3
79
80
  }
81
+ #pragma endscop
80
82
 
81
83
  // Clean-up and exit the function
82
84
  fflush(stdout);
85
+ A[8][9] = A[8][9];
83
86
  return 0;
84
87
  }
@@ -18,6 +18,7 @@
18
18
  // Last modified on...03-April-2012
19
19
  //
20
20
 
21
+
21
22
  #include "common.h"
22
23
 
23
24
  // This is 'jacobi-1d-imper', a 1D Jacobi stencil computation
@@ -27,14 +28,19 @@ int main(void) {
27
28
  // Declare arrays on the stack
28
29
  float A[LARGE_N];
29
30
  float B[LARGE_N];
31
+ //printf("A: %p\n", A);
32
+ //printf("B: %p\n", B);
33
+ //float *A = (float *)malloc(LARGE_N*sizeof(float));
34
+ //float *B = (float *)malloc(LARGE_N*sizeof(float));
30
35
 
31
36
  // Set the input data
32
- for (i=0; i<LARGE_N; i++) {
37
+ /* for (i=0; i<LARGE_N; i++) {
33
38
  A[i] = ((float) i+2) / LARGE_N;
34
39
  B[i] = ((float) i+3) / LARGE_N;
35
40
  }
36
-
41
+ */
37
42
  // Perform the computation
43
+ #pragma scop
38
44
  for (t=0; t<TSTEPS; t++) {
39
45
  #pragma species kernel 1:LARGE_N-2|neighbourhood(-1:1) -> 1:LARGE_N-2|element
40
46
  for (i=1; i<LARGE_N-1; i++) {
@@ -47,9 +53,11 @@ int main(void) {
47
53
  }
48
54
  #pragma species endkernel jacobi-1d-imper-part2
49
55
  }
56
+ #pragma endscop
50
57
 
51
58
  // Clean-up and exit the function
52
59
  fflush(stdout);
60
+ B[9] = B[9];
53
61
  return 0;
54
62
  }
55
63
 
@@ -29,19 +29,22 @@ int main(void) {
29
29
  float B[N][N];
30
30
 
31
31
  // Set the input data
32
- for (i=0; i<N; i++) {
32
+ /* for (i=0; i<N; i++) {
33
33
  for (j=0; j<N; j++) {
34
34
  A[i][j] = ((float) i*(j+2) + 2) / N;
35
35
  B[i][j] = ((float) i*(j+3) + 3) / N;
36
36
  }
37
37
  }
38
-
38
+ */
39
39
  // Perform the computation
40
+ #pragma scop
40
41
  for (t=0; t<TSTEPS; t++) {
41
42
  #pragma species kernel 1:N-2,1:N-2|neighbourhood(-1:1,-1:1) -> 1:N-2,1:N-2|element
42
43
  for (i=1; i<N-1; i++) {
43
44
  for (j=1; j<N-1; j++) {
44
- B[i][j] = 0.2 * (A[i][j] + A[i][j-1] + A[i][1+j] + A[1+i][j] + A[i-1][j]);
45
+ if (i < N-1 && j < N-1) {
46
+ B[i][j] = 0.2 * (A[i][j] + A[i][j-1] + A[i][1+j] + A[1+i][j] + A[i-1][j]);
47
+ }
45
48
  }
46
49
  }
47
50
  #pragma species endkernel jacobi-2d-imper-part1
@@ -53,9 +56,11 @@ int main(void) {
53
56
  }
54
57
  #pragma species endkernel jacobi-2d-imper-part2
55
58
  }
59
+ #pragma endscop
56
60
 
57
61
  // Clean-up and exit the function
58
62
  fflush(stdout);
63
+ B[8][9] = B[8][9];
59
64
  return 0;
60
65
  }
61
66
 
@@ -35,6 +35,7 @@ int main(void) {
35
35
  }
36
36
 
37
37
  // Perform the computation
38
+ #pragma scop
38
39
  for (k=0; k<N; k++) {
39
40
  #pragma species kernel k:k,k+1:N-1|element -> k:k,k+1:N-1|element
40
41
  for (j=k+1; j<N; j++) {
@@ -49,9 +50,11 @@ int main(void) {
49
50
  }
50
51
  #pragma species endkernel lu-part2
51
52
  }
53
+ #pragma endscop
52
54
 
53
55
  // Clean-up and exit the function
54
56
  fflush(stdout);
57
+ A[8][9] = A[8][9];
55
58
  return 0;
56
59
  }
57
60
 
@@ -42,6 +42,7 @@ int main(void) {
42
42
  }
43
43
 
44
44
  // Perform the computation
45
+ #pragma scop
45
46
  b[0] = 1.0;
46
47
  for (i=0; i<N; i++) {
47
48
  for (j=i+1; j<=N; j++) {
@@ -83,9 +84,11 @@ int main(void) {
83
84
  //#pragma species endkernel ludcmp-part4
84
85
  x[N-1-i] = w[0] / A[N-1-i][N-1-i];
85
86
  }
87
+ #pragma endscop
86
88
 
87
89
  // Clean-up and exit the function
88
90
  fflush(stdout);
91
+ x[9] = x[9];
89
92
  return 0;
90
93
  }
91
94
 
@@ -30,7 +30,7 @@ int main(void) {
30
30
  float x2[NX];
31
31
  float y_1[NX];
32
32
  float y_2[NX];
33
-
33
+ /*
34
34
  // Set the input data
35
35
  for (i=0; i<NX; i++) {
36
36
  x1[i] = ((float) i) / NX;
@@ -40,9 +40,10 @@ int main(void) {
40
40
  for (j=0; j<NX; j++) {
41
41
  A[i][j] = ((float) i*j) / NX;
42
42
  }
43
- }
43
+ }*/
44
44
 
45
45
  // Perform the computation
46
+ #pragma scop
46
47
  #pragma species kernel 0:NX-1|element ^ 0:NX-1,0:NX-1|chunk(0:0,0:NX-1) ^ 0:NX-1|full -> 0:NX-1|element
47
48
  for (i=0; i<NX; i++) {
48
49
  for (j=0; j<NX; j++) {
@@ -57,9 +58,12 @@ int main(void) {
57
58
  }
58
59
  }
59
60
  #pragma species endkernel mvt-part2
61
+ #pragma endscop
60
62
 
61
63
  // Clean-up and exit the function
62
64
  fflush(stdout);
65
+ x1[9] = x1[9];
66
+ x2[9] = x2[9];
63
67
  return 0;
64
68
  }
65
69
 
@@ -42,6 +42,7 @@ int main(void) {
42
42
  }
43
43
 
44
44
  // Perform the computation
45
+ #pragma scop
45
46
  for (t=0; t<ITER; t++) {
46
47
  #pragma species kernel 0:MAXGRID-1,0:MAXGRID-1|element -> 0:MAXGRID-1,0:MAXGRID-1,0:LENGTH-1|chunk(0:0,0:0,0:LENGTH-1)
47
48
  for (j=0; j<=MAXGRID-1; j++) {
@@ -75,8 +76,10 @@ int main(void) {
75
76
  #pragma species endkernel reg-detect-part3
76
77
  }
77
78
  }
79
+ #pragma endscop
78
80
 
79
81
  // Clean-up and exit the function
80
82
  fflush(stdout);
83
+ path[8][9] = path[8][9];
81
84
  return 0;
82
85
  }
@@ -35,6 +35,7 @@ int main(void) {
35
35
  }
36
36
 
37
37
  // Perform the computation
38
+ #pragma scop
38
39
  for (t=0; t<TSTEPS-1; t++) {
39
40
  for (i=1; i<=N-2; i++) {
40
41
  for (j=1; j<=N-2; j++) {
@@ -44,8 +45,10 @@ int main(void) {
44
45
  }
45
46
  }
46
47
  }
48
+ #pragma endscop
47
49
 
48
50
  // Clean-up and exit the function
49
51
  fflush(stdout);
52
+ A[8][9] = A[8][9];
50
53
  return 0;
51
54
  }
@@ -49,6 +49,7 @@ int main(void) {
49
49
  }
50
50
 
51
51
  // Perform the computation (C := alpha*A*B + beta*C, with A symmetric)
52
+ #pragma scop
52
53
  for (i=0; i<NI; i++) {
53
54
  for (j=0; j<NJ; j++) {
54
55
  acc[0] = 0;
@@ -66,9 +67,11 @@ int main(void) {
66
67
  C[i][j] = beta*C[i][j] + alpha*A[i][i]*bij + alpha*acc[0];
67
68
  }
68
69
  }
70
+ #pragma endscop
69
71
 
70
72
  // Clean-up and exit the function
71
73
  fflush(stdout);
74
+ C[8][9] = C[8][9];
72
75
  return 0;
73
76
  }
74
77
 
@@ -34,7 +34,7 @@ int main(void) {
34
34
  int beta = 2123;
35
35
 
36
36
  // Set the input data
37
- for (i=0; i<NI; i++) {
37
+ /* for (i=0; i<NI; i++) {
38
38
  for (j=0; j<NJ; j++) {
39
39
  A[i][j] = ((float) i*j) / NI;
40
40
  B[i][j] = ((float) i*j) / NI;
@@ -45,8 +45,9 @@ int main(void) {
45
45
  C[i][j] = ((float) i*j) / NI;
46
46
  }
47
47
  }
48
-
48
+ */
49
49
  // Perform the computation (C := alpha*A*B' + alpha*B*A' + beta*C)
50
+ #pragma scop
50
51
  #pragma species kernel 0:NI-1,0:NI-1|element ^ 0:NI-1,0:NJ-1|chunk(0:0,0:NJ-1) ^ 0:NI-1,0:NJ-1|chunk(0:0,0:NJ-1) -> 0:NI-1,0:NI-1|element
51
52
  for (i=0; i<NI; i++) {
52
53
  for (j=0; j<NI; j++) {
@@ -58,8 +59,10 @@ int main(void) {
58
59
  }
59
60
  }
60
61
  #pragma species endkernel syr2k
62
+ #pragma endscop
61
63
 
62
64
  // Clean-up and exit the function
63
65
  fflush(stdout);
66
+ C[8][9] = C[8][9];
64
67
  return 0;
65
68
  }
@@ -15,7 +15,7 @@
15
15
  // == File information
16
16
  // Filename...........benchmark/syrk.c
17
17
  // Author.............Cedric Nugteren
18
- // Last modified on...08-May-2012
18
+ // Last modified on...07-May-2013
19
19
  //
20
20
 
21
21
  #include "common.h"
@@ -33,7 +33,7 @@ int main(void) {
33
33
  float beta = 2123;
34
34
 
35
35
  // Set the input data
36
- for (i=0; i<NI; i++) {
36
+ /* for (i=0; i<NI; i++) {
37
37
  for (j=0; j<NJ; j++) {
38
38
  A[i][j] = ((float) i*j) / NI;
39
39
  }
@@ -43,9 +43,10 @@ int main(void) {
43
43
  C[i][j] = ((float) i*j) / NI;
44
44
  }
45
45
  }
46
-
46
+ */
47
47
  // Perform the computation (C := alpha*A*A' + beta*C)
48
- #pragma species kernel C[0:NI-1,0:NI-1]|element ^ A[0:NI-1,0:NJ-1]|chunk(0:0,0:NJ-1) ^ A[0:NI-1,0:NJ-1]|chunk(0:0,0:NJ-1) -> C[0:NI-1,0:NI-1]|element
48
+ #pragma scop
49
+ #pragma species kernel C[0:NI-1,0:NI-1]|element ^ A[0:NI-1,0:NJ-1]|chunk(0:0,0:NJ-1) -> C[0:NI-1,0:NI-1]|element
49
50
  for (i=0; i<NI; i++) {
50
51
  for (j=0; j<NI; j++) {
51
52
  C[i][j] *= beta;
@@ -55,8 +56,10 @@ int main(void) {
55
56
  }
56
57
  }
57
58
  #pragma species endkernel syrk
59
+ #pragma endscop
58
60
 
59
61
  // Clean-up and exit the function
60
62
  fflush(stdout);
63
+ C[8][9] = C[8][9];
61
64
  return 0;
62
65
  }
@@ -40,6 +40,7 @@ int main(void) {
40
40
  }
41
41
 
42
42
  // Perform the computation
43
+ #pragma scop
43
44
  for (i=0; i<NX; i++) {
44
45
  x[i] = c[i];
45
46
  A_i_i = A[i][i];
@@ -50,8 +51,10 @@ int main(void) {
50
51
  }
51
52
  //#pragma species endkernel trisolv
52
53
  }
54
+ #pragma endscop
53
55
 
54
56
  // Clean-up and exit the function
55
57
  fflush(stdout);
58
+ x[8] = x[8];
56
59
  return 0;
57
60
  }
@@ -40,6 +40,7 @@ int main(void) {
40
40
  }
41
41
 
42
42
  // Perform the computation (B := alpha*A'*B, with A triangular)
43
+ #pragma scop
43
44
  for (i=1; i<NI; i++) {
44
45
  for (j=0; j<NI; j++) {
45
46
  #pragma species kernel i:i,0:i-1|element ^ j:j,0:i-1|element -> i:i,j:j|shared
@@ -49,9 +50,11 @@ int main(void) {
49
50
  #pragma species endkernel trmm
50
51
  }
51
52
  }
53
+ #pragma endscop
52
54
 
53
55
  // Clean-up and exit the function
54
56
  fflush(stdout);
57
+ B[8][9] = B[8][9];
55
58
  return 0;
56
59
  }
57
60
 
@@ -0,0 +1,180 @@
1
+ //
2
+ // This file is part of the Bones source-to-source compiler examples. This C-code
3
+ // demonstrates the use of Bones for an example application: 'Unstructured Grid-
4
+ // Based CFD Solvers', taken from the Rodinia benchmark suite. For more information
5
+ // on the application or on Bones please use the contact information below.
6
+ //
7
+ // == More information on unstructured grid based CFD solvers:
8
+ // Website............http://web.cos.gmu.edu/~acorriga/pubs/gpu_cfd/
9
+ // Article............http://web.cos.gmu.edu/~acorriga/pubs/gpu_cfd/aiaa_2009_4001.pdf
10
+ // Original code......https://www.cs.virginia.edu/~skadron/wiki/rodinia/
11
+ //
12
+ // == More information on Bones
13
+ // Contact............Cedric Nugteren <c.nugteren@tue.nl>
14
+ // Web address........http://parse.ele.tue.nl/bones/
15
+ //
16
+ // == File information
17
+ // Filename...........applications/cfd.c
18
+ // Authors............Cedric Nugteren
19
+ // Original author....Andrew Corrigan
20
+ // Last modified on...10-Aug-2012
21
+ //
22
+
23
+ //########################################################################
24
+ //### Includes
25
+ //########################################################################
26
+
27
+ #include <stdio.h>
28
+ #include <stdlib.h>
29
+ #include <math.h>
30
+
31
+ //########################################################################
32
+ //### Data types
33
+ //########################################################################
34
+
35
+ typedef struct {
36
+ float x;
37
+ float y;
38
+ float z;
39
+ } float3;
40
+
41
+ //########################################################################
42
+ //### Forward declarations
43
+ //########################################################################
44
+
45
+ inline void compute_flux_contribution(float3 momentum, float density_energy, float pressure, float3 velocity, float3 *fc_momentum_x, float3 *fc_momentum_y, float3 *fc_momentum_z, float3 *fc_density_energy);
46
+
47
+ //########################################################################
48
+ //### Options
49
+ //########################################################################
50
+
51
+ #define GAMMA 1.4f
52
+ #define iterations 2000
53
+ #define NNB 4
54
+ #define RK 3 // 3rd order RK
55
+
56
+ #define FF_MACH 1.2f
57
+ #define DEG_ANGLE_OF_ATTACK 0.0f
58
+ #define NDIM 3
59
+
60
+ //########################################################################
61
+ //### Defines
62
+ //########################################################################
63
+
64
+ #define VAR_DENSITY 0
65
+ #define VAR_MOMENTUM 1
66
+ #define VAR_DENSITY_ENERGY (VAR_MOMENTUM+NDIM)
67
+ #define NVAR (VAR_DENSITY_ENERGY+1)
68
+
69
+ //########################################################################
70
+ //### Global variables
71
+ //########################################################################
72
+
73
+ float ff_variable[NVAR];
74
+ float3 ff_flux_contribution_momentum_x;
75
+ float3 ff_flux_contribution_momentum_y;
76
+ float3 ff_flux_contribution_momentum_z;
77
+ float3 ff_flux_contribution_density_energy;
78
+
79
+ //########################################################################
80
+ //### Start of the main function
81
+ //########################################################################
82
+
83
+ int main(void) {
84
+
85
+ // Declare the loop iterators
86
+ int i,j;
87
+
88
+ // Declare far field variables
89
+ const float angle_of_attack = (M_PI/180.0f) * DEG_ANGLE_OF_ATTACK;
90
+ float ff_pressure, ff_speed_of_sound, ff_speed;
91
+ float3 ff_velocity, ff_momentum;
92
+
93
+ // Declare other domain variables
94
+
95
+ // Declare other/helper variables
96
+
97
+ // Compute the far field
98
+ printf("\n[cfd] Set the far field conditions"); fflush(stdout);
99
+ {
100
+ ff_variable[VAR_DENSITY] = 1.4f;
101
+ ff_pressure = 1.0f;
102
+ ff_speed_of_sound = sqrt(GAMMA*ff_pressure / ff_variable[VAR_DENSITY]);
103
+ ff_speed = FF_MACH*ff_speed_of_sound;
104
+
105
+ // Compute the velocity
106
+ ff_velocity.x = ff_speed*cos(angle_of_attack);
107
+ ff_velocity.y = ff_speed*sin(angle_of_attack);
108
+ ff_velocity.z = 0.0f;
109
+
110
+ // Update the variable
111
+ ff_variable[VAR_MOMENTUM+0] = ff_variable[VAR_DENSITY] * ff_velocity.x;
112
+ ff_variable[VAR_MOMENTUM+1] = ff_variable[VAR_DENSITY] * ff_velocity.y;
113
+ ff_variable[VAR_MOMENTUM+2] = ff_variable[VAR_DENSITY] * ff_velocity.z;
114
+ ff_variable[VAR_DENSITY_ENERGY] = ff_variable[VAR_DENSITY]*0.5f*ff_speed*ff_speed + (ff_pressure/(GAMMA-1.0f));
115
+
116
+ // Set the momentum
117
+ ff_momentum.x = ff_variable[VAR_MOMENTUM+0];
118
+ ff_momentum.y = ff_variable[VAR_MOMENTUM+1];
119
+ ff_momentum.z = ff_variable[VAR_MOMENTUM+2];
120
+
121
+ // Compute the flux contribution
122
+ compute_flux_contribution(
123
+ ff_momentum,
124
+ ff_variable[VAR_DENSITY_ENERGY],
125
+ ff_pressure,
126
+ ff_velocity,
127
+ &ff_flux_contribution_momentum_x,
128
+ &ff_flux_contribution_momentum_y,
129
+ &ff_flux_contribution_momentum_z,
130
+ &ff_flux_contribution_density_energy
131
+ );
132
+ }
133
+
134
+ // Initialising memory
135
+ printf("\n[cfd] Initialising memory"); fflush(stdout);
136
+
137
+
138
+ // Clean-up and exit
139
+ printf("\n[cfd] Completed\n\n"); fflush(stdout);
140
+ fflush(stdout);
141
+ return 0;
142
+ }
143
+
144
+ //########################################################################
145
+ //### Function to compute the flux contribution
146
+ //########################################################################
147
+
148
+ inline void compute_flux_contribution(
149
+ float3 momentum,
150
+ float density_energy,
151
+ float pressure,
152
+ float3 velocity,
153
+ float3 *fc_momentum_x,
154
+ float3 *fc_momentum_y,
155
+ float3 *fc_momentum_z,
156
+ float3 *fc_density_energy
157
+ ) {
158
+
159
+ // Compute the x-momentum
160
+ (*fc_momentum_x).x = velocity.x*momentum.x + pressure;
161
+ (*fc_momentum_x).y = velocity.x*momentum.y;
162
+ (*fc_momentum_x).z = velocity.x*momentum.z;
163
+
164
+ // Compute the y-momentum
165
+ (*fc_momentum_y).x = velocity.x*momentum.y;
166
+ (*fc_momentum_y).y = velocity.y*momentum.y + pressure;
167
+ (*fc_momentum_y).z = velocity.y*momentum.z;
168
+
169
+ // Compute the z-momentum
170
+ (*fc_momentum_z).x = velocity.x*momentum.z;
171
+ (*fc_momentum_z).y = velocity.y*momentum.z;
172
+ (*fc_momentum_z).z = velocity.z*momentum.z + pressure;
173
+
174
+ // Compute energy density
175
+ (*fc_density_energy).x = velocity.x*density_energy+pressure;
176
+ (*fc_density_energy).y = velocity.y*density_energy+pressure;
177
+ (*fc_density_energy).z = velocity.z*density_energy+pressure;
178
+ }
179
+
180
+ //########################################################################