bones-compiler 1.1.0 → 1.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (211) hide show
  1. checksums.yaml +15 -0
  2. data/CHANGELOG +37 -0
  3. data/LICENSE +1 -1
  4. data/README.rdoc +95 -70
  5. data/Rakefile +78 -3
  6. data/VERSION +1 -1
  7. data/bin/adarwin +17 -0
  8. data/examples/benchmarks/PolyBench/2mm.c +104 -0
  9. data/examples/benchmarks/{3mm.c → PolyBench/3mm.c} +5 -2
  10. data/examples/benchmarks/{adi.c → PolyBench/adi.c} +6 -3
  11. data/examples/benchmarks/{atax.c → PolyBench/atax.c} +5 -2
  12. data/examples/benchmarks/{bicg.c → PolyBench/bicg.c} +5 -2
  13. data/examples/benchmarks/{cholesky.c → PolyBench/cholesky.c} +3 -0
  14. data/examples/benchmarks/{common.h → PolyBench/common.h} +2 -2
  15. data/examples/benchmarks/{correlation.c → PolyBench/correlation.c} +16 -7
  16. data/examples/benchmarks/{covariance.c → PolyBench/covariance.c} +7 -2
  17. data/examples/benchmarks/{doitgen.c → PolyBench/doitgen.c} +5 -2
  18. data/examples/benchmarks/{durbin.c → PolyBench/durbin.c} +3 -0
  19. data/examples/benchmarks/{dynprog.c → PolyBench/dynprog.c} +3 -0
  20. data/examples/benchmarks/{fdtd-2d-apml.c → PolyBench/fdtd-2d-apml.c} +3 -0
  21. data/examples/benchmarks/{fdtd-2d.c → PolyBench/fdtd-2d.c} +5 -2
  22. data/examples/benchmarks/{floyd-warshall.c → PolyBench/floyd-warshall.c} +3 -0
  23. data/examples/benchmarks/{gemm.c → PolyBench/gemm.c} +5 -2
  24. data/examples/benchmarks/{gemver.c → PolyBench/gemver.c} +5 -2
  25. data/examples/benchmarks/{gesummv.c → PolyBench/gesummv.c} +5 -2
  26. data/examples/benchmarks/{gramschmidt.c → PolyBench/gramschmidt.c} +3 -0
  27. data/examples/benchmarks/{jacobi-1d-imper.c → PolyBench/jacobi-1d-imper.c} +10 -2
  28. data/examples/benchmarks/{jacobi-2d-imper.c → PolyBench/jacobi-2d-imper.c} +8 -3
  29. data/examples/benchmarks/{lu.c → PolyBench/lu.c} +3 -0
  30. data/examples/benchmarks/{ludcmp.c → PolyBench/ludcmp.c} +3 -0
  31. data/examples/benchmarks/{mvt.c → PolyBench/mvt.c} +6 -2
  32. data/examples/benchmarks/{reg_detect.c → PolyBench/reg_detect.c} +3 -0
  33. data/examples/benchmarks/{seidel-2d.c → PolyBench/seidel-2d.c} +3 -0
  34. data/examples/benchmarks/{symm.c → PolyBench/symm.c} +3 -0
  35. data/examples/benchmarks/{syr2k.c → PolyBench/syr2k.c} +5 -2
  36. data/examples/benchmarks/{syrk.c → PolyBench/syrk.c} +7 -4
  37. data/examples/benchmarks/{trisolv.c → PolyBench/trisolv.c} +3 -0
  38. data/examples/benchmarks/{trmm.c → PolyBench/trmm.c} +3 -0
  39. data/examples/benchmarks/Rodinia/cfd.c +180 -0
  40. data/examples/benchmarks/Rodinia/hotspot.c +228 -0
  41. data/examples/benchmarks/Rodinia/kmeans.c +164 -0
  42. data/examples/benchmarks/Rodinia/srad.c +188 -0
  43. data/examples/benchmarks/other/common.h +0 -0
  44. data/examples/benchmarks/other/dct.c +58 -0
  45. data/examples/benchmarks/other/mm.c +50 -0
  46. data/examples/benchmarks/{saxpy.c → other/saxpy.c} +11 -7
  47. data/examples/chunk/{example1.c → example01.c} +0 -0
  48. data/examples/chunk/{example2.c → example02.c} +0 -0
  49. data/examples/chunk/{example3.c → example03.c} +0 -0
  50. data/examples/chunk/{example4.c → example04.c} +0 -0
  51. data/examples/chunk/{example5.c → example05.c} +0 -0
  52. data/examples/chunk/example06.c +45 -0
  53. data/examples/chunk/example07.c +49 -0
  54. data/examples/dependences/example01.c +42 -0
  55. data/examples/dependences/example02.c +40 -0
  56. data/examples/dependences/example03.c +43 -0
  57. data/examples/dependences/example04.c +44 -0
  58. data/examples/dependences/example05.c +42 -0
  59. data/examples/element/{example1.c → example01.c} +0 -0
  60. data/examples/element/{example2.c → example02.c} +2 -2
  61. data/examples/element/{example3.c → example03.c} +0 -0
  62. data/examples/element/{example4.c → example04.c} +0 -0
  63. data/examples/element/{example5.c → example05.c} +0 -0
  64. data/examples/element/{example6.c → example06.c} +0 -0
  65. data/examples/element/{example7.c → example07.c} +0 -0
  66. data/examples/element/{example8.c → example08.c} +0 -0
  67. data/examples/element/{example9.c → example09.c} +0 -0
  68. data/examples/element/example13.c +73 -0
  69. data/examples/fusion/example01.c +68 -0
  70. data/examples/fusion/example02.c +73 -0
  71. data/examples/fusion/example03.c +72 -0
  72. data/examples/fusion/example04.c +61 -0
  73. data/examples/fusion/example05.c +55 -0
  74. data/examples/neighbourhood/{example1.c → example01.c} +0 -0
  75. data/examples/neighbourhood/{example2.c → example02.c} +0 -0
  76. data/examples/neighbourhood/{example3.c → example03.c} +0 -0
  77. data/examples/neighbourhood/{example4.c → example04.c} +0 -0
  78. data/examples/neighbourhood/example05.c +44 -0
  79. data/examples/shared/{example1.c → example01.c} +0 -0
  80. data/examples/shared/{example2.c → example02.c} +0 -0
  81. data/examples/shared/{example3.c → example03.c} +0 -0
  82. data/examples/shared/{example4.c → example04.c} +0 -0
  83. data/examples/shared/{example5.c → example05.c} +0 -0
  84. data/lib/adarwin.rb +62 -0
  85. data/lib/adarwin/dependences.rb +268 -0
  86. data/lib/adarwin/engine.rb +277 -0
  87. data/lib/adarwin/fusion.rb +174 -0
  88. data/lib/adarwin/interval.rb +57 -0
  89. data/lib/adarwin/memorycopies.rb +153 -0
  90. data/lib/adarwin/nest.rb +225 -0
  91. data/lib/adarwin/preprocessor.rb +76 -0
  92. data/lib/adarwin/reference.rb +261 -0
  93. data/lib/bones.rb +4 -55
  94. data/lib/bones/algorithm.rb +77 -40
  95. data/lib/bones/copy.rb +26 -0
  96. data/lib/bones/engine.rb +147 -31
  97. data/lib/bones/preprocessor.rb +92 -12
  98. data/lib/bones/species.rb +4 -3
  99. data/lib/bones/structure.rb +14 -4
  100. data/lib/castaddon.rb +11 -6
  101. data/lib/castaddon/node_adarwin.rb +245 -0
  102. data/lib/castaddon/node_bones.rb +316 -0
  103. data/lib/castaddon/node_common.rb +289 -0
  104. data/lib/castaddon/transformations.rb +236 -0
  105. data/lib/common.rb +216 -0
  106. data/skeletons/CPU-C/common/header.c +3 -0
  107. data/skeletons/CPU-C/common/mem_global.c +0 -0
  108. data/skeletons/CPU-C/common/timer_2_start.c +11 -13
  109. data/skeletons/CPU-C/common/timer_2_stop.c +1 -1
  110. data/skeletons/CPU-C/common/timer_globals.c +29 -0
  111. data/skeletons/CPU-OPENCL-INTEL/common/globals.c +1 -1
  112. data/skeletons/CPU-OPENCL-INTEL/common/header.c +3 -0
  113. data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +7 -2
  114. data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +4 -2
  115. data/skeletons/CPU-OPENCL-INTEL/common/mem_global.c +0 -0
  116. data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +6 -3
  117. data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +1 -1
  118. data/skeletons/CPU-OPENCL-INTEL/common/timer_globals.c +24 -0
  119. data/skeletons/CPU-OPENMP/common/globals.c +1 -0
  120. data/skeletons/CPU-OPENMP/common/header.c +3 -0
  121. data/skeletons/CPU-OPENMP/common/mem_global.c +0 -0
  122. data/skeletons/CPU-OPENMP/common/timer_1_start.c +0 -12
  123. data/skeletons/CPU-OPENMP/common/timer_2_stop.c +1 -1
  124. data/skeletons/CPU-OPENMP/common/timer_globals.c +33 -0
  125. data/skeletons/GPU-CUDA/common/globals.c +27 -3
  126. data/skeletons/GPU-CUDA/common/header.c +2 -0
  127. data/skeletons/GPU-CUDA/common/mem_async_alloc.c +6 -0
  128. data/skeletons/GPU-CUDA/common/mem_async_copyin.c +6 -0
  129. data/skeletons/GPU-CUDA/common/mem_async_copyout.c +6 -0
  130. data/skeletons/GPU-CUDA/common/mem_async_free.c +6 -0
  131. data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +2 -1
  132. data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +2 -1
  133. data/skeletons/GPU-CUDA/common/mem_global.c +1 -0
  134. data/skeletons/GPU-CUDA/common/mem_prologue.c +1 -2
  135. data/skeletons/GPU-CUDA/common/scheduler.c +86 -0
  136. data/skeletons/GPU-CUDA/common/timer_2_start.c +2 -4
  137. data/skeletons/GPU-CUDA/common/timer_2_stop.c +3 -5
  138. data/skeletons/GPU-CUDA/common/timer_globals.c +26 -0
  139. data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +5 -7
  140. data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +4 -6
  141. data/skeletons/GPU-CUDA/kernel/default.host.c +1 -1
  142. data/skeletons/GPU-CUDA/kernel/default.kernel.cu +6 -8
  143. data/skeletons/GPU-CUDA/skeletons.txt +6 -5
  144. data/{examples/benchmarks/2mm.c → test/examples/benchmarks/PolyBench/2mm_species.c} +19 -15
  145. data/test/examples/benchmarks/PolyBench/3mm_species.c +82 -0
  146. data/test/examples/benchmarks/PolyBench/adi_species.c +89 -0
  147. data/test/examples/benchmarks/PolyBench/atax_species.c +69 -0
  148. data/test/examples/benchmarks/PolyBench/bicg_species.c +71 -0
  149. data/test/examples/benchmarks/PolyBench/cholesky_species.c +68 -0
  150. data/test/examples/benchmarks/PolyBench/correlation_species.c +97 -0
  151. data/test/examples/benchmarks/PolyBench/covariance_species.c +78 -0
  152. data/test/examples/benchmarks/PolyBench/doitgen_species.c +67 -0
  153. data/test/examples/benchmarks/PolyBench/durbin_species.c +80 -0
  154. data/test/examples/benchmarks/PolyBench/dynprog_species.c +71 -0
  155. data/test/examples/benchmarks/PolyBench/fdtd-2d-apml_species.c +112 -0
  156. data/test/examples/benchmarks/PolyBench/fdtd-2d_species.c +78 -0
  157. data/test/examples/benchmarks/PolyBench/floyd-warshall_species.c +54 -0
  158. data/test/examples/benchmarks/PolyBench/gemm_species.c +73 -0
  159. data/test/examples/benchmarks/PolyBench/gemver_species.c +93 -0
  160. data/test/examples/benchmarks/PolyBench/gesummv_species.c +68 -0
  161. data/test/examples/benchmarks/PolyBench/gramschmidt_species.c +78 -0
  162. data/test/examples/benchmarks/PolyBench/jacobi-1d-imper_species.c +59 -0
  163. data/test/examples/benchmarks/PolyBench/jacobi-2d-imper_species.c +65 -0
  164. data/test/examples/benchmarks/PolyBench/lu_species.c +57 -0
  165. data/test/examples/benchmarks/PolyBench/ludcmp_species.c +89 -0
  166. data/test/examples/benchmarks/PolyBench/mvt_species.c +69 -0
  167. data/test/examples/benchmarks/PolyBench/reg_detect_species.c +86 -0
  168. data/test/examples/benchmarks/PolyBench/seidel-2d_species.c +53 -0
  169. data/test/examples/benchmarks/PolyBench/symm_species.c +74 -0
  170. data/test/examples/benchmarks/PolyBench/syr2k_species.c +69 -0
  171. data/test/examples/benchmarks/PolyBench/syrk_species.c +66 -0
  172. data/test/examples/benchmarks/PolyBench/trisolv_species.c +61 -0
  173. data/test/examples/benchmarks/PolyBench/trmm_species.c +61 -0
  174. data/test/examples/chunk/example01_species.c +58 -0
  175. data/test/examples/chunk/example02_species.c +48 -0
  176. data/test/examples/chunk/example03_species.c +63 -0
  177. data/test/examples/chunk/example04_species.c +58 -0
  178. data/test/examples/chunk/example05_species.c +56 -0
  179. data/test/examples/chunk/example06_species.c +49 -0
  180. data/test/examples/chunk/example07_species.c +53 -0
  181. data/test/examples/dependences/example01_species.c +46 -0
  182. data/test/examples/dependences/example02_species.c +44 -0
  183. data/test/examples/dependences/example03_species.c +47 -0
  184. data/test/examples/dependences/example04_species.c +48 -0
  185. data/test/examples/dependences/example05_species.c +46 -0
  186. data/test/examples/element/example01_species.c +50 -0
  187. data/test/examples/element/example02_species.c +50 -0
  188. data/test/examples/element/example03_species.c +62 -0
  189. data/test/examples/element/example04_species.c +53 -0
  190. data/test/examples/element/example05_species.c +59 -0
  191. data/test/examples/element/example06_species.c +50 -0
  192. data/test/examples/element/example07_species.c +58 -0
  193. data/test/examples/element/example08_species.c +49 -0
  194. data/test/examples/element/example09_species.c +52 -0
  195. data/test/examples/element/example10_species.c +54 -0
  196. data/test/examples/element/example11_species.c +51 -0
  197. data/test/examples/element/example12_species.c +60 -0
  198. data/test/examples/element/example13_species.c +77 -0
  199. data/test/examples/neighbourhood/example01_species.c +57 -0
  200. data/test/examples/neighbourhood/example02_species.c +56 -0
  201. data/test/examples/neighbourhood/example03_species.c +83 -0
  202. data/test/examples/neighbourhood/example04_species.c +55 -0
  203. data/test/examples/neighbourhood/example05_species.c +48 -0
  204. data/test/examples/shared/example01_species.c +49 -0
  205. data/test/examples/shared/example02_species.c +55 -0
  206. data/test/examples/shared/example03_species.c +59 -0
  207. data/test/examples/shared/example04_species.c +56 -0
  208. data/test/examples/shared/example05_species.c +52 -0
  209. metadata +193 -73
  210. data/examples/benchmarks/overview.txt +0 -38
  211. data/lib/castaddon/node.rb +0 -753
@@ -40,7 +40,7 @@ int main(void) {
40
40
  int beta = 12313;
41
41
 
42
42
  // Set the input data
43
- for (i=0; i<NX; i++) {
43
+ /* for (i=0; i<NX; i++) {
44
44
  u1[i] = i;
45
45
  u2[i] = (i+1)/NX/2.0;
46
46
  v1[i] = (i+1)/NX/4.0;
@@ -53,8 +53,9 @@ int main(void) {
53
53
  A[i][j] = ((float) i*j) / NX;
54
54
  }
55
55
  }
56
-
56
+ */
57
57
  // Perform the computation
58
+ #pragma scop
58
59
  #pragma species kernel 0:NX-1,0:NX-1|element ^ 0:NX-1|element ^ 0:NX-1|element ^ 0:NX-1|element ^ 0:NX-1|element -> 0:NX-1,0:NX-1|element
59
60
  for (i=0; i<NX; i++) {
60
61
  for (j=0; j<NX; j++) {
@@ -81,9 +82,11 @@ int main(void) {
81
82
  }
82
83
  }
83
84
  #pragma species endkernel gemver-part4
85
+ #pragma endscop
84
86
 
85
87
  // Clean-up and exit the function
86
88
  fflush(stdout);
89
+ w[9] = w[9];
87
90
  return 0;
88
91
  }
89
92
 
@@ -36,15 +36,16 @@ int main(void) {
36
36
  float beta = 12313;
37
37
 
38
38
  // Set the input data
39
- for (i=0; i<NX; i++) {
39
+ /* for (i=0; i<NX; i++) {
40
40
  x[i] = ((float) i) / NX;
41
41
  for (j=0; j<NX; j++) {
42
42
  A[i][j] = ((float) i*(j+1)) / NX;
43
43
  B[i][j] = ((float) (i+3)*j) / NX;
44
44
  }
45
45
  }
46
-
46
+ */
47
47
  // Perform the computation
48
+ #pragma scop
48
49
  #pragma species kernel 0:NX-1,0:NX-1|chunk(0:0,0:NX-1) ^ 0:NX-1|full ^ 0:NX-1,0:NX-1|chunk(0:0,0:NX-1) -> 0:NX-1|element ^ 0:NX-1|element
49
50
  for (i=0; i<NX; i++) {
50
51
  tmp[i] = 0;
@@ -56,9 +57,11 @@ int main(void) {
56
57
  y[i] = alpha*tmp[i] + beta*y[i];
57
58
  }
58
59
  #pragma species endkernel gesummv
60
+ #pragma endscop
59
61
 
60
62
  // Clean-up and exit the function
61
63
  fflush(stdout);
64
+ y[9] = y[9];
62
65
  return 0;
63
66
  }
64
67
 
@@ -46,6 +46,7 @@ int main(void) {
46
46
  }
47
47
 
48
48
  // Perform the computation
49
+ #pragma scop
49
50
  for (k=0; k<NJ; k++) {
50
51
  nrm[0] = 0;
51
52
  #pragma species kernel 0:NI-1,k:k|element -> 0:0|shared
@@ -77,8 +78,10 @@ int main(void) {
77
78
  }
78
79
  #pragma species endkernel gramschmidt-part3
79
80
  }
81
+ #pragma endscop
80
82
 
81
83
  // Clean-up and exit the function
82
84
  fflush(stdout);
85
+ A[8][9] = A[8][9];
83
86
  return 0;
84
87
  }
@@ -18,6 +18,7 @@
18
18
  // Last modified on...03-April-2012
19
19
  //
20
20
 
21
+
21
22
  #include "common.h"
22
23
 
23
24
  // This is 'jacobi-1d-imper', a 1D Jacobi stencil computation
@@ -27,14 +28,19 @@ int main(void) {
27
28
  // Declare arrays on the stack
28
29
  float A[LARGE_N];
29
30
  float B[LARGE_N];
31
+ //printf("A: %p\n", A);
32
+ //printf("B: %p\n", B);
33
+ //float *A = (float *)malloc(LARGE_N*sizeof(float));
34
+ //float *B = (float *)malloc(LARGE_N*sizeof(float));
30
35
 
31
36
  // Set the input data
32
- for (i=0; i<LARGE_N; i++) {
37
+ /* for (i=0; i<LARGE_N; i++) {
33
38
  A[i] = ((float) i+2) / LARGE_N;
34
39
  B[i] = ((float) i+3) / LARGE_N;
35
40
  }
36
-
41
+ */
37
42
  // Perform the computation
43
+ #pragma scop
38
44
  for (t=0; t<TSTEPS; t++) {
39
45
  #pragma species kernel 1:LARGE_N-2|neighbourhood(-1:1) -> 1:LARGE_N-2|element
40
46
  for (i=1; i<LARGE_N-1; i++) {
@@ -47,9 +53,11 @@ int main(void) {
47
53
  }
48
54
  #pragma species endkernel jacobi-1d-imper-part2
49
55
  }
56
+ #pragma endscop
50
57
 
51
58
  // Clean-up and exit the function
52
59
  fflush(stdout);
60
+ B[9] = B[9];
53
61
  return 0;
54
62
  }
55
63
 
@@ -29,19 +29,22 @@ int main(void) {
29
29
  float B[N][N];
30
30
 
31
31
  // Set the input data
32
- for (i=0; i<N; i++) {
32
+ /* for (i=0; i<N; i++) {
33
33
  for (j=0; j<N; j++) {
34
34
  A[i][j] = ((float) i*(j+2) + 2) / N;
35
35
  B[i][j] = ((float) i*(j+3) + 3) / N;
36
36
  }
37
37
  }
38
-
38
+ */
39
39
  // Perform the computation
40
+ #pragma scop
40
41
  for (t=0; t<TSTEPS; t++) {
41
42
  #pragma species kernel 1:N-2,1:N-2|neighbourhood(-1:1,-1:1) -> 1:N-2,1:N-2|element
42
43
  for (i=1; i<N-1; i++) {
43
44
  for (j=1; j<N-1; j++) {
44
- B[i][j] = 0.2 * (A[i][j] + A[i][j-1] + A[i][1+j] + A[1+i][j] + A[i-1][j]);
45
+ if (i < N-1 && j < N-1) {
46
+ B[i][j] = 0.2 * (A[i][j] + A[i][j-1] + A[i][1+j] + A[1+i][j] + A[i-1][j]);
47
+ }
45
48
  }
46
49
  }
47
50
  #pragma species endkernel jacobi-2d-imper-part1
@@ -53,9 +56,11 @@ int main(void) {
53
56
  }
54
57
  #pragma species endkernel jacobi-2d-imper-part2
55
58
  }
59
+ #pragma endscop
56
60
 
57
61
  // Clean-up and exit the function
58
62
  fflush(stdout);
63
+ B[8][9] = B[8][9];
59
64
  return 0;
60
65
  }
61
66
 
@@ -35,6 +35,7 @@ int main(void) {
35
35
  }
36
36
 
37
37
  // Perform the computation
38
+ #pragma scop
38
39
  for (k=0; k<N; k++) {
39
40
  #pragma species kernel k:k,k+1:N-1|element -> k:k,k+1:N-1|element
40
41
  for (j=k+1; j<N; j++) {
@@ -49,9 +50,11 @@ int main(void) {
49
50
  }
50
51
  #pragma species endkernel lu-part2
51
52
  }
53
+ #pragma endscop
52
54
 
53
55
  // Clean-up and exit the function
54
56
  fflush(stdout);
57
+ A[8][9] = A[8][9];
55
58
  return 0;
56
59
  }
57
60
 
@@ -42,6 +42,7 @@ int main(void) {
42
42
  }
43
43
 
44
44
  // Perform the computation
45
+ #pragma scop
45
46
  b[0] = 1.0;
46
47
  for (i=0; i<N; i++) {
47
48
  for (j=i+1; j<=N; j++) {
@@ -83,9 +84,11 @@ int main(void) {
83
84
  //#pragma species endkernel ludcmp-part4
84
85
  x[N-1-i] = w[0] / A[N-1-i][N-1-i];
85
86
  }
87
+ #pragma endscop
86
88
 
87
89
  // Clean-up and exit the function
88
90
  fflush(stdout);
91
+ x[9] = x[9];
89
92
  return 0;
90
93
  }
91
94
 
@@ -30,7 +30,7 @@ int main(void) {
30
30
  float x2[NX];
31
31
  float y_1[NX];
32
32
  float y_2[NX];
33
-
33
+ /*
34
34
  // Set the input data
35
35
  for (i=0; i<NX; i++) {
36
36
  x1[i] = ((float) i) / NX;
@@ -40,9 +40,10 @@ int main(void) {
40
40
  for (j=0; j<NX; j++) {
41
41
  A[i][j] = ((float) i*j) / NX;
42
42
  }
43
- }
43
+ }*/
44
44
 
45
45
  // Perform the computation
46
+ #pragma scop
46
47
  #pragma species kernel 0:NX-1|element ^ 0:NX-1,0:NX-1|chunk(0:0,0:NX-1) ^ 0:NX-1|full -> 0:NX-1|element
47
48
  for (i=0; i<NX; i++) {
48
49
  for (j=0; j<NX; j++) {
@@ -57,9 +58,12 @@ int main(void) {
57
58
  }
58
59
  }
59
60
  #pragma species endkernel mvt-part2
61
+ #pragma endscop
60
62
 
61
63
  // Clean-up and exit the function
62
64
  fflush(stdout);
65
+ x1[9] = x1[9];
66
+ x2[9] = x2[9];
63
67
  return 0;
64
68
  }
65
69
 
@@ -42,6 +42,7 @@ int main(void) {
42
42
  }
43
43
 
44
44
  // Perform the computation
45
+ #pragma scop
45
46
  for (t=0; t<ITER; t++) {
46
47
  #pragma species kernel 0:MAXGRID-1,0:MAXGRID-1|element -> 0:MAXGRID-1,0:MAXGRID-1,0:LENGTH-1|chunk(0:0,0:0,0:LENGTH-1)
47
48
  for (j=0; j<=MAXGRID-1; j++) {
@@ -75,8 +76,10 @@ int main(void) {
75
76
  #pragma species endkernel reg-detect-part3
76
77
  }
77
78
  }
79
+ #pragma endscop
78
80
 
79
81
  // Clean-up and exit the function
80
82
  fflush(stdout);
83
+ path[8][9] = path[8][9];
81
84
  return 0;
82
85
  }
@@ -35,6 +35,7 @@ int main(void) {
35
35
  }
36
36
 
37
37
  // Perform the computation
38
+ #pragma scop
38
39
  for (t=0; t<TSTEPS-1; t++) {
39
40
  for (i=1; i<=N-2; i++) {
40
41
  for (j=1; j<=N-2; j++) {
@@ -44,8 +45,10 @@ int main(void) {
44
45
  }
45
46
  }
46
47
  }
48
+ #pragma endscop
47
49
 
48
50
  // Clean-up and exit the function
49
51
  fflush(stdout);
52
+ A[8][9] = A[8][9];
50
53
  return 0;
51
54
  }
@@ -49,6 +49,7 @@ int main(void) {
49
49
  }
50
50
 
51
51
  // Perform the computation (C := alpha*A*B + beta*C, with A symmetric)
52
+ #pragma scop
52
53
  for (i=0; i<NI; i++) {
53
54
  for (j=0; j<NJ; j++) {
54
55
  acc[0] = 0;
@@ -66,9 +67,11 @@ int main(void) {
66
67
  C[i][j] = beta*C[i][j] + alpha*A[i][i]*bij + alpha*acc[0];
67
68
  }
68
69
  }
70
+ #pragma endscop
69
71
 
70
72
  // Clean-up and exit the function
71
73
  fflush(stdout);
74
+ C[8][9] = C[8][9];
72
75
  return 0;
73
76
  }
74
77
 
@@ -34,7 +34,7 @@ int main(void) {
34
34
  int beta = 2123;
35
35
 
36
36
  // Set the input data
37
- for (i=0; i<NI; i++) {
37
+ /* for (i=0; i<NI; i++) {
38
38
  for (j=0; j<NJ; j++) {
39
39
  A[i][j] = ((float) i*j) / NI;
40
40
  B[i][j] = ((float) i*j) / NI;
@@ -45,8 +45,9 @@ int main(void) {
45
45
  C[i][j] = ((float) i*j) / NI;
46
46
  }
47
47
  }
48
-
48
+ */
49
49
  // Perform the computation (C := alpha*A*B' + alpha*B*A' + beta*C)
50
+ #pragma scop
50
51
  #pragma species kernel 0:NI-1,0:NI-1|element ^ 0:NI-1,0:NJ-1|chunk(0:0,0:NJ-1) ^ 0:NI-1,0:NJ-1|chunk(0:0,0:NJ-1) -> 0:NI-1,0:NI-1|element
51
52
  for (i=0; i<NI; i++) {
52
53
  for (j=0; j<NI; j++) {
@@ -58,8 +59,10 @@ int main(void) {
58
59
  }
59
60
  }
60
61
  #pragma species endkernel syr2k
62
+ #pragma endscop
61
63
 
62
64
  // Clean-up and exit the function
63
65
  fflush(stdout);
66
+ C[8][9] = C[8][9];
64
67
  return 0;
65
68
  }
@@ -15,7 +15,7 @@
15
15
  // == File information
16
16
  // Filename...........benchmark/syrk.c
17
17
  // Author.............Cedric Nugteren
18
- // Last modified on...08-May-2012
18
+ // Last modified on...07-May-2013
19
19
  //
20
20
 
21
21
  #include "common.h"
@@ -33,7 +33,7 @@ int main(void) {
33
33
  float beta = 2123;
34
34
 
35
35
  // Set the input data
36
- for (i=0; i<NI; i++) {
36
+ /* for (i=0; i<NI; i++) {
37
37
  for (j=0; j<NJ; j++) {
38
38
  A[i][j] = ((float) i*j) / NI;
39
39
  }
@@ -43,9 +43,10 @@ int main(void) {
43
43
  C[i][j] = ((float) i*j) / NI;
44
44
  }
45
45
  }
46
-
46
+ */
47
47
  // Perform the computation (C := alpha*A*A' + beta*C)
48
- #pragma species kernel C[0:NI-1,0:NI-1]|element ^ A[0:NI-1,0:NJ-1]|chunk(0:0,0:NJ-1) ^ A[0:NI-1,0:NJ-1]|chunk(0:0,0:NJ-1) -> C[0:NI-1,0:NI-1]|element
48
+ #pragma scop
49
+ #pragma species kernel C[0:NI-1,0:NI-1]|element ^ A[0:NI-1,0:NJ-1]|chunk(0:0,0:NJ-1) -> C[0:NI-1,0:NI-1]|element
49
50
  for (i=0; i<NI; i++) {
50
51
  for (j=0; j<NI; j++) {
51
52
  C[i][j] *= beta;
@@ -55,8 +56,10 @@ int main(void) {
55
56
  }
56
57
  }
57
58
  #pragma species endkernel syrk
59
+ #pragma endscop
58
60
 
59
61
  // Clean-up and exit the function
60
62
  fflush(stdout);
63
+ C[8][9] = C[8][9];
61
64
  return 0;
62
65
  }
@@ -40,6 +40,7 @@ int main(void) {
40
40
  }
41
41
 
42
42
  // Perform the computation
43
+ #pragma scop
43
44
  for (i=0; i<NX; i++) {
44
45
  x[i] = c[i];
45
46
  A_i_i = A[i][i];
@@ -50,8 +51,10 @@ int main(void) {
50
51
  }
51
52
  //#pragma species endkernel trisolv
52
53
  }
54
+ #pragma endscop
53
55
 
54
56
  // Clean-up and exit the function
55
57
  fflush(stdout);
58
+ x[8] = x[8];
56
59
  return 0;
57
60
  }
@@ -40,6 +40,7 @@ int main(void) {
40
40
  }
41
41
 
42
42
  // Perform the computation (B := alpha*A'*B, with A triangular)
43
+ #pragma scop
43
44
  for (i=1; i<NI; i++) {
44
45
  for (j=0; j<NI; j++) {
45
46
  #pragma species kernel i:i,0:i-1|element ^ j:j,0:i-1|element -> i:i,j:j|shared
@@ -49,9 +50,11 @@ int main(void) {
49
50
  #pragma species endkernel trmm
50
51
  }
51
52
  }
53
+ #pragma endscop
52
54
 
53
55
  // Clean-up and exit the function
54
56
  fflush(stdout);
57
+ B[8][9] = B[8][9];
55
58
  return 0;
56
59
  }
57
60
 
@@ -0,0 +1,180 @@
1
+ //
2
+ // This file is part of the Bones source-to-source compiler examples. This C-code
3
+ // demonstrates the use of Bones for an example application: 'Unstructured Grid-
4
+ // Based CFD Solvers', taken from the Rodinia benchmark suite. For more information
5
+ // on the application or on Bones please use the contact information below.
6
+ //
7
+ // == More information on unstructured grid based CFD solvers:
8
+ // Website............http://web.cos.gmu.edu/~acorriga/pubs/gpu_cfd/
9
+ // Article............http://web.cos.gmu.edu/~acorriga/pubs/gpu_cfd/aiaa_2009_4001.pdf
10
+ // Original code......https://www.cs.virginia.edu/~skadron/wiki/rodinia/
11
+ //
12
+ // == More information on Bones
13
+ // Contact............Cedric Nugteren <c.nugteren@tue.nl>
14
+ // Web address........http://parse.ele.tue.nl/bones/
15
+ //
16
+ // == File information
17
+ // Filename...........applications/cfd.c
18
+ // Authors............Cedric Nugteren
19
+ // Original author....Andrew Corrigan
20
+ // Last modified on...10-Aug-2012
21
+ //
22
+
23
+ //########################################################################
24
+ //### Includes
25
+ //########################################################################
26
+
27
+ #include <stdio.h>
28
+ #include <stdlib.h>
29
+ #include <math.h>
30
+
31
+ //########################################################################
32
+ //### Data types
33
+ //########################################################################
34
+
35
+ typedef struct {
36
+ float x;
37
+ float y;
38
+ float z;
39
+ } float3;
40
+
41
+ //########################################################################
42
+ //### Forward declarations
43
+ //########################################################################
44
+
45
+ inline void compute_flux_contribution(float3 momentum, float density_energy, float pressure, float3 velocity, float3 *fc_momentum_x, float3 *fc_momentum_y, float3 *fc_momentum_z, float3 *fc_density_energy);
46
+
47
+ //########################################################################
48
+ //### Options
49
+ //########################################################################
50
+
51
+ #define GAMMA 1.4f
52
+ #define iterations 2000
53
+ #define NNB 4
54
+ #define RK 3 // 3rd order RK
55
+
56
+ #define FF_MACH 1.2f
57
+ #define DEG_ANGLE_OF_ATTACK 0.0f
58
+ #define NDIM 3
59
+
60
+ //########################################################################
61
+ //### Defines
62
+ //########################################################################
63
+
64
+ #define VAR_DENSITY 0
65
+ #define VAR_MOMENTUM 1
66
+ #define VAR_DENSITY_ENERGY (VAR_MOMENTUM+NDIM)
67
+ #define NVAR (VAR_DENSITY_ENERGY+1)
68
+
69
+ //########################################################################
70
+ //### Global variables
71
+ //########################################################################
72
+
73
+ float ff_variable[NVAR];
74
+ float3 ff_flux_contribution_momentum_x;
75
+ float3 ff_flux_contribution_momentum_y;
76
+ float3 ff_flux_contribution_momentum_z;
77
+ float3 ff_flux_contribution_density_energy;
78
+
79
+ //########################################################################
80
+ //### Start of the main function
81
+ //########################################################################
82
+
83
+ int main(void) {
84
+
85
+ // Declare the loop iterators
86
+ int i,j;
87
+
88
+ // Declare far field variables
89
+ const float angle_of_attack = (M_PI/180.0f) * DEG_ANGLE_OF_ATTACK;
90
+ float ff_pressure, ff_speed_of_sound, ff_speed;
91
+ float3 ff_velocity, ff_momentum;
92
+
93
+ // Declare other domain variables
94
+
95
+ // Declare other/helper variables
96
+
97
+ // Compute the far field
98
+ printf("\n[cfd] Set the far field conditions"); fflush(stdout);
99
+ {
100
+ ff_variable[VAR_DENSITY] = 1.4f;
101
+ ff_pressure = 1.0f;
102
+ ff_speed_of_sound = sqrt(GAMMA*ff_pressure / ff_variable[VAR_DENSITY]);
103
+ ff_speed = FF_MACH*ff_speed_of_sound;
104
+
105
+ // Compute the velocity
106
+ ff_velocity.x = ff_speed*cos(angle_of_attack);
107
+ ff_velocity.y = ff_speed*sin(angle_of_attack);
108
+ ff_velocity.z = 0.0f;
109
+
110
+ // Update the variable
111
+ ff_variable[VAR_MOMENTUM+0] = ff_variable[VAR_DENSITY] * ff_velocity.x;
112
+ ff_variable[VAR_MOMENTUM+1] = ff_variable[VAR_DENSITY] * ff_velocity.y;
113
+ ff_variable[VAR_MOMENTUM+2] = ff_variable[VAR_DENSITY] * ff_velocity.z;
114
+ ff_variable[VAR_DENSITY_ENERGY] = ff_variable[VAR_DENSITY]*0.5f*ff_speed*ff_speed + (ff_pressure/(GAMMA-1.0f));
115
+
116
+ // Set the momentum
117
+ ff_momentum.x = ff_variable[VAR_MOMENTUM+0];
118
+ ff_momentum.y = ff_variable[VAR_MOMENTUM+1];
119
+ ff_momentum.z = ff_variable[VAR_MOMENTUM+2];
120
+
121
+ // Compute the flux contribution
122
+ compute_flux_contribution(
123
+ ff_momentum,
124
+ ff_variable[VAR_DENSITY_ENERGY],
125
+ ff_pressure,
126
+ ff_velocity,
127
+ &ff_flux_contribution_momentum_x,
128
+ &ff_flux_contribution_momentum_y,
129
+ &ff_flux_contribution_momentum_z,
130
+ &ff_flux_contribution_density_energy
131
+ );
132
+ }
133
+
134
+ // Initialising memory
135
+ printf("\n[cfd] Initialising memory"); fflush(stdout);
136
+
137
+
138
+ // Clean-up and exit
139
+ printf("\n[cfd] Completed\n\n"); fflush(stdout);
140
+ fflush(stdout);
141
+ return 0;
142
+ }
143
+
144
+ //########################################################################
145
+ //### Function to compute the flux contribution
146
+ //########################################################################
147
+
148
+ inline void compute_flux_contribution(
149
+ float3 momentum,
150
+ float density_energy,
151
+ float pressure,
152
+ float3 velocity,
153
+ float3 *fc_momentum_x,
154
+ float3 *fc_momentum_y,
155
+ float3 *fc_momentum_z,
156
+ float3 *fc_density_energy
157
+ ) {
158
+
159
+ // Compute the x-momentum
160
+ (*fc_momentum_x).x = velocity.x*momentum.x + pressure;
161
+ (*fc_momentum_x).y = velocity.x*momentum.y;
162
+ (*fc_momentum_x).z = velocity.x*momentum.z;
163
+
164
+ // Compute the y-momentum
165
+ (*fc_momentum_y).x = velocity.x*momentum.y;
166
+ (*fc_momentum_y).y = velocity.y*momentum.y + pressure;
167
+ (*fc_momentum_y).z = velocity.y*momentum.z;
168
+
169
+ // Compute the z-momentum
170
+ (*fc_momentum_z).x = velocity.x*momentum.z;
171
+ (*fc_momentum_z).y = velocity.y*momentum.z;
172
+ (*fc_momentum_z).z = velocity.z*momentum.z + pressure;
173
+
174
+ // Compute energy density
175
+ (*fc_density_energy).x = velocity.x*density_energy+pressure;
176
+ (*fc_density_energy).y = velocity.y*density_energy+pressure;
177
+ (*fc_density_energy).z = velocity.z*density_energy+pressure;
178
+ }
179
+
180
+ //########################################################################