bones-compiler 1.3.1 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (307) hide show
  1. data/CHANGELOG +62 -0
  2. data/README.rdoc +14 -3
  3. data/Rakefile +13 -12
  4. data/VERSION +1 -1
  5. data/examples/applications/ffos.c +24 -8
  6. data/examples/benchmarks/PolyBench/2mm.c +0 -0
  7. data/examples/benchmarks/PolyBench/3mm.c +0 -0
  8. data/examples/benchmarks/PolyBench/adi.c +0 -0
  9. data/examples/benchmarks/PolyBench/atax.c +0 -0
  10. data/examples/benchmarks/PolyBench/bicg.c +0 -0
  11. data/examples/benchmarks/PolyBench/cholesky.c +0 -0
  12. data/examples/benchmarks/PolyBench/common.h +0 -0
  13. data/examples/benchmarks/PolyBench/correlation.c +0 -0
  14. data/examples/benchmarks/PolyBench/covariance.c +0 -0
  15. data/examples/benchmarks/PolyBench/doitgen.c +0 -0
  16. data/examples/benchmarks/PolyBench/durbin.c +0 -0
  17. data/examples/benchmarks/PolyBench/dynprog.c +0 -0
  18. data/examples/benchmarks/PolyBench/fdtd-2d-apml.c +0 -0
  19. data/examples/benchmarks/PolyBench/fdtd-2d.c +0 -0
  20. data/examples/benchmarks/PolyBench/floyd-warshall.c +0 -0
  21. data/examples/benchmarks/PolyBench/gemm.c +0 -0
  22. data/examples/benchmarks/PolyBench/gemver.c +0 -0
  23. data/examples/benchmarks/PolyBench/gesummv.c +0 -0
  24. data/examples/benchmarks/PolyBench/gramschmidt.c +0 -0
  25. data/examples/benchmarks/PolyBench/jacobi-1d-imper.c +4 -2
  26. data/examples/benchmarks/PolyBench/jacobi-2d-imper.c +1 -1
  27. data/examples/benchmarks/PolyBench/lu.c +0 -0
  28. data/examples/benchmarks/PolyBench/ludcmp.c +0 -0
  29. data/examples/benchmarks/PolyBench/mvt.c +0 -0
  30. data/examples/benchmarks/PolyBench/reg_detect.c +0 -0
  31. data/examples/benchmarks/PolyBench/seidel-2d.c +0 -0
  32. data/examples/benchmarks/PolyBench/symm.c +0 -0
  33. data/examples/benchmarks/PolyBench/syr2k.c +0 -0
  34. data/examples/benchmarks/PolyBench/syrk.c +0 -0
  35. data/examples/benchmarks/PolyBench/trisolv.c +0 -0
  36. data/examples/benchmarks/PolyBench/trmm.c +0 -0
  37. data/examples/benchmarks/Rodinia/bfs.c +143 -0
  38. data/examples/benchmarks/Rodinia/common.h +78 -0
  39. data/examples/benchmarks/Rodinia/hotspot.c +106 -126
  40. data/examples/benchmarks/Rodinia/kmeans.c +157 -164
  41. data/examples/benchmarks/Rodinia/nw.c +151 -0
  42. data/examples/benchmarks/Rodinia/pathfinder.c +88 -0
  43. data/examples/benchmarks/Rodinia/srad.c +50 -59
  44. data/examples/benchmarks/other/common.h +0 -0
  45. data/examples/benchmarks/other/dct.c +0 -0
  46. data/examples/benchmarks/other/mm.c +0 -0
  47. data/examples/benchmarks/other/saxpy.c +0 -0
  48. data/examples/chunk/example01.c +6 -4
  49. data/examples/chunk/example02.c +6 -4
  50. data/examples/chunk/example03.c +6 -4
  51. data/examples/chunk/example04.c +8 -5
  52. data/examples/chunk/example05.c +6 -4
  53. data/examples/chunk/example06.c +3 -1
  54. data/examples/chunk/example07.c +5 -2
  55. data/examples/dependences/example01.c +3 -1
  56. data/examples/dependences/example02.c +3 -1
  57. data/examples/dependences/example03.c +3 -1
  58. data/examples/dependences/example04.c +3 -1
  59. data/examples/dependences/example05.c +3 -1
  60. data/examples/element/example01.c +6 -4
  61. data/examples/element/example02.c +6 -4
  62. data/examples/element/example03.c +10 -8
  63. data/examples/element/example04.c +6 -4
  64. data/examples/element/example05.c +8 -5
  65. data/examples/element/example06.c +6 -4
  66. data/examples/element/example07.c +6 -4
  67. data/examples/element/example08.c +6 -4
  68. data/examples/element/example09.c +6 -4
  69. data/examples/element/example10.c +4 -2
  70. data/examples/element/example11.c +4 -2
  71. data/examples/element/example12.c +4 -2
  72. data/examples/element/example13.c +3 -1
  73. data/examples/fusion/example01.c +3 -12
  74. data/examples/fusion/example02.c +3 -16
  75. data/examples/fusion/example03.c +3 -1
  76. data/examples/fusion/example04.c +5 -3
  77. data/examples/fusion/example05.c +3 -1
  78. data/examples/neighbourhood/example01.c +6 -4
  79. data/examples/neighbourhood/example02.c +6 -4
  80. data/examples/neighbourhood/example03.c +6 -4
  81. data/examples/neighbourhood/example04.c +5 -3
  82. data/examples/neighbourhood/example05.c +3 -1
  83. data/examples/shared/example01.c +6 -4
  84. data/examples/shared/example02.c +6 -4
  85. data/examples/shared/example03.c +6 -4
  86. data/examples/shared/example04.c +6 -4
  87. data/examples/shared/example05.c +6 -4
  88. data/lib/adarwin/engine.rb +16 -5
  89. data/lib/adarwin/memorycopies.rb +21 -9
  90. data/lib/adarwin/nest.rb +18 -1
  91. data/lib/adarwin/preprocessor.rb +5 -2
  92. data/lib/adarwin/reference.rb +71 -6
  93. data/lib/bones/algorithm.rb +20 -5
  94. data/lib/bones/copy.rb +3 -2
  95. data/lib/bones/engine.rb +12 -9
  96. data/lib/bones/preprocessor.rb +170 -120
  97. data/lib/bones/variablelist.rb +1 -1
  98. data/lib/cast.rb +11 -0
  99. data/lib/castaddon.rb +23 -6
  100. data/lib/castaddon/node_adarwin.rb +17 -0
  101. data/lib/castaddon/node_common.rb +6 -0
  102. data/lib/castaddon/transformations.rb +13 -9
  103. data/skeletons/CPU-C/common/epilogue.c +0 -0
  104. data/skeletons/CPU-C/common/globals.c +0 -0
  105. data/skeletons/CPU-C/common/globals_kernel.c +0 -0
  106. data/skeletons/CPU-C/common/header.c +0 -0
  107. data/skeletons/CPU-C/common/mem_copy_D2H.c +0 -0
  108. data/skeletons/CPU-C/common/mem_copy_H2D.c +0 -0
  109. data/skeletons/CPU-C/common/mem_epilogue.c +0 -0
  110. data/skeletons/CPU-C/common/mem_global.c +0 -0
  111. data/skeletons/CPU-C/common/mem_prologue.c +0 -0
  112. data/skeletons/CPU-C/common/prologue.c +0 -0
  113. data/skeletons/CPU-C/common/timer_1_start.c +0 -0
  114. data/skeletons/CPU-C/common/timer_1_stop.c +0 -0
  115. data/skeletons/CPU-C/common/timer_2_start.c +0 -0
  116. data/skeletons/CPU-C/common/timer_2_stop.c +0 -0
  117. data/skeletons/CPU-C/common/timer_globals.c +5 -0
  118. data/skeletons/CPU-C/kernel/default.host.c +0 -0
  119. data/skeletons/CPU-C/kernel/default.kernel.c +0 -0
  120. data/skeletons/CPU-C/skeletons.txt +0 -0
  121. data/skeletons/CPU-OPENCL-AMD/common/epilogue.c +0 -0
  122. data/skeletons/CPU-OPENCL-AMD/common/globals.c +0 -0
  123. data/skeletons/CPU-OPENCL-AMD/common/globals_kernel.c +0 -0
  124. data/skeletons/CPU-OPENCL-AMD/common/header.c +0 -0
  125. data/skeletons/CPU-OPENCL-AMD/common/mem_copy_D2H.c +0 -0
  126. data/skeletons/CPU-OPENCL-AMD/common/mem_copy_H2D.c +0 -0
  127. data/skeletons/CPU-OPENCL-AMD/common/mem_epilogue.c +0 -0
  128. data/skeletons/CPU-OPENCL-AMD/common/mem_prologue.c +0 -0
  129. data/skeletons/CPU-OPENCL-AMD/common/prologue.c +0 -0
  130. data/skeletons/CPU-OPENCL-AMD/common/timer_1_start.c +0 -0
  131. data/skeletons/CPU-OPENCL-AMD/common/timer_1_stop.c +0 -0
  132. data/skeletons/CPU-OPENCL-AMD/common/timer_2_start.c +0 -0
  133. data/skeletons/CPU-OPENCL-AMD/common/timer_2_stop.c +0 -0
  134. data/skeletons/CPU-OPENCL-AMD/kernel/D-element-to-1-shared.host.c +0 -0
  135. data/skeletons/CPU-OPENCL-AMD/kernel/D-element-to-1-shared.kernel.cl +0 -0
  136. data/skeletons/CPU-OPENCL-AMD/kernel/default.host.c +0 -0
  137. data/skeletons/CPU-OPENCL-AMD/kernel/default.kernel.cl +0 -0
  138. data/skeletons/CPU-OPENCL-AMD/skeletons.txt +0 -0
  139. data/skeletons/CPU-OPENCL-INTEL/common/epilogue.c +0 -0
  140. data/skeletons/CPU-OPENCL-INTEL/common/globals.c +0 -0
  141. data/skeletons/CPU-OPENCL-INTEL/common/globals_kernel.c +0 -0
  142. data/skeletons/CPU-OPENCL-INTEL/common/header.c +0 -0
  143. data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +0 -0
  144. data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +0 -0
  145. data/skeletons/CPU-OPENCL-INTEL/common/mem_epilogue.c +0 -0
  146. data/skeletons/CPU-OPENCL-INTEL/common/mem_global.c +0 -0
  147. data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +0 -0
  148. data/skeletons/CPU-OPENCL-INTEL/common/prologue.c +0 -0
  149. data/skeletons/CPU-OPENCL-INTEL/common/timer_1_start.c +0 -0
  150. data/skeletons/CPU-OPENCL-INTEL/common/timer_1_stop.c +0 -0
  151. data/skeletons/CPU-OPENCL-INTEL/common/timer_2_start.c +0 -0
  152. data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +0 -0
  153. data/skeletons/CPU-OPENCL-INTEL/common/timer_globals.c +5 -0
  154. data/skeletons/CPU-OPENCL-INTEL/kernel/D-element-to-1-shared.host.c +0 -0
  155. data/skeletons/CPU-OPENCL-INTEL/kernel/D-element-to-1-shared.kernel.cl +0 -0
  156. data/skeletons/CPU-OPENCL-INTEL/kernel/default.host.c +0 -0
  157. data/skeletons/CPU-OPENCL-INTEL/kernel/default.kernel.cl +0 -0
  158. data/skeletons/CPU-OPENCL-INTEL/skeletons.txt +0 -0
  159. data/skeletons/CPU-OPENMP/common/epilogue.c +0 -0
  160. data/skeletons/CPU-OPENMP/common/globals.c +0 -0
  161. data/skeletons/CPU-OPENMP/common/globals_kernel.c +0 -0
  162. data/skeletons/CPU-OPENMP/common/header.c +0 -0
  163. data/skeletons/CPU-OPENMP/common/mem_copy_D2H.c +0 -0
  164. data/skeletons/CPU-OPENMP/common/mem_copy_H2D.c +0 -0
  165. data/skeletons/CPU-OPENMP/common/mem_epilogue.c +0 -0
  166. data/skeletons/CPU-OPENMP/common/mem_global.c +0 -0
  167. data/skeletons/CPU-OPENMP/common/mem_prologue.c +0 -0
  168. data/skeletons/CPU-OPENMP/common/prologue.c +0 -0
  169. data/skeletons/CPU-OPENMP/common/timer_1_start.c +0 -0
  170. data/skeletons/CPU-OPENMP/common/timer_1_stop.c +0 -0
  171. data/skeletons/CPU-OPENMP/common/timer_2_start.c +0 -0
  172. data/skeletons/CPU-OPENMP/common/timer_2_stop.c +0 -0
  173. data/skeletons/CPU-OPENMP/common/timer_globals.c +2 -0
  174. data/skeletons/CPU-OPENMP/kernel/D-element-to-1-shared.host.c +0 -0
  175. data/skeletons/CPU-OPENMP/kernel/D-element-to-1-shared.kernel.c +3 -3
  176. data/skeletons/CPU-OPENMP/kernel/default.host.c +0 -0
  177. data/skeletons/CPU-OPENMP/kernel/default.kernel.c +0 -0
  178. data/skeletons/CPU-OPENMP/skeletons.txt +0 -0
  179. data/skeletons/GPU-CUDA/common/epilogue.c +0 -0
  180. data/skeletons/GPU-CUDA/common/globals.c +0 -0
  181. data/skeletons/GPU-CUDA/common/globals_kernel.c +0 -0
  182. data/skeletons/GPU-CUDA/common/header.c +0 -0
  183. data/skeletons/GPU-CUDA/common/mem_async_alloc.c +0 -0
  184. data/skeletons/GPU-CUDA/common/mem_async_copyin.c +0 -0
  185. data/skeletons/GPU-CUDA/common/mem_async_copyout.c +0 -0
  186. data/skeletons/GPU-CUDA/common/mem_async_free.c +0 -0
  187. data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +0 -0
  188. data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +0 -0
  189. data/skeletons/GPU-CUDA/common/mem_epilogue.c +0 -0
  190. data/skeletons/GPU-CUDA/common/mem_global.c +0 -0
  191. data/skeletons/GPU-CUDA/common/mem_prologue.c +0 -0
  192. data/skeletons/GPU-CUDA/common/prologue.c +0 -0
  193. data/skeletons/GPU-CUDA/common/scheduler.c +2 -2
  194. data/skeletons/GPU-CUDA/common/timer_1_start.c +0 -0
  195. data/skeletons/GPU-CUDA/common/timer_1_stop.c +0 -0
  196. data/skeletons/GPU-CUDA/common/timer_2_start.c +0 -0
  197. data/skeletons/GPU-CUDA/common/timer_2_stop.c +0 -0
  198. data/skeletons/GPU-CUDA/common/timer_globals.c +0 -0
  199. data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.host.c +0 -0
  200. data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +0 -0
  201. data/skeletons/GPU-CUDA/kernel/D-element-to-1-shared.host.c +0 -0
  202. data/skeletons/GPU-CUDA/kernel/D-element-to-1-shared.kernel.cu +0 -0
  203. data/skeletons/GPU-CUDA/kernel/D-element-to-N-shared.host.c +0 -0
  204. data/skeletons/GPU-CUDA/kernel/D-element-to-N-shared.kernel.cu +0 -0
  205. data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.host.c +0 -0
  206. data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +0 -0
  207. data/skeletons/GPU-CUDA/kernel/N-neighbourhood-N-to-N-element.host.c +0 -0
  208. data/skeletons/GPU-CUDA/kernel/N-neighbourhood-N-to-N-element.kernel.cu +0 -0
  209. data/skeletons/GPU-CUDA/kernel/default.host.c +0 -0
  210. data/skeletons/GPU-CUDA/kernel/default.kernel.cu +0 -0
  211. data/skeletons/GPU-CUDA/skeletons.txt +4 -2
  212. data/skeletons/GPU-OPENCL-AMD/common/epilogue.c +0 -0
  213. data/skeletons/GPU-OPENCL-AMD/common/globals.c +0 -0
  214. data/skeletons/GPU-OPENCL-AMD/common/globals_kernel.c +0 -0
  215. data/skeletons/GPU-OPENCL-AMD/common/header.c +0 -0
  216. data/skeletons/GPU-OPENCL-AMD/common/mem_copy_D2H.c +0 -0
  217. data/skeletons/GPU-OPENCL-AMD/common/mem_copy_H2D.c +0 -0
  218. data/skeletons/GPU-OPENCL-AMD/common/mem_epilogue.c +0 -0
  219. data/skeletons/GPU-OPENCL-AMD/common/mem_prologue.c +0 -0
  220. data/skeletons/GPU-OPENCL-AMD/common/prologue.c +0 -0
  221. data/skeletons/GPU-OPENCL-AMD/common/timer_1_start.c +0 -0
  222. data/skeletons/GPU-OPENCL-AMD/common/timer_1_stop.c +0 -0
  223. data/skeletons/GPU-OPENCL-AMD/common/timer_2_start.c +0 -0
  224. data/skeletons/GPU-OPENCL-AMD/common/timer_2_stop.c +0 -0
  225. data/skeletons/GPU-OPENCL-AMD/kernel/D-element-to-1-shared.host.c +0 -0
  226. data/skeletons/GPU-OPENCL-AMD/kernel/D-element-to-1-shared.kernel.cl +0 -0
  227. data/skeletons/GPU-OPENCL-AMD/kernel/default.host.c +0 -0
  228. data/skeletons/GPU-OPENCL-AMD/kernel/default.kernel.cl +0 -0
  229. data/skeletons/GPU-OPENCL-AMD/skeletons.txt +0 -0
  230. data/skeletons/verification/header.c +0 -0
  231. data/skeletons/verification/timer_start.c +0 -0
  232. data/skeletons/verification/timer_stop.c +0 -0
  233. data/skeletons/verification/verify_results.c +0 -0
  234. data/test/bones/test_algorithm.rb +0 -0
  235. data/test/bones/test_common.rb +0 -0
  236. data/test/bones/test_preprocessor.rb +0 -0
  237. data/test/bones/test_species.rb +0 -0
  238. data/test/bones/test_variable.rb +0 -0
  239. data/test/examples/benchmarks/PolyBench/2mm_species.c +1 -1
  240. data/test/examples/benchmarks/PolyBench/3mm_species.c +0 -0
  241. data/test/examples/benchmarks/PolyBench/adi_species.c +0 -0
  242. data/test/examples/benchmarks/PolyBench/atax_species.c +0 -0
  243. data/test/examples/benchmarks/PolyBench/bicg_species.c +0 -0
  244. data/test/examples/benchmarks/PolyBench/cholesky_species.c +0 -0
  245. data/test/examples/benchmarks/PolyBench/correlation_species.c +0 -0
  246. data/test/examples/benchmarks/PolyBench/covariance_species.c +0 -0
  247. data/test/examples/benchmarks/PolyBench/doitgen_species.c +0 -0
  248. data/test/examples/benchmarks/PolyBench/durbin_species.c +0 -0
  249. data/test/examples/benchmarks/PolyBench/dynprog_species.c +0 -0
  250. data/test/examples/benchmarks/PolyBench/fdtd-2d-apml_species.c +0 -0
  251. data/test/examples/benchmarks/PolyBench/fdtd-2d_species.c +0 -0
  252. data/test/examples/benchmarks/PolyBench/floyd-warshall_species.c +0 -0
  253. data/test/examples/benchmarks/PolyBench/gemm_species.c +0 -0
  254. data/test/examples/benchmarks/PolyBench/gemver_species.c +0 -0
  255. data/test/examples/benchmarks/PolyBench/gesummv_species.c +0 -0
  256. data/test/examples/benchmarks/PolyBench/gramschmidt_species.c +0 -0
  257. data/test/examples/benchmarks/PolyBench/jacobi-1d-imper_species.c +0 -0
  258. data/test/examples/benchmarks/PolyBench/jacobi-2d-imper_species.c +0 -0
  259. data/test/examples/benchmarks/PolyBench/lu_species.c +0 -0
  260. data/test/examples/benchmarks/PolyBench/ludcmp_species.c +0 -0
  261. data/test/examples/benchmarks/PolyBench/mvt_species.c +0 -0
  262. data/test/examples/benchmarks/PolyBench/reg_detect_species.c +0 -0
  263. data/test/examples/benchmarks/PolyBench/seidel-2d_species.c +0 -0
  264. data/test/examples/benchmarks/PolyBench/symm_species.c +0 -0
  265. data/test/examples/benchmarks/PolyBench/syr2k_species.c +0 -0
  266. data/test/examples/benchmarks/PolyBench/syrk_species.c +0 -0
  267. data/test/examples/benchmarks/PolyBench/trisolv_species.c +0 -0
  268. data/test/examples/benchmarks/PolyBench/trmm_species.c +0 -0
  269. data/test/examples/chunk/example01_species.c +3 -3
  270. data/test/examples/chunk/example02_species.c +3 -3
  271. data/test/examples/chunk/example03_species.c +3 -3
  272. data/test/examples/chunk/example04_species.c +3 -3
  273. data/test/examples/chunk/example05_species.c +3 -3
  274. data/test/examples/chunk/example06_species.c +1 -1
  275. data/test/examples/chunk/example07_species.c +3 -2
  276. data/test/examples/dependences/example01_species.c +1 -1
  277. data/test/examples/dependences/example02_species.c +1 -1
  278. data/test/examples/dependences/example03_species.c +1 -1
  279. data/test/examples/dependences/example04_species.c +1 -1
  280. data/test/examples/dependences/example05_species.c +1 -1
  281. data/test/examples/element/example01_species.c +3 -3
  282. data/test/examples/element/example02_species.c +3 -3
  283. data/test/examples/element/example03_species.c +7 -7
  284. data/test/examples/element/example04_species.c +3 -3
  285. data/test/examples/element/example05_species.c +3 -3
  286. data/test/examples/element/example06_species.c +3 -3
  287. data/test/examples/element/example07_species.c +3 -3
  288. data/test/examples/element/example08_species.c +3 -3
  289. data/test/examples/element/example09_species.c +3 -3
  290. data/test/examples/element/example10_species.c +1 -1
  291. data/test/examples/element/example11_species.c +1 -1
  292. data/test/examples/element/example12_species.c +1 -1
  293. data/test/examples/element/example13_species.c +1 -1
  294. data/test/examples/neighbourhood/example01_species.c +3 -3
  295. data/test/examples/neighbourhood/example02_species.c +3 -3
  296. data/test/examples/neighbourhood/example03_species.c +3 -3
  297. data/test/examples/neighbourhood/example04_species.c +3 -3
  298. data/test/examples/neighbourhood/example05_species.c +1 -1
  299. data/test/examples/shared/example01_species.c +3 -3
  300. data/test/examples/shared/example02_species.c +3 -3
  301. data/test/examples/shared/example03_species.c +3 -3
  302. data/test/examples/shared/example04_species.c +3 -3
  303. data/test/examples/shared/example05_species.c +3 -3
  304. data/test/test_helper.rb +2 -2
  305. metadata +266 -252
  306. checksums.yaml +0 -15
  307. data/examples/benchmarks/Rodinia/cfd.c +0 -180
@@ -8,16 +8,16 @@
8
8
  // Web address........http://parse.ele.tue.nl/bones/
9
9
  //
10
10
  // == File information
11
- // Filename...........neighbourhood/example2.c
11
+ // Filename...........neighbourhood/example02.c
12
12
  // Author.............Cedric Nugteren
13
- // Last modified on...16-April-2012
13
+ // Last modified on...10-October-2014
14
14
  //
15
15
 
16
16
  #include <stdio.h>
17
17
  #define A 256
18
18
  #define B 512
19
19
 
20
- // This is 'example2', demonstrating a 2D array, a 2D neighbourhood and a for-loop-less notation of the neighbourhood accesses
20
+ // This is 'example02', demonstrating a 2D array, a 2D neighbourhood and a for-loop-less notation of the neighbourhood accesses
21
21
  int main(void) {
22
22
  int i,j;
23
23
 
@@ -33,7 +33,8 @@ int main(void) {
33
33
  }
34
34
 
35
35
  // Perform the computation
36
- #pragma species kernel 0:255,0:511|neighbourhood(-1:1,-1:1) -> 0:255,0:511|element
36
+ #pragma scop
37
+ #pragma species kernel in[0:255,0:511]|neighbourhood(-1:1,-1:1) -> out[0:255,0:511]|element
37
38
  for(i=0;i<A;i++) {
38
39
  for(j=0;j<B;j++) {
39
40
  if (i >= 1 && j >= 1 && i < (A-1) && j < (B-1)) {
@@ -47,6 +48,7 @@ int main(void) {
47
48
  }
48
49
  }
49
50
  #pragma species endkernel example2
51
+ #pragma endscop
50
52
 
51
53
  // Clean-up and exit the function
52
54
  fflush(stdout);
@@ -8,9 +8,9 @@
8
8
  // Web address........http://parse.ele.tue.nl/bones/
9
9
  //
10
10
  // == File information
11
- // Filename...........neighbourhood/example3.c
11
+ // Filename...........neighbourhood/example03.c
12
12
  // Author.............Cedric Nugteren
13
- // Last modified on...16-April-2012
13
+ // Last modified on...10-October-2014
14
14
  //
15
15
 
16
16
  #include <stdio.h>
@@ -23,7 +23,7 @@
23
23
  float ** alloc_2D(int size1, int size2);
24
24
  void free_2D(float ** array_2D);
25
25
 
26
- // This is 'example3', demonstrating a neighbourhood with only some values used (a cross) and a math.h square root function call
26
+ // This is 'example03', demonstrating a neighbourhood with only some values used (a cross) and a math.h square root function call
27
27
  int main(void) {
28
28
  int i,j;
29
29
  int sizea = A;
@@ -41,7 +41,8 @@ int main(void) {
41
41
  }
42
42
 
43
43
  // Perform the computation
44
- #pragma species kernel 0:sizea-1,0:sizeb-1|neighbourhood(-1:1,-1:1) -> 0:sizea-1,0:sizeb-1|element
44
+ #pragma scop
45
+ #pragma species kernel in[0:sizea-1,0:sizeb-1]|neighbourhood(-1:1,-1:1) -> out[0:sizea-1,0:sizeb-1]|element
45
46
  for(i=0;i<sizea;i++) {
46
47
  for(j=0;j<sizeb;j++) {
47
48
  if (i >= 1 && j >= 1 && i < (sizea-1) && j < (sizeb-1)) {
@@ -55,6 +56,7 @@ int main(void) {
55
56
  }
56
57
  }
57
58
  #pragma species endkernel example3
59
+ #pragma endscop
58
60
 
59
61
  // Clean-up and exit the function
60
62
  free_2D(in);
@@ -8,14 +8,14 @@
8
8
  // Web address........http://parse.ele.tue.nl/bones/
9
9
  //
10
10
  // == File information
11
- // Filename...........neighbourhood/example4.c
11
+ // Filename...........neighbourhood/example04.c
12
12
  // Author.............Cedric Nugteren
13
- // Last modified on...16-April-2012
13
+ // Last modified on...10-October-2014
14
14
  //
15
15
 
16
16
  #include <stdio.h>
17
17
 
18
- // This is 'example4', demonstrating naming (optional) in the classification to distingish the two input arrays
18
+ // This is 'example04', demonstrating naming (optional) in the classification to distingish the two input arrays
19
19
  int main(void) {
20
20
  int i;
21
21
  float factor;
@@ -33,6 +33,7 @@ int main(void) {
33
33
  }
34
34
 
35
35
  // Perform the computation
36
+ #pragma scop
36
37
  #pragma species kernel B[0:size-1]|neighbourhood(-1:1) ^ A[0:size-1]|element -> C[0:size-1]|element
37
38
  for(i=0;i<size;i++) {
38
39
  factor = A[i]/100.0;
@@ -44,6 +45,7 @@ int main(void) {
44
45
  }
45
46
  }
46
47
  #pragma species endkernel example4
48
+ #pragma endscop
47
49
 
48
50
  // Clean-up and exit the function
49
51
  fflush(stdout);
@@ -10,7 +10,7 @@
10
10
  // == File information
11
11
  // Filename...........neighbourhood/example05.c
12
12
  // Author.............Cedric Nugteren
13
- // Last modified on...07-May-2013
13
+ // Last modified on...10-October-2014
14
14
  //
15
15
 
16
16
  #include <stdio.h>
@@ -31,11 +31,13 @@ int main(void) {
31
31
  }
32
32
 
33
33
  // Perform the computation
34
+ #pragma scop
34
35
  #pragma species kernel A[2:N]|neighbourhood(0:1) -> B[2:N-1]|element
35
36
  for (i=2; i<N; i++) {
36
37
  B[i] = A[i] + A[i+1];
37
38
  }
38
39
  #pragma species endkernel example05
40
+ #pragma endscop
39
41
 
40
42
  // Clean-up and exit the function
41
43
  fflush(stdout);
@@ -8,16 +8,16 @@
8
8
  // Web address........http://parse.ele.tue.nl/bones/
9
9
  //
10
10
  // == File information
11
- // Filename...........shared/example1.c
11
+ // Filename...........shared/example01.c
12
12
  // Author.............Cedric Nugteren
13
- // Last modified on...16-April-2012
13
+ // Last modified on...10-October-2014
14
14
  //
15
15
 
16
16
  #include <stdio.h>
17
17
  #include <stdlib.h>
18
18
  #define SIZE 512*1024
19
19
 
20
- // This is 'example1', a basic associative and commutative reduction to scalar
20
+ // This is 'example01', a basic associative and commutative reduction to scalar
21
21
  int main(void) {
22
22
  int i;
23
23
 
@@ -32,11 +32,13 @@ int main(void) {
32
32
 
33
33
  // Perform the computation
34
34
  B[0] = 0;
35
- #pragma species kernel 0:SIZE-1|element -> 0:0|shared
35
+ #pragma scop
36
+ #pragma species kernel A[0:SIZE-1]|element -> B[0:0]|shared
36
37
  for(i=0;i<SIZE;i++) {
37
38
  B[0] = B[0] + A[i];
38
39
  }
39
40
  #pragma species endkernel example1
41
+ #pragma endscop
40
42
 
41
43
  // Clean-up and exit the function
42
44
  fflush(stdout);
@@ -8,14 +8,14 @@
8
8
  // Web address........http://parse.ele.tue.nl/bones/
9
9
  //
10
10
  // == File information
11
- // Filename...........shared/example2.c
11
+ // Filename...........shared/example02.c
12
12
  // Author.............Cedric Nugteren
13
- // Last modified on...09-May-2012
13
+ // Last modified on...10-October-2014
14
14
  //
15
15
 
16
16
  #include <stdio.h>
17
17
 
18
- // This is 'example2', demonstrating a 3D input reduction to scalar
18
+ // This is 'example02', demonstrating a 3D input reduction to scalar
19
19
  int main(void) {
20
20
  int a,b,c;
21
21
 
@@ -34,7 +34,8 @@ int main(void) {
34
34
 
35
35
  // Perform the computation
36
36
  out[0] = 0;
37
- #pragma species kernel 0:7,0:15,0:31|element -> 0:0|shared
37
+ #pragma scop
38
+ #pragma species kernel in[0:7,0:15,0:31]|element -> out[0:0]|shared
38
39
  for(a=0;a<8;a++) {
39
40
  for(b=0;b<16;b++) {
40
41
  for(c=0;c<32;c++) {
@@ -43,6 +44,7 @@ int main(void) {
43
44
  }
44
45
  }
45
46
  #pragma species endkernel example2
47
+ #pragma endscop
46
48
 
47
49
  // Clean-up and exit the function
48
50
  fflush(stdout);
@@ -8,16 +8,16 @@
8
8
  // Web address........http://parse.ele.tue.nl/bones/
9
9
  //
10
10
  // == File information
11
- // Filename...........shared/example3.c
11
+ // Filename...........shared/example03.c
12
12
  // Author.............Cedric Nugteren
13
- // Last modified on...16-April-2012
13
+ // Last modified on...10-October-2014
14
14
  //
15
15
 
16
16
  #include <stdio.h>
17
17
  #include <stdlib.h>
18
18
  #define SIZE 1024
19
19
 
20
- // This is 'example3', demonstrating a reduction to a 2D array
20
+ // This is 'example03', demonstrating a reduction to a 2D array
21
21
  int main(void) {
22
22
  int i,p,q;
23
23
  int index1,index2;
@@ -39,13 +39,15 @@ int main(void) {
39
39
  }
40
40
 
41
41
  // Perform the computation
42
- #pragma species kernel 0:SIZE-1|element -> 0:19,0:9|shared
42
+ #pragma scop
43
+ #pragma species kernel in[0:SIZE-1]|element -> B[0:19,0:9]|shared
43
44
  for(i=0;i<SIZE;i++) {
44
45
  index1 = in[i]%20;
45
46
  index2 = in[i]%10;
46
47
  B[index1][index2] = B[index1][index2] + 1;
47
48
  }
48
49
  #pragma species endkernel example3
50
+ #pragma endscop
49
51
 
50
52
  // Clean-up and exit the function
51
53
  free(in);
@@ -8,16 +8,16 @@
8
8
  // Web address........http://parse.ele.tue.nl/bones/
9
9
  //
10
10
  // == File information
11
- // Filename...........shared/example4.c
11
+ // Filename...........shared/example04.c
12
12
  // Author.............Cedric Nugteren
13
- // Last modified on...16-April-2012
13
+ // Last modified on...10-October-2014
14
14
  //
15
15
 
16
16
  #include <stdio.h>
17
17
  #include <stdlib.h>
18
18
  #define SIZE 1024*1024
19
19
 
20
- // This is 'example4', demonstrating a basic 256-bin histogram computation
20
+ // This is 'example04', demonstrating a basic 256-bin histogram computation
21
21
  int main(void) {
22
22
  int i;
23
23
  unsigned char index;
@@ -37,12 +37,14 @@ int main(void) {
37
37
  }
38
38
 
39
39
  // Perform the computation
40
- #pragma species kernel 0:SIZE-1|element -> 0:255|shared
40
+ #pragma scop
41
+ #pragma species kernel A[0:SIZE-1]|element -> B[0:255]|shared
41
42
  for(i=0;i<SIZE;i++) {
42
43
  index = A[i];
43
44
  B[index]++;
44
45
  }
45
46
  #pragma species endkernel example4
47
+ #pragma endscop
46
48
 
47
49
  // Clean-up and exit the function
48
50
  free(A);
@@ -8,14 +8,14 @@
8
8
  // Web address........http://parse.ele.tue.nl/bones/
9
9
  //
10
10
  // == File information
11
- // Filename...........shared/example5.c
11
+ // Filename...........shared/example05.c
12
12
  // Author.............Cedric Nugteren
13
- // Last modified on...07-May-2012
13
+ // Last modified on...10-October-2014
14
14
  //
15
15
 
16
16
  #include <stdio.h>
17
17
 
18
- // This is 'example5', demonstrating an inner-loop only classification of a reduction to scalar
18
+ // This is 'example05', demonstrating an inner-loop only classification of a reduction to scalar
19
19
  int main(void) {
20
20
  int a,b,c;
21
21
 
@@ -32,14 +32,16 @@ int main(void) {
32
32
  }
33
33
 
34
34
  // Perform the computation
35
+ #pragma scop
35
36
  for(a=0;a<16;a++) {
36
- #pragma species kernel a:a,0:a|element -> 0:0|shared
37
+ #pragma species kernel in[a:a,0:a]|element -> out[0:0]|shared
37
38
  for(b=0;b<=a;b++) {
38
39
  out[0] = out[0] - in[a][b]*in[a][b];
39
40
  }
40
41
  #pragma species endkernel example5
41
42
  out[0] = 1.002;
42
43
  }
44
+ #pragma endscop
43
45
 
44
46
  // Clean-up and exit the function
45
47
  fflush(stdout);
@@ -83,16 +83,27 @@ module Adarwin
83
83
  # Parse the original source code into AST form (using CAST)
84
84
  original_ast = parser.parse(preprocessor.parsed_code)
85
85
 
86
+ # Process every SCoP, one by one
87
+ @id = 0
88
+ @result[:species_code] = preprocessor.target_code
89
+ preprocessor.scop_code.each do |scop_code|
90
+ process_scop(scop_code)
91
+ end
92
+ end
93
+
94
+ def process_scop(scop_code)
86
95
  # Create an AST of the SCoP (using CAST) and save a backup
87
- scop_ast = C::Block.parse('{'+preprocessor.scop_code+'}')
96
+ scop_ast = C::Block.parse('{'+scop_code+'}')
88
97
  original_scop_ast = scop_ast.clone
89
98
 
90
99
  # Process the scop to identify the loop nests of interest and to find the
91
100
  # corresponding species. This is the method performing most of the work.
92
101
  @nests = []
93
- @id = 0
94
102
  populate_nests(scop_ast)
95
103
 
104
+ # return if no loop nests are found in the code
105
+ return unless @nests.length > 0
106
+
96
107
  # Remove inner-loop (nested) species. This removes all species that are
97
108
  # found within another species. For completeness, this might be desired in
98
109
  # some cases.
@@ -141,7 +152,7 @@ module Adarwin
141
152
  puts modified_scop if !@options[:silent]
142
153
 
143
154
  # Store the result
144
- @result[:species_code] = preprocessor.target_code.gsub(preprocessor.scop_code,modified_scop)
155
+ @result[:species_code].gsub!(scop_code,modified_scop)
145
156
  end
146
157
 
147
158
  # This method writes the output code to a file.
@@ -149,7 +160,7 @@ module Adarwin
149
160
 
150
161
  # Populate the species file
151
162
  # TODO: The filename is fixed, make this an optional argument
152
- File.open(File.join(@options[:application].split('.').first+'_species'+'.c'),'w') do |target|
163
+ File.open(File.join(@options[:application].rpartition('.').first+'_species'+'.c'),'w') do |target|
153
164
  target.puts @result[:species_code]
154
165
  end
155
166
  end
@@ -172,9 +183,9 @@ module Adarwin
172
183
  # Only continue if the nest is an actual loop nest
173
184
  if nest.for_statement?
174
185
  @nests.push(Nest.new(new_level,nest,@id,@basename,!@options[:silent]))
186
+ @id += 1
175
187
  end
176
188
  end
177
- @id += 1
178
189
  end
179
190
 
180
191
  # Proceed to the next depth level.
@@ -2,14 +2,16 @@
2
2
 
3
3
  # Recursive copy optimisations
4
4
  def recursive_copy_optimisations(nests,options)
5
- perform_copy_optimisations1(nests,options)
6
- perform_copy_optimisations2(nests,options)
7
- nests.each do |nest|
8
- children = get_children(nest)
9
- recursive_copy_optimisations(children,options) if !children.empty?
5
+ 2.times do
6
+ perform_copy_optimisations1(nests,options)
7
+ perform_copy_optimisations2(nests,options)
8
+ nests.each do |nest|
9
+ children = get_children(nest)
10
+ recursive_copy_optimisations(children,options) if !children.empty?
11
+ end
12
+ perform_copy_optimisations3(nests,options)
13
+ perform_copy_optimisations3(nests,options)
10
14
  end
11
- perform_copy_optimisations3(nests,options)
12
- perform_copy_optimisations3(nests,options)
13
15
  end
14
16
 
15
17
  # First set of copyin/copyout optimisations (recursive)
@@ -134,8 +136,18 @@ def perform_copy_optimisations3(nests,options)
134
136
  # Move copyins to outer loops
135
137
  children.first.copyins.each do |copyin|
136
138
  to_outer_loop = true
137
- nest.outer_loops.map{ |l| l[:var] }.each do |var|
138
- to_outer_loop = false if copyin.depends_on?(var)
139
+ nest.outer_loops.map{ |l| l[:var] }.each_with_index do |var,lindex|
140
+ if copyin.depends_on?(var)
141
+ to_outer_loop = false
142
+ if copyin.tD[0].a == var && copyin.tD[0].b == var
143
+ loopinfo = nest.outer_loops[lindex]
144
+ if loopinfo[:step] == "1"
145
+ copyin.tD[0].a = loopinfo[:min]
146
+ copyin.tD[0].b = loopinfo[:max]
147
+ to_outer_loop = true
148
+ end
149
+ end
150
+ end
139
151
  end
140
152
  children.drop(1).each do |child|
141
153
  to_outer_loop = false if child.copyins.map{ |c| c.tN }.include?(copyin.tN)
@@ -47,12 +47,15 @@ module Adarwin
47
47
  @all_loops = @code.get_all_loops()
48
48
  @outer_loops = @code.get_direct_loops()
49
49
  @inner_loops = @all_loops - @outer_loops
50
+
51
+ # Get all local variable declarations
52
+ @var_declarations = @code.get_var_declarations()
50
53
 
51
54
  # Process the read/write nodes in the loop body to obtain the array
52
55
  # reference characterisations. The references also need to be aware of all
53
56
  # loop data and of any if-statements in the loop body.
54
57
  @references = @code.clone.get_accesses().map do |reference|
55
- Reference.new(reference,@id,@inner_loops,@outer_loops,@verbose)
58
+ Reference.new(reference,@id,@inner_loops,@outer_loops,@var_declarations,@verbose)
56
59
  end
57
60
 
58
61
  # Perform the dependence test. The result can be either true or false.
@@ -121,6 +124,19 @@ module Adarwin
121
124
  # Else, set the species for the individual accesses.
122
125
  read_names = (@reads.empty?) ? ['0:0|void'] : @reads.map{ |r| r.to_species }
123
126
  write_names = (@writes.empty?) ? ['0:0|void'] : @writes.map{ |r| r.to_species }
127
+
128
+ # Remove a 'full' access pattern in case there is a same 'shared' write pattern
129
+ write_names.each do |write_name|
130
+ write_parts = write_name.split(PIPE)
131
+ if write_parts.last == 'shared'
132
+ read_names.each do |read_name|
133
+ read_parts = read_name.split(PIPE)
134
+ if read_parts.last == 'full' && read_parts.first == write_parts.first
135
+ read_names.delete(read_name)
136
+ end
137
+ end
138
+ end
139
+ end
124
140
 
125
141
  # Combine the descriptions (using Reference's +to_s+ method) into species
126
142
  species_in = read_names.uniq.join(' '+WEDGE+' ')
@@ -174,6 +190,7 @@ module Adarwin
174
190
  return false if @removed
175
191
  return false if @has_dependences
176
192
  return false if @species == ''
193
+ return false if (@writes) && (@writes.select{ |a| a.pattern == 'shared' }.length > 3)
177
194
  only_full = (@reads) ? @reads.select{ |a| a.pattern != 'full' }.empty? : false
178
195
  only_shared = (@writes) ? @writes.select{ |a| a.pattern != 'shared' }.empty? : false
179
196
  return !(only_full && only_shared)