bones-compiler 1.3.1 → 1.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (307) hide show
  1. data/CHANGELOG +62 -0
  2. data/README.rdoc +14 -3
  3. data/Rakefile +13 -12
  4. data/VERSION +1 -1
  5. data/examples/applications/ffos.c +24 -8
  6. data/examples/benchmarks/PolyBench/2mm.c +0 -0
  7. data/examples/benchmarks/PolyBench/3mm.c +0 -0
  8. data/examples/benchmarks/PolyBench/adi.c +0 -0
  9. data/examples/benchmarks/PolyBench/atax.c +0 -0
  10. data/examples/benchmarks/PolyBench/bicg.c +0 -0
  11. data/examples/benchmarks/PolyBench/cholesky.c +0 -0
  12. data/examples/benchmarks/PolyBench/common.h +0 -0
  13. data/examples/benchmarks/PolyBench/correlation.c +0 -0
  14. data/examples/benchmarks/PolyBench/covariance.c +0 -0
  15. data/examples/benchmarks/PolyBench/doitgen.c +0 -0
  16. data/examples/benchmarks/PolyBench/durbin.c +0 -0
  17. data/examples/benchmarks/PolyBench/dynprog.c +0 -0
  18. data/examples/benchmarks/PolyBench/fdtd-2d-apml.c +0 -0
  19. data/examples/benchmarks/PolyBench/fdtd-2d.c +0 -0
  20. data/examples/benchmarks/PolyBench/floyd-warshall.c +0 -0
  21. data/examples/benchmarks/PolyBench/gemm.c +0 -0
  22. data/examples/benchmarks/PolyBench/gemver.c +0 -0
  23. data/examples/benchmarks/PolyBench/gesummv.c +0 -0
  24. data/examples/benchmarks/PolyBench/gramschmidt.c +0 -0
  25. data/examples/benchmarks/PolyBench/jacobi-1d-imper.c +4 -2
  26. data/examples/benchmarks/PolyBench/jacobi-2d-imper.c +1 -1
  27. data/examples/benchmarks/PolyBench/lu.c +0 -0
  28. data/examples/benchmarks/PolyBench/ludcmp.c +0 -0
  29. data/examples/benchmarks/PolyBench/mvt.c +0 -0
  30. data/examples/benchmarks/PolyBench/reg_detect.c +0 -0
  31. data/examples/benchmarks/PolyBench/seidel-2d.c +0 -0
  32. data/examples/benchmarks/PolyBench/symm.c +0 -0
  33. data/examples/benchmarks/PolyBench/syr2k.c +0 -0
  34. data/examples/benchmarks/PolyBench/syrk.c +0 -0
  35. data/examples/benchmarks/PolyBench/trisolv.c +0 -0
  36. data/examples/benchmarks/PolyBench/trmm.c +0 -0
  37. data/examples/benchmarks/Rodinia/bfs.c +143 -0
  38. data/examples/benchmarks/Rodinia/common.h +78 -0
  39. data/examples/benchmarks/Rodinia/hotspot.c +106 -126
  40. data/examples/benchmarks/Rodinia/kmeans.c +157 -164
  41. data/examples/benchmarks/Rodinia/nw.c +151 -0
  42. data/examples/benchmarks/Rodinia/pathfinder.c +88 -0
  43. data/examples/benchmarks/Rodinia/srad.c +50 -59
  44. data/examples/benchmarks/other/common.h +0 -0
  45. data/examples/benchmarks/other/dct.c +0 -0
  46. data/examples/benchmarks/other/mm.c +0 -0
  47. data/examples/benchmarks/other/saxpy.c +0 -0
  48. data/examples/chunk/example01.c +6 -4
  49. data/examples/chunk/example02.c +6 -4
  50. data/examples/chunk/example03.c +6 -4
  51. data/examples/chunk/example04.c +8 -5
  52. data/examples/chunk/example05.c +6 -4
  53. data/examples/chunk/example06.c +3 -1
  54. data/examples/chunk/example07.c +5 -2
  55. data/examples/dependences/example01.c +3 -1
  56. data/examples/dependences/example02.c +3 -1
  57. data/examples/dependences/example03.c +3 -1
  58. data/examples/dependences/example04.c +3 -1
  59. data/examples/dependences/example05.c +3 -1
  60. data/examples/element/example01.c +6 -4
  61. data/examples/element/example02.c +6 -4
  62. data/examples/element/example03.c +10 -8
  63. data/examples/element/example04.c +6 -4
  64. data/examples/element/example05.c +8 -5
  65. data/examples/element/example06.c +6 -4
  66. data/examples/element/example07.c +6 -4
  67. data/examples/element/example08.c +6 -4
  68. data/examples/element/example09.c +6 -4
  69. data/examples/element/example10.c +4 -2
  70. data/examples/element/example11.c +4 -2
  71. data/examples/element/example12.c +4 -2
  72. data/examples/element/example13.c +3 -1
  73. data/examples/fusion/example01.c +3 -12
  74. data/examples/fusion/example02.c +3 -16
  75. data/examples/fusion/example03.c +3 -1
  76. data/examples/fusion/example04.c +5 -3
  77. data/examples/fusion/example05.c +3 -1
  78. data/examples/neighbourhood/example01.c +6 -4
  79. data/examples/neighbourhood/example02.c +6 -4
  80. data/examples/neighbourhood/example03.c +6 -4
  81. data/examples/neighbourhood/example04.c +5 -3
  82. data/examples/neighbourhood/example05.c +3 -1
  83. data/examples/shared/example01.c +6 -4
  84. data/examples/shared/example02.c +6 -4
  85. data/examples/shared/example03.c +6 -4
  86. data/examples/shared/example04.c +6 -4
  87. data/examples/shared/example05.c +6 -4
  88. data/lib/adarwin/engine.rb +16 -5
  89. data/lib/adarwin/memorycopies.rb +21 -9
  90. data/lib/adarwin/nest.rb +18 -1
  91. data/lib/adarwin/preprocessor.rb +5 -2
  92. data/lib/adarwin/reference.rb +71 -6
  93. data/lib/bones/algorithm.rb +20 -5
  94. data/lib/bones/copy.rb +3 -2
  95. data/lib/bones/engine.rb +12 -9
  96. data/lib/bones/preprocessor.rb +170 -120
  97. data/lib/bones/variablelist.rb +1 -1
  98. data/lib/cast.rb +11 -0
  99. data/lib/castaddon.rb +23 -6
  100. data/lib/castaddon/node_adarwin.rb +17 -0
  101. data/lib/castaddon/node_common.rb +6 -0
  102. data/lib/castaddon/transformations.rb +13 -9
  103. data/skeletons/CPU-C/common/epilogue.c +0 -0
  104. data/skeletons/CPU-C/common/globals.c +0 -0
  105. data/skeletons/CPU-C/common/globals_kernel.c +0 -0
  106. data/skeletons/CPU-C/common/header.c +0 -0
  107. data/skeletons/CPU-C/common/mem_copy_D2H.c +0 -0
  108. data/skeletons/CPU-C/common/mem_copy_H2D.c +0 -0
  109. data/skeletons/CPU-C/common/mem_epilogue.c +0 -0
  110. data/skeletons/CPU-C/common/mem_global.c +0 -0
  111. data/skeletons/CPU-C/common/mem_prologue.c +0 -0
  112. data/skeletons/CPU-C/common/prologue.c +0 -0
  113. data/skeletons/CPU-C/common/timer_1_start.c +0 -0
  114. data/skeletons/CPU-C/common/timer_1_stop.c +0 -0
  115. data/skeletons/CPU-C/common/timer_2_start.c +0 -0
  116. data/skeletons/CPU-C/common/timer_2_stop.c +0 -0
  117. data/skeletons/CPU-C/common/timer_globals.c +5 -0
  118. data/skeletons/CPU-C/kernel/default.host.c +0 -0
  119. data/skeletons/CPU-C/kernel/default.kernel.c +0 -0
  120. data/skeletons/CPU-C/skeletons.txt +0 -0
  121. data/skeletons/CPU-OPENCL-AMD/common/epilogue.c +0 -0
  122. data/skeletons/CPU-OPENCL-AMD/common/globals.c +0 -0
  123. data/skeletons/CPU-OPENCL-AMD/common/globals_kernel.c +0 -0
  124. data/skeletons/CPU-OPENCL-AMD/common/header.c +0 -0
  125. data/skeletons/CPU-OPENCL-AMD/common/mem_copy_D2H.c +0 -0
  126. data/skeletons/CPU-OPENCL-AMD/common/mem_copy_H2D.c +0 -0
  127. data/skeletons/CPU-OPENCL-AMD/common/mem_epilogue.c +0 -0
  128. data/skeletons/CPU-OPENCL-AMD/common/mem_prologue.c +0 -0
  129. data/skeletons/CPU-OPENCL-AMD/common/prologue.c +0 -0
  130. data/skeletons/CPU-OPENCL-AMD/common/timer_1_start.c +0 -0
  131. data/skeletons/CPU-OPENCL-AMD/common/timer_1_stop.c +0 -0
  132. data/skeletons/CPU-OPENCL-AMD/common/timer_2_start.c +0 -0
  133. data/skeletons/CPU-OPENCL-AMD/common/timer_2_stop.c +0 -0
  134. data/skeletons/CPU-OPENCL-AMD/kernel/D-element-to-1-shared.host.c +0 -0
  135. data/skeletons/CPU-OPENCL-AMD/kernel/D-element-to-1-shared.kernel.cl +0 -0
  136. data/skeletons/CPU-OPENCL-AMD/kernel/default.host.c +0 -0
  137. data/skeletons/CPU-OPENCL-AMD/kernel/default.kernel.cl +0 -0
  138. data/skeletons/CPU-OPENCL-AMD/skeletons.txt +0 -0
  139. data/skeletons/CPU-OPENCL-INTEL/common/epilogue.c +0 -0
  140. data/skeletons/CPU-OPENCL-INTEL/common/globals.c +0 -0
  141. data/skeletons/CPU-OPENCL-INTEL/common/globals_kernel.c +0 -0
  142. data/skeletons/CPU-OPENCL-INTEL/common/header.c +0 -0
  143. data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +0 -0
  144. data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +0 -0
  145. data/skeletons/CPU-OPENCL-INTEL/common/mem_epilogue.c +0 -0
  146. data/skeletons/CPU-OPENCL-INTEL/common/mem_global.c +0 -0
  147. data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +0 -0
  148. data/skeletons/CPU-OPENCL-INTEL/common/prologue.c +0 -0
  149. data/skeletons/CPU-OPENCL-INTEL/common/timer_1_start.c +0 -0
  150. data/skeletons/CPU-OPENCL-INTEL/common/timer_1_stop.c +0 -0
  151. data/skeletons/CPU-OPENCL-INTEL/common/timer_2_start.c +0 -0
  152. data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +0 -0
  153. data/skeletons/CPU-OPENCL-INTEL/common/timer_globals.c +5 -0
  154. data/skeletons/CPU-OPENCL-INTEL/kernel/D-element-to-1-shared.host.c +0 -0
  155. data/skeletons/CPU-OPENCL-INTEL/kernel/D-element-to-1-shared.kernel.cl +0 -0
  156. data/skeletons/CPU-OPENCL-INTEL/kernel/default.host.c +0 -0
  157. data/skeletons/CPU-OPENCL-INTEL/kernel/default.kernel.cl +0 -0
  158. data/skeletons/CPU-OPENCL-INTEL/skeletons.txt +0 -0
  159. data/skeletons/CPU-OPENMP/common/epilogue.c +0 -0
  160. data/skeletons/CPU-OPENMP/common/globals.c +0 -0
  161. data/skeletons/CPU-OPENMP/common/globals_kernel.c +0 -0
  162. data/skeletons/CPU-OPENMP/common/header.c +0 -0
  163. data/skeletons/CPU-OPENMP/common/mem_copy_D2H.c +0 -0
  164. data/skeletons/CPU-OPENMP/common/mem_copy_H2D.c +0 -0
  165. data/skeletons/CPU-OPENMP/common/mem_epilogue.c +0 -0
  166. data/skeletons/CPU-OPENMP/common/mem_global.c +0 -0
  167. data/skeletons/CPU-OPENMP/common/mem_prologue.c +0 -0
  168. data/skeletons/CPU-OPENMP/common/prologue.c +0 -0
  169. data/skeletons/CPU-OPENMP/common/timer_1_start.c +0 -0
  170. data/skeletons/CPU-OPENMP/common/timer_1_stop.c +0 -0
  171. data/skeletons/CPU-OPENMP/common/timer_2_start.c +0 -0
  172. data/skeletons/CPU-OPENMP/common/timer_2_stop.c +0 -0
  173. data/skeletons/CPU-OPENMP/common/timer_globals.c +2 -0
  174. data/skeletons/CPU-OPENMP/kernel/D-element-to-1-shared.host.c +0 -0
  175. data/skeletons/CPU-OPENMP/kernel/D-element-to-1-shared.kernel.c +3 -3
  176. data/skeletons/CPU-OPENMP/kernel/default.host.c +0 -0
  177. data/skeletons/CPU-OPENMP/kernel/default.kernel.c +0 -0
  178. data/skeletons/CPU-OPENMP/skeletons.txt +0 -0
  179. data/skeletons/GPU-CUDA/common/epilogue.c +0 -0
  180. data/skeletons/GPU-CUDA/common/globals.c +0 -0
  181. data/skeletons/GPU-CUDA/common/globals_kernel.c +0 -0
  182. data/skeletons/GPU-CUDA/common/header.c +0 -0
  183. data/skeletons/GPU-CUDA/common/mem_async_alloc.c +0 -0
  184. data/skeletons/GPU-CUDA/common/mem_async_copyin.c +0 -0
  185. data/skeletons/GPU-CUDA/common/mem_async_copyout.c +0 -0
  186. data/skeletons/GPU-CUDA/common/mem_async_free.c +0 -0
  187. data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +0 -0
  188. data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +0 -0
  189. data/skeletons/GPU-CUDA/common/mem_epilogue.c +0 -0
  190. data/skeletons/GPU-CUDA/common/mem_global.c +0 -0
  191. data/skeletons/GPU-CUDA/common/mem_prologue.c +0 -0
  192. data/skeletons/GPU-CUDA/common/prologue.c +0 -0
  193. data/skeletons/GPU-CUDA/common/scheduler.c +2 -2
  194. data/skeletons/GPU-CUDA/common/timer_1_start.c +0 -0
  195. data/skeletons/GPU-CUDA/common/timer_1_stop.c +0 -0
  196. data/skeletons/GPU-CUDA/common/timer_2_start.c +0 -0
  197. data/skeletons/GPU-CUDA/common/timer_2_stop.c +0 -0
  198. data/skeletons/GPU-CUDA/common/timer_globals.c +0 -0
  199. data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.host.c +0 -0
  200. data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +0 -0
  201. data/skeletons/GPU-CUDA/kernel/D-element-to-1-shared.host.c +0 -0
  202. data/skeletons/GPU-CUDA/kernel/D-element-to-1-shared.kernel.cu +0 -0
  203. data/skeletons/GPU-CUDA/kernel/D-element-to-N-shared.host.c +0 -0
  204. data/skeletons/GPU-CUDA/kernel/D-element-to-N-shared.kernel.cu +0 -0
  205. data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.host.c +0 -0
  206. data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +0 -0
  207. data/skeletons/GPU-CUDA/kernel/N-neighbourhood-N-to-N-element.host.c +0 -0
  208. data/skeletons/GPU-CUDA/kernel/N-neighbourhood-N-to-N-element.kernel.cu +0 -0
  209. data/skeletons/GPU-CUDA/kernel/default.host.c +0 -0
  210. data/skeletons/GPU-CUDA/kernel/default.kernel.cu +0 -0
  211. data/skeletons/GPU-CUDA/skeletons.txt +4 -2
  212. data/skeletons/GPU-OPENCL-AMD/common/epilogue.c +0 -0
  213. data/skeletons/GPU-OPENCL-AMD/common/globals.c +0 -0
  214. data/skeletons/GPU-OPENCL-AMD/common/globals_kernel.c +0 -0
  215. data/skeletons/GPU-OPENCL-AMD/common/header.c +0 -0
  216. data/skeletons/GPU-OPENCL-AMD/common/mem_copy_D2H.c +0 -0
  217. data/skeletons/GPU-OPENCL-AMD/common/mem_copy_H2D.c +0 -0
  218. data/skeletons/GPU-OPENCL-AMD/common/mem_epilogue.c +0 -0
  219. data/skeletons/GPU-OPENCL-AMD/common/mem_prologue.c +0 -0
  220. data/skeletons/GPU-OPENCL-AMD/common/prologue.c +0 -0
  221. data/skeletons/GPU-OPENCL-AMD/common/timer_1_start.c +0 -0
  222. data/skeletons/GPU-OPENCL-AMD/common/timer_1_stop.c +0 -0
  223. data/skeletons/GPU-OPENCL-AMD/common/timer_2_start.c +0 -0
  224. data/skeletons/GPU-OPENCL-AMD/common/timer_2_stop.c +0 -0
  225. data/skeletons/GPU-OPENCL-AMD/kernel/D-element-to-1-shared.host.c +0 -0
  226. data/skeletons/GPU-OPENCL-AMD/kernel/D-element-to-1-shared.kernel.cl +0 -0
  227. data/skeletons/GPU-OPENCL-AMD/kernel/default.host.c +0 -0
  228. data/skeletons/GPU-OPENCL-AMD/kernel/default.kernel.cl +0 -0
  229. data/skeletons/GPU-OPENCL-AMD/skeletons.txt +0 -0
  230. data/skeletons/verification/header.c +0 -0
  231. data/skeletons/verification/timer_start.c +0 -0
  232. data/skeletons/verification/timer_stop.c +0 -0
  233. data/skeletons/verification/verify_results.c +0 -0
  234. data/test/bones/test_algorithm.rb +0 -0
  235. data/test/bones/test_common.rb +0 -0
  236. data/test/bones/test_preprocessor.rb +0 -0
  237. data/test/bones/test_species.rb +0 -0
  238. data/test/bones/test_variable.rb +0 -0
  239. data/test/examples/benchmarks/PolyBench/2mm_species.c +1 -1
  240. data/test/examples/benchmarks/PolyBench/3mm_species.c +0 -0
  241. data/test/examples/benchmarks/PolyBench/adi_species.c +0 -0
  242. data/test/examples/benchmarks/PolyBench/atax_species.c +0 -0
  243. data/test/examples/benchmarks/PolyBench/bicg_species.c +0 -0
  244. data/test/examples/benchmarks/PolyBench/cholesky_species.c +0 -0
  245. data/test/examples/benchmarks/PolyBench/correlation_species.c +0 -0
  246. data/test/examples/benchmarks/PolyBench/covariance_species.c +0 -0
  247. data/test/examples/benchmarks/PolyBench/doitgen_species.c +0 -0
  248. data/test/examples/benchmarks/PolyBench/durbin_species.c +0 -0
  249. data/test/examples/benchmarks/PolyBench/dynprog_species.c +0 -0
  250. data/test/examples/benchmarks/PolyBench/fdtd-2d-apml_species.c +0 -0
  251. data/test/examples/benchmarks/PolyBench/fdtd-2d_species.c +0 -0
  252. data/test/examples/benchmarks/PolyBench/floyd-warshall_species.c +0 -0
  253. data/test/examples/benchmarks/PolyBench/gemm_species.c +0 -0
  254. data/test/examples/benchmarks/PolyBench/gemver_species.c +0 -0
  255. data/test/examples/benchmarks/PolyBench/gesummv_species.c +0 -0
  256. data/test/examples/benchmarks/PolyBench/gramschmidt_species.c +0 -0
  257. data/test/examples/benchmarks/PolyBench/jacobi-1d-imper_species.c +0 -0
  258. data/test/examples/benchmarks/PolyBench/jacobi-2d-imper_species.c +0 -0
  259. data/test/examples/benchmarks/PolyBench/lu_species.c +0 -0
  260. data/test/examples/benchmarks/PolyBench/ludcmp_species.c +0 -0
  261. data/test/examples/benchmarks/PolyBench/mvt_species.c +0 -0
  262. data/test/examples/benchmarks/PolyBench/reg_detect_species.c +0 -0
  263. data/test/examples/benchmarks/PolyBench/seidel-2d_species.c +0 -0
  264. data/test/examples/benchmarks/PolyBench/symm_species.c +0 -0
  265. data/test/examples/benchmarks/PolyBench/syr2k_species.c +0 -0
  266. data/test/examples/benchmarks/PolyBench/syrk_species.c +0 -0
  267. data/test/examples/benchmarks/PolyBench/trisolv_species.c +0 -0
  268. data/test/examples/benchmarks/PolyBench/trmm_species.c +0 -0
  269. data/test/examples/chunk/example01_species.c +3 -3
  270. data/test/examples/chunk/example02_species.c +3 -3
  271. data/test/examples/chunk/example03_species.c +3 -3
  272. data/test/examples/chunk/example04_species.c +3 -3
  273. data/test/examples/chunk/example05_species.c +3 -3
  274. data/test/examples/chunk/example06_species.c +1 -1
  275. data/test/examples/chunk/example07_species.c +3 -2
  276. data/test/examples/dependences/example01_species.c +1 -1
  277. data/test/examples/dependences/example02_species.c +1 -1
  278. data/test/examples/dependences/example03_species.c +1 -1
  279. data/test/examples/dependences/example04_species.c +1 -1
  280. data/test/examples/dependences/example05_species.c +1 -1
  281. data/test/examples/element/example01_species.c +3 -3
  282. data/test/examples/element/example02_species.c +3 -3
  283. data/test/examples/element/example03_species.c +7 -7
  284. data/test/examples/element/example04_species.c +3 -3
  285. data/test/examples/element/example05_species.c +3 -3
  286. data/test/examples/element/example06_species.c +3 -3
  287. data/test/examples/element/example07_species.c +3 -3
  288. data/test/examples/element/example08_species.c +3 -3
  289. data/test/examples/element/example09_species.c +3 -3
  290. data/test/examples/element/example10_species.c +1 -1
  291. data/test/examples/element/example11_species.c +1 -1
  292. data/test/examples/element/example12_species.c +1 -1
  293. data/test/examples/element/example13_species.c +1 -1
  294. data/test/examples/neighbourhood/example01_species.c +3 -3
  295. data/test/examples/neighbourhood/example02_species.c +3 -3
  296. data/test/examples/neighbourhood/example03_species.c +3 -3
  297. data/test/examples/neighbourhood/example04_species.c +3 -3
  298. data/test/examples/neighbourhood/example05_species.c +1 -1
  299. data/test/examples/shared/example01_species.c +3 -3
  300. data/test/examples/shared/example02_species.c +3 -3
  301. data/test/examples/shared/example03_species.c +3 -3
  302. data/test/examples/shared/example04_species.c +3 -3
  303. data/test/examples/shared/example05_species.c +3 -3
  304. data/test/test_helper.rb +2 -2
  305. metadata +266 -252
  306. checksums.yaml +0 -15
  307. data/examples/benchmarks/Rodinia/cfd.c +0 -180
data/CHANGELOG CHANGED
@@ -1,3 +1,65 @@
1
+ ###################
2
+ ### v1.6.0 ###
3
+ ###################
4
+
5
+ General:
6
+ - Add support for multiple scops in a file
7
+
8
+ Bug fixes:
9
+ - Fixed mismatch in kernel arguments of the OpenMP D-element-to-1 skeleton
10
+ - Fixed the empty-scop bug in A-Darwin
11
+
12
+ Miscellaneous:
13
+ - Updated the documentation to include the latest Bones publication "Bones: An Automatic Skeleton-Based C-to-CUDA Compiler for GPUs"
14
+
15
+ ###################
16
+ ### v1.5.0c ###
17
+ ###################
18
+
19
+ Bug fixes:
20
+ - Remove CAST gem, only include fixes
21
+ - Update examples and tests
22
+
23
+ ###################
24
+ ### v1.5.0b ###
25
+ ###################
26
+
27
+ Bug fixes:
28
+ - Fix filename parsing when directory or filename contains a '.'
29
+ - Include updated CAST gem in bones which preserves literal suffixes
30
+
31
+ ###################
32
+ ### v1.5 ###
33
+ ###################
34
+
35
+ Bug fixes:
36
+ - Adjusted the examples to include pragma scop and named species (to make them work again with Bones)
37
+ - Fixed a bug where input/output variables of species with 'shared' where not properly handled
38
+ - Removed warning from Rakefile
39
+ - Fixed a bug requiring Bones to be executed from the folder containing the 'lib' directory
40
+ - Fixed a filename issue on non-Unix systems
41
+ - The pre-processor now understands block-comments
42
+
43
+ Various:
44
+ - Added .gitignore file
45
+ - Improved clarity of Rakefile stub targets
46
+ - Improved error handling of incorrect names
47
+ - Improved error handling of incorrect species
48
+
49
+ ###################
50
+ ### v1.4 ###
51
+ ###################
52
+
53
+ A-Darwin:
54
+ - Handles additional cases of copyin/out-to-outer-loop movement
55
+
56
+ Skeletons:
57
+ - Improved performance of the memory-copy thread ('scheduler')
58
+ - Minor changes to skeletons mapping file
59
+
60
+ Examples:
61
+ - Added 6 examples benchmarks from the Rodinia suite
62
+
1
63
  ###################
2
64
  ### v1.3 ###
3
65
  ###################
@@ -116,7 +116,18 @@ Code documentation can be generated automatically using RDoc. Navigate to the in
116
116
  == Scientific publications
117
117
  Scientific publications related to Bones/A-Darwin can be obtained from http://www.cedricnugteren.nl/publications. Several publications are relevant:
118
118
 
119
- 1. <b>Algorithmic Species Revisited: A Program Code Classification Based on Array References</b>, which provides details on the algorithm classification (the species) and A-Darwin (the tool). When refering to the algorithm classification in scientific work, you are kindly asked to include the following citation:
119
+ 1. <b>Bones: An Automatic Skeleton-Based C-to-CUDA Compiler for GPUs</b>, which provides details on the Bones source-to-source compiler, including optimizations in host-accelerator transfer and loop fusion in kernel code. When referring to GPU code generation using Bones, loop fusion or optimizations in host-accelerator transfer in scientific work, you are kindly asked to include the following citation:
120
+
121
+ @INPROCEEDINGS{Nugteren2015a,
122
+ author = {Cedric Nugteren and and Henk Corporaal},
123
+ title = {Bones: An Automatic Skeleton-Based C-to-CUDA Compiler for GPUs},
124
+ journal = {ACM Trans. Archit. Code Optim.},
125
+ volume = {11},
126
+ number = {4},
127
+ year = {2015},
128
+ }
129
+
130
+ 2. <b>Algorithmic Species Revisited: A Program Code Classification Based on Array References</b>, which provides details on the algorithm classification (the species) and A-Darwin (the tool). When referring to the algorithm classification in scientific work, you are kindly asked to include the following citation:
120
131
 
121
132
  @INPROCEEDINGS{Nugteren2013a,
122
133
  author = {Cedric Nugteren and Rosilde Corvino and Henk Corporaal},
@@ -125,7 +136,7 @@ Scientific publications related to Bones/A-Darwin can be obtained from http://ww
125
136
  year = {2013},
126
137
  }
127
138
 
128
- 2. <b>Automatic Skeleton-Based Compilation through Integration with an Algorithm Classification</b>, which discusses the Bones source-to-source compiler. When refering to Bones in scientific work, you are kindly asked to include the following citation:
139
+ 3. <b>Automatic Skeleton-Based Compilation through Integration with an Algorithm Classification</b>, which discusses the Bones source-to-source compiler. When referring to Bones in scientific work, you are kindly asked to include the following citation:
129
140
 
130
141
  @INPROCEEDINGS{Nugteren2013b,
131
142
  author = {Cedric Nugteren and Pieter Custers and Henk Corporaal},
@@ -148,4 +159,4 @@ With rake, A-Darwin can be tested on a set of examples '<tt>rake adarwin_test</t
148
159
 
149
160
 
150
161
  = Questions
151
- Questions can be directed by email. You can find contact details on the personal page of the author at http://www.cedricnugteren.nl/ or http://parse.ele.tue.nl/cnugteren/ or on the project page at github.
162
+ Questions can be directed by email. You can find contact details on the personal page of the author at http://www.cedricnugteren.nl/ or on the project page at GitHub.
data/Rakefile CHANGED
@@ -37,7 +37,7 @@ TARGET = TARGETS[0]
37
37
  MEASUREMENTS = true
38
38
  VERIFICATION = false
39
39
  MEMORY_OPTIMISATIONS = true
40
- ADARWIN_OPTIONS = MEMORY_OPTIMISATIONS ? '-r -f -b -l' : ''
40
+ ADARWIN_OPTIONS_BONES = MEMORY_OPTIMISATIONS ? '-r -f -b -l' : ''
41
41
 
42
42
  # Small helper function to display text on screen
43
43
  def display(text)
@@ -62,7 +62,7 @@ namespace :examples do
62
62
  bones_options = (MEASUREMENTS ? '-m ' : '') + (VERIFICATION ? '-c ' : '')
63
63
  args.with_defaults(:file => EXAMPLES)
64
64
  Dir[args.file].sort.each do |file|
65
- sh "bin/adarwin -a #{file} #{ADARWIN_OPTIONS}"
65
+ sh "bin/adarwin -a #{file} #{ADARWIN_OPTIONS_BONES}"
66
66
  split = file.split('.')
67
67
  file = split[0]+'_species'+'.'+split[1]
68
68
  sh "bin/bones -a #{file} -t #{TARGET} #{bones_options}"
@@ -100,15 +100,15 @@ namespace :examples do
100
100
  end
101
101
  end
102
102
 
103
- # Helper function to compile code
104
- #def compile(file,target)
105
- # (system-specific, to be filled in by the user)
106
- #end
103
+ # Helper function to compile code (NOTE: this task is a stub)
104
+ def compile(file,target)
105
+ puts "[Rake] ### Compiling the code is system-specific, to be filled in..."
106
+ end
107
107
 
108
- # Helper function to execute code
109
- #def execute(file,target)
110
- # (system-specific, to be filled in by the user)
111
- #end
108
+ # Helper function to execute code (NOTE: this task is a stub)
109
+ def execute(file,target)
110
+ puts "[Rake] ### Executing the code is system-specific, to be filled in..."
111
+ end
112
112
 
113
113
  end
114
114
  task :examples => ['examples:generate']
@@ -172,11 +172,12 @@ end
172
172
 
173
173
  # Generate HTML documentation using RDoc
174
174
  RDoc::Task.new do |rdoc|
175
- rdoc.title = 'Bones'
175
+ version = File.read('VERSION')
176
+ rdoc.title = 'Bones - %s' % version
176
177
  rdoc.options << '--line-numbers'
177
178
  rdoc.rdoc_files.include(File.join('lib','**','*.rb'))
178
179
  rdoc.rdoc_files.include('README.rdoc')
180
+ rdoc.rdoc_files.include('VERSION')
179
181
  rdoc.rdoc_dir = 'rdoc'
180
182
  rdoc.main = 'README.rdoc'
181
183
  end
182
-
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.3.1
1
+ 1.6.0
@@ -15,7 +15,7 @@
15
15
  // == File information
16
16
  // Filename...........applications/ffos.c
17
17
  // Author.............Cedric Nugteren
18
- // Last modified on...22-May-2012
18
+ // Last modified on...11-October-2014
19
19
  //
20
20
 
21
21
  //########################################################################
@@ -104,13 +104,15 @@ int main(void) {
104
104
  //########################################################################
105
105
  if (messages >= 1) { printf("### PART1: Histogramming.\n"); fflush(stdout); }
106
106
 
107
- #pragma species kernel 0:height-1,0:width-1|element -> 0:255|shared
107
+ #pragma scop
108
+ #pragma species kernel image0[0:height-1,0:width-1]|element -> hist[0:255]|shared
108
109
  for (h=0;h<height;h++) {
109
110
  for (w=0;w<width;w++) {
110
111
  hist[image0[h][w]] = hist[image0[h][w]] + 1;
111
112
  }
112
113
  }
113
114
  #pragma species endkernel histogram
115
+ #pragma endscop
114
116
 
115
117
  //########################################################################
116
118
  //### Between class variance (CPU)
@@ -168,14 +170,22 @@ int main(void) {
168
170
  //########################################################################
169
171
  if (messages >= 1) { printf("### PART4: Binarization with treshold at %d.\n",threshold); fflush(stdout); }
170
172
 
171
- #pragma species kernel 0:height-1,0:width-1|element -> 0:height-1,0:width-1|element
173
+ unsigned char val;
174
+ #pragma scop
175
+ #pragma species kernel image0[0:height-1,0:width-1]|element -> image1[0:height-1,0:width-1]|element
172
176
  for (h=0;h<height;h++) {
173
177
  for (w=0;w<width;w++) {
174
- if (image0[h][w] > threshold) { image1[h][w] = 1; }
175
- else { image1[h][w] = 0; }
178
+ if (image0[h][w] > threshold) {
179
+ val = 1;
180
+ }
181
+ else {
182
+ val = 0;
183
+ }
184
+ image1[h][w] = val;
176
185
  }
177
186
  }
178
187
  #pragma species endkernel threshold
188
+ #pragma endscop
179
189
 
180
190
  //########################################################################
181
191
  //### PART5: Erosion 7x7 (accelerated)
@@ -183,7 +193,8 @@ int main(void) {
183
193
  if (messages >= 1) { printf("### PART5: Perform the erode kernel.\n"); fflush(stdout); }
184
194
 
185
195
  int condition;
186
- #pragma species kernel 7:height-8,7:width-8|neighbourhood(-3:3,-3:3) -> 0:height-1,0:width-1|element
196
+ #pragma scop
197
+ #pragma species kernel image1[7:height-8,7:width-8]|neighbourhood(-3:3,-3:3) -> image2[0:height-1,0:width-1]|element
187
198
  for (h=0;h<height;h++) {
188
199
  for (w=0;w<width;w++) {
189
200
  if (w >= 7 && h >= 7 && w <= width-7 && h <= height-7) {
@@ -208,6 +219,7 @@ int main(void) {
208
219
  }
209
220
  }
210
221
  #pragma species endkernel erosion
222
+ #pragma endscop
211
223
 
212
224
  //########################################################################
213
225
  //### PART6: 1D erosion(7) synthetic example (accelerated)
@@ -251,7 +263,8 @@ int main(void) {
251
263
  if (messages >= 1) { printf("### PART7: Starting the Y-projection algorithm.\n"); fflush(stdout); }
252
264
 
253
265
  int result_yp;
254
- #pragma species kernel 0:height-1,0:width-1|chunk(0:height-1,0:0) -> 0:width-1|element
266
+ #pragma scop
267
+ #pragma species kernel image2[0:height-1,0:width-1]|chunk(0:height-1,0:0) -> Yvector[0:width-1]|element
255
268
  for (w=0;w<width;w++) {
256
269
  result_yp = 0;
257
270
  for (h=0;h<height;h++) {
@@ -262,6 +275,7 @@ int main(void) {
262
275
  Yvector[w] = result_yp;
263
276
  }
264
277
  #pragma species endkernel y_projection
278
+ #pragma endscop
265
279
 
266
280
  //########################################################################
267
281
  //### PART8: X-projection (accelerated)
@@ -269,7 +283,8 @@ int main(void) {
269
283
  if (messages >= 1) { printf("### PART8: Starting the X-projection algorithm.\n"); fflush(stdout); }
270
284
 
271
285
  int result_xp;
272
- #pragma species kernel 0:height-1,0:width-1|chunk(0:0,0:width-1) -> 0:height-1|element
286
+ #pragma scop
287
+ #pragma species kernel image2[0:height-1,0:width-1]|chunk(0:0,0:width-1) -> Xvector[0:height-1]|element
273
288
  for (h=0;h<height;h++) {
274
289
  result_xp = 0;
275
290
  for (w=0;w<width;w++) {
@@ -280,6 +295,7 @@ int main(void) {
280
295
  Xvector[h] = result_xp;
281
296
  }
282
297
  #pragma species endkernel x_projection
298
+ #pragma endscop
283
299
 
284
300
  //########################################################################
285
301
  //### Search for the centers of the projection vectors (CPU)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
@@ -43,8 +43,10 @@ int main(void) {
43
43
  #pragma scop
44
44
  for (t=0; t<TSTEPS; t++) {
45
45
  #pragma species kernel 1:LARGE_N-2|neighbourhood(-1:1) -> 1:LARGE_N-2|element
46
- for (i=1; i<LARGE_N-1; i++) {
47
- B[i] = 0.33333 * (A[i-1] + A[i] + A[i+1]);
46
+ for (i=0; i<LARGE_N; i++) {
47
+ if (i > 0 && i < LARGE_N-1) {
48
+ B[i] = 0.33333 * (A[i-1] + A[i] + A[i+1]);
49
+ }
48
50
  }
49
51
  #pragma species endkernel jacobi-1d-imper-part1
50
52
  #pragma species kernel 1:LARGE_N-2|element -> 1:LARGE_N-2|element
@@ -42,7 +42,7 @@ int main(void) {
42
42
  #pragma species kernel 1:N-2,1:N-2|neighbourhood(-1:1,-1:1) -> 1:N-2,1:N-2|element
43
43
  for (i=1; i<N-1; i++) {
44
44
  for (j=1; j<N-1; j++) {
45
- if (i < N-1 && j < N-1) {
45
+ if (i > 0 && j > 0 && i < N-1 && j < N-1) {
46
46
  B[i][j] = 0.2 * (A[i][j] + A[i][j-1] + A[i][1+j] + A[1+i][j] + A[i-1][j]);
47
47
  }
48
48
  }
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
@@ -0,0 +1,143 @@
1
+ //
2
+ // This file is part of the Bones source-to-source compiler examples. This C-code
3
+ // demonstrates the use of Bones for an example application: 'bfs', taken from
4
+ // the Rodinia benchmark suite. For more information on the application or on Bones
5
+ // please use the contact information below.
6
+ //
7
+ // == More information on Hotspot
8
+ // Original code......https://www.cs.virginia.edu/~skadron/wiki/rodinia/
9
+ //
10
+ // == More information on Bones
11
+ // Contact............Cedric Nugteren <c.nugteren@tue.nl>
12
+ // Web address........http://parse.ele.tue.nl/bones/
13
+ //
14
+ // == File information
15
+ // Filename...........applications/bfs.c
16
+ // Authors............Cedric Nugteren
17
+ // Last modified on...08-Jun-2014
18
+ //
19
+ //########################################################################
20
+
21
+ // Includes
22
+ #include "common.h"
23
+
24
+ //########################################################################
25
+ //### Start of the main function
26
+ //########################################################################
27
+
28
+ int main(void) {
29
+ int no_of_nodes;
30
+
31
+ // Read input data
32
+ printf("[bfs] Reading File\n");
33
+ FILE* fp = fopen(FILENAME, "r");
34
+ if (!fp) {
35
+ printf("[bfs] Error Reading graph file\n");
36
+ return 1;
37
+ }
38
+ fscanf(fp,"%d",&no_of_nodes);
39
+
40
+ // Arrays
41
+ int h_graph_nodes_start[MAX_NODES];
42
+ int h_graph_nodes_edges[MAX_NODES];
43
+ int h_graph_mask[MAX_NODES];
44
+ int h_updating_graph_mask[MAX_NODES];
45
+ int h_graph_visited[MAX_NODES];
46
+
47
+ // Initialize
48
+ int start;
49
+ int edges;
50
+ for (int i=0; i<no_of_nodes; i++) {
51
+ fscanf(fp, "%d %d", &start, &edges);
52
+ h_graph_nodes_start[i] = start;
53
+ h_graph_nodes_edges[i] = edges;
54
+ h_graph_mask[i] = 0;
55
+ h_updating_graph_mask[i] = 0;
56
+ h_graph_visited[i] = 0;
57
+ }
58
+
59
+ // Read the source node from the file
60
+ int source = 0;
61
+ fscanf(fp, "%d", &source);
62
+ source = 0;
63
+
64
+ // Set the source node as true in the mask
65
+ h_graph_mask[source] = 1;
66
+ h_graph_visited[source] = 1;
67
+
68
+ // Get the edge list
69
+ int id;
70
+ int cost;
71
+ int edge_list_size;
72
+ fscanf(fp,"%d",&edge_list_size);
73
+ int h_graph_edges[MAX_NODES];
74
+ for(int i=0; i<edge_list_size; i++) {
75
+ fscanf(fp, "%d", &id);
76
+ fscanf(fp, "%d", &cost);
77
+ h_graph_edges[i] = id;
78
+ }
79
+
80
+ // Memory for the result
81
+ int h_cost[MAX_NODES];
82
+ for(int i=0; i<MAX_NODES; i++) {
83
+ h_cost[i] = -1;
84
+ }
85
+ h_cost[source] = 0;
86
+
87
+ // Start the computation
88
+ printf("[bfs] Start traversing the tree\n");
89
+ int k = 0;
90
+ int stop[1];
91
+
92
+ // If no thread changes this value then the loop stops
93
+ stop[0] = 0;
94
+
95
+ #pragma scop
96
+ for (unsigned t=0; t<10; t++) {
97
+ //do {
98
+
99
+ // Atomic update loop
100
+ for(int tid=0; tid<no_of_nodes; tid++) {
101
+ int val1 = h_graph_mask[tid];
102
+ if (val1 == 1) {
103
+ h_graph_mask[tid] = 0;
104
+ int val2 = h_graph_nodes_start[tid];
105
+ int val3 = h_graph_nodes_edges[tid];
106
+ for (int i=val2; i<(val3 + val2); i++) {
107
+ int id = h_graph_edges[i];
108
+ int val4 = h_graph_visited[id];
109
+ if (val4 == 0) {
110
+ h_cost[id] = h_cost[tid] + 1;
111
+ h_updating_graph_mask[id] = 1;
112
+ }
113
+ }
114
+ }
115
+ }
116
+
117
+ // Atomic update loop
118
+ for (int tid=0; tid<no_of_nodes; tid++) {
119
+ int val1 = h_updating_graph_mask[tid];
120
+ if (val1 == 1) {
121
+ h_graph_mask[tid] = 1;
122
+ h_graph_visited[tid] = 1;
123
+ h_updating_graph_mask[tid] = 0;
124
+ stop[0] = 1;
125
+ }
126
+ }
127
+
128
+ // Next iteration
129
+ //k++;
130
+ //} while(stop[0] != 0);
131
+ }
132
+ #pragma endscop
133
+
134
+ // Clean-up and exit
135
+ if (fp) {
136
+ fclose(fp);
137
+ }
138
+ printf("\n[bfs] Completed\n\n"); fflush(stdout);
139
+ fflush(stdout);
140
+ return 0;
141
+ }
142
+
143
+ //########################################################################