bones-compiler 1.3.1 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +62 -0
- data/README.rdoc +14 -3
- data/Rakefile +13 -12
- data/VERSION +1 -1
- data/examples/applications/ffos.c +24 -8
- data/examples/benchmarks/PolyBench/2mm.c +0 -0
- data/examples/benchmarks/PolyBench/3mm.c +0 -0
- data/examples/benchmarks/PolyBench/adi.c +0 -0
- data/examples/benchmarks/PolyBench/atax.c +0 -0
- data/examples/benchmarks/PolyBench/bicg.c +0 -0
- data/examples/benchmarks/PolyBench/cholesky.c +0 -0
- data/examples/benchmarks/PolyBench/common.h +0 -0
- data/examples/benchmarks/PolyBench/correlation.c +0 -0
- data/examples/benchmarks/PolyBench/covariance.c +0 -0
- data/examples/benchmarks/PolyBench/doitgen.c +0 -0
- data/examples/benchmarks/PolyBench/durbin.c +0 -0
- data/examples/benchmarks/PolyBench/dynprog.c +0 -0
- data/examples/benchmarks/PolyBench/fdtd-2d-apml.c +0 -0
- data/examples/benchmarks/PolyBench/fdtd-2d.c +0 -0
- data/examples/benchmarks/PolyBench/floyd-warshall.c +0 -0
- data/examples/benchmarks/PolyBench/gemm.c +0 -0
- data/examples/benchmarks/PolyBench/gemver.c +0 -0
- data/examples/benchmarks/PolyBench/gesummv.c +0 -0
- data/examples/benchmarks/PolyBench/gramschmidt.c +0 -0
- data/examples/benchmarks/PolyBench/jacobi-1d-imper.c +4 -2
- data/examples/benchmarks/PolyBench/jacobi-2d-imper.c +1 -1
- data/examples/benchmarks/PolyBench/lu.c +0 -0
- data/examples/benchmarks/PolyBench/ludcmp.c +0 -0
- data/examples/benchmarks/PolyBench/mvt.c +0 -0
- data/examples/benchmarks/PolyBench/reg_detect.c +0 -0
- data/examples/benchmarks/PolyBench/seidel-2d.c +0 -0
- data/examples/benchmarks/PolyBench/symm.c +0 -0
- data/examples/benchmarks/PolyBench/syr2k.c +0 -0
- data/examples/benchmarks/PolyBench/syrk.c +0 -0
- data/examples/benchmarks/PolyBench/trisolv.c +0 -0
- data/examples/benchmarks/PolyBench/trmm.c +0 -0
- data/examples/benchmarks/Rodinia/bfs.c +143 -0
- data/examples/benchmarks/Rodinia/common.h +78 -0
- data/examples/benchmarks/Rodinia/hotspot.c +106 -126
- data/examples/benchmarks/Rodinia/kmeans.c +157 -164
- data/examples/benchmarks/Rodinia/nw.c +151 -0
- data/examples/benchmarks/Rodinia/pathfinder.c +88 -0
- data/examples/benchmarks/Rodinia/srad.c +50 -59
- data/examples/benchmarks/other/common.h +0 -0
- data/examples/benchmarks/other/dct.c +0 -0
- data/examples/benchmarks/other/mm.c +0 -0
- data/examples/benchmarks/other/saxpy.c +0 -0
- data/examples/chunk/example01.c +6 -4
- data/examples/chunk/example02.c +6 -4
- data/examples/chunk/example03.c +6 -4
- data/examples/chunk/example04.c +8 -5
- data/examples/chunk/example05.c +6 -4
- data/examples/chunk/example06.c +3 -1
- data/examples/chunk/example07.c +5 -2
- data/examples/dependences/example01.c +3 -1
- data/examples/dependences/example02.c +3 -1
- data/examples/dependences/example03.c +3 -1
- data/examples/dependences/example04.c +3 -1
- data/examples/dependences/example05.c +3 -1
- data/examples/element/example01.c +6 -4
- data/examples/element/example02.c +6 -4
- data/examples/element/example03.c +10 -8
- data/examples/element/example04.c +6 -4
- data/examples/element/example05.c +8 -5
- data/examples/element/example06.c +6 -4
- data/examples/element/example07.c +6 -4
- data/examples/element/example08.c +6 -4
- data/examples/element/example09.c +6 -4
- data/examples/element/example10.c +4 -2
- data/examples/element/example11.c +4 -2
- data/examples/element/example12.c +4 -2
- data/examples/element/example13.c +3 -1
- data/examples/fusion/example01.c +3 -12
- data/examples/fusion/example02.c +3 -16
- data/examples/fusion/example03.c +3 -1
- data/examples/fusion/example04.c +5 -3
- data/examples/fusion/example05.c +3 -1
- data/examples/neighbourhood/example01.c +6 -4
- data/examples/neighbourhood/example02.c +6 -4
- data/examples/neighbourhood/example03.c +6 -4
- data/examples/neighbourhood/example04.c +5 -3
- data/examples/neighbourhood/example05.c +3 -1
- data/examples/shared/example01.c +6 -4
- data/examples/shared/example02.c +6 -4
- data/examples/shared/example03.c +6 -4
- data/examples/shared/example04.c +6 -4
- data/examples/shared/example05.c +6 -4
- data/lib/adarwin/engine.rb +16 -5
- data/lib/adarwin/memorycopies.rb +21 -9
- data/lib/adarwin/nest.rb +18 -1
- data/lib/adarwin/preprocessor.rb +5 -2
- data/lib/adarwin/reference.rb +71 -6
- data/lib/bones/algorithm.rb +20 -5
- data/lib/bones/copy.rb +3 -2
- data/lib/bones/engine.rb +12 -9
- data/lib/bones/preprocessor.rb +170 -120
- data/lib/bones/variablelist.rb +1 -1
- data/lib/cast.rb +11 -0
- data/lib/castaddon.rb +23 -6
- data/lib/castaddon/node_adarwin.rb +17 -0
- data/lib/castaddon/node_common.rb +6 -0
- data/lib/castaddon/transformations.rb +13 -9
- data/skeletons/CPU-C/common/epilogue.c +0 -0
- data/skeletons/CPU-C/common/globals.c +0 -0
- data/skeletons/CPU-C/common/globals_kernel.c +0 -0
- data/skeletons/CPU-C/common/header.c +0 -0
- data/skeletons/CPU-C/common/mem_copy_D2H.c +0 -0
- data/skeletons/CPU-C/common/mem_copy_H2D.c +0 -0
- data/skeletons/CPU-C/common/mem_epilogue.c +0 -0
- data/skeletons/CPU-C/common/mem_global.c +0 -0
- data/skeletons/CPU-C/common/mem_prologue.c +0 -0
- data/skeletons/CPU-C/common/prologue.c +0 -0
- data/skeletons/CPU-C/common/timer_1_start.c +0 -0
- data/skeletons/CPU-C/common/timer_1_stop.c +0 -0
- data/skeletons/CPU-C/common/timer_2_start.c +0 -0
- data/skeletons/CPU-C/common/timer_2_stop.c +0 -0
- data/skeletons/CPU-C/common/timer_globals.c +5 -0
- data/skeletons/CPU-C/kernel/default.host.c +0 -0
- data/skeletons/CPU-C/kernel/default.kernel.c +0 -0
- data/skeletons/CPU-C/skeletons.txt +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/epilogue.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/globals.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/globals_kernel.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/header.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/mem_copy_D2H.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/mem_copy_H2D.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/mem_epilogue.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/mem_prologue.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/prologue.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/timer_1_start.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/timer_1_stop.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/timer_2_start.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/timer_2_stop.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/kernel/D-element-to-1-shared.host.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/kernel/D-element-to-1-shared.kernel.cl +0 -0
- data/skeletons/CPU-OPENCL-AMD/kernel/default.host.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/kernel/default.kernel.cl +0 -0
- data/skeletons/CPU-OPENCL-AMD/skeletons.txt +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/epilogue.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/globals.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/globals_kernel.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/header.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_epilogue.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_global.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/prologue.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/timer_1_start.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/timer_1_stop.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/timer_2_start.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/timer_globals.c +5 -0
- data/skeletons/CPU-OPENCL-INTEL/kernel/D-element-to-1-shared.host.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/kernel/D-element-to-1-shared.kernel.cl +0 -0
- data/skeletons/CPU-OPENCL-INTEL/kernel/default.host.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/kernel/default.kernel.cl +0 -0
- data/skeletons/CPU-OPENCL-INTEL/skeletons.txt +0 -0
- data/skeletons/CPU-OPENMP/common/epilogue.c +0 -0
- data/skeletons/CPU-OPENMP/common/globals.c +0 -0
- data/skeletons/CPU-OPENMP/common/globals_kernel.c +0 -0
- data/skeletons/CPU-OPENMP/common/header.c +0 -0
- data/skeletons/CPU-OPENMP/common/mem_copy_D2H.c +0 -0
- data/skeletons/CPU-OPENMP/common/mem_copy_H2D.c +0 -0
- data/skeletons/CPU-OPENMP/common/mem_epilogue.c +0 -0
- data/skeletons/CPU-OPENMP/common/mem_global.c +0 -0
- data/skeletons/CPU-OPENMP/common/mem_prologue.c +0 -0
- data/skeletons/CPU-OPENMP/common/prologue.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_1_start.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_1_stop.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_2_start.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_2_stop.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_globals.c +2 -0
- data/skeletons/CPU-OPENMP/kernel/D-element-to-1-shared.host.c +0 -0
- data/skeletons/CPU-OPENMP/kernel/D-element-to-1-shared.kernel.c +3 -3
- data/skeletons/CPU-OPENMP/kernel/default.host.c +0 -0
- data/skeletons/CPU-OPENMP/kernel/default.kernel.c +0 -0
- data/skeletons/CPU-OPENMP/skeletons.txt +0 -0
- data/skeletons/GPU-CUDA/common/epilogue.c +0 -0
- data/skeletons/GPU-CUDA/common/globals.c +0 -0
- data/skeletons/GPU-CUDA/common/globals_kernel.c +0 -0
- data/skeletons/GPU-CUDA/common/header.c +0 -0
- data/skeletons/GPU-CUDA/common/mem_async_alloc.c +0 -0
- data/skeletons/GPU-CUDA/common/mem_async_copyin.c +0 -0
- data/skeletons/GPU-CUDA/common/mem_async_copyout.c +0 -0
- data/skeletons/GPU-CUDA/common/mem_async_free.c +0 -0
- data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +0 -0
- data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +0 -0
- data/skeletons/GPU-CUDA/common/mem_epilogue.c +0 -0
- data/skeletons/GPU-CUDA/common/mem_global.c +0 -0
- data/skeletons/GPU-CUDA/common/mem_prologue.c +0 -0
- data/skeletons/GPU-CUDA/common/prologue.c +0 -0
- data/skeletons/GPU-CUDA/common/scheduler.c +2 -2
- data/skeletons/GPU-CUDA/common/timer_1_start.c +0 -0
- data/skeletons/GPU-CUDA/common/timer_1_stop.c +0 -0
- data/skeletons/GPU-CUDA/common/timer_2_start.c +0 -0
- data/skeletons/GPU-CUDA/common/timer_2_stop.c +0 -0
- data/skeletons/GPU-CUDA/common/timer_globals.c +0 -0
- data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.host.c +0 -0
- data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +0 -0
- data/skeletons/GPU-CUDA/kernel/D-element-to-1-shared.host.c +0 -0
- data/skeletons/GPU-CUDA/kernel/D-element-to-1-shared.kernel.cu +0 -0
- data/skeletons/GPU-CUDA/kernel/D-element-to-N-shared.host.c +0 -0
- data/skeletons/GPU-CUDA/kernel/D-element-to-N-shared.kernel.cu +0 -0
- data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.host.c +0 -0
- data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +0 -0
- data/skeletons/GPU-CUDA/kernel/N-neighbourhood-N-to-N-element.host.c +0 -0
- data/skeletons/GPU-CUDA/kernel/N-neighbourhood-N-to-N-element.kernel.cu +0 -0
- data/skeletons/GPU-CUDA/kernel/default.host.c +0 -0
- data/skeletons/GPU-CUDA/kernel/default.kernel.cu +0 -0
- data/skeletons/GPU-CUDA/skeletons.txt +4 -2
- data/skeletons/GPU-OPENCL-AMD/common/epilogue.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/common/globals.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/common/globals_kernel.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/common/header.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/common/mem_copy_D2H.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/common/mem_copy_H2D.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/common/mem_epilogue.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/common/mem_prologue.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/common/prologue.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/common/timer_1_start.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/common/timer_1_stop.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/common/timer_2_start.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/common/timer_2_stop.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/kernel/D-element-to-1-shared.host.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/kernel/D-element-to-1-shared.kernel.cl +0 -0
- data/skeletons/GPU-OPENCL-AMD/kernel/default.host.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/kernel/default.kernel.cl +0 -0
- data/skeletons/GPU-OPENCL-AMD/skeletons.txt +0 -0
- data/skeletons/verification/header.c +0 -0
- data/skeletons/verification/timer_start.c +0 -0
- data/skeletons/verification/timer_stop.c +0 -0
- data/skeletons/verification/verify_results.c +0 -0
- data/test/bones/test_algorithm.rb +0 -0
- data/test/bones/test_common.rb +0 -0
- data/test/bones/test_preprocessor.rb +0 -0
- data/test/bones/test_species.rb +0 -0
- data/test/bones/test_variable.rb +0 -0
- data/test/examples/benchmarks/PolyBench/2mm_species.c +1 -1
- data/test/examples/benchmarks/PolyBench/3mm_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/adi_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/atax_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/bicg_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/cholesky_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/correlation_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/covariance_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/doitgen_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/durbin_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/dynprog_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/fdtd-2d-apml_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/fdtd-2d_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/floyd-warshall_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/gemm_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/gemver_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/gesummv_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/gramschmidt_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/jacobi-1d-imper_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/jacobi-2d-imper_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/lu_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/ludcmp_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/mvt_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/reg_detect_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/seidel-2d_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/symm_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/syr2k_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/syrk_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/trisolv_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/trmm_species.c +0 -0
- data/test/examples/chunk/example01_species.c +3 -3
- data/test/examples/chunk/example02_species.c +3 -3
- data/test/examples/chunk/example03_species.c +3 -3
- data/test/examples/chunk/example04_species.c +3 -3
- data/test/examples/chunk/example05_species.c +3 -3
- data/test/examples/chunk/example06_species.c +1 -1
- data/test/examples/chunk/example07_species.c +3 -2
- data/test/examples/dependences/example01_species.c +1 -1
- data/test/examples/dependences/example02_species.c +1 -1
- data/test/examples/dependences/example03_species.c +1 -1
- data/test/examples/dependences/example04_species.c +1 -1
- data/test/examples/dependences/example05_species.c +1 -1
- data/test/examples/element/example01_species.c +3 -3
- data/test/examples/element/example02_species.c +3 -3
- data/test/examples/element/example03_species.c +7 -7
- data/test/examples/element/example04_species.c +3 -3
- data/test/examples/element/example05_species.c +3 -3
- data/test/examples/element/example06_species.c +3 -3
- data/test/examples/element/example07_species.c +3 -3
- data/test/examples/element/example08_species.c +3 -3
- data/test/examples/element/example09_species.c +3 -3
- data/test/examples/element/example10_species.c +1 -1
- data/test/examples/element/example11_species.c +1 -1
- data/test/examples/element/example12_species.c +1 -1
- data/test/examples/element/example13_species.c +1 -1
- data/test/examples/neighbourhood/example01_species.c +3 -3
- data/test/examples/neighbourhood/example02_species.c +3 -3
- data/test/examples/neighbourhood/example03_species.c +3 -3
- data/test/examples/neighbourhood/example04_species.c +3 -3
- data/test/examples/neighbourhood/example05_species.c +1 -1
- data/test/examples/shared/example01_species.c +3 -3
- data/test/examples/shared/example02_species.c +3 -3
- data/test/examples/shared/example03_species.c +3 -3
- data/test/examples/shared/example04_species.c +3 -3
- data/test/examples/shared/example05_species.c +3 -3
- data/test/test_helper.rb +2 -2
- metadata +266 -252
- checksums.yaml +0 -15
- data/examples/benchmarks/Rodinia/cfd.c +0 -180
data/CHANGELOG
CHANGED
|
@@ -1,3 +1,65 @@
|
|
|
1
|
+
###################
|
|
2
|
+
### v1.6.0 ###
|
|
3
|
+
###################
|
|
4
|
+
|
|
5
|
+
General:
|
|
6
|
+
- Add support for multiple scops in a file
|
|
7
|
+
|
|
8
|
+
Bug fixes:
|
|
9
|
+
- Fixed mismatch in kernel arguments of the OpenMP D-element-to-1 skeleton
|
|
10
|
+
- Fixed the empty-scop bug in A-Darwin
|
|
11
|
+
|
|
12
|
+
Miscellaneous:
|
|
13
|
+
- Updated the documentation to include the latest Bones publication "Bones: An Automatic Skeleton-Based C-to-CUDA Compiler for GPUs"
|
|
14
|
+
|
|
15
|
+
###################
|
|
16
|
+
### v1.5.0c ###
|
|
17
|
+
###################
|
|
18
|
+
|
|
19
|
+
Bug fixes:
|
|
20
|
+
- Remove CAST gem, only include fixes
|
|
21
|
+
- Update examples and tests
|
|
22
|
+
|
|
23
|
+
###################
|
|
24
|
+
### v1.5.0b ###
|
|
25
|
+
###################
|
|
26
|
+
|
|
27
|
+
Bug fixes:
|
|
28
|
+
- Fix filename parsing when directory or filename contains a '.'
|
|
29
|
+
- Include updated CAST gem in bones which preserves literal suffixes
|
|
30
|
+
|
|
31
|
+
###################
|
|
32
|
+
### v1.5 ###
|
|
33
|
+
###################
|
|
34
|
+
|
|
35
|
+
Bug fixes:
|
|
36
|
+
- Adjusted the examples to include pragma scop and named species (to make them work again with Bones)
|
|
37
|
+
- Fixed a bug where input/output variables of species with 'shared' where not properly handled
|
|
38
|
+
- Removed warning from Rakefile
|
|
39
|
+
- Fixed a bug requiring Bones to be executed from the folder containing the 'lib' directory
|
|
40
|
+
- Fixed a filename issue on non-Unix systems
|
|
41
|
+
- The pre-processor now understands block-comments
|
|
42
|
+
|
|
43
|
+
Various:
|
|
44
|
+
- Added .gitignore file
|
|
45
|
+
- Improved clarity of Rakefile stub targets
|
|
46
|
+
- Improved error handling of incorrect names
|
|
47
|
+
- Improved error handling of incorrect species
|
|
48
|
+
|
|
49
|
+
###################
|
|
50
|
+
### v1.4 ###
|
|
51
|
+
###################
|
|
52
|
+
|
|
53
|
+
A-Darwin:
|
|
54
|
+
- Handles additional cases of copyin/out-to-outer-loop movement
|
|
55
|
+
|
|
56
|
+
Skeletons:
|
|
57
|
+
- Improved performance of the memory-copy thread ('scheduler')
|
|
58
|
+
- Minor changes to skeletons mapping file
|
|
59
|
+
|
|
60
|
+
Examples:
|
|
61
|
+
- Added 6 examples benchmarks from the Rodinia suite
|
|
62
|
+
|
|
1
63
|
###################
|
|
2
64
|
### v1.3 ###
|
|
3
65
|
###################
|
data/README.rdoc
CHANGED
|
@@ -116,7 +116,18 @@ Code documentation can be generated automatically using RDoc. Navigate to the in
|
|
|
116
116
|
== Scientific publications
|
|
117
117
|
Scientific publications related to Bones/A-Darwin can be obtained from http://www.cedricnugteren.nl/publications. Several publications are relevant:
|
|
118
118
|
|
|
119
|
-
1. <b>
|
|
119
|
+
1. <b>Bones: An Automatic Skeleton-Based C-to-CUDA Compiler for GPUs</b>, which provides details on the Bones source-to-source compiler, including optimizations in host-accelerator transfer and loop fusion in kernel code. When referring to GPU code generation using Bones, loop fusion or optimizations in host-accelerator transfer in scientific work, you are kindly asked to include the following citation:
|
|
120
|
+
|
|
121
|
+
@INPROCEEDINGS{Nugteren2015a,
|
|
122
|
+
author = {Cedric Nugteren and and Henk Corporaal},
|
|
123
|
+
title = {Bones: An Automatic Skeleton-Based C-to-CUDA Compiler for GPUs},
|
|
124
|
+
journal = {ACM Trans. Archit. Code Optim.},
|
|
125
|
+
volume = {11},
|
|
126
|
+
number = {4},
|
|
127
|
+
year = {2015},
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
2. <b>Algorithmic Species Revisited: A Program Code Classification Based on Array References</b>, which provides details on the algorithm classification (the species) and A-Darwin (the tool). When referring to the algorithm classification in scientific work, you are kindly asked to include the following citation:
|
|
120
131
|
|
|
121
132
|
@INPROCEEDINGS{Nugteren2013a,
|
|
122
133
|
author = {Cedric Nugteren and Rosilde Corvino and Henk Corporaal},
|
|
@@ -125,7 +136,7 @@ Scientific publications related to Bones/A-Darwin can be obtained from http://ww
|
|
|
125
136
|
year = {2013},
|
|
126
137
|
}
|
|
127
138
|
|
|
128
|
-
|
|
139
|
+
3. <b>Automatic Skeleton-Based Compilation through Integration with an Algorithm Classification</b>, which discusses the Bones source-to-source compiler. When referring to Bones in scientific work, you are kindly asked to include the following citation:
|
|
129
140
|
|
|
130
141
|
@INPROCEEDINGS{Nugteren2013b,
|
|
131
142
|
author = {Cedric Nugteren and Pieter Custers and Henk Corporaal},
|
|
@@ -148,4 +159,4 @@ With rake, A-Darwin can be tested on a set of examples '<tt>rake adarwin_test</t
|
|
|
148
159
|
|
|
149
160
|
|
|
150
161
|
= Questions
|
|
151
|
-
Questions can be directed by email. You can find contact details on the personal page of the author at http://www.cedricnugteren.nl/ or
|
|
162
|
+
Questions can be directed by email. You can find contact details on the personal page of the author at http://www.cedricnugteren.nl/ or on the project page at GitHub.
|
data/Rakefile
CHANGED
|
@@ -37,7 +37,7 @@ TARGET = TARGETS[0]
|
|
|
37
37
|
MEASUREMENTS = true
|
|
38
38
|
VERIFICATION = false
|
|
39
39
|
MEMORY_OPTIMISATIONS = true
|
|
40
|
-
|
|
40
|
+
ADARWIN_OPTIONS_BONES = MEMORY_OPTIMISATIONS ? '-r -f -b -l' : ''
|
|
41
41
|
|
|
42
42
|
# Small helper function to display text on screen
|
|
43
43
|
def display(text)
|
|
@@ -62,7 +62,7 @@ namespace :examples do
|
|
|
62
62
|
bones_options = (MEASUREMENTS ? '-m ' : '') + (VERIFICATION ? '-c ' : '')
|
|
63
63
|
args.with_defaults(:file => EXAMPLES)
|
|
64
64
|
Dir[args.file].sort.each do |file|
|
|
65
|
-
sh "bin/adarwin -a #{file} #{
|
|
65
|
+
sh "bin/adarwin -a #{file} #{ADARWIN_OPTIONS_BONES}"
|
|
66
66
|
split = file.split('.')
|
|
67
67
|
file = split[0]+'_species'+'.'+split[1]
|
|
68
68
|
sh "bin/bones -a #{file} -t #{TARGET} #{bones_options}"
|
|
@@ -100,15 +100,15 @@ namespace :examples do
|
|
|
100
100
|
end
|
|
101
101
|
end
|
|
102
102
|
|
|
103
|
-
# Helper function to compile code
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
103
|
+
# Helper function to compile code (NOTE: this task is a stub)
|
|
104
|
+
def compile(file,target)
|
|
105
|
+
puts "[Rake] ### Compiling the code is system-specific, to be filled in..."
|
|
106
|
+
end
|
|
107
107
|
|
|
108
|
-
# Helper function to execute code
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
108
|
+
# Helper function to execute code (NOTE: this task is a stub)
|
|
109
|
+
def execute(file,target)
|
|
110
|
+
puts "[Rake] ### Executing the code is system-specific, to be filled in..."
|
|
111
|
+
end
|
|
112
112
|
|
|
113
113
|
end
|
|
114
114
|
task :examples => ['examples:generate']
|
|
@@ -172,11 +172,12 @@ end
|
|
|
172
172
|
|
|
173
173
|
# Generate HTML documentation using RDoc
|
|
174
174
|
RDoc::Task.new do |rdoc|
|
|
175
|
-
|
|
175
|
+
version = File.read('VERSION')
|
|
176
|
+
rdoc.title = 'Bones - %s' % version
|
|
176
177
|
rdoc.options << '--line-numbers'
|
|
177
178
|
rdoc.rdoc_files.include(File.join('lib','**','*.rb'))
|
|
178
179
|
rdoc.rdoc_files.include('README.rdoc')
|
|
180
|
+
rdoc.rdoc_files.include('VERSION')
|
|
179
181
|
rdoc.rdoc_dir = 'rdoc'
|
|
180
182
|
rdoc.main = 'README.rdoc'
|
|
181
183
|
end
|
|
182
|
-
|
data/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
1.
|
|
1
|
+
1.6.0
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
// == File information
|
|
16
16
|
// Filename...........applications/ffos.c
|
|
17
17
|
// Author.............Cedric Nugteren
|
|
18
|
-
// Last modified on...
|
|
18
|
+
// Last modified on...11-October-2014
|
|
19
19
|
//
|
|
20
20
|
|
|
21
21
|
//########################################################################
|
|
@@ -104,13 +104,15 @@ int main(void) {
|
|
|
104
104
|
//########################################################################
|
|
105
105
|
if (messages >= 1) { printf("### PART1: Histogramming.\n"); fflush(stdout); }
|
|
106
106
|
|
|
107
|
-
#pragma
|
|
107
|
+
#pragma scop
|
|
108
|
+
#pragma species kernel image0[0:height-1,0:width-1]|element -> hist[0:255]|shared
|
|
108
109
|
for (h=0;h<height;h++) {
|
|
109
110
|
for (w=0;w<width;w++) {
|
|
110
111
|
hist[image0[h][w]] = hist[image0[h][w]] + 1;
|
|
111
112
|
}
|
|
112
113
|
}
|
|
113
114
|
#pragma species endkernel histogram
|
|
115
|
+
#pragma endscop
|
|
114
116
|
|
|
115
117
|
//########################################################################
|
|
116
118
|
//### Between class variance (CPU)
|
|
@@ -168,14 +170,22 @@ int main(void) {
|
|
|
168
170
|
//########################################################################
|
|
169
171
|
if (messages >= 1) { printf("### PART4: Binarization with treshold at %d.\n",threshold); fflush(stdout); }
|
|
170
172
|
|
|
171
|
-
|
|
173
|
+
unsigned char val;
|
|
174
|
+
#pragma scop
|
|
175
|
+
#pragma species kernel image0[0:height-1,0:width-1]|element -> image1[0:height-1,0:width-1]|element
|
|
172
176
|
for (h=0;h<height;h++) {
|
|
173
177
|
for (w=0;w<width;w++) {
|
|
174
|
-
if (image0[h][w] > threshold) {
|
|
175
|
-
|
|
178
|
+
if (image0[h][w] > threshold) {
|
|
179
|
+
val = 1;
|
|
180
|
+
}
|
|
181
|
+
else {
|
|
182
|
+
val = 0;
|
|
183
|
+
}
|
|
184
|
+
image1[h][w] = val;
|
|
176
185
|
}
|
|
177
186
|
}
|
|
178
187
|
#pragma species endkernel threshold
|
|
188
|
+
#pragma endscop
|
|
179
189
|
|
|
180
190
|
//########################################################################
|
|
181
191
|
//### PART5: Erosion 7x7 (accelerated)
|
|
@@ -183,7 +193,8 @@ int main(void) {
|
|
|
183
193
|
if (messages >= 1) { printf("### PART5: Perform the erode kernel.\n"); fflush(stdout); }
|
|
184
194
|
|
|
185
195
|
int condition;
|
|
186
|
-
#pragma
|
|
196
|
+
#pragma scop
|
|
197
|
+
#pragma species kernel image1[7:height-8,7:width-8]|neighbourhood(-3:3,-3:3) -> image2[0:height-1,0:width-1]|element
|
|
187
198
|
for (h=0;h<height;h++) {
|
|
188
199
|
for (w=0;w<width;w++) {
|
|
189
200
|
if (w >= 7 && h >= 7 && w <= width-7 && h <= height-7) {
|
|
@@ -208,6 +219,7 @@ int main(void) {
|
|
|
208
219
|
}
|
|
209
220
|
}
|
|
210
221
|
#pragma species endkernel erosion
|
|
222
|
+
#pragma endscop
|
|
211
223
|
|
|
212
224
|
//########################################################################
|
|
213
225
|
//### PART6: 1D erosion(7) synthetic example (accelerated)
|
|
@@ -251,7 +263,8 @@ int main(void) {
|
|
|
251
263
|
if (messages >= 1) { printf("### PART7: Starting the Y-projection algorithm.\n"); fflush(stdout); }
|
|
252
264
|
|
|
253
265
|
int result_yp;
|
|
254
|
-
#pragma
|
|
266
|
+
#pragma scop
|
|
267
|
+
#pragma species kernel image2[0:height-1,0:width-1]|chunk(0:height-1,0:0) -> Yvector[0:width-1]|element
|
|
255
268
|
for (w=0;w<width;w++) {
|
|
256
269
|
result_yp = 0;
|
|
257
270
|
for (h=0;h<height;h++) {
|
|
@@ -262,6 +275,7 @@ int main(void) {
|
|
|
262
275
|
Yvector[w] = result_yp;
|
|
263
276
|
}
|
|
264
277
|
#pragma species endkernel y_projection
|
|
278
|
+
#pragma endscop
|
|
265
279
|
|
|
266
280
|
//########################################################################
|
|
267
281
|
//### PART8: X-projection (accelerated)
|
|
@@ -269,7 +283,8 @@ int main(void) {
|
|
|
269
283
|
if (messages >= 1) { printf("### PART8: Starting the X-projection algorithm.\n"); fflush(stdout); }
|
|
270
284
|
|
|
271
285
|
int result_xp;
|
|
272
|
-
#pragma
|
|
286
|
+
#pragma scop
|
|
287
|
+
#pragma species kernel image2[0:height-1,0:width-1]|chunk(0:0,0:width-1) -> Xvector[0:height-1]|element
|
|
273
288
|
for (h=0;h<height;h++) {
|
|
274
289
|
result_xp = 0;
|
|
275
290
|
for (w=0;w<width;w++) {
|
|
@@ -280,6 +295,7 @@ int main(void) {
|
|
|
280
295
|
Xvector[h] = result_xp;
|
|
281
296
|
}
|
|
282
297
|
#pragma species endkernel x_projection
|
|
298
|
+
#pragma endscop
|
|
283
299
|
|
|
284
300
|
//########################################################################
|
|
285
301
|
//### Search for the centers of the projection vectors (CPU)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -43,8 +43,10 @@ int main(void) {
|
|
|
43
43
|
#pragma scop
|
|
44
44
|
for (t=0; t<TSTEPS; t++) {
|
|
45
45
|
#pragma species kernel 1:LARGE_N-2|neighbourhood(-1:1) -> 1:LARGE_N-2|element
|
|
46
|
-
for (i=
|
|
47
|
-
|
|
46
|
+
for (i=0; i<LARGE_N; i++) {
|
|
47
|
+
if (i > 0 && i < LARGE_N-1) {
|
|
48
|
+
B[i] = 0.33333 * (A[i-1] + A[i] + A[i+1]);
|
|
49
|
+
}
|
|
48
50
|
}
|
|
49
51
|
#pragma species endkernel jacobi-1d-imper-part1
|
|
50
52
|
#pragma species kernel 1:LARGE_N-2|element -> 1:LARGE_N-2|element
|
|
@@ -42,7 +42,7 @@ int main(void) {
|
|
|
42
42
|
#pragma species kernel 1:N-2,1:N-2|neighbourhood(-1:1,-1:1) -> 1:N-2,1:N-2|element
|
|
43
43
|
for (i=1; i<N-1; i++) {
|
|
44
44
|
for (j=1; j<N-1; j++) {
|
|
45
|
-
if (i < N-1 && j < N-1) {
|
|
45
|
+
if (i > 0 && j > 0 && i < N-1 && j < N-1) {
|
|
46
46
|
B[i][j] = 0.2 * (A[i][j] + A[i][j-1] + A[i][1+j] + A[1+i][j] + A[i-1][j]);
|
|
47
47
|
}
|
|
48
48
|
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
//
|
|
2
|
+
// This file is part of the Bones source-to-source compiler examples. This C-code
|
|
3
|
+
// demonstrates the use of Bones for an example application: 'bfs', taken from
|
|
4
|
+
// the Rodinia benchmark suite. For more information on the application or on Bones
|
|
5
|
+
// please use the contact information below.
|
|
6
|
+
//
|
|
7
|
+
// == More information on Hotspot
|
|
8
|
+
// Original code......https://www.cs.virginia.edu/~skadron/wiki/rodinia/
|
|
9
|
+
//
|
|
10
|
+
// == More information on Bones
|
|
11
|
+
// Contact............Cedric Nugteren <c.nugteren@tue.nl>
|
|
12
|
+
// Web address........http://parse.ele.tue.nl/bones/
|
|
13
|
+
//
|
|
14
|
+
// == File information
|
|
15
|
+
// Filename...........applications/bfs.c
|
|
16
|
+
// Authors............Cedric Nugteren
|
|
17
|
+
// Last modified on...08-Jun-2014
|
|
18
|
+
//
|
|
19
|
+
//########################################################################
|
|
20
|
+
|
|
21
|
+
// Includes
|
|
22
|
+
#include "common.h"
|
|
23
|
+
|
|
24
|
+
//########################################################################
|
|
25
|
+
//### Start of the main function
|
|
26
|
+
//########################################################################
|
|
27
|
+
|
|
28
|
+
int main(void) {
|
|
29
|
+
int no_of_nodes;
|
|
30
|
+
|
|
31
|
+
// Read input data
|
|
32
|
+
printf("[bfs] Reading File\n");
|
|
33
|
+
FILE* fp = fopen(FILENAME, "r");
|
|
34
|
+
if (!fp) {
|
|
35
|
+
printf("[bfs] Error Reading graph file\n");
|
|
36
|
+
return 1;
|
|
37
|
+
}
|
|
38
|
+
fscanf(fp,"%d",&no_of_nodes);
|
|
39
|
+
|
|
40
|
+
// Arrays
|
|
41
|
+
int h_graph_nodes_start[MAX_NODES];
|
|
42
|
+
int h_graph_nodes_edges[MAX_NODES];
|
|
43
|
+
int h_graph_mask[MAX_NODES];
|
|
44
|
+
int h_updating_graph_mask[MAX_NODES];
|
|
45
|
+
int h_graph_visited[MAX_NODES];
|
|
46
|
+
|
|
47
|
+
// Initialize
|
|
48
|
+
int start;
|
|
49
|
+
int edges;
|
|
50
|
+
for (int i=0; i<no_of_nodes; i++) {
|
|
51
|
+
fscanf(fp, "%d %d", &start, &edges);
|
|
52
|
+
h_graph_nodes_start[i] = start;
|
|
53
|
+
h_graph_nodes_edges[i] = edges;
|
|
54
|
+
h_graph_mask[i] = 0;
|
|
55
|
+
h_updating_graph_mask[i] = 0;
|
|
56
|
+
h_graph_visited[i] = 0;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// Read the source node from the file
|
|
60
|
+
int source = 0;
|
|
61
|
+
fscanf(fp, "%d", &source);
|
|
62
|
+
source = 0;
|
|
63
|
+
|
|
64
|
+
// Set the source node as true in the mask
|
|
65
|
+
h_graph_mask[source] = 1;
|
|
66
|
+
h_graph_visited[source] = 1;
|
|
67
|
+
|
|
68
|
+
// Get the edge list
|
|
69
|
+
int id;
|
|
70
|
+
int cost;
|
|
71
|
+
int edge_list_size;
|
|
72
|
+
fscanf(fp,"%d",&edge_list_size);
|
|
73
|
+
int h_graph_edges[MAX_NODES];
|
|
74
|
+
for(int i=0; i<edge_list_size; i++) {
|
|
75
|
+
fscanf(fp, "%d", &id);
|
|
76
|
+
fscanf(fp, "%d", &cost);
|
|
77
|
+
h_graph_edges[i] = id;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// Memory for the result
|
|
81
|
+
int h_cost[MAX_NODES];
|
|
82
|
+
for(int i=0; i<MAX_NODES; i++) {
|
|
83
|
+
h_cost[i] = -1;
|
|
84
|
+
}
|
|
85
|
+
h_cost[source] = 0;
|
|
86
|
+
|
|
87
|
+
// Start the computation
|
|
88
|
+
printf("[bfs] Start traversing the tree\n");
|
|
89
|
+
int k = 0;
|
|
90
|
+
int stop[1];
|
|
91
|
+
|
|
92
|
+
// If no thread changes this value then the loop stops
|
|
93
|
+
stop[0] = 0;
|
|
94
|
+
|
|
95
|
+
#pragma scop
|
|
96
|
+
for (unsigned t=0; t<10; t++) {
|
|
97
|
+
//do {
|
|
98
|
+
|
|
99
|
+
// Atomic update loop
|
|
100
|
+
for(int tid=0; tid<no_of_nodes; tid++) {
|
|
101
|
+
int val1 = h_graph_mask[tid];
|
|
102
|
+
if (val1 == 1) {
|
|
103
|
+
h_graph_mask[tid] = 0;
|
|
104
|
+
int val2 = h_graph_nodes_start[tid];
|
|
105
|
+
int val3 = h_graph_nodes_edges[tid];
|
|
106
|
+
for (int i=val2; i<(val3 + val2); i++) {
|
|
107
|
+
int id = h_graph_edges[i];
|
|
108
|
+
int val4 = h_graph_visited[id];
|
|
109
|
+
if (val4 == 0) {
|
|
110
|
+
h_cost[id] = h_cost[tid] + 1;
|
|
111
|
+
h_updating_graph_mask[id] = 1;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// Atomic update loop
|
|
118
|
+
for (int tid=0; tid<no_of_nodes; tid++) {
|
|
119
|
+
int val1 = h_updating_graph_mask[tid];
|
|
120
|
+
if (val1 == 1) {
|
|
121
|
+
h_graph_mask[tid] = 1;
|
|
122
|
+
h_graph_visited[tid] = 1;
|
|
123
|
+
h_updating_graph_mask[tid] = 0;
|
|
124
|
+
stop[0] = 1;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// Next iteration
|
|
129
|
+
//k++;
|
|
130
|
+
//} while(stop[0] != 0);
|
|
131
|
+
}
|
|
132
|
+
#pragma endscop
|
|
133
|
+
|
|
134
|
+
// Clean-up and exit
|
|
135
|
+
if (fp) {
|
|
136
|
+
fclose(fp);
|
|
137
|
+
}
|
|
138
|
+
printf("\n[bfs] Completed\n\n"); fflush(stdout);
|
|
139
|
+
fflush(stdout);
|
|
140
|
+
return 0;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
//########################################################################
|