bones-compiler 1.3.1 → 1.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +62 -0
- data/README.rdoc +14 -3
- data/Rakefile +13 -12
- data/VERSION +1 -1
- data/examples/applications/ffos.c +24 -8
- data/examples/benchmarks/PolyBench/2mm.c +0 -0
- data/examples/benchmarks/PolyBench/3mm.c +0 -0
- data/examples/benchmarks/PolyBench/adi.c +0 -0
- data/examples/benchmarks/PolyBench/atax.c +0 -0
- data/examples/benchmarks/PolyBench/bicg.c +0 -0
- data/examples/benchmarks/PolyBench/cholesky.c +0 -0
- data/examples/benchmarks/PolyBench/common.h +0 -0
- data/examples/benchmarks/PolyBench/correlation.c +0 -0
- data/examples/benchmarks/PolyBench/covariance.c +0 -0
- data/examples/benchmarks/PolyBench/doitgen.c +0 -0
- data/examples/benchmarks/PolyBench/durbin.c +0 -0
- data/examples/benchmarks/PolyBench/dynprog.c +0 -0
- data/examples/benchmarks/PolyBench/fdtd-2d-apml.c +0 -0
- data/examples/benchmarks/PolyBench/fdtd-2d.c +0 -0
- data/examples/benchmarks/PolyBench/floyd-warshall.c +0 -0
- data/examples/benchmarks/PolyBench/gemm.c +0 -0
- data/examples/benchmarks/PolyBench/gemver.c +0 -0
- data/examples/benchmarks/PolyBench/gesummv.c +0 -0
- data/examples/benchmarks/PolyBench/gramschmidt.c +0 -0
- data/examples/benchmarks/PolyBench/jacobi-1d-imper.c +4 -2
- data/examples/benchmarks/PolyBench/jacobi-2d-imper.c +1 -1
- data/examples/benchmarks/PolyBench/lu.c +0 -0
- data/examples/benchmarks/PolyBench/ludcmp.c +0 -0
- data/examples/benchmarks/PolyBench/mvt.c +0 -0
- data/examples/benchmarks/PolyBench/reg_detect.c +0 -0
- data/examples/benchmarks/PolyBench/seidel-2d.c +0 -0
- data/examples/benchmarks/PolyBench/symm.c +0 -0
- data/examples/benchmarks/PolyBench/syr2k.c +0 -0
- data/examples/benchmarks/PolyBench/syrk.c +0 -0
- data/examples/benchmarks/PolyBench/trisolv.c +0 -0
- data/examples/benchmarks/PolyBench/trmm.c +0 -0
- data/examples/benchmarks/Rodinia/bfs.c +143 -0
- data/examples/benchmarks/Rodinia/common.h +78 -0
- data/examples/benchmarks/Rodinia/hotspot.c +106 -126
- data/examples/benchmarks/Rodinia/kmeans.c +157 -164
- data/examples/benchmarks/Rodinia/nw.c +151 -0
- data/examples/benchmarks/Rodinia/pathfinder.c +88 -0
- data/examples/benchmarks/Rodinia/srad.c +50 -59
- data/examples/benchmarks/other/common.h +0 -0
- data/examples/benchmarks/other/dct.c +0 -0
- data/examples/benchmarks/other/mm.c +0 -0
- data/examples/benchmarks/other/saxpy.c +0 -0
- data/examples/chunk/example01.c +6 -4
- data/examples/chunk/example02.c +6 -4
- data/examples/chunk/example03.c +6 -4
- data/examples/chunk/example04.c +8 -5
- data/examples/chunk/example05.c +6 -4
- data/examples/chunk/example06.c +3 -1
- data/examples/chunk/example07.c +5 -2
- data/examples/dependences/example01.c +3 -1
- data/examples/dependences/example02.c +3 -1
- data/examples/dependences/example03.c +3 -1
- data/examples/dependences/example04.c +3 -1
- data/examples/dependences/example05.c +3 -1
- data/examples/element/example01.c +6 -4
- data/examples/element/example02.c +6 -4
- data/examples/element/example03.c +10 -8
- data/examples/element/example04.c +6 -4
- data/examples/element/example05.c +8 -5
- data/examples/element/example06.c +6 -4
- data/examples/element/example07.c +6 -4
- data/examples/element/example08.c +6 -4
- data/examples/element/example09.c +6 -4
- data/examples/element/example10.c +4 -2
- data/examples/element/example11.c +4 -2
- data/examples/element/example12.c +4 -2
- data/examples/element/example13.c +3 -1
- data/examples/fusion/example01.c +3 -12
- data/examples/fusion/example02.c +3 -16
- data/examples/fusion/example03.c +3 -1
- data/examples/fusion/example04.c +5 -3
- data/examples/fusion/example05.c +3 -1
- data/examples/neighbourhood/example01.c +6 -4
- data/examples/neighbourhood/example02.c +6 -4
- data/examples/neighbourhood/example03.c +6 -4
- data/examples/neighbourhood/example04.c +5 -3
- data/examples/neighbourhood/example05.c +3 -1
- data/examples/shared/example01.c +6 -4
- data/examples/shared/example02.c +6 -4
- data/examples/shared/example03.c +6 -4
- data/examples/shared/example04.c +6 -4
- data/examples/shared/example05.c +6 -4
- data/lib/adarwin/engine.rb +16 -5
- data/lib/adarwin/memorycopies.rb +21 -9
- data/lib/adarwin/nest.rb +18 -1
- data/lib/adarwin/preprocessor.rb +5 -2
- data/lib/adarwin/reference.rb +71 -6
- data/lib/bones/algorithm.rb +20 -5
- data/lib/bones/copy.rb +3 -2
- data/lib/bones/engine.rb +12 -9
- data/lib/bones/preprocessor.rb +170 -120
- data/lib/bones/variablelist.rb +1 -1
- data/lib/cast.rb +11 -0
- data/lib/castaddon.rb +23 -6
- data/lib/castaddon/node_adarwin.rb +17 -0
- data/lib/castaddon/node_common.rb +6 -0
- data/lib/castaddon/transformations.rb +13 -9
- data/skeletons/CPU-C/common/epilogue.c +0 -0
- data/skeletons/CPU-C/common/globals.c +0 -0
- data/skeletons/CPU-C/common/globals_kernel.c +0 -0
- data/skeletons/CPU-C/common/header.c +0 -0
- data/skeletons/CPU-C/common/mem_copy_D2H.c +0 -0
- data/skeletons/CPU-C/common/mem_copy_H2D.c +0 -0
- data/skeletons/CPU-C/common/mem_epilogue.c +0 -0
- data/skeletons/CPU-C/common/mem_global.c +0 -0
- data/skeletons/CPU-C/common/mem_prologue.c +0 -0
- data/skeletons/CPU-C/common/prologue.c +0 -0
- data/skeletons/CPU-C/common/timer_1_start.c +0 -0
- data/skeletons/CPU-C/common/timer_1_stop.c +0 -0
- data/skeletons/CPU-C/common/timer_2_start.c +0 -0
- data/skeletons/CPU-C/common/timer_2_stop.c +0 -0
- data/skeletons/CPU-C/common/timer_globals.c +5 -0
- data/skeletons/CPU-C/kernel/default.host.c +0 -0
- data/skeletons/CPU-C/kernel/default.kernel.c +0 -0
- data/skeletons/CPU-C/skeletons.txt +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/epilogue.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/globals.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/globals_kernel.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/header.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/mem_copy_D2H.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/mem_copy_H2D.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/mem_epilogue.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/mem_prologue.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/prologue.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/timer_1_start.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/timer_1_stop.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/timer_2_start.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/timer_2_stop.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/kernel/D-element-to-1-shared.host.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/kernel/D-element-to-1-shared.kernel.cl +0 -0
- data/skeletons/CPU-OPENCL-AMD/kernel/default.host.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/kernel/default.kernel.cl +0 -0
- data/skeletons/CPU-OPENCL-AMD/skeletons.txt +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/epilogue.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/globals.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/globals_kernel.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/header.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_epilogue.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_global.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/prologue.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/timer_1_start.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/timer_1_stop.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/timer_2_start.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/timer_globals.c +5 -0
- data/skeletons/CPU-OPENCL-INTEL/kernel/D-element-to-1-shared.host.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/kernel/D-element-to-1-shared.kernel.cl +0 -0
- data/skeletons/CPU-OPENCL-INTEL/kernel/default.host.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/kernel/default.kernel.cl +0 -0
- data/skeletons/CPU-OPENCL-INTEL/skeletons.txt +0 -0
- data/skeletons/CPU-OPENMP/common/epilogue.c +0 -0
- data/skeletons/CPU-OPENMP/common/globals.c +0 -0
- data/skeletons/CPU-OPENMP/common/globals_kernel.c +0 -0
- data/skeletons/CPU-OPENMP/common/header.c +0 -0
- data/skeletons/CPU-OPENMP/common/mem_copy_D2H.c +0 -0
- data/skeletons/CPU-OPENMP/common/mem_copy_H2D.c +0 -0
- data/skeletons/CPU-OPENMP/common/mem_epilogue.c +0 -0
- data/skeletons/CPU-OPENMP/common/mem_global.c +0 -0
- data/skeletons/CPU-OPENMP/common/mem_prologue.c +0 -0
- data/skeletons/CPU-OPENMP/common/prologue.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_1_start.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_1_stop.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_2_start.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_2_stop.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_globals.c +2 -0
- data/skeletons/CPU-OPENMP/kernel/D-element-to-1-shared.host.c +0 -0
- data/skeletons/CPU-OPENMP/kernel/D-element-to-1-shared.kernel.c +3 -3
- data/skeletons/CPU-OPENMP/kernel/default.host.c +0 -0
- data/skeletons/CPU-OPENMP/kernel/default.kernel.c +0 -0
- data/skeletons/CPU-OPENMP/skeletons.txt +0 -0
- data/skeletons/GPU-CUDA/common/epilogue.c +0 -0
- data/skeletons/GPU-CUDA/common/globals.c +0 -0
- data/skeletons/GPU-CUDA/common/globals_kernel.c +0 -0
- data/skeletons/GPU-CUDA/common/header.c +0 -0
- data/skeletons/GPU-CUDA/common/mem_async_alloc.c +0 -0
- data/skeletons/GPU-CUDA/common/mem_async_copyin.c +0 -0
- data/skeletons/GPU-CUDA/common/mem_async_copyout.c +0 -0
- data/skeletons/GPU-CUDA/common/mem_async_free.c +0 -0
- data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +0 -0
- data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +0 -0
- data/skeletons/GPU-CUDA/common/mem_epilogue.c +0 -0
- data/skeletons/GPU-CUDA/common/mem_global.c +0 -0
- data/skeletons/GPU-CUDA/common/mem_prologue.c +0 -0
- data/skeletons/GPU-CUDA/common/prologue.c +0 -0
- data/skeletons/GPU-CUDA/common/scheduler.c +2 -2
- data/skeletons/GPU-CUDA/common/timer_1_start.c +0 -0
- data/skeletons/GPU-CUDA/common/timer_1_stop.c +0 -0
- data/skeletons/GPU-CUDA/common/timer_2_start.c +0 -0
- data/skeletons/GPU-CUDA/common/timer_2_stop.c +0 -0
- data/skeletons/GPU-CUDA/common/timer_globals.c +0 -0
- data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.host.c +0 -0
- data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +0 -0
- data/skeletons/GPU-CUDA/kernel/D-element-to-1-shared.host.c +0 -0
- data/skeletons/GPU-CUDA/kernel/D-element-to-1-shared.kernel.cu +0 -0
- data/skeletons/GPU-CUDA/kernel/D-element-to-N-shared.host.c +0 -0
- data/skeletons/GPU-CUDA/kernel/D-element-to-N-shared.kernel.cu +0 -0
- data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.host.c +0 -0
- data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +0 -0
- data/skeletons/GPU-CUDA/kernel/N-neighbourhood-N-to-N-element.host.c +0 -0
- data/skeletons/GPU-CUDA/kernel/N-neighbourhood-N-to-N-element.kernel.cu +0 -0
- data/skeletons/GPU-CUDA/kernel/default.host.c +0 -0
- data/skeletons/GPU-CUDA/kernel/default.kernel.cu +0 -0
- data/skeletons/GPU-CUDA/skeletons.txt +4 -2
- data/skeletons/GPU-OPENCL-AMD/common/epilogue.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/common/globals.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/common/globals_kernel.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/common/header.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/common/mem_copy_D2H.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/common/mem_copy_H2D.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/common/mem_epilogue.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/common/mem_prologue.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/common/prologue.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/common/timer_1_start.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/common/timer_1_stop.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/common/timer_2_start.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/common/timer_2_stop.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/kernel/D-element-to-1-shared.host.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/kernel/D-element-to-1-shared.kernel.cl +0 -0
- data/skeletons/GPU-OPENCL-AMD/kernel/default.host.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/kernel/default.kernel.cl +0 -0
- data/skeletons/GPU-OPENCL-AMD/skeletons.txt +0 -0
- data/skeletons/verification/header.c +0 -0
- data/skeletons/verification/timer_start.c +0 -0
- data/skeletons/verification/timer_stop.c +0 -0
- data/skeletons/verification/verify_results.c +0 -0
- data/test/bones/test_algorithm.rb +0 -0
- data/test/bones/test_common.rb +0 -0
- data/test/bones/test_preprocessor.rb +0 -0
- data/test/bones/test_species.rb +0 -0
- data/test/bones/test_variable.rb +0 -0
- data/test/examples/benchmarks/PolyBench/2mm_species.c +1 -1
- data/test/examples/benchmarks/PolyBench/3mm_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/adi_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/atax_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/bicg_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/cholesky_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/correlation_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/covariance_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/doitgen_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/durbin_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/dynprog_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/fdtd-2d-apml_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/fdtd-2d_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/floyd-warshall_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/gemm_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/gemver_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/gesummv_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/gramschmidt_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/jacobi-1d-imper_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/jacobi-2d-imper_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/lu_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/ludcmp_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/mvt_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/reg_detect_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/seidel-2d_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/symm_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/syr2k_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/syrk_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/trisolv_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/trmm_species.c +0 -0
- data/test/examples/chunk/example01_species.c +3 -3
- data/test/examples/chunk/example02_species.c +3 -3
- data/test/examples/chunk/example03_species.c +3 -3
- data/test/examples/chunk/example04_species.c +3 -3
- data/test/examples/chunk/example05_species.c +3 -3
- data/test/examples/chunk/example06_species.c +1 -1
- data/test/examples/chunk/example07_species.c +3 -2
- data/test/examples/dependences/example01_species.c +1 -1
- data/test/examples/dependences/example02_species.c +1 -1
- data/test/examples/dependences/example03_species.c +1 -1
- data/test/examples/dependences/example04_species.c +1 -1
- data/test/examples/dependences/example05_species.c +1 -1
- data/test/examples/element/example01_species.c +3 -3
- data/test/examples/element/example02_species.c +3 -3
- data/test/examples/element/example03_species.c +7 -7
- data/test/examples/element/example04_species.c +3 -3
- data/test/examples/element/example05_species.c +3 -3
- data/test/examples/element/example06_species.c +3 -3
- data/test/examples/element/example07_species.c +3 -3
- data/test/examples/element/example08_species.c +3 -3
- data/test/examples/element/example09_species.c +3 -3
- data/test/examples/element/example10_species.c +1 -1
- data/test/examples/element/example11_species.c +1 -1
- data/test/examples/element/example12_species.c +1 -1
- data/test/examples/element/example13_species.c +1 -1
- data/test/examples/neighbourhood/example01_species.c +3 -3
- data/test/examples/neighbourhood/example02_species.c +3 -3
- data/test/examples/neighbourhood/example03_species.c +3 -3
- data/test/examples/neighbourhood/example04_species.c +3 -3
- data/test/examples/neighbourhood/example05_species.c +1 -1
- data/test/examples/shared/example01_species.c +3 -3
- data/test/examples/shared/example02_species.c +3 -3
- data/test/examples/shared/example03_species.c +3 -3
- data/test/examples/shared/example04_species.c +3 -3
- data/test/examples/shared/example05_species.c +3 -3
- data/test/test_helper.rb +2 -2
- metadata +266 -252
- checksums.yaml +0 -15
- data/examples/benchmarks/Rodinia/cfd.c +0 -180
data/CHANGELOG
CHANGED
@@ -1,3 +1,65 @@
|
|
1
|
+
###################
|
2
|
+
### v1.6.0 ###
|
3
|
+
###################
|
4
|
+
|
5
|
+
General:
|
6
|
+
- Add support for multiple scops in a file
|
7
|
+
|
8
|
+
Bug fixes:
|
9
|
+
- Fixed mismatch in kernel arguments of the OpenMP D-element-to-1 skeleton
|
10
|
+
- Fixed the empty-scop bug in A-Darwin
|
11
|
+
|
12
|
+
Miscellaneous:
|
13
|
+
- Updated the documentation to include the latest Bones publication "Bones: An Automatic Skeleton-Based C-to-CUDA Compiler for GPUs"
|
14
|
+
|
15
|
+
###################
|
16
|
+
### v1.5.0c ###
|
17
|
+
###################
|
18
|
+
|
19
|
+
Bug fixes:
|
20
|
+
- Remove CAST gem, only include fixes
|
21
|
+
- Update examples and tests
|
22
|
+
|
23
|
+
###################
|
24
|
+
### v1.5.0b ###
|
25
|
+
###################
|
26
|
+
|
27
|
+
Bug fixes:
|
28
|
+
- Fix filename parsing when directory or filename contains a '.'
|
29
|
+
- Include updated CAST gem in bones which preserves literal suffixes
|
30
|
+
|
31
|
+
###################
|
32
|
+
### v1.5 ###
|
33
|
+
###################
|
34
|
+
|
35
|
+
Bug fixes:
|
36
|
+
- Adjusted the examples to include pragma scop and named species (to make them work again with Bones)
|
37
|
+
- Fixed a bug where input/output variables of species with 'shared' where not properly handled
|
38
|
+
- Removed warning from Rakefile
|
39
|
+
- Fixed a bug requiring Bones to be executed from the folder containing the 'lib' directory
|
40
|
+
- Fixed a filename issue on non-Unix systems
|
41
|
+
- The pre-processor now understands block-comments
|
42
|
+
|
43
|
+
Various:
|
44
|
+
- Added .gitignore file
|
45
|
+
- Improved clarity of Rakefile stub targets
|
46
|
+
- Improved error handling of incorrect names
|
47
|
+
- Improved error handling of incorrect species
|
48
|
+
|
49
|
+
###################
|
50
|
+
### v1.4 ###
|
51
|
+
###################
|
52
|
+
|
53
|
+
A-Darwin:
|
54
|
+
- Handles additional cases of copyin/out-to-outer-loop movement
|
55
|
+
|
56
|
+
Skeletons:
|
57
|
+
- Improved performance of the memory-copy thread ('scheduler')
|
58
|
+
- Minor changes to skeletons mapping file
|
59
|
+
|
60
|
+
Examples:
|
61
|
+
- Added 6 examples benchmarks from the Rodinia suite
|
62
|
+
|
1
63
|
###################
|
2
64
|
### v1.3 ###
|
3
65
|
###################
|
data/README.rdoc
CHANGED
@@ -116,7 +116,18 @@ Code documentation can be generated automatically using RDoc. Navigate to the in
|
|
116
116
|
== Scientific publications
|
117
117
|
Scientific publications related to Bones/A-Darwin can be obtained from http://www.cedricnugteren.nl/publications. Several publications are relevant:
|
118
118
|
|
119
|
-
1. <b>
|
119
|
+
1. <b>Bones: An Automatic Skeleton-Based C-to-CUDA Compiler for GPUs</b>, which provides details on the Bones source-to-source compiler, including optimizations in host-accelerator transfer and loop fusion in kernel code. When referring to GPU code generation using Bones, loop fusion or optimizations in host-accelerator transfer in scientific work, you are kindly asked to include the following citation:
|
120
|
+
|
121
|
+
@INPROCEEDINGS{Nugteren2015a,
|
122
|
+
author = {Cedric Nugteren and and Henk Corporaal},
|
123
|
+
title = {Bones: An Automatic Skeleton-Based C-to-CUDA Compiler for GPUs},
|
124
|
+
journal = {ACM Trans. Archit. Code Optim.},
|
125
|
+
volume = {11},
|
126
|
+
number = {4},
|
127
|
+
year = {2015},
|
128
|
+
}
|
129
|
+
|
130
|
+
2. <b>Algorithmic Species Revisited: A Program Code Classification Based on Array References</b>, which provides details on the algorithm classification (the species) and A-Darwin (the tool). When referring to the algorithm classification in scientific work, you are kindly asked to include the following citation:
|
120
131
|
|
121
132
|
@INPROCEEDINGS{Nugteren2013a,
|
122
133
|
author = {Cedric Nugteren and Rosilde Corvino and Henk Corporaal},
|
@@ -125,7 +136,7 @@ Scientific publications related to Bones/A-Darwin can be obtained from http://ww
|
|
125
136
|
year = {2013},
|
126
137
|
}
|
127
138
|
|
128
|
-
|
139
|
+
3. <b>Automatic Skeleton-Based Compilation through Integration with an Algorithm Classification</b>, which discusses the Bones source-to-source compiler. When referring to Bones in scientific work, you are kindly asked to include the following citation:
|
129
140
|
|
130
141
|
@INPROCEEDINGS{Nugteren2013b,
|
131
142
|
author = {Cedric Nugteren and Pieter Custers and Henk Corporaal},
|
@@ -148,4 +159,4 @@ With rake, A-Darwin can be tested on a set of examples '<tt>rake adarwin_test</t
|
|
148
159
|
|
149
160
|
|
150
161
|
= Questions
|
151
|
-
Questions can be directed by email. You can find contact details on the personal page of the author at http://www.cedricnugteren.nl/ or
|
162
|
+
Questions can be directed by email. You can find contact details on the personal page of the author at http://www.cedricnugteren.nl/ or on the project page at GitHub.
|
data/Rakefile
CHANGED
@@ -37,7 +37,7 @@ TARGET = TARGETS[0]
|
|
37
37
|
MEASUREMENTS = true
|
38
38
|
VERIFICATION = false
|
39
39
|
MEMORY_OPTIMISATIONS = true
|
40
|
-
|
40
|
+
ADARWIN_OPTIONS_BONES = MEMORY_OPTIMISATIONS ? '-r -f -b -l' : ''
|
41
41
|
|
42
42
|
# Small helper function to display text on screen
|
43
43
|
def display(text)
|
@@ -62,7 +62,7 @@ namespace :examples do
|
|
62
62
|
bones_options = (MEASUREMENTS ? '-m ' : '') + (VERIFICATION ? '-c ' : '')
|
63
63
|
args.with_defaults(:file => EXAMPLES)
|
64
64
|
Dir[args.file].sort.each do |file|
|
65
|
-
sh "bin/adarwin -a #{file} #{
|
65
|
+
sh "bin/adarwin -a #{file} #{ADARWIN_OPTIONS_BONES}"
|
66
66
|
split = file.split('.')
|
67
67
|
file = split[0]+'_species'+'.'+split[1]
|
68
68
|
sh "bin/bones -a #{file} -t #{TARGET} #{bones_options}"
|
@@ -100,15 +100,15 @@ namespace :examples do
|
|
100
100
|
end
|
101
101
|
end
|
102
102
|
|
103
|
-
# Helper function to compile code
|
104
|
-
|
105
|
-
|
106
|
-
|
103
|
+
# Helper function to compile code (NOTE: this task is a stub)
|
104
|
+
def compile(file,target)
|
105
|
+
puts "[Rake] ### Compiling the code is system-specific, to be filled in..."
|
106
|
+
end
|
107
107
|
|
108
|
-
# Helper function to execute code
|
109
|
-
|
110
|
-
|
111
|
-
|
108
|
+
# Helper function to execute code (NOTE: this task is a stub)
|
109
|
+
def execute(file,target)
|
110
|
+
puts "[Rake] ### Executing the code is system-specific, to be filled in..."
|
111
|
+
end
|
112
112
|
|
113
113
|
end
|
114
114
|
task :examples => ['examples:generate']
|
@@ -172,11 +172,12 @@ end
|
|
172
172
|
|
173
173
|
# Generate HTML documentation using RDoc
|
174
174
|
RDoc::Task.new do |rdoc|
|
175
|
-
|
175
|
+
version = File.read('VERSION')
|
176
|
+
rdoc.title = 'Bones - %s' % version
|
176
177
|
rdoc.options << '--line-numbers'
|
177
178
|
rdoc.rdoc_files.include(File.join('lib','**','*.rb'))
|
178
179
|
rdoc.rdoc_files.include('README.rdoc')
|
180
|
+
rdoc.rdoc_files.include('VERSION')
|
179
181
|
rdoc.rdoc_dir = 'rdoc'
|
180
182
|
rdoc.main = 'README.rdoc'
|
181
183
|
end
|
182
|
-
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.
|
1
|
+
1.6.0
|
@@ -15,7 +15,7 @@
|
|
15
15
|
// == File information
|
16
16
|
// Filename...........applications/ffos.c
|
17
17
|
// Author.............Cedric Nugteren
|
18
|
-
// Last modified on...
|
18
|
+
// Last modified on...11-October-2014
|
19
19
|
//
|
20
20
|
|
21
21
|
//########################################################################
|
@@ -104,13 +104,15 @@ int main(void) {
|
|
104
104
|
//########################################################################
|
105
105
|
if (messages >= 1) { printf("### PART1: Histogramming.\n"); fflush(stdout); }
|
106
106
|
|
107
|
-
#pragma
|
107
|
+
#pragma scop
|
108
|
+
#pragma species kernel image0[0:height-1,0:width-1]|element -> hist[0:255]|shared
|
108
109
|
for (h=0;h<height;h++) {
|
109
110
|
for (w=0;w<width;w++) {
|
110
111
|
hist[image0[h][w]] = hist[image0[h][w]] + 1;
|
111
112
|
}
|
112
113
|
}
|
113
114
|
#pragma species endkernel histogram
|
115
|
+
#pragma endscop
|
114
116
|
|
115
117
|
//########################################################################
|
116
118
|
//### Between class variance (CPU)
|
@@ -168,14 +170,22 @@ int main(void) {
|
|
168
170
|
//########################################################################
|
169
171
|
if (messages >= 1) { printf("### PART4: Binarization with treshold at %d.\n",threshold); fflush(stdout); }
|
170
172
|
|
171
|
-
|
173
|
+
unsigned char val;
|
174
|
+
#pragma scop
|
175
|
+
#pragma species kernel image0[0:height-1,0:width-1]|element -> image1[0:height-1,0:width-1]|element
|
172
176
|
for (h=0;h<height;h++) {
|
173
177
|
for (w=0;w<width;w++) {
|
174
|
-
if (image0[h][w] > threshold) {
|
175
|
-
|
178
|
+
if (image0[h][w] > threshold) {
|
179
|
+
val = 1;
|
180
|
+
}
|
181
|
+
else {
|
182
|
+
val = 0;
|
183
|
+
}
|
184
|
+
image1[h][w] = val;
|
176
185
|
}
|
177
186
|
}
|
178
187
|
#pragma species endkernel threshold
|
188
|
+
#pragma endscop
|
179
189
|
|
180
190
|
//########################################################################
|
181
191
|
//### PART5: Erosion 7x7 (accelerated)
|
@@ -183,7 +193,8 @@ int main(void) {
|
|
183
193
|
if (messages >= 1) { printf("### PART5: Perform the erode kernel.\n"); fflush(stdout); }
|
184
194
|
|
185
195
|
int condition;
|
186
|
-
#pragma
|
196
|
+
#pragma scop
|
197
|
+
#pragma species kernel image1[7:height-8,7:width-8]|neighbourhood(-3:3,-3:3) -> image2[0:height-1,0:width-1]|element
|
187
198
|
for (h=0;h<height;h++) {
|
188
199
|
for (w=0;w<width;w++) {
|
189
200
|
if (w >= 7 && h >= 7 && w <= width-7 && h <= height-7) {
|
@@ -208,6 +219,7 @@ int main(void) {
|
|
208
219
|
}
|
209
220
|
}
|
210
221
|
#pragma species endkernel erosion
|
222
|
+
#pragma endscop
|
211
223
|
|
212
224
|
//########################################################################
|
213
225
|
//### PART6: 1D erosion(7) synthetic example (accelerated)
|
@@ -251,7 +263,8 @@ int main(void) {
|
|
251
263
|
if (messages >= 1) { printf("### PART7: Starting the Y-projection algorithm.\n"); fflush(stdout); }
|
252
264
|
|
253
265
|
int result_yp;
|
254
|
-
#pragma
|
266
|
+
#pragma scop
|
267
|
+
#pragma species kernel image2[0:height-1,0:width-1]|chunk(0:height-1,0:0) -> Yvector[0:width-1]|element
|
255
268
|
for (w=0;w<width;w++) {
|
256
269
|
result_yp = 0;
|
257
270
|
for (h=0;h<height;h++) {
|
@@ -262,6 +275,7 @@ int main(void) {
|
|
262
275
|
Yvector[w] = result_yp;
|
263
276
|
}
|
264
277
|
#pragma species endkernel y_projection
|
278
|
+
#pragma endscop
|
265
279
|
|
266
280
|
//########################################################################
|
267
281
|
//### PART8: X-projection (accelerated)
|
@@ -269,7 +283,8 @@ int main(void) {
|
|
269
283
|
if (messages >= 1) { printf("### PART8: Starting the X-projection algorithm.\n"); fflush(stdout); }
|
270
284
|
|
271
285
|
int result_xp;
|
272
|
-
#pragma
|
286
|
+
#pragma scop
|
287
|
+
#pragma species kernel image2[0:height-1,0:width-1]|chunk(0:0,0:width-1) -> Xvector[0:height-1]|element
|
273
288
|
for (h=0;h<height;h++) {
|
274
289
|
result_xp = 0;
|
275
290
|
for (w=0;w<width;w++) {
|
@@ -280,6 +295,7 @@ int main(void) {
|
|
280
295
|
Xvector[h] = result_xp;
|
281
296
|
}
|
282
297
|
#pragma species endkernel x_projection
|
298
|
+
#pragma endscop
|
283
299
|
|
284
300
|
//########################################################################
|
285
301
|
//### Search for the centers of the projection vectors (CPU)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
@@ -43,8 +43,10 @@ int main(void) {
|
|
43
43
|
#pragma scop
|
44
44
|
for (t=0; t<TSTEPS; t++) {
|
45
45
|
#pragma species kernel 1:LARGE_N-2|neighbourhood(-1:1) -> 1:LARGE_N-2|element
|
46
|
-
for (i=
|
47
|
-
|
46
|
+
for (i=0; i<LARGE_N; i++) {
|
47
|
+
if (i > 0 && i < LARGE_N-1) {
|
48
|
+
B[i] = 0.33333 * (A[i-1] + A[i] + A[i+1]);
|
49
|
+
}
|
48
50
|
}
|
49
51
|
#pragma species endkernel jacobi-1d-imper-part1
|
50
52
|
#pragma species kernel 1:LARGE_N-2|element -> 1:LARGE_N-2|element
|
@@ -42,7 +42,7 @@ int main(void) {
|
|
42
42
|
#pragma species kernel 1:N-2,1:N-2|neighbourhood(-1:1,-1:1) -> 1:N-2,1:N-2|element
|
43
43
|
for (i=1; i<N-1; i++) {
|
44
44
|
for (j=1; j<N-1; j++) {
|
45
|
-
if (i < N-1 && j < N-1) {
|
45
|
+
if (i > 0 && j > 0 && i < N-1 && j < N-1) {
|
46
46
|
B[i][j] = 0.2 * (A[i][j] + A[i][j-1] + A[i][1+j] + A[1+i][j] + A[i-1][j]);
|
47
47
|
}
|
48
48
|
}
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
@@ -0,0 +1,143 @@
|
|
1
|
+
//
|
2
|
+
// This file is part of the Bones source-to-source compiler examples. This C-code
|
3
|
+
// demonstrates the use of Bones for an example application: 'bfs', taken from
|
4
|
+
// the Rodinia benchmark suite. For more information on the application or on Bones
|
5
|
+
// please use the contact information below.
|
6
|
+
//
|
7
|
+
// == More information on Hotspot
|
8
|
+
// Original code......https://www.cs.virginia.edu/~skadron/wiki/rodinia/
|
9
|
+
//
|
10
|
+
// == More information on Bones
|
11
|
+
// Contact............Cedric Nugteren <c.nugteren@tue.nl>
|
12
|
+
// Web address........http://parse.ele.tue.nl/bones/
|
13
|
+
//
|
14
|
+
// == File information
|
15
|
+
// Filename...........applications/bfs.c
|
16
|
+
// Authors............Cedric Nugteren
|
17
|
+
// Last modified on...08-Jun-2014
|
18
|
+
//
|
19
|
+
//########################################################################
|
20
|
+
|
21
|
+
// Includes
|
22
|
+
#include "common.h"
|
23
|
+
|
24
|
+
//########################################################################
|
25
|
+
//### Start of the main function
|
26
|
+
//########################################################################
|
27
|
+
|
28
|
+
int main(void) {
|
29
|
+
int no_of_nodes;
|
30
|
+
|
31
|
+
// Read input data
|
32
|
+
printf("[bfs] Reading File\n");
|
33
|
+
FILE* fp = fopen(FILENAME, "r");
|
34
|
+
if (!fp) {
|
35
|
+
printf("[bfs] Error Reading graph file\n");
|
36
|
+
return 1;
|
37
|
+
}
|
38
|
+
fscanf(fp,"%d",&no_of_nodes);
|
39
|
+
|
40
|
+
// Arrays
|
41
|
+
int h_graph_nodes_start[MAX_NODES];
|
42
|
+
int h_graph_nodes_edges[MAX_NODES];
|
43
|
+
int h_graph_mask[MAX_NODES];
|
44
|
+
int h_updating_graph_mask[MAX_NODES];
|
45
|
+
int h_graph_visited[MAX_NODES];
|
46
|
+
|
47
|
+
// Initialize
|
48
|
+
int start;
|
49
|
+
int edges;
|
50
|
+
for (int i=0; i<no_of_nodes; i++) {
|
51
|
+
fscanf(fp, "%d %d", &start, &edges);
|
52
|
+
h_graph_nodes_start[i] = start;
|
53
|
+
h_graph_nodes_edges[i] = edges;
|
54
|
+
h_graph_mask[i] = 0;
|
55
|
+
h_updating_graph_mask[i] = 0;
|
56
|
+
h_graph_visited[i] = 0;
|
57
|
+
}
|
58
|
+
|
59
|
+
// Read the source node from the file
|
60
|
+
int source = 0;
|
61
|
+
fscanf(fp, "%d", &source);
|
62
|
+
source = 0;
|
63
|
+
|
64
|
+
// Set the source node as true in the mask
|
65
|
+
h_graph_mask[source] = 1;
|
66
|
+
h_graph_visited[source] = 1;
|
67
|
+
|
68
|
+
// Get the edge list
|
69
|
+
int id;
|
70
|
+
int cost;
|
71
|
+
int edge_list_size;
|
72
|
+
fscanf(fp,"%d",&edge_list_size);
|
73
|
+
int h_graph_edges[MAX_NODES];
|
74
|
+
for(int i=0; i<edge_list_size; i++) {
|
75
|
+
fscanf(fp, "%d", &id);
|
76
|
+
fscanf(fp, "%d", &cost);
|
77
|
+
h_graph_edges[i] = id;
|
78
|
+
}
|
79
|
+
|
80
|
+
// Memory for the result
|
81
|
+
int h_cost[MAX_NODES];
|
82
|
+
for(int i=0; i<MAX_NODES; i++) {
|
83
|
+
h_cost[i] = -1;
|
84
|
+
}
|
85
|
+
h_cost[source] = 0;
|
86
|
+
|
87
|
+
// Start the computation
|
88
|
+
printf("[bfs] Start traversing the tree\n");
|
89
|
+
int k = 0;
|
90
|
+
int stop[1];
|
91
|
+
|
92
|
+
// If no thread changes this value then the loop stops
|
93
|
+
stop[0] = 0;
|
94
|
+
|
95
|
+
#pragma scop
|
96
|
+
for (unsigned t=0; t<10; t++) {
|
97
|
+
//do {
|
98
|
+
|
99
|
+
// Atomic update loop
|
100
|
+
for(int tid=0; tid<no_of_nodes; tid++) {
|
101
|
+
int val1 = h_graph_mask[tid];
|
102
|
+
if (val1 == 1) {
|
103
|
+
h_graph_mask[tid] = 0;
|
104
|
+
int val2 = h_graph_nodes_start[tid];
|
105
|
+
int val3 = h_graph_nodes_edges[tid];
|
106
|
+
for (int i=val2; i<(val3 + val2); i++) {
|
107
|
+
int id = h_graph_edges[i];
|
108
|
+
int val4 = h_graph_visited[id];
|
109
|
+
if (val4 == 0) {
|
110
|
+
h_cost[id] = h_cost[tid] + 1;
|
111
|
+
h_updating_graph_mask[id] = 1;
|
112
|
+
}
|
113
|
+
}
|
114
|
+
}
|
115
|
+
}
|
116
|
+
|
117
|
+
// Atomic update loop
|
118
|
+
for (int tid=0; tid<no_of_nodes; tid++) {
|
119
|
+
int val1 = h_updating_graph_mask[tid];
|
120
|
+
if (val1 == 1) {
|
121
|
+
h_graph_mask[tid] = 1;
|
122
|
+
h_graph_visited[tid] = 1;
|
123
|
+
h_updating_graph_mask[tid] = 0;
|
124
|
+
stop[0] = 1;
|
125
|
+
}
|
126
|
+
}
|
127
|
+
|
128
|
+
// Next iteration
|
129
|
+
//k++;
|
130
|
+
//} while(stop[0] != 0);
|
131
|
+
}
|
132
|
+
#pragma endscop
|
133
|
+
|
134
|
+
// Clean-up and exit
|
135
|
+
if (fp) {
|
136
|
+
fclose(fp);
|
137
|
+
}
|
138
|
+
printf("\n[bfs] Completed\n\n"); fflush(stdout);
|
139
|
+
fflush(stdout);
|
140
|
+
return 0;
|
141
|
+
}
|
142
|
+
|
143
|
+
//########################################################################
|