bones-compiler 1.3.1 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +62 -0
- data/README.rdoc +14 -3
- data/Rakefile +13 -12
- data/VERSION +1 -1
- data/examples/applications/ffos.c +24 -8
- data/examples/benchmarks/PolyBench/2mm.c +0 -0
- data/examples/benchmarks/PolyBench/3mm.c +0 -0
- data/examples/benchmarks/PolyBench/adi.c +0 -0
- data/examples/benchmarks/PolyBench/atax.c +0 -0
- data/examples/benchmarks/PolyBench/bicg.c +0 -0
- data/examples/benchmarks/PolyBench/cholesky.c +0 -0
- data/examples/benchmarks/PolyBench/common.h +0 -0
- data/examples/benchmarks/PolyBench/correlation.c +0 -0
- data/examples/benchmarks/PolyBench/covariance.c +0 -0
- data/examples/benchmarks/PolyBench/doitgen.c +0 -0
- data/examples/benchmarks/PolyBench/durbin.c +0 -0
- data/examples/benchmarks/PolyBench/dynprog.c +0 -0
- data/examples/benchmarks/PolyBench/fdtd-2d-apml.c +0 -0
- data/examples/benchmarks/PolyBench/fdtd-2d.c +0 -0
- data/examples/benchmarks/PolyBench/floyd-warshall.c +0 -0
- data/examples/benchmarks/PolyBench/gemm.c +0 -0
- data/examples/benchmarks/PolyBench/gemver.c +0 -0
- data/examples/benchmarks/PolyBench/gesummv.c +0 -0
- data/examples/benchmarks/PolyBench/gramschmidt.c +0 -0
- data/examples/benchmarks/PolyBench/jacobi-1d-imper.c +4 -2
- data/examples/benchmarks/PolyBench/jacobi-2d-imper.c +1 -1
- data/examples/benchmarks/PolyBench/lu.c +0 -0
- data/examples/benchmarks/PolyBench/ludcmp.c +0 -0
- data/examples/benchmarks/PolyBench/mvt.c +0 -0
- data/examples/benchmarks/PolyBench/reg_detect.c +0 -0
- data/examples/benchmarks/PolyBench/seidel-2d.c +0 -0
- data/examples/benchmarks/PolyBench/symm.c +0 -0
- data/examples/benchmarks/PolyBench/syr2k.c +0 -0
- data/examples/benchmarks/PolyBench/syrk.c +0 -0
- data/examples/benchmarks/PolyBench/trisolv.c +0 -0
- data/examples/benchmarks/PolyBench/trmm.c +0 -0
- data/examples/benchmarks/Rodinia/bfs.c +143 -0
- data/examples/benchmarks/Rodinia/common.h +78 -0
- data/examples/benchmarks/Rodinia/hotspot.c +106 -126
- data/examples/benchmarks/Rodinia/kmeans.c +157 -164
- data/examples/benchmarks/Rodinia/nw.c +151 -0
- data/examples/benchmarks/Rodinia/pathfinder.c +88 -0
- data/examples/benchmarks/Rodinia/srad.c +50 -59
- data/examples/benchmarks/other/common.h +0 -0
- data/examples/benchmarks/other/dct.c +0 -0
- data/examples/benchmarks/other/mm.c +0 -0
- data/examples/benchmarks/other/saxpy.c +0 -0
- data/examples/chunk/example01.c +6 -4
- data/examples/chunk/example02.c +6 -4
- data/examples/chunk/example03.c +6 -4
- data/examples/chunk/example04.c +8 -5
- data/examples/chunk/example05.c +6 -4
- data/examples/chunk/example06.c +3 -1
- data/examples/chunk/example07.c +5 -2
- data/examples/dependences/example01.c +3 -1
- data/examples/dependences/example02.c +3 -1
- data/examples/dependences/example03.c +3 -1
- data/examples/dependences/example04.c +3 -1
- data/examples/dependences/example05.c +3 -1
- data/examples/element/example01.c +6 -4
- data/examples/element/example02.c +6 -4
- data/examples/element/example03.c +10 -8
- data/examples/element/example04.c +6 -4
- data/examples/element/example05.c +8 -5
- data/examples/element/example06.c +6 -4
- data/examples/element/example07.c +6 -4
- data/examples/element/example08.c +6 -4
- data/examples/element/example09.c +6 -4
- data/examples/element/example10.c +4 -2
- data/examples/element/example11.c +4 -2
- data/examples/element/example12.c +4 -2
- data/examples/element/example13.c +3 -1
- data/examples/fusion/example01.c +3 -12
- data/examples/fusion/example02.c +3 -16
- data/examples/fusion/example03.c +3 -1
- data/examples/fusion/example04.c +5 -3
- data/examples/fusion/example05.c +3 -1
- data/examples/neighbourhood/example01.c +6 -4
- data/examples/neighbourhood/example02.c +6 -4
- data/examples/neighbourhood/example03.c +6 -4
- data/examples/neighbourhood/example04.c +5 -3
- data/examples/neighbourhood/example05.c +3 -1
- data/examples/shared/example01.c +6 -4
- data/examples/shared/example02.c +6 -4
- data/examples/shared/example03.c +6 -4
- data/examples/shared/example04.c +6 -4
- data/examples/shared/example05.c +6 -4
- data/lib/adarwin/engine.rb +16 -5
- data/lib/adarwin/memorycopies.rb +21 -9
- data/lib/adarwin/nest.rb +18 -1
- data/lib/adarwin/preprocessor.rb +5 -2
- data/lib/adarwin/reference.rb +71 -6
- data/lib/bones/algorithm.rb +20 -5
- data/lib/bones/copy.rb +3 -2
- data/lib/bones/engine.rb +12 -9
- data/lib/bones/preprocessor.rb +170 -120
- data/lib/bones/variablelist.rb +1 -1
- data/lib/cast.rb +11 -0
- data/lib/castaddon.rb +23 -6
- data/lib/castaddon/node_adarwin.rb +17 -0
- data/lib/castaddon/node_common.rb +6 -0
- data/lib/castaddon/transformations.rb +13 -9
- data/skeletons/CPU-C/common/epilogue.c +0 -0
- data/skeletons/CPU-C/common/globals.c +0 -0
- data/skeletons/CPU-C/common/globals_kernel.c +0 -0
- data/skeletons/CPU-C/common/header.c +0 -0
- data/skeletons/CPU-C/common/mem_copy_D2H.c +0 -0
- data/skeletons/CPU-C/common/mem_copy_H2D.c +0 -0
- data/skeletons/CPU-C/common/mem_epilogue.c +0 -0
- data/skeletons/CPU-C/common/mem_global.c +0 -0
- data/skeletons/CPU-C/common/mem_prologue.c +0 -0
- data/skeletons/CPU-C/common/prologue.c +0 -0
- data/skeletons/CPU-C/common/timer_1_start.c +0 -0
- data/skeletons/CPU-C/common/timer_1_stop.c +0 -0
- data/skeletons/CPU-C/common/timer_2_start.c +0 -0
- data/skeletons/CPU-C/common/timer_2_stop.c +0 -0
- data/skeletons/CPU-C/common/timer_globals.c +5 -0
- data/skeletons/CPU-C/kernel/default.host.c +0 -0
- data/skeletons/CPU-C/kernel/default.kernel.c +0 -0
- data/skeletons/CPU-C/skeletons.txt +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/epilogue.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/globals.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/globals_kernel.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/header.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/mem_copy_D2H.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/mem_copy_H2D.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/mem_epilogue.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/mem_prologue.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/prologue.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/timer_1_start.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/timer_1_stop.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/timer_2_start.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/timer_2_stop.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/kernel/D-element-to-1-shared.host.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/kernel/D-element-to-1-shared.kernel.cl +0 -0
- data/skeletons/CPU-OPENCL-AMD/kernel/default.host.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/kernel/default.kernel.cl +0 -0
- data/skeletons/CPU-OPENCL-AMD/skeletons.txt +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/epilogue.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/globals.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/globals_kernel.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/header.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_epilogue.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_global.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/prologue.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/timer_1_start.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/timer_1_stop.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/timer_2_start.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/timer_globals.c +5 -0
- data/skeletons/CPU-OPENCL-INTEL/kernel/D-element-to-1-shared.host.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/kernel/D-element-to-1-shared.kernel.cl +0 -0
- data/skeletons/CPU-OPENCL-INTEL/kernel/default.host.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/kernel/default.kernel.cl +0 -0
- data/skeletons/CPU-OPENCL-INTEL/skeletons.txt +0 -0
- data/skeletons/CPU-OPENMP/common/epilogue.c +0 -0
- data/skeletons/CPU-OPENMP/common/globals.c +0 -0
- data/skeletons/CPU-OPENMP/common/globals_kernel.c +0 -0
- data/skeletons/CPU-OPENMP/common/header.c +0 -0
- data/skeletons/CPU-OPENMP/common/mem_copy_D2H.c +0 -0
- data/skeletons/CPU-OPENMP/common/mem_copy_H2D.c +0 -0
- data/skeletons/CPU-OPENMP/common/mem_epilogue.c +0 -0
- data/skeletons/CPU-OPENMP/common/mem_global.c +0 -0
- data/skeletons/CPU-OPENMP/common/mem_prologue.c +0 -0
- data/skeletons/CPU-OPENMP/common/prologue.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_1_start.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_1_stop.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_2_start.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_2_stop.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_globals.c +2 -0
- data/skeletons/CPU-OPENMP/kernel/D-element-to-1-shared.host.c +0 -0
- data/skeletons/CPU-OPENMP/kernel/D-element-to-1-shared.kernel.c +3 -3
- data/skeletons/CPU-OPENMP/kernel/default.host.c +0 -0
- data/skeletons/CPU-OPENMP/kernel/default.kernel.c +0 -0
- data/skeletons/CPU-OPENMP/skeletons.txt +0 -0
- data/skeletons/GPU-CUDA/common/epilogue.c +0 -0
- data/skeletons/GPU-CUDA/common/globals.c +0 -0
- data/skeletons/GPU-CUDA/common/globals_kernel.c +0 -0
- data/skeletons/GPU-CUDA/common/header.c +0 -0
- data/skeletons/GPU-CUDA/common/mem_async_alloc.c +0 -0
- data/skeletons/GPU-CUDA/common/mem_async_copyin.c +0 -0
- data/skeletons/GPU-CUDA/common/mem_async_copyout.c +0 -0
- data/skeletons/GPU-CUDA/common/mem_async_free.c +0 -0
- data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +0 -0
- data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +0 -0
- data/skeletons/GPU-CUDA/common/mem_epilogue.c +0 -0
- data/skeletons/GPU-CUDA/common/mem_global.c +0 -0
- data/skeletons/GPU-CUDA/common/mem_prologue.c +0 -0
- data/skeletons/GPU-CUDA/common/prologue.c +0 -0
- data/skeletons/GPU-CUDA/common/scheduler.c +2 -2
- data/skeletons/GPU-CUDA/common/timer_1_start.c +0 -0
- data/skeletons/GPU-CUDA/common/timer_1_stop.c +0 -0
- data/skeletons/GPU-CUDA/common/timer_2_start.c +0 -0
- data/skeletons/GPU-CUDA/common/timer_2_stop.c +0 -0
- data/skeletons/GPU-CUDA/common/timer_globals.c +0 -0
- data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.host.c +0 -0
- data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +0 -0
- data/skeletons/GPU-CUDA/kernel/D-element-to-1-shared.host.c +0 -0
- data/skeletons/GPU-CUDA/kernel/D-element-to-1-shared.kernel.cu +0 -0
- data/skeletons/GPU-CUDA/kernel/D-element-to-N-shared.host.c +0 -0
- data/skeletons/GPU-CUDA/kernel/D-element-to-N-shared.kernel.cu +0 -0
- data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.host.c +0 -0
- data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +0 -0
- data/skeletons/GPU-CUDA/kernel/N-neighbourhood-N-to-N-element.host.c +0 -0
- data/skeletons/GPU-CUDA/kernel/N-neighbourhood-N-to-N-element.kernel.cu +0 -0
- data/skeletons/GPU-CUDA/kernel/default.host.c +0 -0
- data/skeletons/GPU-CUDA/kernel/default.kernel.cu +0 -0
- data/skeletons/GPU-CUDA/skeletons.txt +4 -2
- data/skeletons/GPU-OPENCL-AMD/common/epilogue.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/common/globals.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/common/globals_kernel.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/common/header.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/common/mem_copy_D2H.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/common/mem_copy_H2D.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/common/mem_epilogue.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/common/mem_prologue.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/common/prologue.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/common/timer_1_start.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/common/timer_1_stop.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/common/timer_2_start.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/common/timer_2_stop.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/kernel/D-element-to-1-shared.host.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/kernel/D-element-to-1-shared.kernel.cl +0 -0
- data/skeletons/GPU-OPENCL-AMD/kernel/default.host.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/kernel/default.kernel.cl +0 -0
- data/skeletons/GPU-OPENCL-AMD/skeletons.txt +0 -0
- data/skeletons/verification/header.c +0 -0
- data/skeletons/verification/timer_start.c +0 -0
- data/skeletons/verification/timer_stop.c +0 -0
- data/skeletons/verification/verify_results.c +0 -0
- data/test/bones/test_algorithm.rb +0 -0
- data/test/bones/test_common.rb +0 -0
- data/test/bones/test_preprocessor.rb +0 -0
- data/test/bones/test_species.rb +0 -0
- data/test/bones/test_variable.rb +0 -0
- data/test/examples/benchmarks/PolyBench/2mm_species.c +1 -1
- data/test/examples/benchmarks/PolyBench/3mm_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/adi_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/atax_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/bicg_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/cholesky_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/correlation_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/covariance_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/doitgen_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/durbin_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/dynprog_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/fdtd-2d-apml_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/fdtd-2d_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/floyd-warshall_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/gemm_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/gemver_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/gesummv_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/gramschmidt_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/jacobi-1d-imper_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/jacobi-2d-imper_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/lu_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/ludcmp_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/mvt_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/reg_detect_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/seidel-2d_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/symm_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/syr2k_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/syrk_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/trisolv_species.c +0 -0
- data/test/examples/benchmarks/PolyBench/trmm_species.c +0 -0
- data/test/examples/chunk/example01_species.c +3 -3
- data/test/examples/chunk/example02_species.c +3 -3
- data/test/examples/chunk/example03_species.c +3 -3
- data/test/examples/chunk/example04_species.c +3 -3
- data/test/examples/chunk/example05_species.c +3 -3
- data/test/examples/chunk/example06_species.c +1 -1
- data/test/examples/chunk/example07_species.c +3 -2
- data/test/examples/dependences/example01_species.c +1 -1
- data/test/examples/dependences/example02_species.c +1 -1
- data/test/examples/dependences/example03_species.c +1 -1
- data/test/examples/dependences/example04_species.c +1 -1
- data/test/examples/dependences/example05_species.c +1 -1
- data/test/examples/element/example01_species.c +3 -3
- data/test/examples/element/example02_species.c +3 -3
- data/test/examples/element/example03_species.c +7 -7
- data/test/examples/element/example04_species.c +3 -3
- data/test/examples/element/example05_species.c +3 -3
- data/test/examples/element/example06_species.c +3 -3
- data/test/examples/element/example07_species.c +3 -3
- data/test/examples/element/example08_species.c +3 -3
- data/test/examples/element/example09_species.c +3 -3
- data/test/examples/element/example10_species.c +1 -1
- data/test/examples/element/example11_species.c +1 -1
- data/test/examples/element/example12_species.c +1 -1
- data/test/examples/element/example13_species.c +1 -1
- data/test/examples/neighbourhood/example01_species.c +3 -3
- data/test/examples/neighbourhood/example02_species.c +3 -3
- data/test/examples/neighbourhood/example03_species.c +3 -3
- data/test/examples/neighbourhood/example04_species.c +3 -3
- data/test/examples/neighbourhood/example05_species.c +1 -1
- data/test/examples/shared/example01_species.c +3 -3
- data/test/examples/shared/example02_species.c +3 -3
- data/test/examples/shared/example03_species.c +3 -3
- data/test/examples/shared/example04_species.c +3 -3
- data/test/examples/shared/example05_species.c +3 -3
- data/test/test_helper.rb +2 -2
- metadata +266 -252
- checksums.yaml +0 -15
- data/examples/benchmarks/Rodinia/cfd.c +0 -180
|
@@ -8,16 +8,16 @@
|
|
|
8
8
|
// Web address........http://parse.ele.tue.nl/bones/
|
|
9
9
|
//
|
|
10
10
|
// == File information
|
|
11
|
-
// Filename...........neighbourhood/
|
|
11
|
+
// Filename...........neighbourhood/example02.c
|
|
12
12
|
// Author.............Cedric Nugteren
|
|
13
|
-
// Last modified on...
|
|
13
|
+
// Last modified on...10-October-2014
|
|
14
14
|
//
|
|
15
15
|
|
|
16
16
|
#include <stdio.h>
|
|
17
17
|
#define A 256
|
|
18
18
|
#define B 512
|
|
19
19
|
|
|
20
|
-
// This is '
|
|
20
|
+
// This is 'example02', demonstrating a 2D array, a 2D neighbourhood and a for-loop-less notation of the neighbourhood accesses
|
|
21
21
|
int main(void) {
|
|
22
22
|
int i,j;
|
|
23
23
|
|
|
@@ -33,7 +33,8 @@ int main(void) {
|
|
|
33
33
|
}
|
|
34
34
|
|
|
35
35
|
// Perform the computation
|
|
36
|
-
#pragma
|
|
36
|
+
#pragma scop
|
|
37
|
+
#pragma species kernel in[0:255,0:511]|neighbourhood(-1:1,-1:1) -> out[0:255,0:511]|element
|
|
37
38
|
for(i=0;i<A;i++) {
|
|
38
39
|
for(j=0;j<B;j++) {
|
|
39
40
|
if (i >= 1 && j >= 1 && i < (A-1) && j < (B-1)) {
|
|
@@ -47,6 +48,7 @@ int main(void) {
|
|
|
47
48
|
}
|
|
48
49
|
}
|
|
49
50
|
#pragma species endkernel example2
|
|
51
|
+
#pragma endscop
|
|
50
52
|
|
|
51
53
|
// Clean-up and exit the function
|
|
52
54
|
fflush(stdout);
|
|
@@ -8,9 +8,9 @@
|
|
|
8
8
|
// Web address........http://parse.ele.tue.nl/bones/
|
|
9
9
|
//
|
|
10
10
|
// == File information
|
|
11
|
-
// Filename...........neighbourhood/
|
|
11
|
+
// Filename...........neighbourhood/example03.c
|
|
12
12
|
// Author.............Cedric Nugteren
|
|
13
|
-
// Last modified on...
|
|
13
|
+
// Last modified on...10-October-2014
|
|
14
14
|
//
|
|
15
15
|
|
|
16
16
|
#include <stdio.h>
|
|
@@ -23,7 +23,7 @@
|
|
|
23
23
|
float ** alloc_2D(int size1, int size2);
|
|
24
24
|
void free_2D(float ** array_2D);
|
|
25
25
|
|
|
26
|
-
// This is '
|
|
26
|
+
// This is 'example03', demonstrating a neighbourhood with only some values used (a cross) and a math.h square root function call
|
|
27
27
|
int main(void) {
|
|
28
28
|
int i,j;
|
|
29
29
|
int sizea = A;
|
|
@@ -41,7 +41,8 @@ int main(void) {
|
|
|
41
41
|
}
|
|
42
42
|
|
|
43
43
|
// Perform the computation
|
|
44
|
-
#pragma
|
|
44
|
+
#pragma scop
|
|
45
|
+
#pragma species kernel in[0:sizea-1,0:sizeb-1]|neighbourhood(-1:1,-1:1) -> out[0:sizea-1,0:sizeb-1]|element
|
|
45
46
|
for(i=0;i<sizea;i++) {
|
|
46
47
|
for(j=0;j<sizeb;j++) {
|
|
47
48
|
if (i >= 1 && j >= 1 && i < (sizea-1) && j < (sizeb-1)) {
|
|
@@ -55,6 +56,7 @@ int main(void) {
|
|
|
55
56
|
}
|
|
56
57
|
}
|
|
57
58
|
#pragma species endkernel example3
|
|
59
|
+
#pragma endscop
|
|
58
60
|
|
|
59
61
|
// Clean-up and exit the function
|
|
60
62
|
free_2D(in);
|
|
@@ -8,14 +8,14 @@
|
|
|
8
8
|
// Web address........http://parse.ele.tue.nl/bones/
|
|
9
9
|
//
|
|
10
10
|
// == File information
|
|
11
|
-
// Filename...........neighbourhood/
|
|
11
|
+
// Filename...........neighbourhood/example04.c
|
|
12
12
|
// Author.............Cedric Nugteren
|
|
13
|
-
// Last modified on...
|
|
13
|
+
// Last modified on...10-October-2014
|
|
14
14
|
//
|
|
15
15
|
|
|
16
16
|
#include <stdio.h>
|
|
17
17
|
|
|
18
|
-
// This is '
|
|
18
|
+
// This is 'example04', demonstrating naming (optional) in the classification to distingish the two input arrays
|
|
19
19
|
int main(void) {
|
|
20
20
|
int i;
|
|
21
21
|
float factor;
|
|
@@ -33,6 +33,7 @@ int main(void) {
|
|
|
33
33
|
}
|
|
34
34
|
|
|
35
35
|
// Perform the computation
|
|
36
|
+
#pragma scop
|
|
36
37
|
#pragma species kernel B[0:size-1]|neighbourhood(-1:1) ^ A[0:size-1]|element -> C[0:size-1]|element
|
|
37
38
|
for(i=0;i<size;i++) {
|
|
38
39
|
factor = A[i]/100.0;
|
|
@@ -44,6 +45,7 @@ int main(void) {
|
|
|
44
45
|
}
|
|
45
46
|
}
|
|
46
47
|
#pragma species endkernel example4
|
|
48
|
+
#pragma endscop
|
|
47
49
|
|
|
48
50
|
// Clean-up and exit the function
|
|
49
51
|
fflush(stdout);
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
// == File information
|
|
11
11
|
// Filename...........neighbourhood/example05.c
|
|
12
12
|
// Author.............Cedric Nugteren
|
|
13
|
-
// Last modified on...
|
|
13
|
+
// Last modified on...10-October-2014
|
|
14
14
|
//
|
|
15
15
|
|
|
16
16
|
#include <stdio.h>
|
|
@@ -31,11 +31,13 @@ int main(void) {
|
|
|
31
31
|
}
|
|
32
32
|
|
|
33
33
|
// Perform the computation
|
|
34
|
+
#pragma scop
|
|
34
35
|
#pragma species kernel A[2:N]|neighbourhood(0:1) -> B[2:N-1]|element
|
|
35
36
|
for (i=2; i<N; i++) {
|
|
36
37
|
B[i] = A[i] + A[i+1];
|
|
37
38
|
}
|
|
38
39
|
#pragma species endkernel example05
|
|
40
|
+
#pragma endscop
|
|
39
41
|
|
|
40
42
|
// Clean-up and exit the function
|
|
41
43
|
fflush(stdout);
|
data/examples/shared/example01.c
CHANGED
|
@@ -8,16 +8,16 @@
|
|
|
8
8
|
// Web address........http://parse.ele.tue.nl/bones/
|
|
9
9
|
//
|
|
10
10
|
// == File information
|
|
11
|
-
// Filename...........shared/
|
|
11
|
+
// Filename...........shared/example01.c
|
|
12
12
|
// Author.............Cedric Nugteren
|
|
13
|
-
// Last modified on...
|
|
13
|
+
// Last modified on...10-October-2014
|
|
14
14
|
//
|
|
15
15
|
|
|
16
16
|
#include <stdio.h>
|
|
17
17
|
#include <stdlib.h>
|
|
18
18
|
#define SIZE 512*1024
|
|
19
19
|
|
|
20
|
-
// This is '
|
|
20
|
+
// This is 'example01', a basic associative and commutative reduction to scalar
|
|
21
21
|
int main(void) {
|
|
22
22
|
int i;
|
|
23
23
|
|
|
@@ -32,11 +32,13 @@ int main(void) {
|
|
|
32
32
|
|
|
33
33
|
// Perform the computation
|
|
34
34
|
B[0] = 0;
|
|
35
|
-
#pragma
|
|
35
|
+
#pragma scop
|
|
36
|
+
#pragma species kernel A[0:SIZE-1]|element -> B[0:0]|shared
|
|
36
37
|
for(i=0;i<SIZE;i++) {
|
|
37
38
|
B[0] = B[0] + A[i];
|
|
38
39
|
}
|
|
39
40
|
#pragma species endkernel example1
|
|
41
|
+
#pragma endscop
|
|
40
42
|
|
|
41
43
|
// Clean-up and exit the function
|
|
42
44
|
fflush(stdout);
|
data/examples/shared/example02.c
CHANGED
|
@@ -8,14 +8,14 @@
|
|
|
8
8
|
// Web address........http://parse.ele.tue.nl/bones/
|
|
9
9
|
//
|
|
10
10
|
// == File information
|
|
11
|
-
// Filename...........shared/
|
|
11
|
+
// Filename...........shared/example02.c
|
|
12
12
|
// Author.............Cedric Nugteren
|
|
13
|
-
// Last modified on...
|
|
13
|
+
// Last modified on...10-October-2014
|
|
14
14
|
//
|
|
15
15
|
|
|
16
16
|
#include <stdio.h>
|
|
17
17
|
|
|
18
|
-
// This is '
|
|
18
|
+
// This is 'example02', demonstrating a 3D input reduction to scalar
|
|
19
19
|
int main(void) {
|
|
20
20
|
int a,b,c;
|
|
21
21
|
|
|
@@ -34,7 +34,8 @@ int main(void) {
|
|
|
34
34
|
|
|
35
35
|
// Perform the computation
|
|
36
36
|
out[0] = 0;
|
|
37
|
-
#pragma
|
|
37
|
+
#pragma scop
|
|
38
|
+
#pragma species kernel in[0:7,0:15,0:31]|element -> out[0:0]|shared
|
|
38
39
|
for(a=0;a<8;a++) {
|
|
39
40
|
for(b=0;b<16;b++) {
|
|
40
41
|
for(c=0;c<32;c++) {
|
|
@@ -43,6 +44,7 @@ int main(void) {
|
|
|
43
44
|
}
|
|
44
45
|
}
|
|
45
46
|
#pragma species endkernel example2
|
|
47
|
+
#pragma endscop
|
|
46
48
|
|
|
47
49
|
// Clean-up and exit the function
|
|
48
50
|
fflush(stdout);
|
data/examples/shared/example03.c
CHANGED
|
@@ -8,16 +8,16 @@
|
|
|
8
8
|
// Web address........http://parse.ele.tue.nl/bones/
|
|
9
9
|
//
|
|
10
10
|
// == File information
|
|
11
|
-
// Filename...........shared/
|
|
11
|
+
// Filename...........shared/example03.c
|
|
12
12
|
// Author.............Cedric Nugteren
|
|
13
|
-
// Last modified on...
|
|
13
|
+
// Last modified on...10-October-2014
|
|
14
14
|
//
|
|
15
15
|
|
|
16
16
|
#include <stdio.h>
|
|
17
17
|
#include <stdlib.h>
|
|
18
18
|
#define SIZE 1024
|
|
19
19
|
|
|
20
|
-
// This is '
|
|
20
|
+
// This is 'example03', demonstrating a reduction to a 2D array
|
|
21
21
|
int main(void) {
|
|
22
22
|
int i,p,q;
|
|
23
23
|
int index1,index2;
|
|
@@ -39,13 +39,15 @@ int main(void) {
|
|
|
39
39
|
}
|
|
40
40
|
|
|
41
41
|
// Perform the computation
|
|
42
|
-
#pragma
|
|
42
|
+
#pragma scop
|
|
43
|
+
#pragma species kernel in[0:SIZE-1]|element -> B[0:19,0:9]|shared
|
|
43
44
|
for(i=0;i<SIZE;i++) {
|
|
44
45
|
index1 = in[i]%20;
|
|
45
46
|
index2 = in[i]%10;
|
|
46
47
|
B[index1][index2] = B[index1][index2] + 1;
|
|
47
48
|
}
|
|
48
49
|
#pragma species endkernel example3
|
|
50
|
+
#pragma endscop
|
|
49
51
|
|
|
50
52
|
// Clean-up and exit the function
|
|
51
53
|
free(in);
|
data/examples/shared/example04.c
CHANGED
|
@@ -8,16 +8,16 @@
|
|
|
8
8
|
// Web address........http://parse.ele.tue.nl/bones/
|
|
9
9
|
//
|
|
10
10
|
// == File information
|
|
11
|
-
// Filename...........shared/
|
|
11
|
+
// Filename...........shared/example04.c
|
|
12
12
|
// Author.............Cedric Nugteren
|
|
13
|
-
// Last modified on...
|
|
13
|
+
// Last modified on...10-October-2014
|
|
14
14
|
//
|
|
15
15
|
|
|
16
16
|
#include <stdio.h>
|
|
17
17
|
#include <stdlib.h>
|
|
18
18
|
#define SIZE 1024*1024
|
|
19
19
|
|
|
20
|
-
// This is '
|
|
20
|
+
// This is 'example04', demonstrating a basic 256-bin histogram computation
|
|
21
21
|
int main(void) {
|
|
22
22
|
int i;
|
|
23
23
|
unsigned char index;
|
|
@@ -37,12 +37,14 @@ int main(void) {
|
|
|
37
37
|
}
|
|
38
38
|
|
|
39
39
|
// Perform the computation
|
|
40
|
-
#pragma
|
|
40
|
+
#pragma scop
|
|
41
|
+
#pragma species kernel A[0:SIZE-1]|element -> B[0:255]|shared
|
|
41
42
|
for(i=0;i<SIZE;i++) {
|
|
42
43
|
index = A[i];
|
|
43
44
|
B[index]++;
|
|
44
45
|
}
|
|
45
46
|
#pragma species endkernel example4
|
|
47
|
+
#pragma endscop
|
|
46
48
|
|
|
47
49
|
// Clean-up and exit the function
|
|
48
50
|
free(A);
|
data/examples/shared/example05.c
CHANGED
|
@@ -8,14 +8,14 @@
|
|
|
8
8
|
// Web address........http://parse.ele.tue.nl/bones/
|
|
9
9
|
//
|
|
10
10
|
// == File information
|
|
11
|
-
// Filename...........shared/
|
|
11
|
+
// Filename...........shared/example05.c
|
|
12
12
|
// Author.............Cedric Nugteren
|
|
13
|
-
// Last modified on...
|
|
13
|
+
// Last modified on...10-October-2014
|
|
14
14
|
//
|
|
15
15
|
|
|
16
16
|
#include <stdio.h>
|
|
17
17
|
|
|
18
|
-
// This is '
|
|
18
|
+
// This is 'example05', demonstrating an inner-loop only classification of a reduction to scalar
|
|
19
19
|
int main(void) {
|
|
20
20
|
int a,b,c;
|
|
21
21
|
|
|
@@ -32,14 +32,16 @@ int main(void) {
|
|
|
32
32
|
}
|
|
33
33
|
|
|
34
34
|
// Perform the computation
|
|
35
|
+
#pragma scop
|
|
35
36
|
for(a=0;a<16;a++) {
|
|
36
|
-
#pragma species kernel a:a,0:a|element -> 0:0|shared
|
|
37
|
+
#pragma species kernel in[a:a,0:a]|element -> out[0:0]|shared
|
|
37
38
|
for(b=0;b<=a;b++) {
|
|
38
39
|
out[0] = out[0] - in[a][b]*in[a][b];
|
|
39
40
|
}
|
|
40
41
|
#pragma species endkernel example5
|
|
41
42
|
out[0] = 1.002;
|
|
42
43
|
}
|
|
44
|
+
#pragma endscop
|
|
43
45
|
|
|
44
46
|
// Clean-up and exit the function
|
|
45
47
|
fflush(stdout);
|
data/lib/adarwin/engine.rb
CHANGED
|
@@ -83,16 +83,27 @@ module Adarwin
|
|
|
83
83
|
# Parse the original source code into AST form (using CAST)
|
|
84
84
|
original_ast = parser.parse(preprocessor.parsed_code)
|
|
85
85
|
|
|
86
|
+
# Process every SCoP, one by one
|
|
87
|
+
@id = 0
|
|
88
|
+
@result[:species_code] = preprocessor.target_code
|
|
89
|
+
preprocessor.scop_code.each do |scop_code|
|
|
90
|
+
process_scop(scop_code)
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def process_scop(scop_code)
|
|
86
95
|
# Create an AST of the SCoP (using CAST) and save a backup
|
|
87
|
-
scop_ast = C::Block.parse('{'+
|
|
96
|
+
scop_ast = C::Block.parse('{'+scop_code+'}')
|
|
88
97
|
original_scop_ast = scop_ast.clone
|
|
89
98
|
|
|
90
99
|
# Process the scop to identify the loop nests of interest and to find the
|
|
91
100
|
# corresponding species. This is the method performing most of the work.
|
|
92
101
|
@nests = []
|
|
93
|
-
@id = 0
|
|
94
102
|
populate_nests(scop_ast)
|
|
95
103
|
|
|
104
|
+
# return if no loop nests are found in the code
|
|
105
|
+
return unless @nests.length > 0
|
|
106
|
+
|
|
96
107
|
# Remove inner-loop (nested) species. This removes all species that are
|
|
97
108
|
# found within another species. For completeness, this might be desired in
|
|
98
109
|
# some cases.
|
|
@@ -141,7 +152,7 @@ module Adarwin
|
|
|
141
152
|
puts modified_scop if !@options[:silent]
|
|
142
153
|
|
|
143
154
|
# Store the result
|
|
144
|
-
@result[:species_code]
|
|
155
|
+
@result[:species_code].gsub!(scop_code,modified_scop)
|
|
145
156
|
end
|
|
146
157
|
|
|
147
158
|
# This method writes the output code to a file.
|
|
@@ -149,7 +160,7 @@ module Adarwin
|
|
|
149
160
|
|
|
150
161
|
# Populate the species file
|
|
151
162
|
# TODO: The filename is fixed, make this an optional argument
|
|
152
|
-
File.open(File.join(@options[:application].
|
|
163
|
+
File.open(File.join(@options[:application].rpartition('.').first+'_species'+'.c'),'w') do |target|
|
|
153
164
|
target.puts @result[:species_code]
|
|
154
165
|
end
|
|
155
166
|
end
|
|
@@ -172,9 +183,9 @@ module Adarwin
|
|
|
172
183
|
# Only continue if the nest is an actual loop nest
|
|
173
184
|
if nest.for_statement?
|
|
174
185
|
@nests.push(Nest.new(new_level,nest,@id,@basename,!@options[:silent]))
|
|
186
|
+
@id += 1
|
|
175
187
|
end
|
|
176
188
|
end
|
|
177
|
-
@id += 1
|
|
178
189
|
end
|
|
179
190
|
|
|
180
191
|
# Proceed to the next depth level.
|
data/lib/adarwin/memorycopies.rb
CHANGED
|
@@ -2,14 +2,16 @@
|
|
|
2
2
|
|
|
3
3
|
# Recursive copy optimisations
|
|
4
4
|
def recursive_copy_optimisations(nests,options)
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
5
|
+
2.times do
|
|
6
|
+
perform_copy_optimisations1(nests,options)
|
|
7
|
+
perform_copy_optimisations2(nests,options)
|
|
8
|
+
nests.each do |nest|
|
|
9
|
+
children = get_children(nest)
|
|
10
|
+
recursive_copy_optimisations(children,options) if !children.empty?
|
|
11
|
+
end
|
|
12
|
+
perform_copy_optimisations3(nests,options)
|
|
13
|
+
perform_copy_optimisations3(nests,options)
|
|
10
14
|
end
|
|
11
|
-
perform_copy_optimisations3(nests,options)
|
|
12
|
-
perform_copy_optimisations3(nests,options)
|
|
13
15
|
end
|
|
14
16
|
|
|
15
17
|
# First set of copyin/copyout optimisations (recursive)
|
|
@@ -134,8 +136,18 @@ def perform_copy_optimisations3(nests,options)
|
|
|
134
136
|
# Move copyins to outer loops
|
|
135
137
|
children.first.copyins.each do |copyin|
|
|
136
138
|
to_outer_loop = true
|
|
137
|
-
nest.outer_loops.map{ |l| l[:var] }.
|
|
138
|
-
|
|
139
|
+
nest.outer_loops.map{ |l| l[:var] }.each_with_index do |var,lindex|
|
|
140
|
+
if copyin.depends_on?(var)
|
|
141
|
+
to_outer_loop = false
|
|
142
|
+
if copyin.tD[0].a == var && copyin.tD[0].b == var
|
|
143
|
+
loopinfo = nest.outer_loops[lindex]
|
|
144
|
+
if loopinfo[:step] == "1"
|
|
145
|
+
copyin.tD[0].a = loopinfo[:min]
|
|
146
|
+
copyin.tD[0].b = loopinfo[:max]
|
|
147
|
+
to_outer_loop = true
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
end
|
|
139
151
|
end
|
|
140
152
|
children.drop(1).each do |child|
|
|
141
153
|
to_outer_loop = false if child.copyins.map{ |c| c.tN }.include?(copyin.tN)
|
data/lib/adarwin/nest.rb
CHANGED
|
@@ -47,12 +47,15 @@ module Adarwin
|
|
|
47
47
|
@all_loops = @code.get_all_loops()
|
|
48
48
|
@outer_loops = @code.get_direct_loops()
|
|
49
49
|
@inner_loops = @all_loops - @outer_loops
|
|
50
|
+
|
|
51
|
+
# Get all local variable declarations
|
|
52
|
+
@var_declarations = @code.get_var_declarations()
|
|
50
53
|
|
|
51
54
|
# Process the read/write nodes in the loop body to obtain the array
|
|
52
55
|
# reference characterisations. The references also need to be aware of all
|
|
53
56
|
# loop data and of any if-statements in the loop body.
|
|
54
57
|
@references = @code.clone.get_accesses().map do |reference|
|
|
55
|
-
Reference.new(reference,@id,@inner_loops,@outer_loops,@verbose)
|
|
58
|
+
Reference.new(reference,@id,@inner_loops,@outer_loops,@var_declarations,@verbose)
|
|
56
59
|
end
|
|
57
60
|
|
|
58
61
|
# Perform the dependence test. The result can be either true or false.
|
|
@@ -121,6 +124,19 @@ module Adarwin
|
|
|
121
124
|
# Else, set the species for the individual accesses.
|
|
122
125
|
read_names = (@reads.empty?) ? ['0:0|void'] : @reads.map{ |r| r.to_species }
|
|
123
126
|
write_names = (@writes.empty?) ? ['0:0|void'] : @writes.map{ |r| r.to_species }
|
|
127
|
+
|
|
128
|
+
# Remove a 'full' access pattern in case there is a same 'shared' write pattern
|
|
129
|
+
write_names.each do |write_name|
|
|
130
|
+
write_parts = write_name.split(PIPE)
|
|
131
|
+
if write_parts.last == 'shared'
|
|
132
|
+
read_names.each do |read_name|
|
|
133
|
+
read_parts = read_name.split(PIPE)
|
|
134
|
+
if read_parts.last == 'full' && read_parts.first == write_parts.first
|
|
135
|
+
read_names.delete(read_name)
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
end
|
|
124
140
|
|
|
125
141
|
# Combine the descriptions (using Reference's +to_s+ method) into species
|
|
126
142
|
species_in = read_names.uniq.join(' '+WEDGE+' ')
|
|
@@ -174,6 +190,7 @@ module Adarwin
|
|
|
174
190
|
return false if @removed
|
|
175
191
|
return false if @has_dependences
|
|
176
192
|
return false if @species == ''
|
|
193
|
+
return false if (@writes) && (@writes.select{ |a| a.pattern == 'shared' }.length > 3)
|
|
177
194
|
only_full = (@reads) ? @reads.select{ |a| a.pattern != 'full' }.empty? : false
|
|
178
195
|
only_shared = (@writes) ? @writes.select{ |a| a.pattern != 'shared' }.empty? : false
|
|
179
196
|
return !(only_full && only_shared)
|