bones-compiler 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +117 -0
- data/LICENSE +9 -0
- data/README.rdoc +126 -0
- data/Rakefile +107 -0
- data/VERSION +1 -0
- data/bin/bones +20 -0
- data/examples/applications/ffos.c +552 -0
- data/examples/benchmarks/2mm.c +70 -0
- data/examples/benchmarks/3mm.c +81 -0
- data/examples/benchmarks/adi.c +81 -0
- data/examples/benchmarks/atax.c +65 -0
- data/examples/benchmarks/bicg.c +67 -0
- data/examples/benchmarks/cholesky.c +64 -0
- data/examples/benchmarks/common.h +168 -0
- data/examples/benchmarks/correlation.c +97 -0
- data/examples/benchmarks/covariance.c +77 -0
- data/examples/benchmarks/doitgen.c +63 -0
- data/examples/benchmarks/durbin.c +76 -0
- data/examples/benchmarks/dynprog.c +67 -0
- data/examples/benchmarks/fdtd-2d-apml.c +114 -0
- data/examples/benchmarks/fdtd-2d.c +74 -0
- data/examples/benchmarks/floyd-warshall.c +50 -0
- data/examples/benchmarks/gemm.c +69 -0
- data/examples/benchmarks/gemver.c +89 -0
- data/examples/benchmarks/gesummv.c +64 -0
- data/examples/benchmarks/gramschmidt.c +84 -0
- data/examples/benchmarks/jacobi-1d-imper.c +55 -0
- data/examples/benchmarks/jacobi-2d-imper.c +61 -0
- data/examples/benchmarks/lu.c +57 -0
- data/examples/benchmarks/ludcmp.c +91 -0
- data/examples/benchmarks/mvt.c +65 -0
- data/examples/benchmarks/overview.txt +38 -0
- data/examples/benchmarks/reg_detect.c +82 -0
- data/examples/benchmarks/saxpy.c +45 -0
- data/examples/benchmarks/seidel-2d.c +51 -0
- data/examples/benchmarks/symm.c +74 -0
- data/examples/benchmarks/syr2k.c +65 -0
- data/examples/benchmarks/syrk.c +62 -0
- data/examples/benchmarks/trisolv.c +57 -0
- data/examples/benchmarks/trmm.c +57 -0
- data/examples/chunk/example1.c +54 -0
- data/examples/chunk/example2.c +44 -0
- data/examples/chunk/example3.c +59 -0
- data/examples/chunk/example4.c +55 -0
- data/examples/chunk/example5.c +52 -0
- data/examples/element/example1.c +46 -0
- data/examples/element/example10.c +50 -0
- data/examples/element/example11.c +47 -0
- data/examples/element/example12.c +56 -0
- data/examples/element/example2.c +46 -0
- data/examples/element/example3.c +58 -0
- data/examples/element/example4.c +49 -0
- data/examples/element/example5.c +56 -0
- data/examples/element/example6.c +46 -0
- data/examples/element/example7.c +54 -0
- data/examples/element/example8.c +45 -0
- data/examples/element/example9.c +48 -0
- data/examples/neighbourhood/example1.c +54 -0
- data/examples/neighbourhood/example2.c +55 -0
- data/examples/neighbourhood/example3.c +82 -0
- data/examples/neighbourhood/example4.c +52 -0
- data/examples/shared/example1.c +45 -0
- data/examples/shared/example2.c +51 -0
- data/examples/shared/example3.c +55 -0
- data/examples/shared/example4.c +52 -0
- data/examples/shared/example5.c +48 -0
- data/lib/bones.rb +266 -0
- data/lib/bones/algorithm.rb +541 -0
- data/lib/bones/engine.rb +386 -0
- data/lib/bones/preprocessor.rb +161 -0
- data/lib/bones/species.rb +196 -0
- data/lib/bones/structure.rb +94 -0
- data/lib/bones/variable.rb +169 -0
- data/lib/bones/variablelist.rb +72 -0
- data/lib/castaddon.rb +27 -0
- data/lib/castaddon/index.rb +40 -0
- data/lib/castaddon/node.rb +753 -0
- data/lib/castaddon/type.rb +37 -0
- data/skeletons/CPU-C/common/epilogue.c +0 -0
- data/skeletons/CPU-C/common/globals.c +17 -0
- data/skeletons/CPU-C/common/globals_kernel.c +1 -0
- data/skeletons/CPU-C/common/header.c +0 -0
- data/skeletons/CPU-C/common/mem_copy_D2H.c +0 -0
- data/skeletons/CPU-C/common/mem_copy_H2D.c +0 -0
- data/skeletons/CPU-C/common/mem_epilogue.c +0 -0
- data/skeletons/CPU-C/common/mem_prologue.c +3 -0
- data/skeletons/CPU-C/common/prologue.c +0 -0
- data/skeletons/CPU-C/common/timer_1_start.c +0 -0
- data/skeletons/CPU-C/common/timer_1_stop.c +0 -0
- data/skeletons/CPU-C/common/timer_2_start.c +20 -0
- data/skeletons/CPU-C/common/timer_2_stop.c +8 -0
- data/skeletons/CPU-C/kernel/default.host.c +3 -0
- data/skeletons/CPU-C/kernel/default.kernel.c +15 -0
- data/skeletons/CPU-C/skeletons.txt +24 -0
- data/skeletons/CPU-OPENCL-AMD/common/epilogue.c +6 -0
- data/skeletons/CPU-OPENCL-AMD/common/globals.c +155 -0
- data/skeletons/CPU-OPENCL-AMD/common/globals_kernel.c +4 -0
- data/skeletons/CPU-OPENCL-AMD/common/header.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/mem_copy_D2H.c +8 -0
- data/skeletons/CPU-OPENCL-AMD/common/mem_copy_H2D.c +4 -0
- data/skeletons/CPU-OPENCL-AMD/common/mem_epilogue.c +3 -0
- data/skeletons/CPU-OPENCL-AMD/common/mem_prologue.c +6 -0
- data/skeletons/CPU-OPENCL-AMD/common/prologue.c +24 -0
- data/skeletons/CPU-OPENCL-AMD/common/timer_1_start.c +5 -0
- data/skeletons/CPU-OPENCL-AMD/common/timer_1_stop.c +9 -0
- data/skeletons/CPU-OPENCL-AMD/common/timer_2_start.c +16 -0
- data/skeletons/CPU-OPENCL-AMD/common/timer_2_stop.c +11 -0
- data/skeletons/CPU-OPENCL-AMD/kernel/D-element-to-1-shared.host.c +67 -0
- data/skeletons/CPU-OPENCL-AMD/kernel/D-element-to-1-shared.kernel.cl +72 -0
- data/skeletons/CPU-OPENCL-AMD/kernel/default.host.c +14 -0
- data/skeletons/CPU-OPENCL-AMD/kernel/default.kernel.cl +13 -0
- data/skeletons/CPU-OPENCL-AMD/skeletons.txt +26 -0
- data/skeletons/CPU-OPENCL-INTEL/common/epilogue.c +3 -0
- data/skeletons/CPU-OPENCL-INTEL/common/globals.c +154 -0
- data/skeletons/CPU-OPENCL-INTEL/common/globals_kernel.c +4 -0
- data/skeletons/CPU-OPENCL-INTEL/common/header.c +31 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +5 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +3 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_epilogue.c +3 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +4 -0
- data/skeletons/CPU-OPENCL-INTEL/common/prologue.c +24 -0
- data/skeletons/CPU-OPENCL-INTEL/common/timer_1_start.c +5 -0
- data/skeletons/CPU-OPENCL-INTEL/common/timer_1_stop.c +9 -0
- data/skeletons/CPU-OPENCL-INTEL/common/timer_2_start.c +16 -0
- data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +11 -0
- data/skeletons/CPU-OPENCL-INTEL/kernel/D-element-to-1-shared.host.c +67 -0
- data/skeletons/CPU-OPENCL-INTEL/kernel/D-element-to-1-shared.kernel.cl +72 -0
- data/skeletons/CPU-OPENCL-INTEL/kernel/default.host.c +14 -0
- data/skeletons/CPU-OPENCL-INTEL/kernel/default.kernel.cl +13 -0
- data/skeletons/CPU-OPENCL-INTEL/skeletons.txt +26 -0
- data/skeletons/CPU-OPENMP/common/epilogue.c +0 -0
- data/skeletons/CPU-OPENMP/common/globals.c +37 -0
- data/skeletons/CPU-OPENMP/common/globals_kernel.c +6 -0
- data/skeletons/CPU-OPENMP/common/header.c +0 -0
- data/skeletons/CPU-OPENMP/common/mem_copy_D2H.c +0 -0
- data/skeletons/CPU-OPENMP/common/mem_copy_H2D.c +0 -0
- data/skeletons/CPU-OPENMP/common/mem_epilogue.c +0 -0
- data/skeletons/CPU-OPENMP/common/mem_prologue.c +3 -0
- data/skeletons/CPU-OPENMP/common/prologue.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_1_start.c +12 -0
- data/skeletons/CPU-OPENMP/common/timer_1_stop.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_2_start.c +18 -0
- data/skeletons/CPU-OPENMP/common/timer_2_stop.c +8 -0
- data/skeletons/CPU-OPENMP/kernel/D-element-to-1-shared.host.c +27 -0
- data/skeletons/CPU-OPENMP/kernel/D-element-to-1-shared.kernel.c +46 -0
- data/skeletons/CPU-OPENMP/kernel/default.host.c +11 -0
- data/skeletons/CPU-OPENMP/kernel/default.kernel.c +18 -0
- data/skeletons/CPU-OPENMP/skeletons.txt +26 -0
- data/skeletons/GPU-CUDA/common/epilogue.c +0 -0
- data/skeletons/GPU-CUDA/common/globals.c +31 -0
- data/skeletons/GPU-CUDA/common/globals_kernel.c +4 -0
- data/skeletons/GPU-CUDA/common/header.c +0 -0
- data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +3 -0
- data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +3 -0
- data/skeletons/GPU-CUDA/common/mem_epilogue.c +3 -0
- data/skeletons/GPU-CUDA/common/mem_prologue.c +5 -0
- data/skeletons/GPU-CUDA/common/prologue.c +6 -0
- data/skeletons/GPU-CUDA/common/timer_1_start.c +6 -0
- data/skeletons/GPU-CUDA/common/timer_1_stop.c +10 -0
- data/skeletons/GPU-CUDA/common/timer_2_start.c +6 -0
- data/skeletons/GPU-CUDA/common/timer_2_stop.c +10 -0
- data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.host.c +3 -0
- data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +105 -0
- data/skeletons/GPU-CUDA/kernel/D-element-to-1-shared.host.c +3 -0
- data/skeletons/GPU-CUDA/kernel/D-element-to-1-shared.kernel.cu +119 -0
- data/skeletons/GPU-CUDA/kernel/D-element-to-N-shared.host.c +3 -0
- data/skeletons/GPU-CUDA/kernel/D-element-to-N-shared.kernel.cu +166 -0
- data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.host.c +3 -0
- data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +69 -0
- data/skeletons/GPU-CUDA/kernel/N-neighbourhood-N-to-N-element.host.c +3 -0
- data/skeletons/GPU-CUDA/kernel/N-neighbourhood-N-to-N-element.kernel.cu +42 -0
- data/skeletons/GPU-CUDA/kernel/default.host.c +3 -0
- data/skeletons/GPU-CUDA/kernel/default.kernel.cu +28 -0
- data/skeletons/GPU-CUDA/skeletons.txt +30 -0
- data/skeletons/GPU-OPENCL-AMD/common/epilogue.c +3 -0
- data/skeletons/GPU-OPENCL-AMD/common/globals.c +155 -0
- data/skeletons/GPU-OPENCL-AMD/common/globals_kernel.c +4 -0
- data/skeletons/GPU-OPENCL-AMD/common/header.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/common/mem_copy_D2H.c +4 -0
- data/skeletons/GPU-OPENCL-AMD/common/mem_copy_H2D.c +4 -0
- data/skeletons/GPU-OPENCL-AMD/common/mem_epilogue.c +3 -0
- data/skeletons/GPU-OPENCL-AMD/common/mem_prologue.c +3 -0
- data/skeletons/GPU-OPENCL-AMD/common/prologue.c +24 -0
- data/skeletons/GPU-OPENCL-AMD/common/timer_1_start.c +5 -0
- data/skeletons/GPU-OPENCL-AMD/common/timer_1_stop.c +9 -0
- data/skeletons/GPU-OPENCL-AMD/common/timer_2_start.c +4 -0
- data/skeletons/GPU-OPENCL-AMD/common/timer_2_stop.c +11 -0
- data/skeletons/GPU-OPENCL-AMD/kernel/D-element-to-1-shared.host.c +67 -0
- data/skeletons/GPU-OPENCL-AMD/kernel/D-element-to-1-shared.kernel.cl +72 -0
- data/skeletons/GPU-OPENCL-AMD/kernel/default.host.c +14 -0
- data/skeletons/GPU-OPENCL-AMD/kernel/default.kernel.cl +13 -0
- data/skeletons/GPU-OPENCL-AMD/skeletons.txt +26 -0
- data/skeletons/verification/header.c +2 -0
- data/skeletons/verification/timer_start.c +4 -0
- data/skeletons/verification/timer_stop.c +6 -0
- data/skeletons/verification/verify_results.c +23 -0
- data/test/bones/test_algorithm.rb +40 -0
- data/test/bones/test_common.rb +54 -0
- data/test/bones/test_preprocessor.rb +46 -0
- data/test/bones/test_species.rb +21 -0
- data/test/bones/test_variable.rb +84 -0
- data/test/test_helper.rb +106 -0
- metadata +303 -0
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
|
|
2
|
+
// Store the initial value
|
|
3
|
+
cl_mem bones_initial_value = clCreateBuffer(bones_context,CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR,sizeof(<out0_type>),<out0_name>,&bones_errors); error_check(bones_errors);
|
|
4
|
+
|
|
5
|
+
// Create the kernels
|
|
6
|
+
cl_kernel bones_kernel_<algorithm_name>_0 = clCreateKernel(bones_program, "bones_kernel_<algorithm_name>_0", &bones_errors); error_check(bones_errors);
|
|
7
|
+
cl_kernel bones_kernel_<algorithm_name>_1 = clCreateKernel(bones_program, "bones_kernel_<algorithm_name>_1", &bones_errors); error_check(bones_errors);
|
|
8
|
+
cl_kernel bones_kernel_<algorithm_name>_2 = clCreateKernel(bones_program, "bones_kernel_<algorithm_name>_2", &bones_errors); error_check(bones_errors);
|
|
9
|
+
|
|
10
|
+
// Run either one kernel or multiple kernels
|
|
11
|
+
if (<in0_dimensions> <= 512) {
|
|
12
|
+
|
|
13
|
+
// Set all the arguments to the kernel function
|
|
14
|
+
int bones_num_args = 3;
|
|
15
|
+
int bones_dimensions = <in0_dimensions>;
|
|
16
|
+
clSetKernelArg(bones_kernel_<algorithm_name>_0,0,sizeof(bones_dimensions),(void*)&bones_dimensions);
|
|
17
|
+
clSetKernelArg(bones_kernel_<algorithm_name>_0,1,sizeof(<in0_devicename>),(void*)&<in0_devicename>);
|
|
18
|
+
clSetKernelArg(bones_kernel_<algorithm_name>_0,2,sizeof(<out0_devicename>),(void*)&<out0_devicename>);
|
|
19
|
+
<kernel_argument_list_constants>
|
|
20
|
+
// Start only one kernel
|
|
21
|
+
const int bones_num_threads = DIV_CEIL(<in0_dimensions>,2);
|
|
22
|
+
size_t bones_local_worksize1[] = {bones_num_threads};
|
|
23
|
+
size_t bones_global_worksize1[] = {bones_num_threads};
|
|
24
|
+
bones_errors = clEnqueueNDRangeKernel(bones_queue,bones_kernel_<algorithm_name>_0,1,NULL,bones_global_worksize1,bones_local_worksize1,0,NULL,&bones_event); error_check(bones_errors);
|
|
25
|
+
|
|
26
|
+
}
|
|
27
|
+
else {
|
|
28
|
+
|
|
29
|
+
// Allocate space for an intermediate array
|
|
30
|
+
cl_mem bones_device_temp = clCreateBuffer(bones_context,CL_MEM_READ_WRITE,128*sizeof(<out0_type>),NULL,&bones_errors); error_check(bones_errors);
|
|
31
|
+
|
|
32
|
+
// Set all the arguments to the kernel function
|
|
33
|
+
int bones_num_args = 3;
|
|
34
|
+
int bones_dimensions = <in0_dimensions>;
|
|
35
|
+
clSetKernelArg(bones_kernel_<algorithm_name>_0,0,sizeof(bones_dimensions),(void*)&bones_dimensions);
|
|
36
|
+
clSetKernelArg(bones_kernel_<algorithm_name>_0,1,sizeof(<in0_devicename>),(void*)&<in0_devicename>);
|
|
37
|
+
clSetKernelArg(bones_kernel_<algorithm_name>_0,2,sizeof(bones_device_temp),(void*)&bones_device_temp);
|
|
38
|
+
<kernel_argument_list_constants>
|
|
39
|
+
// Start the first kernel
|
|
40
|
+
size_t bones_local_worksize1[] = {256};
|
|
41
|
+
size_t bones_global_worksize1[] = {256*128};
|
|
42
|
+
bones_errors = clEnqueueNDRangeKernel(bones_queue,bones_kernel_<algorithm_name>_0,1,NULL,bones_global_worksize1,bones_local_worksize1,0,NULL,&bones_event); error_check(bones_errors);
|
|
43
|
+
|
|
44
|
+
// Set all the arguments to the kernel function
|
|
45
|
+
clSetKernelArg(bones_kernel_<algorithm_name>_1,0,sizeof(bones_device_temp),(void*)&bones_device_temp);
|
|
46
|
+
clSetKernelArg(bones_kernel_<algorithm_name>_1,1,sizeof(<out0_devicename>),(void*)&<out0_devicename>);
|
|
47
|
+
// Start the second kernel
|
|
48
|
+
size_t bones_local_worksize2[] = {128};
|
|
49
|
+
size_t bones_global_worksize2[] = {128};
|
|
50
|
+
bones_errors = clEnqueueNDRangeKernel(bones_queue,bones_kernel_<algorithm_name>_1,1,NULL,bones_global_worksize2,bones_local_worksize2,0,NULL,&bones_event); error_check(bones_errors);
|
|
51
|
+
clReleaseMemObject(bones_device_temp);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// Set all the arguments to the kernel function
|
|
55
|
+
clSetKernelArg(bones_kernel_<algorithm_name>_2,0,sizeof(bones_initial_value),(void*)&bones_initial_value);
|
|
56
|
+
clSetKernelArg(bones_kernel_<algorithm_name>_2,1,sizeof(<out0_devicename>),(void*)&<out0_devicename>);
|
|
57
|
+
// Perform the last computation (only needed if there is an initial value)
|
|
58
|
+
size_t bones_local_worksize3[] = {1};
|
|
59
|
+
size_t bones_global_worksize3[] = {1};
|
|
60
|
+
bones_errors = clEnqueueNDRangeKernel(bones_queue,bones_kernel_<algorithm_name>_2,1,NULL,bones_global_worksize3,bones_local_worksize3,0,NULL,&bones_event); error_check(bones_errors);
|
|
61
|
+
clReleaseMemObject(bones_initial_value);
|
|
62
|
+
|
|
63
|
+
// Synchronize and clean-up the kernels
|
|
64
|
+
clFinish(bones_queue);
|
|
65
|
+
clReleaseKernel(bones_kernel_<algorithm_name>_0);
|
|
66
|
+
clReleaseKernel(bones_kernel_<algorithm_name>_1);
|
|
67
|
+
clReleaseKernel(bones_kernel_<algorithm_name>_2);
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
|
|
2
|
+
// Start of the <algorithm_name> kernel (main, not unrolled kernel)
|
|
3
|
+
__kernel void bones_kernel_<algorithm_name>_0(int bones_input_size, __global <in0_type><in0_devicepointer> <in0_name>, __global <out0_type><out0_devicepointer> <out0_name>, <argument_definition>) {
|
|
4
|
+
const int bones_threadblock_work = DIV_CEIL(bones_input_size,get_num_groups(0));
|
|
5
|
+
const int bones_parallel_work = BONES_MIN(get_local_size(0),bones_threadblock_work);
|
|
6
|
+
const int bones_sequential_work = DIV_CEIL(bones_threadblock_work,bones_parallel_work);
|
|
7
|
+
const int bones_local_id = get_local_id(0);
|
|
8
|
+
const int bones_global_id = get_global_id(0);
|
|
9
|
+
<ids>
|
|
10
|
+
int bones_iter_id = <in0_flatindex>;
|
|
11
|
+
|
|
12
|
+
// Load data into thread private memory and perform the first computation(s) sequentially
|
|
13
|
+
<in0_type> bones_temporary = <in0_name>[bones_iter_id];
|
|
14
|
+
<in0_type> bones_private_memory = <algorithm_code3>;
|
|
15
|
+
for(int c=1; c<bones_sequential_work; c++) {
|
|
16
|
+
bones_iter_id = bones_iter_id + bones_parallel_work*get_num_groups(0)<factors>;
|
|
17
|
+
if (bones_iter_id <= <in0_to>) {
|
|
18
|
+
bones_temporary = <in0_name>[bones_iter_id];
|
|
19
|
+
bones_private_memory = <algorithm_code1>;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
// Initialize the local memory
|
|
23
|
+
volatile __local <in0_type> bones_local_memory[256];
|
|
24
|
+
bones_local_memory[bones_local_id] = bones_private_memory;
|
|
25
|
+
barrier(CLK_LOCAL_MEM_FENCE);
|
|
26
|
+
|
|
27
|
+
// Perform the remainder of the computations in parallel using a parallel reduction tree
|
|
28
|
+
int bones_offset_id;
|
|
29
|
+
for (int c=256; c>=2; c=c>>1) {
|
|
30
|
+
if ((2*bones_parallel_work > c) && (get_local_id(0) < c/2)) {
|
|
31
|
+
bones_offset_id = get_local_id(0)+c/2;
|
|
32
|
+
if (bones_offset_id < bones_parallel_work) {
|
|
33
|
+
bones_local_memory[bones_local_id] = <algorithm_code2>;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
barrier(CLK_LOCAL_MEM_FENCE);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// Write the final result back to the global memory
|
|
40
|
+
if (get_local_id(0) == 0) { <out0_name>[get_group_id(0)] = bones_local_memory[0]; }
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// Start of the <algorithm_name> kernel (secondary, not unrolled kernel)
|
|
44
|
+
__kernel void bones_kernel_<algorithm_name>_1(__global <in0_type><in0_devicepointer> <in0_name>, __global <out0_type><out0_devicepointer> <out0_name>) {
|
|
45
|
+
const int bones_local_id = get_local_id(0);
|
|
46
|
+
const int bones_global_id = get_local_id(0);
|
|
47
|
+
|
|
48
|
+
// Initialize the local memory
|
|
49
|
+
volatile __local <in0_type> bones_local_memory[128];
|
|
50
|
+
bones_local_memory[bones_local_id] = <in0_name>[bones_global_id];
|
|
51
|
+
barrier(CLK_LOCAL_MEM_FENCE);
|
|
52
|
+
|
|
53
|
+
// Perform reduction using a parallel reduction tree
|
|
54
|
+
int bones_offset_id;
|
|
55
|
+
for (int c=128; c>=2; c=c>>1) {
|
|
56
|
+
if (get_local_id(0) < c/2) {
|
|
57
|
+
bones_offset_id = get_local_id(0)+c/2;
|
|
58
|
+
bones_local_memory[bones_local_id] = <algorithm_code2>;
|
|
59
|
+
}
|
|
60
|
+
barrier(CLK_LOCAL_MEM_FENCE);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Write the final result back to the global memory
|
|
64
|
+
if (get_local_id(0) == 0) { <out0_name>[0] = bones_local_memory[0]; }
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Start of the <algorithm_name> kernel (final, initial value kernel)
|
|
68
|
+
__kernel void bones_kernel_<algorithm_name>_2(__global <out0_type><out0_devicepointer> bones_initial_value, __global <out0_type><out0_devicepointer> <out0_name>) {
|
|
69
|
+
<out0_type> bones_private_memory = <out0_name>[0];
|
|
70
|
+
<out0_type> bones_temporary = bones_initial_value[0];
|
|
71
|
+
<out0_name>[0] = <algorithm_code4>;
|
|
72
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
|
|
2
|
+
// Create the kernel
|
|
3
|
+
cl_kernel bones_kernel_<algorithm_name>_0 = clCreateKernel(bones_program, "bones_kernel_<algorithm_name>_0", &bones_errors); error_check(bones_errors);
|
|
4
|
+
|
|
5
|
+
// Set all the arguments to the kernel function
|
|
6
|
+
int bones_num_args = 0;
|
|
7
|
+
<kernel_argument_list>
|
|
8
|
+
// Start the kernel
|
|
9
|
+
size_t bones_global_worksize[] = {<parallelism>};
|
|
10
|
+
bones_errors = clEnqueueNDRangeKernel(bones_queue,bones_kernel_<algorithm_name>_0,1,NULL,bones_global_worksize,NULL,0,NULL,&bones_event); error_check(bones_errors);
|
|
11
|
+
|
|
12
|
+
// Synchronize and clean-up the kernel
|
|
13
|
+
clFinish(bones_queue);
|
|
14
|
+
clReleaseKernel(bones_kernel_<algorithm_name>_0);
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
|
|
2
|
+
// Start of the <algorithm_name> kernel
|
|
3
|
+
__kernel void bones_kernel_<algorithm_name>_0(<devicedefinitionsopencl>, <argument_definition>) {
|
|
4
|
+
const int bones_global_id = get_global_id(0);
|
|
5
|
+
if (bones_global_id < (<parallelism>)) {
|
|
6
|
+
|
|
7
|
+
// Calculate the global ID(s) based on the thread id
|
|
8
|
+
<ids>
|
|
9
|
+
|
|
10
|
+
// Start the computation
|
|
11
|
+
<algorithm_code1>
|
|
12
|
+
}
|
|
13
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
###################################################################
|
|
2
|
+
# Each line holds one mapping from species to skeleton
|
|
3
|
+
# The ordering is always ['chunk','neighbourhood','element','shared','void']
|
|
4
|
+
# The pattern 'full' is omitted from matching (will thus always match)
|
|
5
|
+
# 'D' denotes any ranges (e.g. D|element can be any dimension)
|
|
6
|
+
# 'N' denotes any range (e.g. N,N|element must be 2D)
|
|
7
|
+
# '+' denotes one or more of these patterns
|
|
8
|
+
###################################################################
|
|
9
|
+
D|chunk(D)+ -> D|chunk(D)+ :default :00
|
|
10
|
+
D|chunk(D)+ -> D|chunk(D)+ ^ D|element+ :default :00
|
|
11
|
+
D|chunk(D)+ ^ D|element+ -> D|chunk(D)+ :default :00
|
|
12
|
+
D|chunk(D)+ ^ D|element+ -> D|chunk(D)+ ^ D|element+ :default :00
|
|
13
|
+
D|chunk(D)+ -> D|element+ :default :00
|
|
14
|
+
D|chunk(D)+ ^ D|neighbourhood(D)+ ^ D|element+ -> D|element+ :default :00
|
|
15
|
+
D|chunk(D)+ ^ D|element+ -> D|element+ :default :00
|
|
16
|
+
N|neighbourhood(N)+ -> N|element+ :default :00
|
|
17
|
+
D|neighbourhood(D)+ -> D|element+ :default :00
|
|
18
|
+
D|neighbourhood(D)+ ^ D|element+ -> D|element+ :default :00
|
|
19
|
+
D|element+ -> D|chunk(D)+ :default :00
|
|
20
|
+
D|element+ -> D|element+ :default :00
|
|
21
|
+
D|element -> 1|shared :D-element-to-1-shared :02 03 04 05
|
|
22
|
+
D|void -> D|element+ :default :00
|
|
23
|
+
|
|
24
|
+
#D|element+ -> D|shared+ :default :09
|
|
25
|
+
#D|element+ -> D|element+ ^ D|shared+ :default :09
|
|
26
|
+
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
|
|
2
|
+
// Stop the timer for the measurement of the original code's execution time
|
|
3
|
+
struct timeval bones_end_time;
|
|
4
|
+
gettimeofday(&bones_end_time, NULL);
|
|
5
|
+
float bones_timer = 0.001 * (1000000*(bones_end_time.tv_sec-bones_start_time.tv_sec)+bones_end_time.tv_usec-bones_start_time.tv_usec);
|
|
6
|
+
printf(">>>\t\t\t Execution time [original ]: %.3lf ms.\n", bones_timer);
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/* STARTDEF
|
|
2
|
+
void bones_verify_results_<name>(<type> *bones_a, <type> *bones_b, <argument_definition>);
|
|
3
|
+
ENDDEF */
|
|
4
|
+
void bones_verify_results_<name>(<type> *bones_a, <type> *bones_b, <argument_definition>) {
|
|
5
|
+
long bones_m=0;
|
|
6
|
+
long bones_e=0;
|
|
7
|
+
for (int bones_global_id=0; bones_global_id<<dimensions>; bones_global_id++) {
|
|
8
|
+
<verifyids>
|
|
9
|
+
int bones_id = <flatindex>;
|
|
10
|
+
if (fabs(bones_a[bones_id]) > 0.000000001 ) {
|
|
11
|
+
if ((fabs((bones_b[bones_id]/bones_a[bones_id])-1) < 0.001)) { bones_m++; } else { bones_e++; }
|
|
12
|
+
} else {
|
|
13
|
+
if (fabs(bones_a[bones_id]-bones_b[bones_id]) < 0.001) { bones_m++; } else { bones_e++; }
|
|
14
|
+
}
|
|
15
|
+
//printf("%.3lf versus %.3lf\n",bones_a[bones_id],bones_b[bones_id]);
|
|
16
|
+
//printf("%d versus %d\n",bones_a[bones_id],bones_b[bones_id]);
|
|
17
|
+
}
|
|
18
|
+
printf("*** Verification ");
|
|
19
|
+
if (bones_e == 0) { printf("complete: no errors found.\n"); }
|
|
20
|
+
else { printf("warning: found %li (%.1lf%%) error(s).\n", bones_e, (bones_e*100.0)/(bones_e+bones_m)); }
|
|
21
|
+
|
|
22
|
+
}
|
|
23
|
+
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# Include the test helper
|
|
2
|
+
require File.dirname(__FILE__) + '/../test_helper'
|
|
3
|
+
|
|
4
|
+
# Test class for the primitive class.
|
|
5
|
+
class TestAlgorithm < Test::Unit::TestCase
|
|
6
|
+
|
|
7
|
+
# Create a list of known examples and the results.
|
|
8
|
+
def setup
|
|
9
|
+
|
|
10
|
+
# Create a comprehensive list of known tribes
|
|
11
|
+
list = setup_species
|
|
12
|
+
@examples = list[:examples]
|
|
13
|
+
@defines = []
|
|
14
|
+
|
|
15
|
+
# Create a list of corresponding preprocessors and code
|
|
16
|
+
@primitives_list, original_code_list, @arrays_list = setup_algorithms(@examples)
|
|
17
|
+
|
|
18
|
+
# Use the preprocessor and the 'CAST' gem to create an AST of the original code
|
|
19
|
+
original_ast_list = []
|
|
20
|
+
original_code_list.each do |original_code|
|
|
21
|
+
preprocessor = Bones::Preprocessor.new(original_code,'','')
|
|
22
|
+
preprocessor.process
|
|
23
|
+
@defines.push(preprocessor.defines)
|
|
24
|
+
original_ast_list.push(C.parse(preprocessor.target_code))
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Populate the contents of the primitives
|
|
28
|
+
@primitives_list.each_with_index do |primitives,index|
|
|
29
|
+
primitives.each do |algorithm|
|
|
30
|
+
algorithm.populate_lists()
|
|
31
|
+
#algorithm.populate_hash()
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def test_nothing
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
end
|
|
40
|
+
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# Include the test helper
|
|
2
|
+
require File.dirname(__FILE__) + '/../test_helper'
|
|
3
|
+
|
|
4
|
+
# Test class for the primitive class.
|
|
5
|
+
class TestCommon < Test::Unit::TestCase
|
|
6
|
+
|
|
7
|
+
# Set the test up.
|
|
8
|
+
def setup
|
|
9
|
+
@common = Bones::Common.new
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def test_brackets
|
|
13
|
+
tests = ['(4)','(var_16)','a+(5)','b1+(var*16)','a-(-4)']
|
|
14
|
+
results = ['4' ,'var_16' ,'a+5' ,'b1+(var*16)','a+4' ]
|
|
15
|
+
tests.each_with_index do |test,index|
|
|
16
|
+
assert_equal(results[index], @common.simplify(test))
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def test_alu_constants
|
|
21
|
+
tests = ['4+1','4*(4+3)','a+5','b1+(3*11)','(6-12)-2','(12-6)*3','-2-2-2','a-a','a-b']
|
|
22
|
+
results = ['5' ,'28' ,'a+5','b1+33' ,'-8' ,'18' ,'-6' ,'0' ,'a-b']
|
|
23
|
+
tests.each_with_index do |test,index|
|
|
24
|
+
assert_equal(results[index], @common.simplify(test))
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def test_division_removal
|
|
29
|
+
tests = ['2/10','4*(2/1)','2/(1*4)']
|
|
30
|
+
results = ['2/10','8' ,'2/4' ]
|
|
31
|
+
tests.each_with_index do |test,index|
|
|
32
|
+
assert_equal(results[index], @common.simplify(test))
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def test_division
|
|
37
|
+
tests = ['(2048/2)-1','4*(2/1)','2/2','2/(1*4)','var+(13/3)+(12/3)']
|
|
38
|
+
results = ['1023' ,'8' ,'1' ,'2/4' ,'var+(13/3)+4' ]
|
|
39
|
+
tests.each_with_index do |test,index|
|
|
40
|
+
assert_equal(results[index], @common.simplify(test))
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def test_general
|
|
46
|
+
tests = ['((3)-(2)+1)+0','((2+0)-(1)+1)','(((id/(1))%(2/1)))+2','(0+id/(2))+1']
|
|
47
|
+
results = ['2' ,'2' ,'(id%2)+2' ,'(id/2)+1']
|
|
48
|
+
tests.each_with_index do |test,index|
|
|
49
|
+
assert_equal(results[index], @common.simplify(test))
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
end
|
|
54
|
+
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# Include the test helper
|
|
2
|
+
require File.dirname(__FILE__) + '/../test_helper'
|
|
3
|
+
|
|
4
|
+
# Test class for the preprocessor class.
|
|
5
|
+
class TestPreprocessor < Test::Unit::TestCase
|
|
6
|
+
|
|
7
|
+
# Create a list of known examples and reference results.
|
|
8
|
+
def setup
|
|
9
|
+
|
|
10
|
+
# Create a comprehensive list of known speciess
|
|
11
|
+
list = setup_species
|
|
12
|
+
@examples = list[:examples]
|
|
13
|
+
|
|
14
|
+
# Create a list of corresponding algorithms and code
|
|
15
|
+
@algorithms_list, code_list = setup_algorithms(@examples)
|
|
16
|
+
|
|
17
|
+
# Create and execute the preprocessors
|
|
18
|
+
@preprocessors = []
|
|
19
|
+
code_list.each_index do |index|
|
|
20
|
+
preprocessor = Bones::Preprocessor.new(code_list[index],'','')
|
|
21
|
+
preprocessor.process
|
|
22
|
+
@preprocessors.push(preprocessor)
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Method to test the found algorithms (species part).
|
|
27
|
+
def test_algorithms_species
|
|
28
|
+
@preprocessors.each_with_index do |preprocessor,index1|
|
|
29
|
+
reference_algorithms = @algorithms_list[index1]
|
|
30
|
+
preprocessor.algorithms.each_with_index do |algorithm,index2|
|
|
31
|
+
assert_equal(reference_algorithms[index2].species.prefix,algorithm.species.prefix)
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Method to test the found algorithms (code part).
|
|
37
|
+
def test_algorithms_code
|
|
38
|
+
@preprocessors.each_with_index do |preprocessor,index1|
|
|
39
|
+
reference_algorithms = @algorithms_list[index1]
|
|
40
|
+
preprocessor.algorithms.each_with_index do |algorithm,index2|
|
|
41
|
+
assert_equal(reference_algorithms[index2].code,algorithm.code)
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# Include the test helper
|
|
2
|
+
require File.dirname(__FILE__) + '/../test_helper'
|
|
3
|
+
|
|
4
|
+
# Test class for the species class
|
|
5
|
+
class TestSpecies < Test::Unit::TestCase
|
|
6
|
+
|
|
7
|
+
# Create a comprehensive list of known species.
|
|
8
|
+
def setup
|
|
9
|
+
list = setup_species
|
|
10
|
+
@dimensions = list[:dimensions]
|
|
11
|
+
@inputs = list[:inputs]
|
|
12
|
+
@outputs = list[:outputs]
|
|
13
|
+
@patterns = list[:patterns]
|
|
14
|
+
@prefixes = list[:prefixes]
|
|
15
|
+
@species = list[:species]
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def test_nothing
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
end
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# Include the test helper
|
|
2
|
+
require File.dirname(__FILE__) + '/../test_helper'
|
|
3
|
+
|
|
4
|
+
# Test class for the variable class
|
|
5
|
+
class TestVariable < Test::Unit::TestCase
|
|
6
|
+
|
|
7
|
+
# Some constants to test against.
|
|
8
|
+
NAME = 'example'
|
|
9
|
+
|
|
10
|
+
# Method to create variable examples from code examples.
|
|
11
|
+
def setup
|
|
12
|
+
@variables = []
|
|
13
|
+
@dimensions = []
|
|
14
|
+
parser = C::Parser.new
|
|
15
|
+
prefix = 'void main() {'
|
|
16
|
+
suffix = '}'
|
|
17
|
+
|
|
18
|
+
# Create code examples
|
|
19
|
+
code_examples = []
|
|
20
|
+
@types = []
|
|
21
|
+
typeprefixes = ['int','float','int *','int **','int ***','unsigned char *']
|
|
22
|
+
typesuffixes = ['','[10]','[N]','[10][10]']
|
|
23
|
+
typeprefixes.each do |typeprefix|
|
|
24
|
+
typesuffixes.each do |typesuffix|
|
|
25
|
+
@types.push([typeprefix,typesuffix])
|
|
26
|
+
@types.push([typeprefix,typesuffix])
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
@types.each_with_index do |type,index|
|
|
30
|
+
if index.odd?
|
|
31
|
+
definition = type[0]+' '+NAME+type[1]
|
|
32
|
+
code_examples.push(parser.parse([prefix,definition+' = 3;',suffix].join("\n")))
|
|
33
|
+
code_examples.push(parser.parse([prefix,definition+';','int a = '+NAME+';',suffix].join("\n")))
|
|
34
|
+
end
|
|
35
|
+
@dimensions.push(type[0].scan('*').length + type[1].scan('[').length)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Create variables
|
|
39
|
+
code_examples.each do |code|
|
|
40
|
+
@variables.push(Bones::Variable.new(NAME,code.variable_type(NAME),code.size(NAME),Bones::INPUT,'0',false))
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Test whether the typename of the variable is recognized correctly.
|
|
45
|
+
def test_typename
|
|
46
|
+
@variables.each_index do |index|
|
|
47
|
+
assert_equal(@types[index][0].gsub('*','').strip,@variables[index].type_name)
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Test whether the device pointer is obtained correctly.
|
|
52
|
+
def test_device_pointer
|
|
53
|
+
@variables.each_index do |index|
|
|
54
|
+
expected_result = (@dimensions[index] == 0) ? '' : '*'
|
|
55
|
+
assert_equal(expected_result,@variables[index].device_pointer)
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Test whether the dimension of a variable is obtained correctly.
|
|
60
|
+
def test_dimension
|
|
61
|
+
@variables.each_index do |index|
|
|
62
|
+
assert_equal(@dimensions[index],@variables[index].dimensions)
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Test to see if the flattened array is obtained correctly.
|
|
67
|
+
def test_flatten
|
|
68
|
+
@variables.each_index do |index|
|
|
69
|
+
if @variables[index].dimensions > 1
|
|
70
|
+
expected_result = ''+'[0]'*(@dimensions[index]-1)
|
|
71
|
+
assert_equal(expected_result,@variables[index].flatten)
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Test whether the variable definition is obtained correctly.
|
|
77
|
+
def test_definition
|
|
78
|
+
@variables.each_index do |index|
|
|
79
|
+
expected_result = @types[index][0]+' '+NAME+@types[index][1]
|
|
80
|
+
assert_equal(expected_result,@variables[index].definition)
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
end
|