bones-compiler 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +117 -0
- data/LICENSE +9 -0
- data/README.rdoc +126 -0
- data/Rakefile +107 -0
- data/VERSION +1 -0
- data/bin/bones +20 -0
- data/examples/applications/ffos.c +552 -0
- data/examples/benchmarks/2mm.c +70 -0
- data/examples/benchmarks/3mm.c +81 -0
- data/examples/benchmarks/adi.c +81 -0
- data/examples/benchmarks/atax.c +65 -0
- data/examples/benchmarks/bicg.c +67 -0
- data/examples/benchmarks/cholesky.c +64 -0
- data/examples/benchmarks/common.h +168 -0
- data/examples/benchmarks/correlation.c +97 -0
- data/examples/benchmarks/covariance.c +77 -0
- data/examples/benchmarks/doitgen.c +63 -0
- data/examples/benchmarks/durbin.c +76 -0
- data/examples/benchmarks/dynprog.c +67 -0
- data/examples/benchmarks/fdtd-2d-apml.c +114 -0
- data/examples/benchmarks/fdtd-2d.c +74 -0
- data/examples/benchmarks/floyd-warshall.c +50 -0
- data/examples/benchmarks/gemm.c +69 -0
- data/examples/benchmarks/gemver.c +89 -0
- data/examples/benchmarks/gesummv.c +64 -0
- data/examples/benchmarks/gramschmidt.c +84 -0
- data/examples/benchmarks/jacobi-1d-imper.c +55 -0
- data/examples/benchmarks/jacobi-2d-imper.c +61 -0
- data/examples/benchmarks/lu.c +57 -0
- data/examples/benchmarks/ludcmp.c +91 -0
- data/examples/benchmarks/mvt.c +65 -0
- data/examples/benchmarks/overview.txt +38 -0
- data/examples/benchmarks/reg_detect.c +82 -0
- data/examples/benchmarks/saxpy.c +45 -0
- data/examples/benchmarks/seidel-2d.c +51 -0
- data/examples/benchmarks/symm.c +74 -0
- data/examples/benchmarks/syr2k.c +65 -0
- data/examples/benchmarks/syrk.c +62 -0
- data/examples/benchmarks/trisolv.c +57 -0
- data/examples/benchmarks/trmm.c +57 -0
- data/examples/chunk/example1.c +54 -0
- data/examples/chunk/example2.c +44 -0
- data/examples/chunk/example3.c +59 -0
- data/examples/chunk/example4.c +55 -0
- data/examples/chunk/example5.c +52 -0
- data/examples/element/example1.c +46 -0
- data/examples/element/example10.c +50 -0
- data/examples/element/example11.c +47 -0
- data/examples/element/example12.c +56 -0
- data/examples/element/example2.c +46 -0
- data/examples/element/example3.c +58 -0
- data/examples/element/example4.c +49 -0
- data/examples/element/example5.c +56 -0
- data/examples/element/example6.c +46 -0
- data/examples/element/example7.c +54 -0
- data/examples/element/example8.c +45 -0
- data/examples/element/example9.c +48 -0
- data/examples/neighbourhood/example1.c +54 -0
- data/examples/neighbourhood/example2.c +55 -0
- data/examples/neighbourhood/example3.c +82 -0
- data/examples/neighbourhood/example4.c +52 -0
- data/examples/shared/example1.c +45 -0
- data/examples/shared/example2.c +51 -0
- data/examples/shared/example3.c +55 -0
- data/examples/shared/example4.c +52 -0
- data/examples/shared/example5.c +48 -0
- data/lib/bones.rb +266 -0
- data/lib/bones/algorithm.rb +541 -0
- data/lib/bones/engine.rb +386 -0
- data/lib/bones/preprocessor.rb +161 -0
- data/lib/bones/species.rb +196 -0
- data/lib/bones/structure.rb +94 -0
- data/lib/bones/variable.rb +169 -0
- data/lib/bones/variablelist.rb +72 -0
- data/lib/castaddon.rb +27 -0
- data/lib/castaddon/index.rb +40 -0
- data/lib/castaddon/node.rb +753 -0
- data/lib/castaddon/type.rb +37 -0
- data/skeletons/CPU-C/common/epilogue.c +0 -0
- data/skeletons/CPU-C/common/globals.c +17 -0
- data/skeletons/CPU-C/common/globals_kernel.c +1 -0
- data/skeletons/CPU-C/common/header.c +0 -0
- data/skeletons/CPU-C/common/mem_copy_D2H.c +0 -0
- data/skeletons/CPU-C/common/mem_copy_H2D.c +0 -0
- data/skeletons/CPU-C/common/mem_epilogue.c +0 -0
- data/skeletons/CPU-C/common/mem_prologue.c +3 -0
- data/skeletons/CPU-C/common/prologue.c +0 -0
- data/skeletons/CPU-C/common/timer_1_start.c +0 -0
- data/skeletons/CPU-C/common/timer_1_stop.c +0 -0
- data/skeletons/CPU-C/common/timer_2_start.c +20 -0
- data/skeletons/CPU-C/common/timer_2_stop.c +8 -0
- data/skeletons/CPU-C/kernel/default.host.c +3 -0
- data/skeletons/CPU-C/kernel/default.kernel.c +15 -0
- data/skeletons/CPU-C/skeletons.txt +24 -0
- data/skeletons/CPU-OPENCL-AMD/common/epilogue.c +6 -0
- data/skeletons/CPU-OPENCL-AMD/common/globals.c +155 -0
- data/skeletons/CPU-OPENCL-AMD/common/globals_kernel.c +4 -0
- data/skeletons/CPU-OPENCL-AMD/common/header.c +0 -0
- data/skeletons/CPU-OPENCL-AMD/common/mem_copy_D2H.c +8 -0
- data/skeletons/CPU-OPENCL-AMD/common/mem_copy_H2D.c +4 -0
- data/skeletons/CPU-OPENCL-AMD/common/mem_epilogue.c +3 -0
- data/skeletons/CPU-OPENCL-AMD/common/mem_prologue.c +6 -0
- data/skeletons/CPU-OPENCL-AMD/common/prologue.c +24 -0
- data/skeletons/CPU-OPENCL-AMD/common/timer_1_start.c +5 -0
- data/skeletons/CPU-OPENCL-AMD/common/timer_1_stop.c +9 -0
- data/skeletons/CPU-OPENCL-AMD/common/timer_2_start.c +16 -0
- data/skeletons/CPU-OPENCL-AMD/common/timer_2_stop.c +11 -0
- data/skeletons/CPU-OPENCL-AMD/kernel/D-element-to-1-shared.host.c +67 -0
- data/skeletons/CPU-OPENCL-AMD/kernel/D-element-to-1-shared.kernel.cl +72 -0
- data/skeletons/CPU-OPENCL-AMD/kernel/default.host.c +14 -0
- data/skeletons/CPU-OPENCL-AMD/kernel/default.kernel.cl +13 -0
- data/skeletons/CPU-OPENCL-AMD/skeletons.txt +26 -0
- data/skeletons/CPU-OPENCL-INTEL/common/epilogue.c +3 -0
- data/skeletons/CPU-OPENCL-INTEL/common/globals.c +154 -0
- data/skeletons/CPU-OPENCL-INTEL/common/globals_kernel.c +4 -0
- data/skeletons/CPU-OPENCL-INTEL/common/header.c +31 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +5 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +3 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_epilogue.c +3 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +4 -0
- data/skeletons/CPU-OPENCL-INTEL/common/prologue.c +24 -0
- data/skeletons/CPU-OPENCL-INTEL/common/timer_1_start.c +5 -0
- data/skeletons/CPU-OPENCL-INTEL/common/timer_1_stop.c +9 -0
- data/skeletons/CPU-OPENCL-INTEL/common/timer_2_start.c +16 -0
- data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +11 -0
- data/skeletons/CPU-OPENCL-INTEL/kernel/D-element-to-1-shared.host.c +67 -0
- data/skeletons/CPU-OPENCL-INTEL/kernel/D-element-to-1-shared.kernel.cl +72 -0
- data/skeletons/CPU-OPENCL-INTEL/kernel/default.host.c +14 -0
- data/skeletons/CPU-OPENCL-INTEL/kernel/default.kernel.cl +13 -0
- data/skeletons/CPU-OPENCL-INTEL/skeletons.txt +26 -0
- data/skeletons/CPU-OPENMP/common/epilogue.c +0 -0
- data/skeletons/CPU-OPENMP/common/globals.c +37 -0
- data/skeletons/CPU-OPENMP/common/globals_kernel.c +6 -0
- data/skeletons/CPU-OPENMP/common/header.c +0 -0
- data/skeletons/CPU-OPENMP/common/mem_copy_D2H.c +0 -0
- data/skeletons/CPU-OPENMP/common/mem_copy_H2D.c +0 -0
- data/skeletons/CPU-OPENMP/common/mem_epilogue.c +0 -0
- data/skeletons/CPU-OPENMP/common/mem_prologue.c +3 -0
- data/skeletons/CPU-OPENMP/common/prologue.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_1_start.c +12 -0
- data/skeletons/CPU-OPENMP/common/timer_1_stop.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_2_start.c +18 -0
- data/skeletons/CPU-OPENMP/common/timer_2_stop.c +8 -0
- data/skeletons/CPU-OPENMP/kernel/D-element-to-1-shared.host.c +27 -0
- data/skeletons/CPU-OPENMP/kernel/D-element-to-1-shared.kernel.c +46 -0
- data/skeletons/CPU-OPENMP/kernel/default.host.c +11 -0
- data/skeletons/CPU-OPENMP/kernel/default.kernel.c +18 -0
- data/skeletons/CPU-OPENMP/skeletons.txt +26 -0
- data/skeletons/GPU-CUDA/common/epilogue.c +0 -0
- data/skeletons/GPU-CUDA/common/globals.c +31 -0
- data/skeletons/GPU-CUDA/common/globals_kernel.c +4 -0
- data/skeletons/GPU-CUDA/common/header.c +0 -0
- data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +3 -0
- data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +3 -0
- data/skeletons/GPU-CUDA/common/mem_epilogue.c +3 -0
- data/skeletons/GPU-CUDA/common/mem_prologue.c +5 -0
- data/skeletons/GPU-CUDA/common/prologue.c +6 -0
- data/skeletons/GPU-CUDA/common/timer_1_start.c +6 -0
- data/skeletons/GPU-CUDA/common/timer_1_stop.c +10 -0
- data/skeletons/GPU-CUDA/common/timer_2_start.c +6 -0
- data/skeletons/GPU-CUDA/common/timer_2_stop.c +10 -0
- data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.host.c +3 -0
- data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +105 -0
- data/skeletons/GPU-CUDA/kernel/D-element-to-1-shared.host.c +3 -0
- data/skeletons/GPU-CUDA/kernel/D-element-to-1-shared.kernel.cu +119 -0
- data/skeletons/GPU-CUDA/kernel/D-element-to-N-shared.host.c +3 -0
- data/skeletons/GPU-CUDA/kernel/D-element-to-N-shared.kernel.cu +166 -0
- data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.host.c +3 -0
- data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +69 -0
- data/skeletons/GPU-CUDA/kernel/N-neighbourhood-N-to-N-element.host.c +3 -0
- data/skeletons/GPU-CUDA/kernel/N-neighbourhood-N-to-N-element.kernel.cu +42 -0
- data/skeletons/GPU-CUDA/kernel/default.host.c +3 -0
- data/skeletons/GPU-CUDA/kernel/default.kernel.cu +28 -0
- data/skeletons/GPU-CUDA/skeletons.txt +30 -0
- data/skeletons/GPU-OPENCL-AMD/common/epilogue.c +3 -0
- data/skeletons/GPU-OPENCL-AMD/common/globals.c +155 -0
- data/skeletons/GPU-OPENCL-AMD/common/globals_kernel.c +4 -0
- data/skeletons/GPU-OPENCL-AMD/common/header.c +0 -0
- data/skeletons/GPU-OPENCL-AMD/common/mem_copy_D2H.c +4 -0
- data/skeletons/GPU-OPENCL-AMD/common/mem_copy_H2D.c +4 -0
- data/skeletons/GPU-OPENCL-AMD/common/mem_epilogue.c +3 -0
- data/skeletons/GPU-OPENCL-AMD/common/mem_prologue.c +3 -0
- data/skeletons/GPU-OPENCL-AMD/common/prologue.c +24 -0
- data/skeletons/GPU-OPENCL-AMD/common/timer_1_start.c +5 -0
- data/skeletons/GPU-OPENCL-AMD/common/timer_1_stop.c +9 -0
- data/skeletons/GPU-OPENCL-AMD/common/timer_2_start.c +4 -0
- data/skeletons/GPU-OPENCL-AMD/common/timer_2_stop.c +11 -0
- data/skeletons/GPU-OPENCL-AMD/kernel/D-element-to-1-shared.host.c +67 -0
- data/skeletons/GPU-OPENCL-AMD/kernel/D-element-to-1-shared.kernel.cl +72 -0
- data/skeletons/GPU-OPENCL-AMD/kernel/default.host.c +14 -0
- data/skeletons/GPU-OPENCL-AMD/kernel/default.kernel.cl +13 -0
- data/skeletons/GPU-OPENCL-AMD/skeletons.txt +26 -0
- data/skeletons/verification/header.c +2 -0
- data/skeletons/verification/timer_start.c +4 -0
- data/skeletons/verification/timer_stop.c +6 -0
- data/skeletons/verification/verify_results.c +23 -0
- data/test/bones/test_algorithm.rb +40 -0
- data/test/bones/test_common.rb +54 -0
- data/test/bones/test_preprocessor.rb +46 -0
- data/test/bones/test_species.rb +21 -0
- data/test/bones/test_variable.rb +84 -0
- data/test/test_helper.rb +106 -0
- metadata +303 -0
@@ -0,0 +1,67 @@
|
|
1
|
+
|
2
|
+
// Store the initial value
|
3
|
+
cl_mem bones_initial_value = clCreateBuffer(bones_context,CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR,sizeof(<out0_type>),<out0_name>,&bones_errors); error_check(bones_errors);
|
4
|
+
|
5
|
+
// Create the kernels
|
6
|
+
cl_kernel bones_kernel_<algorithm_name>_0 = clCreateKernel(bones_program, "bones_kernel_<algorithm_name>_0", &bones_errors); error_check(bones_errors);
|
7
|
+
cl_kernel bones_kernel_<algorithm_name>_1 = clCreateKernel(bones_program, "bones_kernel_<algorithm_name>_1", &bones_errors); error_check(bones_errors);
|
8
|
+
cl_kernel bones_kernel_<algorithm_name>_2 = clCreateKernel(bones_program, "bones_kernel_<algorithm_name>_2", &bones_errors); error_check(bones_errors);
|
9
|
+
|
10
|
+
// Run either one kernel or multiple kernels
|
11
|
+
if (<in0_dimensions> <= 512) {
|
12
|
+
|
13
|
+
// Set all the arguments to the kernel function
|
14
|
+
int bones_num_args = 3;
|
15
|
+
int bones_dimensions = <in0_dimensions>;
|
16
|
+
clSetKernelArg(bones_kernel_<algorithm_name>_0,0,sizeof(bones_dimensions),(void*)&bones_dimensions);
|
17
|
+
clSetKernelArg(bones_kernel_<algorithm_name>_0,1,sizeof(<in0_devicename>),(void*)&<in0_devicename>);
|
18
|
+
clSetKernelArg(bones_kernel_<algorithm_name>_0,2,sizeof(<out0_devicename>),(void*)&<out0_devicename>);
|
19
|
+
<kernel_argument_list_constants>
|
20
|
+
// Start only one kernel
|
21
|
+
const int bones_num_threads = DIV_CEIL(<in0_dimensions>,2);
|
22
|
+
size_t bones_local_worksize1[] = {bones_num_threads};
|
23
|
+
size_t bones_global_worksize1[] = {bones_num_threads};
|
24
|
+
bones_errors = clEnqueueNDRangeKernel(bones_queue,bones_kernel_<algorithm_name>_0,1,NULL,bones_global_worksize1,bones_local_worksize1,0,NULL,&bones_event); error_check(bones_errors);
|
25
|
+
|
26
|
+
}
|
27
|
+
else {
|
28
|
+
|
29
|
+
// Allocate space for an intermediate array
|
30
|
+
cl_mem bones_device_temp = clCreateBuffer(bones_context,CL_MEM_READ_WRITE,128*sizeof(<out0_type>),NULL,&bones_errors); error_check(bones_errors);
|
31
|
+
|
32
|
+
// Set all the arguments to the kernel function
|
33
|
+
int bones_num_args = 3;
|
34
|
+
int bones_dimensions = <in0_dimensions>;
|
35
|
+
clSetKernelArg(bones_kernel_<algorithm_name>_0,0,sizeof(bones_dimensions),(void*)&bones_dimensions);
|
36
|
+
clSetKernelArg(bones_kernel_<algorithm_name>_0,1,sizeof(<in0_devicename>),(void*)&<in0_devicename>);
|
37
|
+
clSetKernelArg(bones_kernel_<algorithm_name>_0,2,sizeof(bones_device_temp),(void*)&bones_device_temp);
|
38
|
+
<kernel_argument_list_constants>
|
39
|
+
// Start the first kernel
|
40
|
+
size_t bones_local_worksize1[] = {256};
|
41
|
+
size_t bones_global_worksize1[] = {256*128};
|
42
|
+
bones_errors = clEnqueueNDRangeKernel(bones_queue,bones_kernel_<algorithm_name>_0,1,NULL,bones_global_worksize1,bones_local_worksize1,0,NULL,&bones_event); error_check(bones_errors);
|
43
|
+
|
44
|
+
// Set all the arguments to the kernel function
|
45
|
+
clSetKernelArg(bones_kernel_<algorithm_name>_1,0,sizeof(bones_device_temp),(void*)&bones_device_temp);
|
46
|
+
clSetKernelArg(bones_kernel_<algorithm_name>_1,1,sizeof(<out0_devicename>),(void*)&<out0_devicename>);
|
47
|
+
// Start the second kernel
|
48
|
+
size_t bones_local_worksize2[] = {128};
|
49
|
+
size_t bones_global_worksize2[] = {128};
|
50
|
+
bones_errors = clEnqueueNDRangeKernel(bones_queue,bones_kernel_<algorithm_name>_1,1,NULL,bones_global_worksize2,bones_local_worksize2,0,NULL,&bones_event); error_check(bones_errors);
|
51
|
+
clReleaseMemObject(bones_device_temp);
|
52
|
+
}
|
53
|
+
|
54
|
+
// Set all the arguments to the kernel function
|
55
|
+
clSetKernelArg(bones_kernel_<algorithm_name>_2,0,sizeof(bones_initial_value),(void*)&bones_initial_value);
|
56
|
+
clSetKernelArg(bones_kernel_<algorithm_name>_2,1,sizeof(<out0_devicename>),(void*)&<out0_devicename>);
|
57
|
+
// Perform the last computation (only needed if there is an initial value)
|
58
|
+
size_t bones_local_worksize3[] = {1};
|
59
|
+
size_t bones_global_worksize3[] = {1};
|
60
|
+
bones_errors = clEnqueueNDRangeKernel(bones_queue,bones_kernel_<algorithm_name>_2,1,NULL,bones_global_worksize3,bones_local_worksize3,0,NULL,&bones_event); error_check(bones_errors);
|
61
|
+
clReleaseMemObject(bones_initial_value);
|
62
|
+
|
63
|
+
// Synchronize and clean-up the kernels
|
64
|
+
clFinish(bones_queue);
|
65
|
+
clReleaseKernel(bones_kernel_<algorithm_name>_0);
|
66
|
+
clReleaseKernel(bones_kernel_<algorithm_name>_1);
|
67
|
+
clReleaseKernel(bones_kernel_<algorithm_name>_2);
|
@@ -0,0 +1,72 @@
|
|
1
|
+
|
2
|
+
// Start of the <algorithm_name> kernel (main, not unrolled kernel)
|
3
|
+
__kernel void bones_kernel_<algorithm_name>_0(int bones_input_size, __global <in0_type><in0_devicepointer> <in0_name>, __global <out0_type><out0_devicepointer> <out0_name>, <argument_definition>) {
|
4
|
+
const int bones_threadblock_work = DIV_CEIL(bones_input_size,get_num_groups(0));
|
5
|
+
const int bones_parallel_work = BONES_MIN(get_local_size(0),bones_threadblock_work);
|
6
|
+
const int bones_sequential_work = DIV_CEIL(bones_threadblock_work,bones_parallel_work);
|
7
|
+
const int bones_local_id = get_local_id(0);
|
8
|
+
const int bones_global_id = get_global_id(0);
|
9
|
+
<ids>
|
10
|
+
int bones_iter_id = <in0_flatindex>;
|
11
|
+
|
12
|
+
// Load data into thread private memory and perform the first computation(s) sequentially
|
13
|
+
<in0_type> bones_temporary = <in0_name>[bones_iter_id];
|
14
|
+
<in0_type> bones_private_memory = <algorithm_code3>;
|
15
|
+
for(int c=1; c<bones_sequential_work; c++) {
|
16
|
+
bones_iter_id = bones_iter_id + bones_parallel_work*get_num_groups(0)<factors>;
|
17
|
+
if (bones_iter_id <= <in0_to>) {
|
18
|
+
bones_temporary = <in0_name>[bones_iter_id];
|
19
|
+
bones_private_memory = <algorithm_code1>;
|
20
|
+
}
|
21
|
+
}
|
22
|
+
// Initialize the local memory
|
23
|
+
volatile __local <in0_type> bones_local_memory[256];
|
24
|
+
bones_local_memory[bones_local_id] = bones_private_memory;
|
25
|
+
barrier(CLK_LOCAL_MEM_FENCE);
|
26
|
+
|
27
|
+
// Perform the remainder of the computations in parallel using a parallel reduction tree
|
28
|
+
int bones_offset_id;
|
29
|
+
for (int c=256; c>=2; c=c>>1) {
|
30
|
+
if ((2*bones_parallel_work > c) && (get_local_id(0) < c/2)) {
|
31
|
+
bones_offset_id = get_local_id(0)+c/2;
|
32
|
+
if (bones_offset_id < bones_parallel_work) {
|
33
|
+
bones_local_memory[bones_local_id] = <algorithm_code2>;
|
34
|
+
}
|
35
|
+
}
|
36
|
+
barrier(CLK_LOCAL_MEM_FENCE);
|
37
|
+
}
|
38
|
+
|
39
|
+
// Write the final result back to the global memory
|
40
|
+
if (get_local_id(0) == 0) { <out0_name>[get_group_id(0)] = bones_local_memory[0]; }
|
41
|
+
}
|
42
|
+
|
43
|
+
// Start of the <algorithm_name> kernel (secondary, not unrolled kernel)
|
44
|
+
__kernel void bones_kernel_<algorithm_name>_1(__global <in0_type><in0_devicepointer> <in0_name>, __global <out0_type><out0_devicepointer> <out0_name>) {
|
45
|
+
const int bones_local_id = get_local_id(0);
|
46
|
+
const int bones_global_id = get_local_id(0);
|
47
|
+
|
48
|
+
// Initialize the local memory
|
49
|
+
volatile __local <in0_type> bones_local_memory[128];
|
50
|
+
bones_local_memory[bones_local_id] = <in0_name>[bones_global_id];
|
51
|
+
barrier(CLK_LOCAL_MEM_FENCE);
|
52
|
+
|
53
|
+
// Perform reduction using a parallel reduction tree
|
54
|
+
int bones_offset_id;
|
55
|
+
for (int c=128; c>=2; c=c>>1) {
|
56
|
+
if (get_local_id(0) < c/2) {
|
57
|
+
bones_offset_id = get_local_id(0)+c/2;
|
58
|
+
bones_local_memory[bones_local_id] = <algorithm_code2>;
|
59
|
+
}
|
60
|
+
barrier(CLK_LOCAL_MEM_FENCE);
|
61
|
+
}
|
62
|
+
|
63
|
+
// Write the final result back to the global memory
|
64
|
+
if (get_local_id(0) == 0) { <out0_name>[0] = bones_local_memory[0]; }
|
65
|
+
}
|
66
|
+
|
67
|
+
// Start of the <algorithm_name> kernel (final, initial value kernel)
|
68
|
+
__kernel void bones_kernel_<algorithm_name>_2(__global <out0_type><out0_devicepointer> bones_initial_value, __global <out0_type><out0_devicepointer> <out0_name>) {
|
69
|
+
<out0_type> bones_private_memory = <out0_name>[0];
|
70
|
+
<out0_type> bones_temporary = bones_initial_value[0];
|
71
|
+
<out0_name>[0] = <algorithm_code4>;
|
72
|
+
}
|
@@ -0,0 +1,14 @@
|
|
1
|
+
|
2
|
+
// Create the kernel
|
3
|
+
cl_kernel bones_kernel_<algorithm_name>_0 = clCreateKernel(bones_program, "bones_kernel_<algorithm_name>_0", &bones_errors); error_check(bones_errors);
|
4
|
+
|
5
|
+
// Set all the arguments to the kernel function
|
6
|
+
int bones_num_args = 0;
|
7
|
+
<kernel_argument_list>
|
8
|
+
// Start the kernel
|
9
|
+
size_t bones_global_worksize[] = {<parallelism>};
|
10
|
+
bones_errors = clEnqueueNDRangeKernel(bones_queue,bones_kernel_<algorithm_name>_0,1,NULL,bones_global_worksize,NULL,0,NULL,&bones_event); error_check(bones_errors);
|
11
|
+
|
12
|
+
// Synchronize and clean-up the kernel
|
13
|
+
clFinish(bones_queue);
|
14
|
+
clReleaseKernel(bones_kernel_<algorithm_name>_0);
|
@@ -0,0 +1,13 @@
|
|
1
|
+
|
2
|
+
// Start of the <algorithm_name> kernel
|
3
|
+
__kernel void bones_kernel_<algorithm_name>_0(<devicedefinitionsopencl>, <argument_definition>) {
|
4
|
+
const int bones_global_id = get_global_id(0);
|
5
|
+
if (bones_global_id < (<parallelism>)) {
|
6
|
+
|
7
|
+
// Calculate the global ID(s) based on the thread id
|
8
|
+
<ids>
|
9
|
+
|
10
|
+
// Start the computation
|
11
|
+
<algorithm_code1>
|
12
|
+
}
|
13
|
+
}
|
@@ -0,0 +1,26 @@
|
|
1
|
+
###################################################################
|
2
|
+
# Each line holds one mapping from species to skeleton
|
3
|
+
# The ordering is always ['chunk','neighbourhood','element','shared','void']
|
4
|
+
# The pattern 'full' is omitted from matching (will thus always match)
|
5
|
+
# 'D' denotes any ranges (e.g. D|element can be any dimension)
|
6
|
+
# 'N' denotes any range (e.g. N,N|element must be 2D)
|
7
|
+
# '+' denotes one or more of these patterns
|
8
|
+
###################################################################
|
9
|
+
D|chunk(D)+ -> D|chunk(D)+ :default :00
|
10
|
+
D|chunk(D)+ -> D|chunk(D)+ ^ D|element+ :default :00
|
11
|
+
D|chunk(D)+ ^ D|element+ -> D|chunk(D)+ :default :00
|
12
|
+
D|chunk(D)+ ^ D|element+ -> D|chunk(D)+ ^ D|element+ :default :00
|
13
|
+
D|chunk(D)+ -> D|element+ :default :00
|
14
|
+
D|chunk(D)+ ^ D|neighbourhood(D)+ ^ D|element+ -> D|element+ :default :00
|
15
|
+
D|chunk(D)+ ^ D|element+ -> D|element+ :default :00
|
16
|
+
N|neighbourhood(N)+ -> N|element+ :default :00
|
17
|
+
D|neighbourhood(D)+ -> D|element+ :default :00
|
18
|
+
D|neighbourhood(D)+ ^ D|element+ -> D|element+ :default :00
|
19
|
+
D|element+ -> D|chunk(D)+ :default :00
|
20
|
+
D|element+ -> D|element+ :default :00
|
21
|
+
D|element -> 1|shared :D-element-to-1-shared :02 03 04 05
|
22
|
+
D|void -> D|element+ :default :00
|
23
|
+
|
24
|
+
#D|element+ -> D|shared+ :default :09
|
25
|
+
#D|element+ -> D|element+ ^ D|shared+ :default :09
|
26
|
+
|
@@ -0,0 +1,6 @@
|
|
1
|
+
|
2
|
+
// Stop the timer for the measurement of the original code's execution time
|
3
|
+
struct timeval bones_end_time;
|
4
|
+
gettimeofday(&bones_end_time, NULL);
|
5
|
+
float bones_timer = 0.001 * (1000000*(bones_end_time.tv_sec-bones_start_time.tv_sec)+bones_end_time.tv_usec-bones_start_time.tv_usec);
|
6
|
+
printf(">>>\t\t\t Execution time [original ]: %.3lf ms.\n", bones_timer);
|
@@ -0,0 +1,23 @@
|
|
1
|
+
/* STARTDEF
|
2
|
+
void bones_verify_results_<name>(<type> *bones_a, <type> *bones_b, <argument_definition>);
|
3
|
+
ENDDEF */
|
4
|
+
void bones_verify_results_<name>(<type> *bones_a, <type> *bones_b, <argument_definition>) {
|
5
|
+
long bones_m=0;
|
6
|
+
long bones_e=0;
|
7
|
+
for (int bones_global_id=0; bones_global_id<<dimensions>; bones_global_id++) {
|
8
|
+
<verifyids>
|
9
|
+
int bones_id = <flatindex>;
|
10
|
+
if (fabs(bones_a[bones_id]) > 0.000000001 ) {
|
11
|
+
if ((fabs((bones_b[bones_id]/bones_a[bones_id])-1) < 0.001)) { bones_m++; } else { bones_e++; }
|
12
|
+
} else {
|
13
|
+
if (fabs(bones_a[bones_id]-bones_b[bones_id]) < 0.001) { bones_m++; } else { bones_e++; }
|
14
|
+
}
|
15
|
+
//printf("%.3lf versus %.3lf\n",bones_a[bones_id],bones_b[bones_id]);
|
16
|
+
//printf("%d versus %d\n",bones_a[bones_id],bones_b[bones_id]);
|
17
|
+
}
|
18
|
+
printf("*** Verification ");
|
19
|
+
if (bones_e == 0) { printf("complete: no errors found.\n"); }
|
20
|
+
else { printf("warning: found %li (%.1lf%%) error(s).\n", bones_e, (bones_e*100.0)/(bones_e+bones_m)); }
|
21
|
+
|
22
|
+
}
|
23
|
+
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# Include the test helper
|
2
|
+
require File.dirname(__FILE__) + '/../test_helper'
|
3
|
+
|
4
|
+
# Test class for the primitive class.
|
5
|
+
class TestAlgorithm < Test::Unit::TestCase
|
6
|
+
|
7
|
+
# Create a list of known examples and the results.
|
8
|
+
def setup
|
9
|
+
|
10
|
+
# Create a comprehensive list of known tribes
|
11
|
+
list = setup_species
|
12
|
+
@examples = list[:examples]
|
13
|
+
@defines = []
|
14
|
+
|
15
|
+
# Create a list of corresponding preprocessors and code
|
16
|
+
@primitives_list, original_code_list, @arrays_list = setup_algorithms(@examples)
|
17
|
+
|
18
|
+
# Use the preprocessor and the 'CAST' gem to create an AST of the original code
|
19
|
+
original_ast_list = []
|
20
|
+
original_code_list.each do |original_code|
|
21
|
+
preprocessor = Bones::Preprocessor.new(original_code,'','')
|
22
|
+
preprocessor.process
|
23
|
+
@defines.push(preprocessor.defines)
|
24
|
+
original_ast_list.push(C.parse(preprocessor.target_code))
|
25
|
+
end
|
26
|
+
|
27
|
+
# Populate the contents of the primitives
|
28
|
+
@primitives_list.each_with_index do |primitives,index|
|
29
|
+
primitives.each do |algorithm|
|
30
|
+
algorithm.populate_lists()
|
31
|
+
#algorithm.populate_hash()
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_nothing
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
40
|
+
|
@@ -0,0 +1,54 @@
|
|
1
|
+
# Include the test helper
|
2
|
+
require File.dirname(__FILE__) + '/../test_helper'
|
3
|
+
|
4
|
+
# Test class for the primitive class.
|
5
|
+
class TestCommon < Test::Unit::TestCase
|
6
|
+
|
7
|
+
# Set the test up.
|
8
|
+
def setup
|
9
|
+
@common = Bones::Common.new
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_brackets
|
13
|
+
tests = ['(4)','(var_16)','a+(5)','b1+(var*16)','a-(-4)']
|
14
|
+
results = ['4' ,'var_16' ,'a+5' ,'b1+(var*16)','a+4' ]
|
15
|
+
tests.each_with_index do |test,index|
|
16
|
+
assert_equal(results[index], @common.simplify(test))
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_alu_constants
|
21
|
+
tests = ['4+1','4*(4+3)','a+5','b1+(3*11)','(6-12)-2','(12-6)*3','-2-2-2','a-a','a-b']
|
22
|
+
results = ['5' ,'28' ,'a+5','b1+33' ,'-8' ,'18' ,'-6' ,'0' ,'a-b']
|
23
|
+
tests.each_with_index do |test,index|
|
24
|
+
assert_equal(results[index], @common.simplify(test))
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_division_removal
|
29
|
+
tests = ['2/10','4*(2/1)','2/(1*4)']
|
30
|
+
results = ['2/10','8' ,'2/4' ]
|
31
|
+
tests.each_with_index do |test,index|
|
32
|
+
assert_equal(results[index], @common.simplify(test))
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_division
|
37
|
+
tests = ['(2048/2)-1','4*(2/1)','2/2','2/(1*4)','var+(13/3)+(12/3)']
|
38
|
+
results = ['1023' ,'8' ,'1' ,'2/4' ,'var+(13/3)+4' ]
|
39
|
+
tests.each_with_index do |test,index|
|
40
|
+
assert_equal(results[index], @common.simplify(test))
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
|
45
|
+
def test_general
|
46
|
+
tests = ['((3)-(2)+1)+0','((2+0)-(1)+1)','(((id/(1))%(2/1)))+2','(0+id/(2))+1']
|
47
|
+
results = ['2' ,'2' ,'(id%2)+2' ,'(id/2)+1']
|
48
|
+
tests.each_with_index do |test,index|
|
49
|
+
assert_equal(results[index], @common.simplify(test))
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# Include the test helper
|
2
|
+
require File.dirname(__FILE__) + '/../test_helper'
|
3
|
+
|
4
|
+
# Test class for the preprocessor class.
|
5
|
+
class TestPreprocessor < Test::Unit::TestCase
|
6
|
+
|
7
|
+
# Create a list of known examples and reference results.
|
8
|
+
def setup
|
9
|
+
|
10
|
+
# Create a comprehensive list of known speciess
|
11
|
+
list = setup_species
|
12
|
+
@examples = list[:examples]
|
13
|
+
|
14
|
+
# Create a list of corresponding algorithms and code
|
15
|
+
@algorithms_list, code_list = setup_algorithms(@examples)
|
16
|
+
|
17
|
+
# Create and execute the preprocessors
|
18
|
+
@preprocessors = []
|
19
|
+
code_list.each_index do |index|
|
20
|
+
preprocessor = Bones::Preprocessor.new(code_list[index],'','')
|
21
|
+
preprocessor.process
|
22
|
+
@preprocessors.push(preprocessor)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# Method to test the found algorithms (species part).
|
27
|
+
def test_algorithms_species
|
28
|
+
@preprocessors.each_with_index do |preprocessor,index1|
|
29
|
+
reference_algorithms = @algorithms_list[index1]
|
30
|
+
preprocessor.algorithms.each_with_index do |algorithm,index2|
|
31
|
+
assert_equal(reference_algorithms[index2].species.prefix,algorithm.species.prefix)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# Method to test the found algorithms (code part).
|
37
|
+
def test_algorithms_code
|
38
|
+
@preprocessors.each_with_index do |preprocessor,index1|
|
39
|
+
reference_algorithms = @algorithms_list[index1]
|
40
|
+
preprocessor.algorithms.each_with_index do |algorithm,index2|
|
41
|
+
assert_equal(reference_algorithms[index2].code,algorithm.code)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# Include the test helper
|
2
|
+
require File.dirname(__FILE__) + '/../test_helper'
|
3
|
+
|
4
|
+
# Test class for the species class
|
5
|
+
class TestSpecies < Test::Unit::TestCase
|
6
|
+
|
7
|
+
# Create a comprehensive list of known species.
|
8
|
+
def setup
|
9
|
+
list = setup_species
|
10
|
+
@dimensions = list[:dimensions]
|
11
|
+
@inputs = list[:inputs]
|
12
|
+
@outputs = list[:outputs]
|
13
|
+
@patterns = list[:patterns]
|
14
|
+
@prefixes = list[:prefixes]
|
15
|
+
@species = list[:species]
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_nothing
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
# Include the test helper
|
2
|
+
require File.dirname(__FILE__) + '/../test_helper'
|
3
|
+
|
4
|
+
# Test class for the variable class
|
5
|
+
class TestVariable < Test::Unit::TestCase
|
6
|
+
|
7
|
+
# Some constants to test against.
|
8
|
+
NAME = 'example'
|
9
|
+
|
10
|
+
# Method to create variable examples from code examples.
|
11
|
+
def setup
|
12
|
+
@variables = []
|
13
|
+
@dimensions = []
|
14
|
+
parser = C::Parser.new
|
15
|
+
prefix = 'void main() {'
|
16
|
+
suffix = '}'
|
17
|
+
|
18
|
+
# Create code examples
|
19
|
+
code_examples = []
|
20
|
+
@types = []
|
21
|
+
typeprefixes = ['int','float','int *','int **','int ***','unsigned char *']
|
22
|
+
typesuffixes = ['','[10]','[N]','[10][10]']
|
23
|
+
typeprefixes.each do |typeprefix|
|
24
|
+
typesuffixes.each do |typesuffix|
|
25
|
+
@types.push([typeprefix,typesuffix])
|
26
|
+
@types.push([typeprefix,typesuffix])
|
27
|
+
end
|
28
|
+
end
|
29
|
+
@types.each_with_index do |type,index|
|
30
|
+
if index.odd?
|
31
|
+
definition = type[0]+' '+NAME+type[1]
|
32
|
+
code_examples.push(parser.parse([prefix,definition+' = 3;',suffix].join("\n")))
|
33
|
+
code_examples.push(parser.parse([prefix,definition+';','int a = '+NAME+';',suffix].join("\n")))
|
34
|
+
end
|
35
|
+
@dimensions.push(type[0].scan('*').length + type[1].scan('[').length)
|
36
|
+
end
|
37
|
+
|
38
|
+
# Create variables
|
39
|
+
code_examples.each do |code|
|
40
|
+
@variables.push(Bones::Variable.new(NAME,code.variable_type(NAME),code.size(NAME),Bones::INPUT,'0',false))
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
# Test whether the typename of the variable is recognized correctly.
|
45
|
+
def test_typename
|
46
|
+
@variables.each_index do |index|
|
47
|
+
assert_equal(@types[index][0].gsub('*','').strip,@variables[index].type_name)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# Test whether the device pointer is obtained correctly.
|
52
|
+
def test_device_pointer
|
53
|
+
@variables.each_index do |index|
|
54
|
+
expected_result = (@dimensions[index] == 0) ? '' : '*'
|
55
|
+
assert_equal(expected_result,@variables[index].device_pointer)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# Test whether the dimension of a variable is obtained correctly.
|
60
|
+
def test_dimension
|
61
|
+
@variables.each_index do |index|
|
62
|
+
assert_equal(@dimensions[index],@variables[index].dimensions)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
# Test to see if the flattened array is obtained correctly.
|
67
|
+
def test_flatten
|
68
|
+
@variables.each_index do |index|
|
69
|
+
if @variables[index].dimensions > 1
|
70
|
+
expected_result = ''+'[0]'*(@dimensions[index]-1)
|
71
|
+
assert_equal(expected_result,@variables[index].flatten)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# Test whether the variable definition is obtained correctly.
|
77
|
+
def test_definition
|
78
|
+
@variables.each_index do |index|
|
79
|
+
expected_result = @types[index][0]+' '+NAME+@types[index][1]
|
80
|
+
assert_equal(expected_result,@variables[index].definition)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|