bones-compiler 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (203) hide show
  1. data/CHANGELOG +117 -0
  2. data/LICENSE +9 -0
  3. data/README.rdoc +126 -0
  4. data/Rakefile +107 -0
  5. data/VERSION +1 -0
  6. data/bin/bones +20 -0
  7. data/examples/applications/ffos.c +552 -0
  8. data/examples/benchmarks/2mm.c +70 -0
  9. data/examples/benchmarks/3mm.c +81 -0
  10. data/examples/benchmarks/adi.c +81 -0
  11. data/examples/benchmarks/atax.c +65 -0
  12. data/examples/benchmarks/bicg.c +67 -0
  13. data/examples/benchmarks/cholesky.c +64 -0
  14. data/examples/benchmarks/common.h +168 -0
  15. data/examples/benchmarks/correlation.c +97 -0
  16. data/examples/benchmarks/covariance.c +77 -0
  17. data/examples/benchmarks/doitgen.c +63 -0
  18. data/examples/benchmarks/durbin.c +76 -0
  19. data/examples/benchmarks/dynprog.c +67 -0
  20. data/examples/benchmarks/fdtd-2d-apml.c +114 -0
  21. data/examples/benchmarks/fdtd-2d.c +74 -0
  22. data/examples/benchmarks/floyd-warshall.c +50 -0
  23. data/examples/benchmarks/gemm.c +69 -0
  24. data/examples/benchmarks/gemver.c +89 -0
  25. data/examples/benchmarks/gesummv.c +64 -0
  26. data/examples/benchmarks/gramschmidt.c +84 -0
  27. data/examples/benchmarks/jacobi-1d-imper.c +55 -0
  28. data/examples/benchmarks/jacobi-2d-imper.c +61 -0
  29. data/examples/benchmarks/lu.c +57 -0
  30. data/examples/benchmarks/ludcmp.c +91 -0
  31. data/examples/benchmarks/mvt.c +65 -0
  32. data/examples/benchmarks/overview.txt +38 -0
  33. data/examples/benchmarks/reg_detect.c +82 -0
  34. data/examples/benchmarks/saxpy.c +45 -0
  35. data/examples/benchmarks/seidel-2d.c +51 -0
  36. data/examples/benchmarks/symm.c +74 -0
  37. data/examples/benchmarks/syr2k.c +65 -0
  38. data/examples/benchmarks/syrk.c +62 -0
  39. data/examples/benchmarks/trisolv.c +57 -0
  40. data/examples/benchmarks/trmm.c +57 -0
  41. data/examples/chunk/example1.c +54 -0
  42. data/examples/chunk/example2.c +44 -0
  43. data/examples/chunk/example3.c +59 -0
  44. data/examples/chunk/example4.c +55 -0
  45. data/examples/chunk/example5.c +52 -0
  46. data/examples/element/example1.c +46 -0
  47. data/examples/element/example10.c +50 -0
  48. data/examples/element/example11.c +47 -0
  49. data/examples/element/example12.c +56 -0
  50. data/examples/element/example2.c +46 -0
  51. data/examples/element/example3.c +58 -0
  52. data/examples/element/example4.c +49 -0
  53. data/examples/element/example5.c +56 -0
  54. data/examples/element/example6.c +46 -0
  55. data/examples/element/example7.c +54 -0
  56. data/examples/element/example8.c +45 -0
  57. data/examples/element/example9.c +48 -0
  58. data/examples/neighbourhood/example1.c +54 -0
  59. data/examples/neighbourhood/example2.c +55 -0
  60. data/examples/neighbourhood/example3.c +82 -0
  61. data/examples/neighbourhood/example4.c +52 -0
  62. data/examples/shared/example1.c +45 -0
  63. data/examples/shared/example2.c +51 -0
  64. data/examples/shared/example3.c +55 -0
  65. data/examples/shared/example4.c +52 -0
  66. data/examples/shared/example5.c +48 -0
  67. data/lib/bones.rb +266 -0
  68. data/lib/bones/algorithm.rb +541 -0
  69. data/lib/bones/engine.rb +386 -0
  70. data/lib/bones/preprocessor.rb +161 -0
  71. data/lib/bones/species.rb +196 -0
  72. data/lib/bones/structure.rb +94 -0
  73. data/lib/bones/variable.rb +169 -0
  74. data/lib/bones/variablelist.rb +72 -0
  75. data/lib/castaddon.rb +27 -0
  76. data/lib/castaddon/index.rb +40 -0
  77. data/lib/castaddon/node.rb +753 -0
  78. data/lib/castaddon/type.rb +37 -0
  79. data/skeletons/CPU-C/common/epilogue.c +0 -0
  80. data/skeletons/CPU-C/common/globals.c +17 -0
  81. data/skeletons/CPU-C/common/globals_kernel.c +1 -0
  82. data/skeletons/CPU-C/common/header.c +0 -0
  83. data/skeletons/CPU-C/common/mem_copy_D2H.c +0 -0
  84. data/skeletons/CPU-C/common/mem_copy_H2D.c +0 -0
  85. data/skeletons/CPU-C/common/mem_epilogue.c +0 -0
  86. data/skeletons/CPU-C/common/mem_prologue.c +3 -0
  87. data/skeletons/CPU-C/common/prologue.c +0 -0
  88. data/skeletons/CPU-C/common/timer_1_start.c +0 -0
  89. data/skeletons/CPU-C/common/timer_1_stop.c +0 -0
  90. data/skeletons/CPU-C/common/timer_2_start.c +20 -0
  91. data/skeletons/CPU-C/common/timer_2_stop.c +8 -0
  92. data/skeletons/CPU-C/kernel/default.host.c +3 -0
  93. data/skeletons/CPU-C/kernel/default.kernel.c +15 -0
  94. data/skeletons/CPU-C/skeletons.txt +24 -0
  95. data/skeletons/CPU-OPENCL-AMD/common/epilogue.c +6 -0
  96. data/skeletons/CPU-OPENCL-AMD/common/globals.c +155 -0
  97. data/skeletons/CPU-OPENCL-AMD/common/globals_kernel.c +4 -0
  98. data/skeletons/CPU-OPENCL-AMD/common/header.c +0 -0
  99. data/skeletons/CPU-OPENCL-AMD/common/mem_copy_D2H.c +8 -0
  100. data/skeletons/CPU-OPENCL-AMD/common/mem_copy_H2D.c +4 -0
  101. data/skeletons/CPU-OPENCL-AMD/common/mem_epilogue.c +3 -0
  102. data/skeletons/CPU-OPENCL-AMD/common/mem_prologue.c +6 -0
  103. data/skeletons/CPU-OPENCL-AMD/common/prologue.c +24 -0
  104. data/skeletons/CPU-OPENCL-AMD/common/timer_1_start.c +5 -0
  105. data/skeletons/CPU-OPENCL-AMD/common/timer_1_stop.c +9 -0
  106. data/skeletons/CPU-OPENCL-AMD/common/timer_2_start.c +16 -0
  107. data/skeletons/CPU-OPENCL-AMD/common/timer_2_stop.c +11 -0
  108. data/skeletons/CPU-OPENCL-AMD/kernel/D-element-to-1-shared.host.c +67 -0
  109. data/skeletons/CPU-OPENCL-AMD/kernel/D-element-to-1-shared.kernel.cl +72 -0
  110. data/skeletons/CPU-OPENCL-AMD/kernel/default.host.c +14 -0
  111. data/skeletons/CPU-OPENCL-AMD/kernel/default.kernel.cl +13 -0
  112. data/skeletons/CPU-OPENCL-AMD/skeletons.txt +26 -0
  113. data/skeletons/CPU-OPENCL-INTEL/common/epilogue.c +3 -0
  114. data/skeletons/CPU-OPENCL-INTEL/common/globals.c +154 -0
  115. data/skeletons/CPU-OPENCL-INTEL/common/globals_kernel.c +4 -0
  116. data/skeletons/CPU-OPENCL-INTEL/common/header.c +31 -0
  117. data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +5 -0
  118. data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +3 -0
  119. data/skeletons/CPU-OPENCL-INTEL/common/mem_epilogue.c +3 -0
  120. data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +4 -0
  121. data/skeletons/CPU-OPENCL-INTEL/common/prologue.c +24 -0
  122. data/skeletons/CPU-OPENCL-INTEL/common/timer_1_start.c +5 -0
  123. data/skeletons/CPU-OPENCL-INTEL/common/timer_1_stop.c +9 -0
  124. data/skeletons/CPU-OPENCL-INTEL/common/timer_2_start.c +16 -0
  125. data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +11 -0
  126. data/skeletons/CPU-OPENCL-INTEL/kernel/D-element-to-1-shared.host.c +67 -0
  127. data/skeletons/CPU-OPENCL-INTEL/kernel/D-element-to-1-shared.kernel.cl +72 -0
  128. data/skeletons/CPU-OPENCL-INTEL/kernel/default.host.c +14 -0
  129. data/skeletons/CPU-OPENCL-INTEL/kernel/default.kernel.cl +13 -0
  130. data/skeletons/CPU-OPENCL-INTEL/skeletons.txt +26 -0
  131. data/skeletons/CPU-OPENMP/common/epilogue.c +0 -0
  132. data/skeletons/CPU-OPENMP/common/globals.c +37 -0
  133. data/skeletons/CPU-OPENMP/common/globals_kernel.c +6 -0
  134. data/skeletons/CPU-OPENMP/common/header.c +0 -0
  135. data/skeletons/CPU-OPENMP/common/mem_copy_D2H.c +0 -0
  136. data/skeletons/CPU-OPENMP/common/mem_copy_H2D.c +0 -0
  137. data/skeletons/CPU-OPENMP/common/mem_epilogue.c +0 -0
  138. data/skeletons/CPU-OPENMP/common/mem_prologue.c +3 -0
  139. data/skeletons/CPU-OPENMP/common/prologue.c +0 -0
  140. data/skeletons/CPU-OPENMP/common/timer_1_start.c +12 -0
  141. data/skeletons/CPU-OPENMP/common/timer_1_stop.c +0 -0
  142. data/skeletons/CPU-OPENMP/common/timer_2_start.c +18 -0
  143. data/skeletons/CPU-OPENMP/common/timer_2_stop.c +8 -0
  144. data/skeletons/CPU-OPENMP/kernel/D-element-to-1-shared.host.c +27 -0
  145. data/skeletons/CPU-OPENMP/kernel/D-element-to-1-shared.kernel.c +46 -0
  146. data/skeletons/CPU-OPENMP/kernel/default.host.c +11 -0
  147. data/skeletons/CPU-OPENMP/kernel/default.kernel.c +18 -0
  148. data/skeletons/CPU-OPENMP/skeletons.txt +26 -0
  149. data/skeletons/GPU-CUDA/common/epilogue.c +0 -0
  150. data/skeletons/GPU-CUDA/common/globals.c +31 -0
  151. data/skeletons/GPU-CUDA/common/globals_kernel.c +4 -0
  152. data/skeletons/GPU-CUDA/common/header.c +0 -0
  153. data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +3 -0
  154. data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +3 -0
  155. data/skeletons/GPU-CUDA/common/mem_epilogue.c +3 -0
  156. data/skeletons/GPU-CUDA/common/mem_prologue.c +5 -0
  157. data/skeletons/GPU-CUDA/common/prologue.c +6 -0
  158. data/skeletons/GPU-CUDA/common/timer_1_start.c +6 -0
  159. data/skeletons/GPU-CUDA/common/timer_1_stop.c +10 -0
  160. data/skeletons/GPU-CUDA/common/timer_2_start.c +6 -0
  161. data/skeletons/GPU-CUDA/common/timer_2_stop.c +10 -0
  162. data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.host.c +3 -0
  163. data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +105 -0
  164. data/skeletons/GPU-CUDA/kernel/D-element-to-1-shared.host.c +3 -0
  165. data/skeletons/GPU-CUDA/kernel/D-element-to-1-shared.kernel.cu +119 -0
  166. data/skeletons/GPU-CUDA/kernel/D-element-to-N-shared.host.c +3 -0
  167. data/skeletons/GPU-CUDA/kernel/D-element-to-N-shared.kernel.cu +166 -0
  168. data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.host.c +3 -0
  169. data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +69 -0
  170. data/skeletons/GPU-CUDA/kernel/N-neighbourhood-N-to-N-element.host.c +3 -0
  171. data/skeletons/GPU-CUDA/kernel/N-neighbourhood-N-to-N-element.kernel.cu +42 -0
  172. data/skeletons/GPU-CUDA/kernel/default.host.c +3 -0
  173. data/skeletons/GPU-CUDA/kernel/default.kernel.cu +28 -0
  174. data/skeletons/GPU-CUDA/skeletons.txt +30 -0
  175. data/skeletons/GPU-OPENCL-AMD/common/epilogue.c +3 -0
  176. data/skeletons/GPU-OPENCL-AMD/common/globals.c +155 -0
  177. data/skeletons/GPU-OPENCL-AMD/common/globals_kernel.c +4 -0
  178. data/skeletons/GPU-OPENCL-AMD/common/header.c +0 -0
  179. data/skeletons/GPU-OPENCL-AMD/common/mem_copy_D2H.c +4 -0
  180. data/skeletons/GPU-OPENCL-AMD/common/mem_copy_H2D.c +4 -0
  181. data/skeletons/GPU-OPENCL-AMD/common/mem_epilogue.c +3 -0
  182. data/skeletons/GPU-OPENCL-AMD/common/mem_prologue.c +3 -0
  183. data/skeletons/GPU-OPENCL-AMD/common/prologue.c +24 -0
  184. data/skeletons/GPU-OPENCL-AMD/common/timer_1_start.c +5 -0
  185. data/skeletons/GPU-OPENCL-AMD/common/timer_1_stop.c +9 -0
  186. data/skeletons/GPU-OPENCL-AMD/common/timer_2_start.c +4 -0
  187. data/skeletons/GPU-OPENCL-AMD/common/timer_2_stop.c +11 -0
  188. data/skeletons/GPU-OPENCL-AMD/kernel/D-element-to-1-shared.host.c +67 -0
  189. data/skeletons/GPU-OPENCL-AMD/kernel/D-element-to-1-shared.kernel.cl +72 -0
  190. data/skeletons/GPU-OPENCL-AMD/kernel/default.host.c +14 -0
  191. data/skeletons/GPU-OPENCL-AMD/kernel/default.kernel.cl +13 -0
  192. data/skeletons/GPU-OPENCL-AMD/skeletons.txt +26 -0
  193. data/skeletons/verification/header.c +2 -0
  194. data/skeletons/verification/timer_start.c +4 -0
  195. data/skeletons/verification/timer_stop.c +6 -0
  196. data/skeletons/verification/verify_results.c +23 -0
  197. data/test/bones/test_algorithm.rb +40 -0
  198. data/test/bones/test_common.rb +54 -0
  199. data/test/bones/test_preprocessor.rb +46 -0
  200. data/test/bones/test_species.rb +21 -0
  201. data/test/bones/test_variable.rb +84 -0
  202. data/test/test_helper.rb +106 -0
  203. metadata +303 -0
@@ -0,0 +1,67 @@
1
+
2
+ // Store the initial value
3
+ cl_mem bones_initial_value = clCreateBuffer(bones_context,CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR,sizeof(<out0_type>),<out0_name>,&bones_errors); error_check(bones_errors);
4
+
5
+ // Create the kernels
6
+ cl_kernel bones_kernel_<algorithm_name>_0 = clCreateKernel(bones_program, "bones_kernel_<algorithm_name>_0", &bones_errors); error_check(bones_errors);
7
+ cl_kernel bones_kernel_<algorithm_name>_1 = clCreateKernel(bones_program, "bones_kernel_<algorithm_name>_1", &bones_errors); error_check(bones_errors);
8
+ cl_kernel bones_kernel_<algorithm_name>_2 = clCreateKernel(bones_program, "bones_kernel_<algorithm_name>_2", &bones_errors); error_check(bones_errors);
9
+
10
+ // Run either one kernel or multiple kernels
11
+ if (<in0_dimensions> <= 512) {
12
+
13
+ // Set all the arguments to the kernel function
14
+ int bones_num_args = 3;
15
+ int bones_dimensions = <in0_dimensions>;
16
+ clSetKernelArg(bones_kernel_<algorithm_name>_0,0,sizeof(bones_dimensions),(void*)&bones_dimensions);
17
+ clSetKernelArg(bones_kernel_<algorithm_name>_0,1,sizeof(<in0_devicename>),(void*)&<in0_devicename>);
18
+ clSetKernelArg(bones_kernel_<algorithm_name>_0,2,sizeof(<out0_devicename>),(void*)&<out0_devicename>);
19
+ <kernel_argument_list_constants>
20
+ // Start only one kernel
21
+ const int bones_num_threads = DIV_CEIL(<in0_dimensions>,2);
22
+ size_t bones_local_worksize1[] = {bones_num_threads};
23
+ size_t bones_global_worksize1[] = {bones_num_threads};
24
+ bones_errors = clEnqueueNDRangeKernel(bones_queue,bones_kernel_<algorithm_name>_0,1,NULL,bones_global_worksize1,bones_local_worksize1,0,NULL,&bones_event); error_check(bones_errors);
25
+
26
+ }
27
+ else {
28
+
29
+ // Allocate space for an intermediate array
30
+ cl_mem bones_device_temp = clCreateBuffer(bones_context,CL_MEM_READ_WRITE,128*sizeof(<out0_type>),NULL,&bones_errors); error_check(bones_errors);
31
+
32
+ // Set all the arguments to the kernel function
33
+ int bones_num_args = 3;
34
+ int bones_dimensions = <in0_dimensions>;
35
+ clSetKernelArg(bones_kernel_<algorithm_name>_0,0,sizeof(bones_dimensions),(void*)&bones_dimensions);
36
+ clSetKernelArg(bones_kernel_<algorithm_name>_0,1,sizeof(<in0_devicename>),(void*)&<in0_devicename>);
37
+ clSetKernelArg(bones_kernel_<algorithm_name>_0,2,sizeof(bones_device_temp),(void*)&bones_device_temp);
38
+ <kernel_argument_list_constants>
39
+ // Start the first kernel
40
+ size_t bones_local_worksize1[] = {256};
41
+ size_t bones_global_worksize1[] = {256*128};
42
+ bones_errors = clEnqueueNDRangeKernel(bones_queue,bones_kernel_<algorithm_name>_0,1,NULL,bones_global_worksize1,bones_local_worksize1,0,NULL,&bones_event); error_check(bones_errors);
43
+
44
+ // Set all the arguments to the kernel function
45
+ clSetKernelArg(bones_kernel_<algorithm_name>_1,0,sizeof(bones_device_temp),(void*)&bones_device_temp);
46
+ clSetKernelArg(bones_kernel_<algorithm_name>_1,1,sizeof(<out0_devicename>),(void*)&<out0_devicename>);
47
+ // Start the second kernel
48
+ size_t bones_local_worksize2[] = {128};
49
+ size_t bones_global_worksize2[] = {128};
50
+ bones_errors = clEnqueueNDRangeKernel(bones_queue,bones_kernel_<algorithm_name>_1,1,NULL,bones_global_worksize2,bones_local_worksize2,0,NULL,&bones_event); error_check(bones_errors);
51
+ clReleaseMemObject(bones_device_temp);
52
+ }
53
+
54
+ // Set all the arguments to the kernel function
55
+ clSetKernelArg(bones_kernel_<algorithm_name>_2,0,sizeof(bones_initial_value),(void*)&bones_initial_value);
56
+ clSetKernelArg(bones_kernel_<algorithm_name>_2,1,sizeof(<out0_devicename>),(void*)&<out0_devicename>);
57
+ // Perform the last computation (only needed if there is an initial value)
58
+ size_t bones_local_worksize3[] = {1};
59
+ size_t bones_global_worksize3[] = {1};
60
+ bones_errors = clEnqueueNDRangeKernel(bones_queue,bones_kernel_<algorithm_name>_2,1,NULL,bones_global_worksize3,bones_local_worksize3,0,NULL,&bones_event); error_check(bones_errors);
61
+ clReleaseMemObject(bones_initial_value);
62
+
63
+ // Synchronize and clean-up the kernels
64
+ clFinish(bones_queue);
65
+ clReleaseKernel(bones_kernel_<algorithm_name>_0);
66
+ clReleaseKernel(bones_kernel_<algorithm_name>_1);
67
+ clReleaseKernel(bones_kernel_<algorithm_name>_2);
@@ -0,0 +1,72 @@
1
+
2
+ // Start of the <algorithm_name> kernel (main, not unrolled kernel)
3
+ __kernel void bones_kernel_<algorithm_name>_0(int bones_input_size, __global <in0_type><in0_devicepointer> <in0_name>, __global <out0_type><out0_devicepointer> <out0_name>, <argument_definition>) {
4
+ const int bones_threadblock_work = DIV_CEIL(bones_input_size,get_num_groups(0));
5
+ const int bones_parallel_work = BONES_MIN(get_local_size(0),bones_threadblock_work);
6
+ const int bones_sequential_work = DIV_CEIL(bones_threadblock_work,bones_parallel_work);
7
+ const int bones_local_id = get_local_id(0);
8
+ const int bones_global_id = get_global_id(0);
9
+ <ids>
10
+ int bones_iter_id = <in0_flatindex>;
11
+
12
+ // Load data into thread private memory and perform the first computation(s) sequentially
13
+ <in0_type> bones_temporary = <in0_name>[bones_iter_id];
14
+ <in0_type> bones_private_memory = <algorithm_code3>;
15
+ for(int c=1; c<bones_sequential_work; c++) {
16
+ bones_iter_id = bones_iter_id + bones_parallel_work*get_num_groups(0)<factors>;
17
+ if (bones_iter_id <= <in0_to>) {
18
+ bones_temporary = <in0_name>[bones_iter_id];
19
+ bones_private_memory = <algorithm_code1>;
20
+ }
21
+ }
22
+ // Initialize the local memory
23
+ volatile __local <in0_type> bones_local_memory[256];
24
+ bones_local_memory[bones_local_id] = bones_private_memory;
25
+ barrier(CLK_LOCAL_MEM_FENCE);
26
+
27
+ // Perform the remainder of the computations in parallel using a parallel reduction tree
28
+ int bones_offset_id;
29
+ for (int c=256; c>=2; c=c>>1) {
30
+ if ((2*bones_parallel_work > c) && (get_local_id(0) < c/2)) {
31
+ bones_offset_id = get_local_id(0)+c/2;
32
+ if (bones_offset_id < bones_parallel_work) {
33
+ bones_local_memory[bones_local_id] = <algorithm_code2>;
34
+ }
35
+ }
36
+ barrier(CLK_LOCAL_MEM_FENCE);
37
+ }
38
+
39
+ // Write the final result back to the global memory
40
+ if (get_local_id(0) == 0) { <out0_name>[get_group_id(0)] = bones_local_memory[0]; }
41
+ }
42
+
43
+ // Start of the <algorithm_name> kernel (secondary, not unrolled kernel)
44
+ __kernel void bones_kernel_<algorithm_name>_1(__global <in0_type><in0_devicepointer> <in0_name>, __global <out0_type><out0_devicepointer> <out0_name>) {
45
+ const int bones_local_id = get_local_id(0);
46
+ const int bones_global_id = get_local_id(0);
47
+
48
+ // Initialize the local memory
49
+ volatile __local <in0_type> bones_local_memory[128];
50
+ bones_local_memory[bones_local_id] = <in0_name>[bones_global_id];
51
+ barrier(CLK_LOCAL_MEM_FENCE);
52
+
53
+ // Perform reduction using a parallel reduction tree
54
+ int bones_offset_id;
55
+ for (int c=128; c>=2; c=c>>1) {
56
+ if (get_local_id(0) < c/2) {
57
+ bones_offset_id = get_local_id(0)+c/2;
58
+ bones_local_memory[bones_local_id] = <algorithm_code2>;
59
+ }
60
+ barrier(CLK_LOCAL_MEM_FENCE);
61
+ }
62
+
63
+ // Write the final result back to the global memory
64
+ if (get_local_id(0) == 0) { <out0_name>[0] = bones_local_memory[0]; }
65
+ }
66
+
67
+ // Start of the <algorithm_name> kernel (final, initial value kernel)
68
+ __kernel void bones_kernel_<algorithm_name>_2(__global <out0_type><out0_devicepointer> bones_initial_value, __global <out0_type><out0_devicepointer> <out0_name>) {
69
+ <out0_type> bones_private_memory = <out0_name>[0];
70
+ <out0_type> bones_temporary = bones_initial_value[0];
71
+ <out0_name>[0] = <algorithm_code4>;
72
+ }
@@ -0,0 +1,14 @@
1
+
2
+ // Create the kernel
3
+ cl_kernel bones_kernel_<algorithm_name>_0 = clCreateKernel(bones_program, "bones_kernel_<algorithm_name>_0", &bones_errors); error_check(bones_errors);
4
+
5
+ // Set all the arguments to the kernel function
6
+ int bones_num_args = 0;
7
+ <kernel_argument_list>
8
+ // Start the kernel
9
+ size_t bones_global_worksize[] = {<parallelism>};
10
+ bones_errors = clEnqueueNDRangeKernel(bones_queue,bones_kernel_<algorithm_name>_0,1,NULL,bones_global_worksize,NULL,0,NULL,&bones_event); error_check(bones_errors);
11
+
12
+ // Synchronize and clean-up the kernel
13
+ clFinish(bones_queue);
14
+ clReleaseKernel(bones_kernel_<algorithm_name>_0);
@@ -0,0 +1,13 @@
1
+
2
+ // Start of the <algorithm_name> kernel
3
+ __kernel void bones_kernel_<algorithm_name>_0(<devicedefinitionsopencl>, <argument_definition>) {
4
+ const int bones_global_id = get_global_id(0);
5
+ if (bones_global_id < (<parallelism>)) {
6
+
7
+ // Calculate the global ID(s) based on the thread id
8
+ <ids>
9
+
10
+ // Start the computation
11
+ <algorithm_code1>
12
+ }
13
+ }
@@ -0,0 +1,26 @@
1
+ ###################################################################
2
+ # Each line holds one mapping from species to skeleton
3
+ # The ordering is always ['chunk','neighbourhood','element','shared','void']
4
+ # The pattern 'full' is omitted from matching (will thus always match)
5
+ # 'D' denotes any ranges (e.g. D|element can be any dimension)
6
+ # 'N' denotes any range (e.g. N,N|element must be 2D)
7
+ # '+' denotes one or more of these patterns
8
+ ###################################################################
9
+ D|chunk(D)+ -> D|chunk(D)+ :default :00
10
+ D|chunk(D)+ -> D|chunk(D)+ ^ D|element+ :default :00
11
+ D|chunk(D)+ ^ D|element+ -> D|chunk(D)+ :default :00
12
+ D|chunk(D)+ ^ D|element+ -> D|chunk(D)+ ^ D|element+ :default :00
13
+ D|chunk(D)+ -> D|element+ :default :00
14
+ D|chunk(D)+ ^ D|neighbourhood(D)+ ^ D|element+ -> D|element+ :default :00
15
+ D|chunk(D)+ ^ D|element+ -> D|element+ :default :00
16
+ N|neighbourhood(N)+ -> N|element+ :default :00
17
+ D|neighbourhood(D)+ -> D|element+ :default :00
18
+ D|neighbourhood(D)+ ^ D|element+ -> D|element+ :default :00
19
+ D|element+ -> D|chunk(D)+ :default :00
20
+ D|element+ -> D|element+ :default :00
21
+ D|element -> 1|shared :D-element-to-1-shared :02 03 04 05
22
+ D|void -> D|element+ :default :00
23
+
24
+ #D|element+ -> D|shared+ :default :09
25
+ #D|element+ -> D|element+ ^ D|shared+ :default :09
26
+
@@ -0,0 +1,2 @@
1
+ #include <string.h>
2
+ #include <math.h>
@@ -0,0 +1,4 @@
1
+
2
+ // Start the timer for the measurement of the original code's execution time
3
+ struct timeval bones_start_time;
4
+ gettimeofday(&bones_start_time, NULL);
@@ -0,0 +1,6 @@
1
+
2
+ // Stop the timer for the measurement of the original code's execution time
3
+ struct timeval bones_end_time;
4
+ gettimeofday(&bones_end_time, NULL);
5
+ float bones_timer = 0.001 * (1000000*(bones_end_time.tv_sec-bones_start_time.tv_sec)+bones_end_time.tv_usec-bones_start_time.tv_usec);
6
+ printf(">>>\t\t\t Execution time [original ]: %.3lf ms.\n", bones_timer);
@@ -0,0 +1,23 @@
1
+ /* STARTDEF
2
+ void bones_verify_results_<name>(<type> *bones_a, <type> *bones_b, <argument_definition>);
3
+ ENDDEF */
4
+ void bones_verify_results_<name>(<type> *bones_a, <type> *bones_b, <argument_definition>) {
5
+ long bones_m=0;
6
+ long bones_e=0;
7
+ for (int bones_global_id=0; bones_global_id<<dimensions>; bones_global_id++) {
8
+ <verifyids>
9
+ int bones_id = <flatindex>;
10
+ if (fabs(bones_a[bones_id]) > 0.000000001 ) {
11
+ if ((fabs((bones_b[bones_id]/bones_a[bones_id])-1) < 0.001)) { bones_m++; } else { bones_e++; }
12
+ } else {
13
+ if (fabs(bones_a[bones_id]-bones_b[bones_id]) < 0.001) { bones_m++; } else { bones_e++; }
14
+ }
15
+ //printf("%.3lf versus %.3lf\n",bones_a[bones_id],bones_b[bones_id]);
16
+ //printf("%d versus %d\n",bones_a[bones_id],bones_b[bones_id]);
17
+ }
18
+ printf("*** Verification ");
19
+ if (bones_e == 0) { printf("complete: no errors found.\n"); }
20
+ else { printf("warning: found %li (%.1lf%%) error(s).\n", bones_e, (bones_e*100.0)/(bones_e+bones_m)); }
21
+
22
+ }
23
+
@@ -0,0 +1,40 @@
1
+ # Include the test helper
2
+ require File.dirname(__FILE__) + '/../test_helper'
3
+
4
+ # Test class for the primitive class.
5
+ class TestAlgorithm < Test::Unit::TestCase
6
+
7
+ # Create a list of known examples and the results.
8
+ def setup
9
+
10
+ # Create a comprehensive list of known tribes
11
+ list = setup_species
12
+ @examples = list[:examples]
13
+ @defines = []
14
+
15
+ # Create a list of corresponding preprocessors and code
16
+ @primitives_list, original_code_list, @arrays_list = setup_algorithms(@examples)
17
+
18
+ # Use the preprocessor and the 'CAST' gem to create an AST of the original code
19
+ original_ast_list = []
20
+ original_code_list.each do |original_code|
21
+ preprocessor = Bones::Preprocessor.new(original_code,'','')
22
+ preprocessor.process
23
+ @defines.push(preprocessor.defines)
24
+ original_ast_list.push(C.parse(preprocessor.target_code))
25
+ end
26
+
27
+ # Populate the contents of the primitives
28
+ @primitives_list.each_with_index do |primitives,index|
29
+ primitives.each do |algorithm|
30
+ algorithm.populate_lists()
31
+ #algorithm.populate_hash()
32
+ end
33
+ end
34
+ end
35
+
36
+ def test_nothing
37
+ end
38
+
39
+ end
40
+
@@ -0,0 +1,54 @@
1
+ # Include the test helper
2
+ require File.dirname(__FILE__) + '/../test_helper'
3
+
4
+ # Test class for the primitive class.
5
+ class TestCommon < Test::Unit::TestCase
6
+
7
+ # Set the test up.
8
+ def setup
9
+ @common = Bones::Common.new
10
+ end
11
+
12
+ def test_brackets
13
+ tests = ['(4)','(var_16)','a+(5)','b1+(var*16)','a-(-4)']
14
+ results = ['4' ,'var_16' ,'a+5' ,'b1+(var*16)','a+4' ]
15
+ tests.each_with_index do |test,index|
16
+ assert_equal(results[index], @common.simplify(test))
17
+ end
18
+ end
19
+
20
+ def test_alu_constants
21
+ tests = ['4+1','4*(4+3)','a+5','b1+(3*11)','(6-12)-2','(12-6)*3','-2-2-2','a-a','a-b']
22
+ results = ['5' ,'28' ,'a+5','b1+33' ,'-8' ,'18' ,'-6' ,'0' ,'a-b']
23
+ tests.each_with_index do |test,index|
24
+ assert_equal(results[index], @common.simplify(test))
25
+ end
26
+ end
27
+
28
+ def test_division_removal
29
+ tests = ['2/10','4*(2/1)','2/(1*4)']
30
+ results = ['2/10','8' ,'2/4' ]
31
+ tests.each_with_index do |test,index|
32
+ assert_equal(results[index], @common.simplify(test))
33
+ end
34
+ end
35
+
36
+ def test_division
37
+ tests = ['(2048/2)-1','4*(2/1)','2/2','2/(1*4)','var+(13/3)+(12/3)']
38
+ results = ['1023' ,'8' ,'1' ,'2/4' ,'var+(13/3)+4' ]
39
+ tests.each_with_index do |test,index|
40
+ assert_equal(results[index], @common.simplify(test))
41
+ end
42
+ end
43
+
44
+
45
+ def test_general
46
+ tests = ['((3)-(2)+1)+0','((2+0)-(1)+1)','(((id/(1))%(2/1)))+2','(0+id/(2))+1']
47
+ results = ['2' ,'2' ,'(id%2)+2' ,'(id/2)+1']
48
+ tests.each_with_index do |test,index|
49
+ assert_equal(results[index], @common.simplify(test))
50
+ end
51
+ end
52
+
53
+ end
54
+
@@ -0,0 +1,46 @@
1
+ # Include the test helper
2
+ require File.dirname(__FILE__) + '/../test_helper'
3
+
4
+ # Test class for the preprocessor class.
5
+ class TestPreprocessor < Test::Unit::TestCase
6
+
7
+ # Create a list of known examples and reference results.
8
+ def setup
9
+
10
+ # Create a comprehensive list of known speciess
11
+ list = setup_species
12
+ @examples = list[:examples]
13
+
14
+ # Create a list of corresponding algorithms and code
15
+ @algorithms_list, code_list = setup_algorithms(@examples)
16
+
17
+ # Create and execute the preprocessors
18
+ @preprocessors = []
19
+ code_list.each_index do |index|
20
+ preprocessor = Bones::Preprocessor.new(code_list[index],'','')
21
+ preprocessor.process
22
+ @preprocessors.push(preprocessor)
23
+ end
24
+ end
25
+
26
+ # Method to test the found algorithms (species part).
27
+ def test_algorithms_species
28
+ @preprocessors.each_with_index do |preprocessor,index1|
29
+ reference_algorithms = @algorithms_list[index1]
30
+ preprocessor.algorithms.each_with_index do |algorithm,index2|
31
+ assert_equal(reference_algorithms[index2].species.prefix,algorithm.species.prefix)
32
+ end
33
+ end
34
+ end
35
+
36
+ # Method to test the found algorithms (code part).
37
+ def test_algorithms_code
38
+ @preprocessors.each_with_index do |preprocessor,index1|
39
+ reference_algorithms = @algorithms_list[index1]
40
+ preprocessor.algorithms.each_with_index do |algorithm,index2|
41
+ assert_equal(reference_algorithms[index2].code,algorithm.code)
42
+ end
43
+ end
44
+ end
45
+ end
46
+
@@ -0,0 +1,21 @@
1
+ # Include the test helper
2
+ require File.dirname(__FILE__) + '/../test_helper'
3
+
4
+ # Test class for the species class
5
+ class TestSpecies < Test::Unit::TestCase
6
+
7
+ # Create a comprehensive list of known species.
8
+ def setup
9
+ list = setup_species
10
+ @dimensions = list[:dimensions]
11
+ @inputs = list[:inputs]
12
+ @outputs = list[:outputs]
13
+ @patterns = list[:patterns]
14
+ @prefixes = list[:prefixes]
15
+ @species = list[:species]
16
+ end
17
+
18
+ def test_nothing
19
+ end
20
+
21
+ end
@@ -0,0 +1,84 @@
1
+ # Include the test helper
2
+ require File.dirname(__FILE__) + '/../test_helper'
3
+
4
+ # Test class for the variable class
5
+ class TestVariable < Test::Unit::TestCase
6
+
7
+ # Some constants to test against.
8
+ NAME = 'example'
9
+
10
+ # Method to create variable examples from code examples.
11
+ def setup
12
+ @variables = []
13
+ @dimensions = []
14
+ parser = C::Parser.new
15
+ prefix = 'void main() {'
16
+ suffix = '}'
17
+
18
+ # Create code examples
19
+ code_examples = []
20
+ @types = []
21
+ typeprefixes = ['int','float','int *','int **','int ***','unsigned char *']
22
+ typesuffixes = ['','[10]','[N]','[10][10]']
23
+ typeprefixes.each do |typeprefix|
24
+ typesuffixes.each do |typesuffix|
25
+ @types.push([typeprefix,typesuffix])
26
+ @types.push([typeprefix,typesuffix])
27
+ end
28
+ end
29
+ @types.each_with_index do |type,index|
30
+ if index.odd?
31
+ definition = type[0]+' '+NAME+type[1]
32
+ code_examples.push(parser.parse([prefix,definition+' = 3;',suffix].join("\n")))
33
+ code_examples.push(parser.parse([prefix,definition+';','int a = '+NAME+';',suffix].join("\n")))
34
+ end
35
+ @dimensions.push(type[0].scan('*').length + type[1].scan('[').length)
36
+ end
37
+
38
+ # Create variables
39
+ code_examples.each do |code|
40
+ @variables.push(Bones::Variable.new(NAME,code.variable_type(NAME),code.size(NAME),Bones::INPUT,'0',false))
41
+ end
42
+ end
43
+
44
+ # Test whether the typename of the variable is recognized correctly.
45
+ def test_typename
46
+ @variables.each_index do |index|
47
+ assert_equal(@types[index][0].gsub('*','').strip,@variables[index].type_name)
48
+ end
49
+ end
50
+
51
+ # Test whether the device pointer is obtained correctly.
52
+ def test_device_pointer
53
+ @variables.each_index do |index|
54
+ expected_result = (@dimensions[index] == 0) ? '' : '*'
55
+ assert_equal(expected_result,@variables[index].device_pointer)
56
+ end
57
+ end
58
+
59
+ # Test whether the dimension of a variable is obtained correctly.
60
+ def test_dimension
61
+ @variables.each_index do |index|
62
+ assert_equal(@dimensions[index],@variables[index].dimensions)
63
+ end
64
+ end
65
+
66
+ # Test to see if the flattened array is obtained correctly.
67
+ def test_flatten
68
+ @variables.each_index do |index|
69
+ if @variables[index].dimensions > 1
70
+ expected_result = ''+'[0]'*(@dimensions[index]-1)
71
+ assert_equal(expected_result,@variables[index].flatten)
72
+ end
73
+ end
74
+ end
75
+
76
+ # Test whether the variable definition is obtained correctly.
77
+ def test_definition
78
+ @variables.each_index do |index|
79
+ expected_result = @types[index][0]+' '+NAME+@types[index][1]
80
+ assert_equal(expected_result,@variables[index].definition)
81
+ end
82
+ end
83
+
84
+ end