bones-compiler 1.1.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. checksums.yaml +15 -0
  2. data/CHANGELOG +37 -0
  3. data/LICENSE +1 -1
  4. data/README.rdoc +95 -70
  5. data/Rakefile +78 -3
  6. data/VERSION +1 -1
  7. data/bin/adarwin +17 -0
  8. data/examples/benchmarks/PolyBench/2mm.c +104 -0
  9. data/examples/benchmarks/{3mm.c → PolyBench/3mm.c} +5 -2
  10. data/examples/benchmarks/{adi.c → PolyBench/adi.c} +6 -3
  11. data/examples/benchmarks/{atax.c → PolyBench/atax.c} +5 -2
  12. data/examples/benchmarks/{bicg.c → PolyBench/bicg.c} +5 -2
  13. data/examples/benchmarks/{cholesky.c → PolyBench/cholesky.c} +3 -0
  14. data/examples/benchmarks/{common.h → PolyBench/common.h} +2 -2
  15. data/examples/benchmarks/{correlation.c → PolyBench/correlation.c} +16 -7
  16. data/examples/benchmarks/{covariance.c → PolyBench/covariance.c} +7 -2
  17. data/examples/benchmarks/{doitgen.c → PolyBench/doitgen.c} +5 -2
  18. data/examples/benchmarks/{durbin.c → PolyBench/durbin.c} +3 -0
  19. data/examples/benchmarks/{dynprog.c → PolyBench/dynprog.c} +3 -0
  20. data/examples/benchmarks/{fdtd-2d-apml.c → PolyBench/fdtd-2d-apml.c} +3 -0
  21. data/examples/benchmarks/{fdtd-2d.c → PolyBench/fdtd-2d.c} +5 -2
  22. data/examples/benchmarks/{floyd-warshall.c → PolyBench/floyd-warshall.c} +3 -0
  23. data/examples/benchmarks/{gemm.c → PolyBench/gemm.c} +5 -2
  24. data/examples/benchmarks/{gemver.c → PolyBench/gemver.c} +5 -2
  25. data/examples/benchmarks/{gesummv.c → PolyBench/gesummv.c} +5 -2
  26. data/examples/benchmarks/{gramschmidt.c → PolyBench/gramschmidt.c} +3 -0
  27. data/examples/benchmarks/{jacobi-1d-imper.c → PolyBench/jacobi-1d-imper.c} +10 -2
  28. data/examples/benchmarks/{jacobi-2d-imper.c → PolyBench/jacobi-2d-imper.c} +8 -3
  29. data/examples/benchmarks/{lu.c → PolyBench/lu.c} +3 -0
  30. data/examples/benchmarks/{ludcmp.c → PolyBench/ludcmp.c} +3 -0
  31. data/examples/benchmarks/{mvt.c → PolyBench/mvt.c} +6 -2
  32. data/examples/benchmarks/{reg_detect.c → PolyBench/reg_detect.c} +3 -0
  33. data/examples/benchmarks/{seidel-2d.c → PolyBench/seidel-2d.c} +3 -0
  34. data/examples/benchmarks/{symm.c → PolyBench/symm.c} +3 -0
  35. data/examples/benchmarks/{syr2k.c → PolyBench/syr2k.c} +5 -2
  36. data/examples/benchmarks/{syrk.c → PolyBench/syrk.c} +7 -4
  37. data/examples/benchmarks/{trisolv.c → PolyBench/trisolv.c} +3 -0
  38. data/examples/benchmarks/{trmm.c → PolyBench/trmm.c} +3 -0
  39. data/examples/benchmarks/Rodinia/cfd.c +180 -0
  40. data/examples/benchmarks/Rodinia/hotspot.c +228 -0
  41. data/examples/benchmarks/Rodinia/kmeans.c +164 -0
  42. data/examples/benchmarks/Rodinia/srad.c +188 -0
  43. data/examples/benchmarks/other/common.h +0 -0
  44. data/examples/benchmarks/other/dct.c +58 -0
  45. data/examples/benchmarks/other/mm.c +50 -0
  46. data/examples/benchmarks/{saxpy.c → other/saxpy.c} +11 -7
  47. data/examples/chunk/{example1.c → example01.c} +0 -0
  48. data/examples/chunk/{example2.c → example02.c} +0 -0
  49. data/examples/chunk/{example3.c → example03.c} +0 -0
  50. data/examples/chunk/{example4.c → example04.c} +0 -0
  51. data/examples/chunk/{example5.c → example05.c} +0 -0
  52. data/examples/chunk/example06.c +45 -0
  53. data/examples/chunk/example07.c +49 -0
  54. data/examples/dependences/example01.c +42 -0
  55. data/examples/dependences/example02.c +40 -0
  56. data/examples/dependences/example03.c +43 -0
  57. data/examples/dependences/example04.c +44 -0
  58. data/examples/dependences/example05.c +42 -0
  59. data/examples/element/{example1.c → example01.c} +0 -0
  60. data/examples/element/{example2.c → example02.c} +2 -2
  61. data/examples/element/{example3.c → example03.c} +0 -0
  62. data/examples/element/{example4.c → example04.c} +0 -0
  63. data/examples/element/{example5.c → example05.c} +0 -0
  64. data/examples/element/{example6.c → example06.c} +0 -0
  65. data/examples/element/{example7.c → example07.c} +0 -0
  66. data/examples/element/{example8.c → example08.c} +0 -0
  67. data/examples/element/{example9.c → example09.c} +0 -0
  68. data/examples/element/example13.c +73 -0
  69. data/examples/fusion/example01.c +68 -0
  70. data/examples/fusion/example02.c +73 -0
  71. data/examples/fusion/example03.c +72 -0
  72. data/examples/fusion/example04.c +61 -0
  73. data/examples/fusion/example05.c +55 -0
  74. data/examples/neighbourhood/{example1.c → example01.c} +0 -0
  75. data/examples/neighbourhood/{example2.c → example02.c} +0 -0
  76. data/examples/neighbourhood/{example3.c → example03.c} +0 -0
  77. data/examples/neighbourhood/{example4.c → example04.c} +0 -0
  78. data/examples/neighbourhood/example05.c +44 -0
  79. data/examples/shared/{example1.c → example01.c} +0 -0
  80. data/examples/shared/{example2.c → example02.c} +0 -0
  81. data/examples/shared/{example3.c → example03.c} +0 -0
  82. data/examples/shared/{example4.c → example04.c} +0 -0
  83. data/examples/shared/{example5.c → example05.c} +0 -0
  84. data/lib/adarwin.rb +62 -0
  85. data/lib/adarwin/dependences.rb +268 -0
  86. data/lib/adarwin/engine.rb +277 -0
  87. data/lib/adarwin/fusion.rb +174 -0
  88. data/lib/adarwin/interval.rb +57 -0
  89. data/lib/adarwin/memorycopies.rb +153 -0
  90. data/lib/adarwin/nest.rb +225 -0
  91. data/lib/adarwin/preprocessor.rb +76 -0
  92. data/lib/adarwin/reference.rb +261 -0
  93. data/lib/bones.rb +4 -55
  94. data/lib/bones/algorithm.rb +77 -40
  95. data/lib/bones/copy.rb +26 -0
  96. data/lib/bones/engine.rb +147 -31
  97. data/lib/bones/preprocessor.rb +92 -12
  98. data/lib/bones/species.rb +4 -3
  99. data/lib/bones/structure.rb +14 -4
  100. data/lib/castaddon.rb +11 -6
  101. data/lib/castaddon/node_adarwin.rb +245 -0
  102. data/lib/castaddon/node_bones.rb +316 -0
  103. data/lib/castaddon/node_common.rb +289 -0
  104. data/lib/castaddon/transformations.rb +236 -0
  105. data/lib/common.rb +216 -0
  106. data/skeletons/CPU-C/common/header.c +3 -0
  107. data/skeletons/CPU-C/common/mem_global.c +0 -0
  108. data/skeletons/CPU-C/common/timer_2_start.c +11 -13
  109. data/skeletons/CPU-C/common/timer_2_stop.c +1 -1
  110. data/skeletons/CPU-C/common/timer_globals.c +29 -0
  111. data/skeletons/CPU-OPENCL-INTEL/common/globals.c +1 -1
  112. data/skeletons/CPU-OPENCL-INTEL/common/header.c +3 -0
  113. data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +7 -2
  114. data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +4 -2
  115. data/skeletons/CPU-OPENCL-INTEL/common/mem_global.c +0 -0
  116. data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +6 -3
  117. data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +1 -1
  118. data/skeletons/CPU-OPENCL-INTEL/common/timer_globals.c +24 -0
  119. data/skeletons/CPU-OPENMP/common/globals.c +1 -0
  120. data/skeletons/CPU-OPENMP/common/header.c +3 -0
  121. data/skeletons/CPU-OPENMP/common/mem_global.c +0 -0
  122. data/skeletons/CPU-OPENMP/common/timer_1_start.c +0 -12
  123. data/skeletons/CPU-OPENMP/common/timer_2_stop.c +1 -1
  124. data/skeletons/CPU-OPENMP/common/timer_globals.c +33 -0
  125. data/skeletons/GPU-CUDA/common/globals.c +27 -3
  126. data/skeletons/GPU-CUDA/common/header.c +2 -0
  127. data/skeletons/GPU-CUDA/common/mem_async_alloc.c +6 -0
  128. data/skeletons/GPU-CUDA/common/mem_async_copyin.c +6 -0
  129. data/skeletons/GPU-CUDA/common/mem_async_copyout.c +6 -0
  130. data/skeletons/GPU-CUDA/common/mem_async_free.c +6 -0
  131. data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +2 -1
  132. data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +2 -1
  133. data/skeletons/GPU-CUDA/common/mem_global.c +1 -0
  134. data/skeletons/GPU-CUDA/common/mem_prologue.c +1 -2
  135. data/skeletons/GPU-CUDA/common/scheduler.c +86 -0
  136. data/skeletons/GPU-CUDA/common/timer_2_start.c +2 -4
  137. data/skeletons/GPU-CUDA/common/timer_2_stop.c +3 -5
  138. data/skeletons/GPU-CUDA/common/timer_globals.c +26 -0
  139. data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +5 -7
  140. data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +4 -6
  141. data/skeletons/GPU-CUDA/kernel/default.host.c +1 -1
  142. data/skeletons/GPU-CUDA/kernel/default.kernel.cu +6 -8
  143. data/skeletons/GPU-CUDA/skeletons.txt +6 -5
  144. data/{examples/benchmarks/2mm.c → test/examples/benchmarks/PolyBench/2mm_species.c} +19 -15
  145. data/test/examples/benchmarks/PolyBench/3mm_species.c +82 -0
  146. data/test/examples/benchmarks/PolyBench/adi_species.c +89 -0
  147. data/test/examples/benchmarks/PolyBench/atax_species.c +69 -0
  148. data/test/examples/benchmarks/PolyBench/bicg_species.c +71 -0
  149. data/test/examples/benchmarks/PolyBench/cholesky_species.c +68 -0
  150. data/test/examples/benchmarks/PolyBench/correlation_species.c +97 -0
  151. data/test/examples/benchmarks/PolyBench/covariance_species.c +78 -0
  152. data/test/examples/benchmarks/PolyBench/doitgen_species.c +67 -0
  153. data/test/examples/benchmarks/PolyBench/durbin_species.c +80 -0
  154. data/test/examples/benchmarks/PolyBench/dynprog_species.c +71 -0
  155. data/test/examples/benchmarks/PolyBench/fdtd-2d-apml_species.c +112 -0
  156. data/test/examples/benchmarks/PolyBench/fdtd-2d_species.c +78 -0
  157. data/test/examples/benchmarks/PolyBench/floyd-warshall_species.c +54 -0
  158. data/test/examples/benchmarks/PolyBench/gemm_species.c +73 -0
  159. data/test/examples/benchmarks/PolyBench/gemver_species.c +93 -0
  160. data/test/examples/benchmarks/PolyBench/gesummv_species.c +68 -0
  161. data/test/examples/benchmarks/PolyBench/gramschmidt_species.c +78 -0
  162. data/test/examples/benchmarks/PolyBench/jacobi-1d-imper_species.c +59 -0
  163. data/test/examples/benchmarks/PolyBench/jacobi-2d-imper_species.c +65 -0
  164. data/test/examples/benchmarks/PolyBench/lu_species.c +57 -0
  165. data/test/examples/benchmarks/PolyBench/ludcmp_species.c +89 -0
  166. data/test/examples/benchmarks/PolyBench/mvt_species.c +69 -0
  167. data/test/examples/benchmarks/PolyBench/reg_detect_species.c +86 -0
  168. data/test/examples/benchmarks/PolyBench/seidel-2d_species.c +53 -0
  169. data/test/examples/benchmarks/PolyBench/symm_species.c +74 -0
  170. data/test/examples/benchmarks/PolyBench/syr2k_species.c +69 -0
  171. data/test/examples/benchmarks/PolyBench/syrk_species.c +66 -0
  172. data/test/examples/benchmarks/PolyBench/trisolv_species.c +61 -0
  173. data/test/examples/benchmarks/PolyBench/trmm_species.c +61 -0
  174. data/test/examples/chunk/example01_species.c +58 -0
  175. data/test/examples/chunk/example02_species.c +48 -0
  176. data/test/examples/chunk/example03_species.c +63 -0
  177. data/test/examples/chunk/example04_species.c +58 -0
  178. data/test/examples/chunk/example05_species.c +56 -0
  179. data/test/examples/chunk/example06_species.c +49 -0
  180. data/test/examples/chunk/example07_species.c +53 -0
  181. data/test/examples/dependences/example01_species.c +46 -0
  182. data/test/examples/dependences/example02_species.c +44 -0
  183. data/test/examples/dependences/example03_species.c +47 -0
  184. data/test/examples/dependences/example04_species.c +48 -0
  185. data/test/examples/dependences/example05_species.c +46 -0
  186. data/test/examples/element/example01_species.c +50 -0
  187. data/test/examples/element/example02_species.c +50 -0
  188. data/test/examples/element/example03_species.c +62 -0
  189. data/test/examples/element/example04_species.c +53 -0
  190. data/test/examples/element/example05_species.c +59 -0
  191. data/test/examples/element/example06_species.c +50 -0
  192. data/test/examples/element/example07_species.c +58 -0
  193. data/test/examples/element/example08_species.c +49 -0
  194. data/test/examples/element/example09_species.c +52 -0
  195. data/test/examples/element/example10_species.c +54 -0
  196. data/test/examples/element/example11_species.c +51 -0
  197. data/test/examples/element/example12_species.c +60 -0
  198. data/test/examples/element/example13_species.c +77 -0
  199. data/test/examples/neighbourhood/example01_species.c +57 -0
  200. data/test/examples/neighbourhood/example02_species.c +56 -0
  201. data/test/examples/neighbourhood/example03_species.c +83 -0
  202. data/test/examples/neighbourhood/example04_species.c +55 -0
  203. data/test/examples/neighbourhood/example05_species.c +48 -0
  204. data/test/examples/shared/example01_species.c +49 -0
  205. data/test/examples/shared/example02_species.c +55 -0
  206. data/test/examples/shared/example03_species.c +59 -0
  207. data/test/examples/shared/example04_species.c +56 -0
  208. data/test/examples/shared/example05_species.c +52 -0
  209. metadata +193 -73
  210. data/examples/benchmarks/overview.txt +0 -38
  211. data/lib/castaddon/node.rb +0 -753
File without changes
File without changes
File without changes
File without changes
@@ -0,0 +1,44 @@
1
+ //
2
+ // This file is part of the Bones source-to-source compiler examples. This C-code
3
+ // example is meant to illustrate the use of Bones. For more information on Bones
4
+ // use the contact information below.
5
+ //
6
+ // == More information on Bones
7
+ // Contact............Cedric Nugteren <c.nugteren@tue.nl>
8
+ // Web address........http://parse.ele.tue.nl/bones/
9
+ //
10
+ // == File information
11
+ // Filename...........neighbourhood/example05.c
12
+ // Author.............Cedric Nugteren
13
+ // Last modified on...07-May-2013
14
+ //
15
+
16
+ #include <stdio.h>
17
+
18
+ // This is 'example05', an unrolled one-sided neighbourhood
19
+ int main(void) {
20
+ int i;
21
+ int N = 256;
22
+
23
+ // Declare input/output arrays
24
+ int A[N];
25
+ int B[N];
26
+
27
+ // Set the input data
28
+ for(i=0;i<N;i++) {
29
+ A[i] = i;
30
+ B[i] = i+5;
31
+ }
32
+
33
+ // Perform the computation
34
+ #pragma species kernel A[2:N]|neighbourhood(0:1) -> B[2:N-1]|element
35
+ for (i=2; i<N; i++) {
36
+ B[i] = A[i] + A[i+1];
37
+ }
38
+ #pragma species endkernel example05
39
+
40
+ // Clean-up and exit the function
41
+ fflush(stdout);
42
+ return 0;
43
+ }
44
+
File without changes
File without changes
File without changes
File without changes
File without changes
data/lib/adarwin.rb ADDED
@@ -0,0 +1,62 @@
1
+
2
+ # Include the common part between Bones and A-Darwin
3
+ require 'common.rb'
4
+
5
+ # We define a custom error class for code generation related
6
+ # errors (any error raised).
7
+ class CodeGenError < StandardError #:nodoc:
8
+ end
9
+ def raise_error(message) #:nodoc:
10
+ puts Adarwin::ERROR+message
11
+ raise CodeGenError, 'Error encountered, stopping execution of A-Darwin'
12
+ end
13
+
14
+ # The module keeps all the classes and constants
15
+ # together. It contains the classes:
16
+ # * Engine: The main component of the tool, providing the high-level flow.
17
+ # * Preprocessor: C-preprocessor, extracting defines/includes from source code.
18
+ # * Nest:
19
+ # * Interval:
20
+ # * Dependence:
21
+ # * Reference:
22
+ #
23
+ # The module also contains a list of inter-class constants.
24
+ module Adarwin
25
+
26
+ # A string given as a start of an informative message.
27
+ MESSAGE = '[A-Darwin] ### Info : '
28
+ # A string given as a start of an warning message.
29
+ WARNING = '[A-Darwin] ### Warning: '
30
+ # A string given as a start of an error message.
31
+ ERROR = '[A-Darwin] ### Error : '
32
+
33
+ # Start of the scop
34
+ SCOP_START = '#pragma scop'
35
+ # Enf of the scop
36
+ SCOP_END = '#pragma endscop'
37
+
38
+ # Species pragma
39
+ PRAGMA_SPECIES = '#pragma species'
40
+
41
+ # Array reference characterisation (ARC) pragma
42
+ PRAGMA_ARC = '#pragma ARC'
43
+
44
+ # Create a string from a pragma because pragma's are unsupported by CAST.
45
+ PRAGMA_DELIMITER_START = '"PRAGMA '
46
+ PRAGMA_DELIMITER_END = ' PRAGMA"'
47
+
48
+ # This class is created to be a parent class of all classes.
49
+ class Common
50
+ end
51
+
52
+ end
53
+
54
+ # This list of require's makes sure all classes are included.
55
+ require 'adarwin/interval.rb'
56
+ require 'adarwin/dependences.rb'
57
+ require 'adarwin/preprocessor.rb'
58
+ require 'adarwin/memorycopies.rb'
59
+ require 'adarwin/fusion.rb'
60
+ require 'adarwin/engine.rb'
61
+ require 'adarwin/reference.rb'
62
+ require 'adarwin/nest.rb'
@@ -0,0 +1,268 @@
1
+ module Adarwin
2
+
3
+ # This class represents the dependence tests. The dependence tests are not
4
+ # objects as such, the use of a class might therefore be a bit out of place.
5
+ # Instead, the class rather holds all methods related to dependence tests.
6
+ #
7
+ # For an M-dimensional access, the problem of dependence testing is reduced to
8
+ # that of determining whether a system of M linear equations of the form
9
+ # >>> a_1*I_1 + a_2*I_2 + ... + a_n*I_n = a_0
10
+ # has a simultaneous integer solution satisfying the loop/if bounds given as
11
+ # >>> min_k <= I_k <= max_k
12
+ #
13
+ # Currently, the following conservative tests are implemented:
14
+ # * The GCD (greatest common divisor) test
15
+ # * The Banerjee test
16
+ #
17
+ # In case the accesses are multi-dimensional, we perform a subscript-by-
18
+ # subscript checking. In other words, we test each dimension separately
19
+ # using the two tests. If we find a possible dependence in one dimension, we
20
+ # conclude that there is a dependence.
21
+ class Dependence
22
+ attr_accessor :result
23
+
24
+ # Method to initialise the dependence tests. This method actually already
25
+ # computes all the dependence tests and stores the result in a class
26
+ # variable. It takes as input the pair of accesses it needs to check for
27
+ # dependences.
28
+ def initialize(access1,access2,verbose)
29
+ @verbose = verbose
30
+ bounds = [access1.bounds,access2.bounds]
31
+
32
+ # Iterate over the dimensions of the array reference
33
+ results = []
34
+ dimensions = [access1.indices.size,access2.indices.size].min
35
+ for dim in 1..dimensions
36
+ ref1 = access1.indices[dim-1]
37
+ ref2 = access2.indices[dim-1]
38
+ loop_vars = [access1.all_loops.map{ |l| l[:var] },access2.all_loops.map{ |l| l[:var] }]
39
+
40
+ # Conclude directly that there is no dependence if the references are
41
+ # exactly the same.
42
+ if ref1 == ref2
43
+ results << false
44
+ next
45
+ end
46
+
47
+ # TODO: Include the step in the dependence tests
48
+ #p access1.tS[dim-1]
49
+
50
+ # Get all variables, a linear equation, and the corresponding conditions
51
+ all_vars, equation, conditions = get_linear_equation(ref1,ref2,bounds,loop_vars)
52
+
53
+ # Conclude directly that there is no dependence if the variables are not
54
+ # dependent on the loops.
55
+ if equation[:ak].empty?
56
+ results << false
57
+ next
58
+ end
59
+
60
+ # Perform the GCD test
61
+ gcd_result = gcd_test(all_vars,equation)
62
+
63
+ # End if the GCD test concludes that there are no dependences
64
+ if gcd_result == false
65
+ results << false
66
+
67
+ # Continue with Banerjee if GCD concludes there might be dependences
68
+ else
69
+ ban_result = ban_test(all_vars,equation,conditions)
70
+ results << ban_result
71
+ end
72
+ end
73
+
74
+ # Combine the results for all dimensions
75
+ if results.include?(true)
76
+ @result = true
77
+ else
78
+ @result = false
79
+ end
80
+ end
81
+
82
+ # This method implements the GCD test. The test is based on the computation
83
+ # of the greatest common divisor, giving it its name. The GCD test is based
84
+ # on the fact that a linear equation in the form of
85
+ # >>> a_1*I_1 + a_2*I_2 + ... + a_n*I_n = a_0
86
+ # has an integer solution if and only if the greatest common divisor of a_1,
87
+ # a_2,...,a_n is a divisor of a_0. The GCD test checks for this
88
+ # divisability by performing the division and checking if the result is
89
+ # integer.
90
+ #
91
+ # This method returns true if there is an integer solution, not necessarily
92
+ # within the loop bounds. Thus, if the method returns true, there might be a
93
+ # dependence. If the method returns false, there is definitely no dependence.
94
+ #
95
+ # TODO: If the result (+division+) is symbolic, can we conclude anything?
96
+ def gcd_test(all_vars,equation)
97
+
98
+ # Gather all the data to perform the test. Here, base represents a_0 and
99
+ # data represents a_1,a_2,...,a_n.
100
+ base = equation[:a0]
101
+ data = equation[:ak]
102
+
103
+ # Perform the greatest common divisor calculation and perform the division
104
+ result = gcd(data)
105
+ division = base/result.to_f
106
+
107
+ # See if the division is integer under the condition that we can test that
108
+ if result == 0
109
+ gcd_result = false
110
+ elsif division.class != Float
111
+ gcd_result = true
112
+ else
113
+ gcd_result = (division.to_i.to_f == division)
114
+ end
115
+
116
+ # Display and return the result
117
+ puts MESSAGE+"GCD-test '#{gcd_result}' ---> (#{base})/(#{result}) = #{division}, gcd(#{data})" if @verbose
118
+ return gcd_result
119
+ end
120
+
121
+ # This method implements the Banerjee test. This test takes loop bounds into
122
+ # consideration. The test is based on a linear equation in the form of
123
+ # >>> a_1*I_1 + a_2*I_2 + ... + a_n*I_n = a_0
124
+ # and loop bounds in the form of
125
+ # >>> min_k <= I_k <= max_k
126
+ #
127
+ # The test proceeds as follows. First, the values a_k+ and a_k- are
128
+ # computed. Also, the bounds min_k and max_k are calculated from the loop
129
+ # conditions. Following, the test computes the extreme values 'low' and
130
+ # 'high'. Finally, the test computes whether the following holds:
131
+ # >>> low <= a_0 <= high
132
+ # If this holds, there might be a dependence (method returns true). If this
133
+ # does not hold, there is definitely no dependence (method returns false).
134
+ def ban_test(all_vars,equation,conditions)
135
+
136
+ # Pre-process the data to obtain the a_k+, a_k-, and lower-bounds and
137
+ # upper-bounds for a_k (min_k and max_k).
138
+ values = []
139
+ equation[:ak].each_with_index do |a,index|
140
+ values << {
141
+ :ak_plus => (a >= 0) ? a : 0,
142
+ :ak_min => (a <= 0) ? -a : 0,
143
+ :min_k => conditions[index][:min],
144
+ :max_k => conditions[index][:max]
145
+ }
146
+ end
147
+
148
+ # Compute the extreme values 'low' and 'high'. This is done symbolically.
149
+ low, high = "0", "0"
150
+ values.each do |v|
151
+ partial_low = simplify("
152
+ (#{v[:ak_plus]}) * (#{v[:min_k]}) -
153
+ (#{v[:ak_min]}) * (#{v[:max_k]})
154
+ ")
155
+ low = simplify("(#{low}) + (#{partial_low})")
156
+ partial_high = simplify("
157
+ (#{v[:ak_plus]}) * (#{v[:max_k]}) -
158
+ (#{v[:ak_min]}) * (#{v[:min_k]})
159
+ ")
160
+ high = simplify("(#{high}) + (#{partial_high})")
161
+ end
162
+
163
+ # Perform the actual test: checking if low <= a_0 <= high holds. This is
164
+ # implemented as two parts: check the lower-bound and check the upper-
165
+ # bound.
166
+ # FIXME: This method uses the +max+ which might make a guess.
167
+ base = equation[:a0]
168
+ test1 = (base.to_s == max(low,base.to_s))
169
+ test2 = (high == max(base.to_s,high))
170
+ ban_result = (test1 && test2)
171
+
172
+ # Display and return the results
173
+ puts MESSAGE+"Banerjee '#{ban_result}' ---> (#{test1},#{test2}), '(#{low} <= #{base} <= #{high})'" if @verbose
174
+ return ban_result
175
+ end
176
+
177
+ # This method retrieves a linear equation from a pair of access. Accesses
178
+ # are transformed into a linear equation of the form
179
+ # >>> a_1*I_1 + a_2*I_2 + ... + a_n*I_n = a_0
180
+ # Additionally, this method returns a list of all variables and a list of
181
+ # loop bounds corresponding to the linear equation's variables.
182
+ def get_linear_equation(access1,access2,bounds,all_loop_vars)
183
+ equation = { :a0 => 0, :ak => [] }
184
+ all_vars = []
185
+ conditions = []
186
+ hash = {}
187
+
188
+ # Loop over the two accesses
189
+ [access1,access2].each_with_index do |access,index|
190
+ access = simplify(access.to_s)
191
+
192
+ # Get the variables (I_1 ... I_n) and modify the access expression
193
+ vars = get_vars(access).uniq
194
+ loop_vars = get_loop_vars(vars,all_loop_vars[index])
195
+ all_vars = (all_vars + vars).uniq
196
+ vars.each do |var_name|
197
+ access = access.gsub(/\b#{var_name}\b/,"hash[:#{var_name}]")
198
+ end
199
+
200
+ # Create a hash of all the variables. For now, this is just the name of
201
+ # the variable. The values will be set later. This uses the 'symbolic'
202
+ # library.
203
+ vars.each do |var_name|
204
+ if !hash[var_name.to_sym]
205
+ hash[var_name.to_sym] = var :name => var_name
206
+ end
207
+ hash[var_name.to_sym].value = hash[var_name.to_sym]
208
+ end
209
+
210
+ # Find the constant term (a_0). This uses the +eval+ method together
211
+ # with the 'symbolic' gem to compute the term.
212
+ loop_vars.each do |var_name|
213
+ hash[var_name.to_sym].value = 0
214
+ end
215
+ base = eval(access).value
216
+ val = (index == 0) ? base : -base
217
+ equation[:a0] = equation[:a0] + val
218
+
219
+ # Find the other terms (a_1, a_2, ... a_n). This uses the +eval+ method
220
+ # together with the 'symbolic' gem to compute the terms.
221
+ loop_vars.each do |var_name|
222
+ hash[var_name.to_sym].value = 1
223
+ val = eval(access).value - base
224
+ val = (index == 0) ? val : -val
225
+ equation[:ak] << val
226
+ hash[var_name.to_sym].value = 0
227
+ end
228
+
229
+ # Get the loop bound conditions corresponding to the linear equation's
230
+ # variables.
231
+ loop_vars.each do |var_name|
232
+ conditions << bounds[index].select{ |c| c[:var] == var_name }.first
233
+ end
234
+ end
235
+ return all_vars, equation, conditions
236
+ end
237
+
238
+ # Implementation of a GCD method with any number of arguments. Relies on
239
+ # Ruby's default GCD method. In contrast to the normal gcd method, this
240
+ # method does not act on a number, but instead takes an array of numbers as
241
+ # an input.
242
+ def gcd(args)
243
+ val = args.first
244
+ args.drop(1).each do |argument|
245
+ val = val.gcd(argument)
246
+ end
247
+ return val
248
+ end
249
+
250
+ # Method to obtain all variables in an array reference that are also loop
251
+ # variables.
252
+ def get_loop_vars(vars,all_loop_vars)
253
+ return vars & all_loop_vars
254
+ end
255
+
256
+ # Method to combine an array of integers in the form of a subtraction. For
257
+ # example, given the input [a,b,c,d], the output will be (a-b-c-d).
258
+ # TODO: Remove this method
259
+ #def merge_subtract(args)
260
+ # val = args.first
261
+ # args.drop(1).each do |argument|
262
+ # val = val - argument
263
+ # end
264
+ # return val
265
+ #end
266
+
267
+ end
268
+ end
@@ -0,0 +1,277 @@
1
+
2
+ module Adarwin
3
+
4
+ # This is the main 'engine' for the A-darwin algorithmic species extraction
5
+ # tool. It contains methods to parse the command-line arguments, to run the
6
+ # pre-processor, to insert the annotations, and to pretty print the final
7
+ # output.
8
+ # TODO: Add a syntax check by a normal compiler first (e.g. gcc)
9
+ class Engine < Common
10
+
11
+ # Initializes the engine and processes the command line arguments. This
12
+ # method uses the 'trollop' gem to parse the arguments and to create a
13
+ # nicely formatted help menu. This method additionally initializes a result-
14
+ # hash and reads the contents of the source file from disk.
15
+ #
16
+ # ==== Command-line usage:
17
+ # adarwin --application <input> [OPTIONS]
18
+ #
19
+ # ==== Options:
20
+ # --application, -a <s>: Input application file
21
+ # --no-memory-annotations, -m: Disable the printing of memory annotations
22
+ # --mem-remove-spurious, -s: Memcopy optimisation: remove spurious copies
23
+ # --mem-copyin-to-front, -f: Memcopy optimisation: move copyins to front
24
+ # --mem-copyout-to-back, -b: Memcopy optimisation: move copyouts to back
25
+ # --mem-to-outer-loop, -l: Memcopy optimisation: move copies to outer loops
26
+ # --only-alg-number, -o <i>: Only generate code for the x-th species (99 -> all) (default: 99)
27
+ # --version, -v: Print version and exit
28
+ # --help, -h: Show this message
29
+ #
30
+ def initialize
31
+ @result = {:original_code => [],
32
+ :species_code => []}
33
+
34
+ # Parse the command line options using the 'trollop' gem.
35
+ @options = Trollop::options do
36
+ version 'A-darwin, part of Bones version '+File.read(ADARWIN_DIR+'/VERSION').strip+' (c) 2013 Cedric Nugteren, Eindhoven University of Technology'
37
+ banner NL+'A-darwin is an algorithmic species extraction tool. ' +
38
+ 'For more information, see the README.rdoc file or visit the Bones/A-darwin website at http://parse.ele.tue.nl/bones/.' + NL + NL +
39
+ 'Usage:' + NL +
40
+ ' adarwin --application <input> [OPTIONS]' + NL +
41
+ 'using the following flags:'
42
+ opt :application, 'Input application file', :short => 'a', :type => String
43
+ opt :no_memory_annotations, 'Disable the printing of memory annotations', :short => 'm', :default => false
44
+ opt :mem_remove_spurious, 'Memcopy optimisation: remove spurious copies', :short => 'r', :default => false
45
+ opt :mem_copyin_to_front, 'Memcopy optimisation: move copyins to front', :short => 'f', :default => false
46
+ opt :mem_copyout_to_back, 'Memcopy optimisation: move copyouts to back', :short => 'b', :default => false
47
+ opt :mem_to_outer_loop, 'Memcopy optimisation: move copies to outer loops', :short => 'l', :default => false
48
+ opt :fusion, 'Type of kernel fusion to perform (0 -> disable)', :short => 'k', :type => Integer, :default => 0
49
+ opt :print_arc, 'Print array reference characterisations (ARC) instead of species', :short => 'c', :default => false
50
+ opt :silent, 'Become silent (no message printing)', :short => 's', :default => false
51
+ opt :only_alg_number, 'Only generate code for the x-th species (99 -> all)', :short => 'o', :type => Integer, :default => 99
52
+ end
53
+ Trollop::die 'no input file supplied (use: --application)' if !@options[:application_given]
54
+ Trollop::die 'input file "'+@options[:application]+'" does not exist' if !File.exists?(@options[:application])
55
+ @options[:name] = @options[:application].split('/').last.split('.').first
56
+ @options[:no_memory_annotations] = true if @options[:print_arc]
57
+
58
+ # Obtain the source code from file
59
+ @source = File.open(@options[:application],'r'){|f| f.read}
60
+ @basename = File.basename(@options[:application],'.c')
61
+ end
62
+
63
+ # Method to process a file and to output target code. This method calls all
64
+ # the other methods, it is the main engine.
65
+ #
66
+ # ==== Tasks:
67
+ # * Run the preprocessor to obtain algorithm information.
68
+ # * Use the 'CAST' gem to parse the source into an AST.
69
+ # * Call the code generator to perform the real work and produce output.
70
+ def process
71
+
72
+ # Run the preprocessor
73
+ preprocessor = Adarwin::Preprocessor.new(@source)
74
+ preprocessor.process
75
+ @result[:header_code] = preprocessor.header_code
76
+
77
+ # Set-up the CAST gem to include certain types
78
+ # FIXME: What about other (user-defined?) types?
79
+ parser = C::Parser.new
80
+ parser.type_names << 'FILE'
81
+ parser.type_names << 'size_t'
82
+
83
+ # Parse the original source code into AST form (using CAST)
84
+ original_ast = parser.parse(preprocessor.parsed_code)
85
+
86
+ # Create an AST of the SCoP (using CAST) and save a backup
87
+ scop_ast = C::Block.parse('{'+preprocessor.scop_code+'}')
88
+ original_scop_ast = scop_ast.clone
89
+
90
+ # Process the scop to identify the loop nests of interest and to find the
91
+ # corresponding species. This is the method performing most of the work.
92
+ @nests = []
93
+ @id = 0
94
+ populate_nests(scop_ast)
95
+
96
+ # Remove inner-loop (nested) species. This removes all species that are
97
+ # found within another species. For completeness, this might be desired in
98
+ # some cases.
99
+ # TODO: Make this an option
100
+ @nests.each do |nest|
101
+ if nest.has_species?
102
+ remove_inner_species(get_children(nest))
103
+ end
104
+ end
105
+ @nests.delete_if{ |n| n.removed }
106
+
107
+ # Iterate over the nests/statements to optimize the copies. Currently,
108
+ # this will only look at loop nests with a depth of 1. Re-call the memory
109
+ # copy optimisations method every time a change is made.
110
+ # TODO: Investigate what the depth should be.
111
+ basenests = @nests.select{ |n| n.depth == 1 }
112
+ recursive_copy_optimisations(basenests,@options)
113
+
114
+ # Kernel fusion is enabled (1,2,3,4) or disabled (0)
115
+ if @options[:fusion] > 0
116
+ # Test if fusion is legal and perform the actual transformation
117
+ kernel_fusion(@nests, @options[:fusion])
118
+ end
119
+
120
+ # Delete the to-be-removed code (because of fusion)
121
+ @nests.each do |nest|
122
+ if nest.removed
123
+ scop_ast.remove_once(nest.code)
124
+ end
125
+ end
126
+ @nests.delete_if{ |n| n.removed }
127
+
128
+ # Insert the species and memory copy annotations into the original code.
129
+ # Don't do this if the user specified that he is not interested in the
130
+ # memory copy annotations.
131
+ insert_copies(scop_ast) unless @options[:no_memory_annotations]
132
+ insert_species(scop_ast)
133
+
134
+ # Create the modified SCoP and remove the quotes from the pragma's
135
+ # FIXME: This is a hack for now, this has conflicts with strings in code
136
+ modified_scop = INDENT+SCOP_START+NL+scop_ast.to_s+NL+INDENT+SCOP_END+NL
137
+ modified_scop = modified_scop.gsub(PRAGMA_DELIMITER_START,'')
138
+ modified_scop = modified_scop.gsub(PRAGMA_DELIMITER_END,'')
139
+
140
+ # Print the result SCoP
141
+ puts modified_scop if !@options[:silent]
142
+
143
+ # Store the result
144
+ @result[:species_code] = preprocessor.target_code.gsub(preprocessor.scop_code,modified_scop)
145
+ end
146
+
147
+ # This method writes the output code to a file.
148
+ def write_output
149
+
150
+ # Populate the species file
151
+ # TODO: The filename is fixed, make this an optional argument
152
+ File.open(File.join(@options[:application].split('.').first+'_species'+'.c'),'w') do |target|
153
+ target.puts @result[:species_code]
154
+ end
155
+ end
156
+
157
+ # This method populates the Nest datastructure (recursively). It is the main
158
+ # method to process the loop nests and fine the species information. It is
159
+ # called recursively.
160
+ def populate_nests(ast,level=[])
161
+
162
+ # Only proceed if it is a loop
163
+ if ast.block?
164
+
165
+ # Create the new loop nests for the current depth level
166
+ ast.stmts.each_with_index do |nest,index|
167
+ new_level = level.clone.push(index)
168
+
169
+ # Push the loop nest, but only if it is not disabled by options
170
+ if @options[:only_alg_number].to_i == 99 || @options[:only_alg_number].to_i == (@id+1)
171
+
172
+ # Only continue if the nest is an actual loop nest
173
+ if nest.for_statement?
174
+ @nests.push(Nest.new(new_level,nest,@id,@basename,!@options[:silent]))
175
+ end
176
+ end
177
+ @id += 1
178
+ end
179
+
180
+ # Proceed to the next depth level.
181
+ # TODO: Make it an option to only investigate the outer most level(s).
182
+ ast.stmts.each_with_index do |nest,index|
183
+ new_level = level.clone.push(index)
184
+ if nest.stmt # && new_level == 0
185
+ populate_nests(nest.stmt,new_level)
186
+ end
187
+ end
188
+ end
189
+ end
190
+
191
+ # This method removes all species in the current loop nest (called
192
+ # recursively). It assumes these species should be removed.
193
+ def remove_inner_species(nests)
194
+ nests.each do |nest|
195
+ nest.copyins = []
196
+ nest.copyouts = []
197
+ nest.species = ''
198
+ nest.removed = true
199
+ children = get_children(nest)
200
+ remove_inner_species(children) if children
201
+ end
202
+ end
203
+
204
+ # Method to obtain the children of a nest
205
+ def get_children(parent)
206
+ children = []
207
+ @nests.map do |nest|
208
+ if parent.depth+1 == nest.depth
209
+ if parent.level == nest.level.reverse.drop(1).reverse
210
+ children << nest
211
+ end
212
+ end
213
+ end
214
+ return children
215
+ end
216
+
217
+ # This method iterates over the loop nests and inserts the species into the
218
+ # original AST. It also inserts the synchronisation barries when needed, and
219
+ # only if the user is interested in the memory copy annotations.
220
+ def insert_species(scop_ast)
221
+
222
+ # Find out where the synchronisation statements are needed
223
+ sync_needed = []
224
+ @nests.each do |nest|
225
+ sync_needed << nest.copyins.map{ |c| c.get_sync_id }
226
+ sync_needed << nest.copyouts.map{ |c| c.get_sync_id }
227
+ end
228
+ sync_needed = sync_needed.flatten.uniq
229
+
230
+ # Insert the annotations into the code
231
+ sync = 0
232
+ @nests.each do |nest|
233
+ sync = 2*nest.id
234
+
235
+ # Insert the pre-kernel synchronisation barrier
236
+ if sync_needed.include?(sync) && !@options[:no_memory_annotations]
237
+ nest.code.insert_prev(C::StringLiteral.parse(PRAGMA_DELIMITER_START+PRAGMA_SPECIES+' sync '+(sync).to_s+PRAGMA_DELIMITER_END))
238
+ end
239
+
240
+ # Insert the pre-kernel species (start of species)
241
+ if nest.has_species?
242
+ to_print = (@options[:print_arc]) ? nest.print_arc_start : nest.print_species_start
243
+ nest.code.insert_prev(C::StringLiteral.parse(to_print))
244
+ end
245
+
246
+ # Insert the post-kernel synchronisation barrier
247
+ if sync_needed.include?(sync+1) && !@options[:no_memory_annotations]
248
+ node = (nest.code.next && nest.code.next.string? && nest.code.next.val =~ /pragma species copyout/) ? nest.code.next : nest.code
249
+ node.insert_next(C::StringLiteral.parse(PRAGMA_DELIMITER_START+PRAGMA_SPECIES+' sync '+(sync+1).to_s+PRAGMA_DELIMITER_END))
250
+ end
251
+
252
+ # Insert the post-kernel species (end of species)
253
+ if nest.has_species?
254
+ to_print = (@options[:print_arc]) ? nest.print_arc_end : nest.print_species_end
255
+ location = nest.code
256
+ location.insert_next(C::StringLiteral.parse(to_print))
257
+ end
258
+ end
259
+ end
260
+
261
+ # Iterate over the loop nests and insert the memory copy annotations into
262
+ # the original AST.
263
+ def insert_copies(scop_ast)
264
+ @nests.each do |nest|
265
+ if nest.has_copyins?
266
+ nest.code.insert_prev(C::StringLiteral.parse(nest.print_copyins))
267
+ end
268
+ if nest.has_copyouts?
269
+ nest.code.insert_next(C::StringLiteral.parse(nest.print_copyouts))
270
+ end
271
+ end
272
+ end
273
+
274
+ end
275
+
276
+ end
277
+