bones-compiler 1.1.0 → 1.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (211) hide show
  1. checksums.yaml +15 -0
  2. data/CHANGELOG +37 -0
  3. data/LICENSE +1 -1
  4. data/README.rdoc +95 -70
  5. data/Rakefile +78 -3
  6. data/VERSION +1 -1
  7. data/bin/adarwin +17 -0
  8. data/examples/benchmarks/PolyBench/2mm.c +104 -0
  9. data/examples/benchmarks/{3mm.c → PolyBench/3mm.c} +5 -2
  10. data/examples/benchmarks/{adi.c → PolyBench/adi.c} +6 -3
  11. data/examples/benchmarks/{atax.c → PolyBench/atax.c} +5 -2
  12. data/examples/benchmarks/{bicg.c → PolyBench/bicg.c} +5 -2
  13. data/examples/benchmarks/{cholesky.c → PolyBench/cholesky.c} +3 -0
  14. data/examples/benchmarks/{common.h → PolyBench/common.h} +2 -2
  15. data/examples/benchmarks/{correlation.c → PolyBench/correlation.c} +16 -7
  16. data/examples/benchmarks/{covariance.c → PolyBench/covariance.c} +7 -2
  17. data/examples/benchmarks/{doitgen.c → PolyBench/doitgen.c} +5 -2
  18. data/examples/benchmarks/{durbin.c → PolyBench/durbin.c} +3 -0
  19. data/examples/benchmarks/{dynprog.c → PolyBench/dynprog.c} +3 -0
  20. data/examples/benchmarks/{fdtd-2d-apml.c → PolyBench/fdtd-2d-apml.c} +3 -0
  21. data/examples/benchmarks/{fdtd-2d.c → PolyBench/fdtd-2d.c} +5 -2
  22. data/examples/benchmarks/{floyd-warshall.c → PolyBench/floyd-warshall.c} +3 -0
  23. data/examples/benchmarks/{gemm.c → PolyBench/gemm.c} +5 -2
  24. data/examples/benchmarks/{gemver.c → PolyBench/gemver.c} +5 -2
  25. data/examples/benchmarks/{gesummv.c → PolyBench/gesummv.c} +5 -2
  26. data/examples/benchmarks/{gramschmidt.c → PolyBench/gramschmidt.c} +3 -0
  27. data/examples/benchmarks/{jacobi-1d-imper.c → PolyBench/jacobi-1d-imper.c} +10 -2
  28. data/examples/benchmarks/{jacobi-2d-imper.c → PolyBench/jacobi-2d-imper.c} +8 -3
  29. data/examples/benchmarks/{lu.c → PolyBench/lu.c} +3 -0
  30. data/examples/benchmarks/{ludcmp.c → PolyBench/ludcmp.c} +3 -0
  31. data/examples/benchmarks/{mvt.c → PolyBench/mvt.c} +6 -2
  32. data/examples/benchmarks/{reg_detect.c → PolyBench/reg_detect.c} +3 -0
  33. data/examples/benchmarks/{seidel-2d.c → PolyBench/seidel-2d.c} +3 -0
  34. data/examples/benchmarks/{symm.c → PolyBench/symm.c} +3 -0
  35. data/examples/benchmarks/{syr2k.c → PolyBench/syr2k.c} +5 -2
  36. data/examples/benchmarks/{syrk.c → PolyBench/syrk.c} +7 -4
  37. data/examples/benchmarks/{trisolv.c → PolyBench/trisolv.c} +3 -0
  38. data/examples/benchmarks/{trmm.c → PolyBench/trmm.c} +3 -0
  39. data/examples/benchmarks/Rodinia/cfd.c +180 -0
  40. data/examples/benchmarks/Rodinia/hotspot.c +228 -0
  41. data/examples/benchmarks/Rodinia/kmeans.c +164 -0
  42. data/examples/benchmarks/Rodinia/srad.c +188 -0
  43. data/examples/benchmarks/other/common.h +0 -0
  44. data/examples/benchmarks/other/dct.c +58 -0
  45. data/examples/benchmarks/other/mm.c +50 -0
  46. data/examples/benchmarks/{saxpy.c → other/saxpy.c} +11 -7
  47. data/examples/chunk/{example1.c → example01.c} +0 -0
  48. data/examples/chunk/{example2.c → example02.c} +0 -0
  49. data/examples/chunk/{example3.c → example03.c} +0 -0
  50. data/examples/chunk/{example4.c → example04.c} +0 -0
  51. data/examples/chunk/{example5.c → example05.c} +0 -0
  52. data/examples/chunk/example06.c +45 -0
  53. data/examples/chunk/example07.c +49 -0
  54. data/examples/dependences/example01.c +42 -0
  55. data/examples/dependences/example02.c +40 -0
  56. data/examples/dependences/example03.c +43 -0
  57. data/examples/dependences/example04.c +44 -0
  58. data/examples/dependences/example05.c +42 -0
  59. data/examples/element/{example1.c → example01.c} +0 -0
  60. data/examples/element/{example2.c → example02.c} +2 -2
  61. data/examples/element/{example3.c → example03.c} +0 -0
  62. data/examples/element/{example4.c → example04.c} +0 -0
  63. data/examples/element/{example5.c → example05.c} +0 -0
  64. data/examples/element/{example6.c → example06.c} +0 -0
  65. data/examples/element/{example7.c → example07.c} +0 -0
  66. data/examples/element/{example8.c → example08.c} +0 -0
  67. data/examples/element/{example9.c → example09.c} +0 -0
  68. data/examples/element/example13.c +73 -0
  69. data/examples/fusion/example01.c +68 -0
  70. data/examples/fusion/example02.c +73 -0
  71. data/examples/fusion/example03.c +72 -0
  72. data/examples/fusion/example04.c +61 -0
  73. data/examples/fusion/example05.c +55 -0
  74. data/examples/neighbourhood/{example1.c → example01.c} +0 -0
  75. data/examples/neighbourhood/{example2.c → example02.c} +0 -0
  76. data/examples/neighbourhood/{example3.c → example03.c} +0 -0
  77. data/examples/neighbourhood/{example4.c → example04.c} +0 -0
  78. data/examples/neighbourhood/example05.c +44 -0
  79. data/examples/shared/{example1.c → example01.c} +0 -0
  80. data/examples/shared/{example2.c → example02.c} +0 -0
  81. data/examples/shared/{example3.c → example03.c} +0 -0
  82. data/examples/shared/{example4.c → example04.c} +0 -0
  83. data/examples/shared/{example5.c → example05.c} +0 -0
  84. data/lib/adarwin.rb +62 -0
  85. data/lib/adarwin/dependences.rb +268 -0
  86. data/lib/adarwin/engine.rb +277 -0
  87. data/lib/adarwin/fusion.rb +174 -0
  88. data/lib/adarwin/interval.rb +57 -0
  89. data/lib/adarwin/memorycopies.rb +153 -0
  90. data/lib/adarwin/nest.rb +225 -0
  91. data/lib/adarwin/preprocessor.rb +76 -0
  92. data/lib/adarwin/reference.rb +261 -0
  93. data/lib/bones.rb +4 -55
  94. data/lib/bones/algorithm.rb +77 -40
  95. data/lib/bones/copy.rb +26 -0
  96. data/lib/bones/engine.rb +147 -31
  97. data/lib/bones/preprocessor.rb +92 -12
  98. data/lib/bones/species.rb +4 -3
  99. data/lib/bones/structure.rb +14 -4
  100. data/lib/castaddon.rb +11 -6
  101. data/lib/castaddon/node_adarwin.rb +245 -0
  102. data/lib/castaddon/node_bones.rb +316 -0
  103. data/lib/castaddon/node_common.rb +289 -0
  104. data/lib/castaddon/transformations.rb +236 -0
  105. data/lib/common.rb +216 -0
  106. data/skeletons/CPU-C/common/header.c +3 -0
  107. data/skeletons/CPU-C/common/mem_global.c +0 -0
  108. data/skeletons/CPU-C/common/timer_2_start.c +11 -13
  109. data/skeletons/CPU-C/common/timer_2_stop.c +1 -1
  110. data/skeletons/CPU-C/common/timer_globals.c +29 -0
  111. data/skeletons/CPU-OPENCL-INTEL/common/globals.c +1 -1
  112. data/skeletons/CPU-OPENCL-INTEL/common/header.c +3 -0
  113. data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +7 -2
  114. data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +4 -2
  115. data/skeletons/CPU-OPENCL-INTEL/common/mem_global.c +0 -0
  116. data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +6 -3
  117. data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +1 -1
  118. data/skeletons/CPU-OPENCL-INTEL/common/timer_globals.c +24 -0
  119. data/skeletons/CPU-OPENMP/common/globals.c +1 -0
  120. data/skeletons/CPU-OPENMP/common/header.c +3 -0
  121. data/skeletons/CPU-OPENMP/common/mem_global.c +0 -0
  122. data/skeletons/CPU-OPENMP/common/timer_1_start.c +0 -12
  123. data/skeletons/CPU-OPENMP/common/timer_2_stop.c +1 -1
  124. data/skeletons/CPU-OPENMP/common/timer_globals.c +33 -0
  125. data/skeletons/GPU-CUDA/common/globals.c +27 -3
  126. data/skeletons/GPU-CUDA/common/header.c +2 -0
  127. data/skeletons/GPU-CUDA/common/mem_async_alloc.c +6 -0
  128. data/skeletons/GPU-CUDA/common/mem_async_copyin.c +6 -0
  129. data/skeletons/GPU-CUDA/common/mem_async_copyout.c +6 -0
  130. data/skeletons/GPU-CUDA/common/mem_async_free.c +6 -0
  131. data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +2 -1
  132. data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +2 -1
  133. data/skeletons/GPU-CUDA/common/mem_global.c +1 -0
  134. data/skeletons/GPU-CUDA/common/mem_prologue.c +1 -2
  135. data/skeletons/GPU-CUDA/common/scheduler.c +86 -0
  136. data/skeletons/GPU-CUDA/common/timer_2_start.c +2 -4
  137. data/skeletons/GPU-CUDA/common/timer_2_stop.c +3 -5
  138. data/skeletons/GPU-CUDA/common/timer_globals.c +26 -0
  139. data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +5 -7
  140. data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +4 -6
  141. data/skeletons/GPU-CUDA/kernel/default.host.c +1 -1
  142. data/skeletons/GPU-CUDA/kernel/default.kernel.cu +6 -8
  143. data/skeletons/GPU-CUDA/skeletons.txt +6 -5
  144. data/{examples/benchmarks/2mm.c → test/examples/benchmarks/PolyBench/2mm_species.c} +19 -15
  145. data/test/examples/benchmarks/PolyBench/3mm_species.c +82 -0
  146. data/test/examples/benchmarks/PolyBench/adi_species.c +89 -0
  147. data/test/examples/benchmarks/PolyBench/atax_species.c +69 -0
  148. data/test/examples/benchmarks/PolyBench/bicg_species.c +71 -0
  149. data/test/examples/benchmarks/PolyBench/cholesky_species.c +68 -0
  150. data/test/examples/benchmarks/PolyBench/correlation_species.c +97 -0
  151. data/test/examples/benchmarks/PolyBench/covariance_species.c +78 -0
  152. data/test/examples/benchmarks/PolyBench/doitgen_species.c +67 -0
  153. data/test/examples/benchmarks/PolyBench/durbin_species.c +80 -0
  154. data/test/examples/benchmarks/PolyBench/dynprog_species.c +71 -0
  155. data/test/examples/benchmarks/PolyBench/fdtd-2d-apml_species.c +112 -0
  156. data/test/examples/benchmarks/PolyBench/fdtd-2d_species.c +78 -0
  157. data/test/examples/benchmarks/PolyBench/floyd-warshall_species.c +54 -0
  158. data/test/examples/benchmarks/PolyBench/gemm_species.c +73 -0
  159. data/test/examples/benchmarks/PolyBench/gemver_species.c +93 -0
  160. data/test/examples/benchmarks/PolyBench/gesummv_species.c +68 -0
  161. data/test/examples/benchmarks/PolyBench/gramschmidt_species.c +78 -0
  162. data/test/examples/benchmarks/PolyBench/jacobi-1d-imper_species.c +59 -0
  163. data/test/examples/benchmarks/PolyBench/jacobi-2d-imper_species.c +65 -0
  164. data/test/examples/benchmarks/PolyBench/lu_species.c +57 -0
  165. data/test/examples/benchmarks/PolyBench/ludcmp_species.c +89 -0
  166. data/test/examples/benchmarks/PolyBench/mvt_species.c +69 -0
  167. data/test/examples/benchmarks/PolyBench/reg_detect_species.c +86 -0
  168. data/test/examples/benchmarks/PolyBench/seidel-2d_species.c +53 -0
  169. data/test/examples/benchmarks/PolyBench/symm_species.c +74 -0
  170. data/test/examples/benchmarks/PolyBench/syr2k_species.c +69 -0
  171. data/test/examples/benchmarks/PolyBench/syrk_species.c +66 -0
  172. data/test/examples/benchmarks/PolyBench/trisolv_species.c +61 -0
  173. data/test/examples/benchmarks/PolyBench/trmm_species.c +61 -0
  174. data/test/examples/chunk/example01_species.c +58 -0
  175. data/test/examples/chunk/example02_species.c +48 -0
  176. data/test/examples/chunk/example03_species.c +63 -0
  177. data/test/examples/chunk/example04_species.c +58 -0
  178. data/test/examples/chunk/example05_species.c +56 -0
  179. data/test/examples/chunk/example06_species.c +49 -0
  180. data/test/examples/chunk/example07_species.c +53 -0
  181. data/test/examples/dependences/example01_species.c +46 -0
  182. data/test/examples/dependences/example02_species.c +44 -0
  183. data/test/examples/dependences/example03_species.c +47 -0
  184. data/test/examples/dependences/example04_species.c +48 -0
  185. data/test/examples/dependences/example05_species.c +46 -0
  186. data/test/examples/element/example01_species.c +50 -0
  187. data/test/examples/element/example02_species.c +50 -0
  188. data/test/examples/element/example03_species.c +62 -0
  189. data/test/examples/element/example04_species.c +53 -0
  190. data/test/examples/element/example05_species.c +59 -0
  191. data/test/examples/element/example06_species.c +50 -0
  192. data/test/examples/element/example07_species.c +58 -0
  193. data/test/examples/element/example08_species.c +49 -0
  194. data/test/examples/element/example09_species.c +52 -0
  195. data/test/examples/element/example10_species.c +54 -0
  196. data/test/examples/element/example11_species.c +51 -0
  197. data/test/examples/element/example12_species.c +60 -0
  198. data/test/examples/element/example13_species.c +77 -0
  199. data/test/examples/neighbourhood/example01_species.c +57 -0
  200. data/test/examples/neighbourhood/example02_species.c +56 -0
  201. data/test/examples/neighbourhood/example03_species.c +83 -0
  202. data/test/examples/neighbourhood/example04_species.c +55 -0
  203. data/test/examples/neighbourhood/example05_species.c +48 -0
  204. data/test/examples/shared/example01_species.c +49 -0
  205. data/test/examples/shared/example02_species.c +55 -0
  206. data/test/examples/shared/example03_species.c +59 -0
  207. data/test/examples/shared/example04_species.c +56 -0
  208. data/test/examples/shared/example05_species.c +52 -0
  209. metadata +193 -73
  210. data/examples/benchmarks/overview.txt +0 -38
  211. data/lib/castaddon/node.rb +0 -753
File without changes
File without changes
File without changes
File without changes
@@ -0,0 +1,44 @@
1
+ //
2
+ // This file is part of the Bones source-to-source compiler examples. This C-code
3
+ // example is meant to illustrate the use of Bones. For more information on Bones
4
+ // use the contact information below.
5
+ //
6
+ // == More information on Bones
7
+ // Contact............Cedric Nugteren <c.nugteren@tue.nl>
8
+ // Web address........http://parse.ele.tue.nl/bones/
9
+ //
10
+ // == File information
11
+ // Filename...........neighbourhood/example05.c
12
+ // Author.............Cedric Nugteren
13
+ // Last modified on...07-May-2013
14
+ //
15
+
16
+ #include <stdio.h>
17
+
18
+ // This is 'example05', an unrolled one-sided neighbourhood
19
+ int main(void) {
20
+ int i;
21
+ int N = 256;
22
+
23
+ // Declare input/output arrays
24
+ int A[N];
25
+ int B[N];
26
+
27
+ // Set the input data
28
+ for(i=0;i<N;i++) {
29
+ A[i] = i;
30
+ B[i] = i+5;
31
+ }
32
+
33
+ // Perform the computation
34
+ #pragma species kernel A[2:N]|neighbourhood(0:1) -> B[2:N-1]|element
35
+ for (i=2; i<N; i++) {
36
+ B[i] = A[i] + A[i+1];
37
+ }
38
+ #pragma species endkernel example05
39
+
40
+ // Clean-up and exit the function
41
+ fflush(stdout);
42
+ return 0;
43
+ }
44
+
File without changes
File without changes
File without changes
File without changes
File without changes
data/lib/adarwin.rb ADDED
@@ -0,0 +1,62 @@
1
+
2
+ # Include the common part between Bones and A-Darwin
3
+ require 'common.rb'
4
+
5
+ # We define a custom error class for code generation related
6
+ # errors (any error raised).
7
+ class CodeGenError < StandardError #:nodoc:
8
+ end
9
+ def raise_error(message) #:nodoc:
10
+ puts Adarwin::ERROR+message
11
+ raise CodeGenError, 'Error encountered, stopping execution of A-Darwin'
12
+ end
13
+
14
+ # The module keeps all the classes and constants
15
+ # together. It contains the classes:
16
+ # * Engine: The main component of the tool, providing the high-level flow.
17
+ # * Preprocessor: C-preprocessor, extracting defines/includes from source code.
18
+ # * Nest:
19
+ # * Interval:
20
+ # * Dependence:
21
+ # * Reference:
22
+ #
23
+ # The module also contains a list of inter-class constants.
24
+ module Adarwin
25
+
26
+ # A string given as a start of an informative message.
27
+ MESSAGE = '[A-Darwin] ### Info : '
28
+ # A string given as a start of an warning message.
29
+ WARNING = '[A-Darwin] ### Warning: '
30
+ # A string given as a start of an error message.
31
+ ERROR = '[A-Darwin] ### Error : '
32
+
33
+ # Start of the scop
34
+ SCOP_START = '#pragma scop'
35
+ # Enf of the scop
36
+ SCOP_END = '#pragma endscop'
37
+
38
+ # Species pragma
39
+ PRAGMA_SPECIES = '#pragma species'
40
+
41
+ # Array reference characterisation (ARC) pragma
42
+ PRAGMA_ARC = '#pragma ARC'
43
+
44
+ # Create a string from a pragma because pragma's are unsupported by CAST.
45
+ PRAGMA_DELIMITER_START = '"PRAGMA '
46
+ PRAGMA_DELIMITER_END = ' PRAGMA"'
47
+
48
+ # This class is created to be a parent class of all classes.
49
+ class Common
50
+ end
51
+
52
+ end
53
+
54
+ # This list of require's makes sure all classes are included.
55
+ require 'adarwin/interval.rb'
56
+ require 'adarwin/dependences.rb'
57
+ require 'adarwin/preprocessor.rb'
58
+ require 'adarwin/memorycopies.rb'
59
+ require 'adarwin/fusion.rb'
60
+ require 'adarwin/engine.rb'
61
+ require 'adarwin/reference.rb'
62
+ require 'adarwin/nest.rb'
@@ -0,0 +1,268 @@
1
+ module Adarwin
2
+
3
+ # This class represents the dependence tests. The dependence tests are not
4
+ # objects as such, the use of a class might therefore be a bit out of place.
5
+ # Instead, the class rather holds all methods related to dependence tests.
6
+ #
7
+ # For an M-dimensional access, the problem of dependence testing is reduced to
8
+ # that of determining whether a system of M linear equations of the form
9
+ # >>> a_1*I_1 + a_2*I_2 + ... + a_n*I_n = a_0
10
+ # has a simultaneous integer solution satisfying the loop/if bounds given as
11
+ # >>> min_k <= I_k <= max_k
12
+ #
13
+ # Currently, the following conservative tests are implemented:
14
+ # * The GCD (greatest common divisor) test
15
+ # * The Banerjee test
16
+ #
17
+ # In case the accesses are multi-dimensional, we perform a subscript-by-
18
+ # subscript checking. In other words, we test each dimension separately
19
+ # using the two tests. If we find a possible dependence in one dimension, we
20
+ # conclude that there is a dependence.
21
+ class Dependence
22
+ attr_accessor :result
23
+
24
+ # Method to initialise the dependence tests. This method actually already
25
+ # computes all the dependence tests and stores the result in a class
26
+ # variable. It takes as input the pair of accesses it needs to check for
27
+ # dependences.
28
+ def initialize(access1,access2,verbose)
29
+ @verbose = verbose
30
+ bounds = [access1.bounds,access2.bounds]
31
+
32
+ # Iterate over the dimensions of the array reference
33
+ results = []
34
+ dimensions = [access1.indices.size,access2.indices.size].min
35
+ for dim in 1..dimensions
36
+ ref1 = access1.indices[dim-1]
37
+ ref2 = access2.indices[dim-1]
38
+ loop_vars = [access1.all_loops.map{ |l| l[:var] },access2.all_loops.map{ |l| l[:var] }]
39
+
40
+ # Conclude directly that there is no dependence if the references are
41
+ # exactly the same.
42
+ if ref1 == ref2
43
+ results << false
44
+ next
45
+ end
46
+
47
+ # TODO: Include the step in the dependence tests
48
+ #p access1.tS[dim-1]
49
+
50
+ # Get all variables, a linear equation, and the corresponding conditions
51
+ all_vars, equation, conditions = get_linear_equation(ref1,ref2,bounds,loop_vars)
52
+
53
+ # Conclude directly that there is no dependence if the variables are not
54
+ # dependent on the loops.
55
+ if equation[:ak].empty?
56
+ results << false
57
+ next
58
+ end
59
+
60
+ # Perform the GCD test
61
+ gcd_result = gcd_test(all_vars,equation)
62
+
63
+ # End if the GCD test concludes that there are no dependences
64
+ if gcd_result == false
65
+ results << false
66
+
67
+ # Continue with Banerjee if GCD concludes there might be dependences
68
+ else
69
+ ban_result = ban_test(all_vars,equation,conditions)
70
+ results << ban_result
71
+ end
72
+ end
73
+
74
+ # Combine the results for all dimensions
75
+ if results.include?(true)
76
+ @result = true
77
+ else
78
+ @result = false
79
+ end
80
+ end
81
+
82
+ # This method implements the GCD test. The test is based on the computation
83
+ # of the greatest common divisor, giving it its name. The GCD test is based
84
+ # on the fact that a linear equation in the form of
85
+ # >>> a_1*I_1 + a_2*I_2 + ... + a_n*I_n = a_0
86
+ # has an integer solution if and only if the greatest common divisor of a_1,
87
+ # a_2,...,a_n is a divisor of a_0. The GCD test checks for this
88
+ # divisability by performing the division and checking if the result is
89
+ # integer.
90
+ #
91
+ # This method returns true if there is an integer solution, not necessarily
92
+ # within the loop bounds. Thus, if the method returns true, there might be a
93
+ # dependence. If the method returns false, there is definitely no dependence.
94
+ #
95
+ # TODO: If the result (+division+) is symbolic, can we conclude anything?
96
+ def gcd_test(all_vars,equation)
97
+
98
+ # Gather all the data to perform the test. Here, base represents a_0 and
99
+ # data represents a_1,a_2,...,a_n.
100
+ base = equation[:a0]
101
+ data = equation[:ak]
102
+
103
+ # Perform the greatest common divisor calculation and perform the division
104
+ result = gcd(data)
105
+ division = base/result.to_f
106
+
107
+ # See if the division is integer under the condition that we can test that
108
+ if result == 0
109
+ gcd_result = false
110
+ elsif division.class != Float
111
+ gcd_result = true
112
+ else
113
+ gcd_result = (division.to_i.to_f == division)
114
+ end
115
+
116
+ # Display and return the result
117
+ puts MESSAGE+"GCD-test '#{gcd_result}' ---> (#{base})/(#{result}) = #{division}, gcd(#{data})" if @verbose
118
+ return gcd_result
119
+ end
120
+
121
+ # This method implements the Banerjee test. This test takes loop bounds into
122
+ # consideration. The test is based on a linear equation in the form of
123
+ # >>> a_1*I_1 + a_2*I_2 + ... + a_n*I_n = a_0
124
+ # and loop bounds in the form of
125
+ # >>> min_k <= I_k <= max_k
126
+ #
127
+ # The test proceeds as follows. First, the values a_k+ and a_k- are
128
+ # computed. Also, the bounds min_k and max_k are calculated from the loop
129
+ # conditions. Following, the test computes the extreme values 'low' and
130
+ # 'high'. Finally, the test computes whether the following holds:
131
+ # >>> low <= a_0 <= high
132
+ # If this holds, there might be a dependence (method returns true). If this
133
+ # does not hold, there is definitely no dependence (method returns false).
134
+ def ban_test(all_vars,equation,conditions)
135
+
136
+ # Pre-process the data to obtain the a_k+, a_k-, and lower-bounds and
137
+ # upper-bounds for a_k (min_k and max_k).
138
+ values = []
139
+ equation[:ak].each_with_index do |a,index|
140
+ values << {
141
+ :ak_plus => (a >= 0) ? a : 0,
142
+ :ak_min => (a <= 0) ? -a : 0,
143
+ :min_k => conditions[index][:min],
144
+ :max_k => conditions[index][:max]
145
+ }
146
+ end
147
+
148
+ # Compute the extreme values 'low' and 'high'. This is done symbolically.
149
+ low, high = "0", "0"
150
+ values.each do |v|
151
+ partial_low = simplify("
152
+ (#{v[:ak_plus]}) * (#{v[:min_k]}) -
153
+ (#{v[:ak_min]}) * (#{v[:max_k]})
154
+ ")
155
+ low = simplify("(#{low}) + (#{partial_low})")
156
+ partial_high = simplify("
157
+ (#{v[:ak_plus]}) * (#{v[:max_k]}) -
158
+ (#{v[:ak_min]}) * (#{v[:min_k]})
159
+ ")
160
+ high = simplify("(#{high}) + (#{partial_high})")
161
+ end
162
+
163
+ # Perform the actual test: checking if low <= a_0 <= high holds. This is
164
+ # implemented as two parts: check the lower-bound and check the upper-
165
+ # bound.
166
+ # FIXME: This method uses the +max+ which might make a guess.
167
+ base = equation[:a0]
168
+ test1 = (base.to_s == max(low,base.to_s))
169
+ test2 = (high == max(base.to_s,high))
170
+ ban_result = (test1 && test2)
171
+
172
+ # Display and return the results
173
+ puts MESSAGE+"Banerjee '#{ban_result}' ---> (#{test1},#{test2}), '(#{low} <= #{base} <= #{high})'" if @verbose
174
+ return ban_result
175
+ end
176
+
177
+ # This method retrieves a linear equation from a pair of access. Accesses
178
+ # are transformed into a linear equation of the form
179
+ # >>> a_1*I_1 + a_2*I_2 + ... + a_n*I_n = a_0
180
+ # Additionally, this method returns a list of all variables and a list of
181
+ # loop bounds corresponding to the linear equation's variables.
182
+ def get_linear_equation(access1,access2,bounds,all_loop_vars)
183
+ equation = { :a0 => 0, :ak => [] }
184
+ all_vars = []
185
+ conditions = []
186
+ hash = {}
187
+
188
+ # Loop over the two accesses
189
+ [access1,access2].each_with_index do |access,index|
190
+ access = simplify(access.to_s)
191
+
192
+ # Get the variables (I_1 ... I_n) and modify the access expression
193
+ vars = get_vars(access).uniq
194
+ loop_vars = get_loop_vars(vars,all_loop_vars[index])
195
+ all_vars = (all_vars + vars).uniq
196
+ vars.each do |var_name|
197
+ access = access.gsub(/\b#{var_name}\b/,"hash[:#{var_name}]")
198
+ end
199
+
200
+ # Create a hash of all the variables. For now, this is just the name of
201
+ # the variable. The values will be set later. This uses the 'symbolic'
202
+ # library.
203
+ vars.each do |var_name|
204
+ if !hash[var_name.to_sym]
205
+ hash[var_name.to_sym] = var :name => var_name
206
+ end
207
+ hash[var_name.to_sym].value = hash[var_name.to_sym]
208
+ end
209
+
210
+ # Find the constant term (a_0). This uses the +eval+ method together
211
+ # with the 'symbolic' gem to compute the term.
212
+ loop_vars.each do |var_name|
213
+ hash[var_name.to_sym].value = 0
214
+ end
215
+ base = eval(access).value
216
+ val = (index == 0) ? base : -base
217
+ equation[:a0] = equation[:a0] + val
218
+
219
+ # Find the other terms (a_1, a_2, ... a_n). This uses the +eval+ method
220
+ # together with the 'symbolic' gem to compute the terms.
221
+ loop_vars.each do |var_name|
222
+ hash[var_name.to_sym].value = 1
223
+ val = eval(access).value - base
224
+ val = (index == 0) ? val : -val
225
+ equation[:ak] << val
226
+ hash[var_name.to_sym].value = 0
227
+ end
228
+
229
+ # Get the loop bound conditions corresponding to the linear equation's
230
+ # variables.
231
+ loop_vars.each do |var_name|
232
+ conditions << bounds[index].select{ |c| c[:var] == var_name }.first
233
+ end
234
+ end
235
+ return all_vars, equation, conditions
236
+ end
237
+
238
+ # Implementation of a GCD method with any number of arguments. Relies on
239
+ # Ruby's default GCD method. In contrast to the normal gcd method, this
240
+ # method does not act on a number, but instead takes an array of numbers as
241
+ # an input.
242
+ def gcd(args)
243
+ val = args.first
244
+ args.drop(1).each do |argument|
245
+ val = val.gcd(argument)
246
+ end
247
+ return val
248
+ end
249
+
250
+ # Method to obtain all variables in an array reference that are also loop
251
+ # variables.
252
+ def get_loop_vars(vars,all_loop_vars)
253
+ return vars & all_loop_vars
254
+ end
255
+
256
+ # Method to combine an array of integers in the form of a subtraction. For
257
+ # example, given the input [a,b,c,d], the output will be (a-b-c-d).
258
+ # TODO: Remove this method
259
+ #def merge_subtract(args)
260
+ # val = args.first
261
+ # args.drop(1).each do |argument|
262
+ # val = val - argument
263
+ # end
264
+ # return val
265
+ #end
266
+
267
+ end
268
+ end
@@ -0,0 +1,277 @@
1
+
2
+ module Adarwin
3
+
4
+ # This is the main 'engine' for the A-darwin algorithmic species extraction
5
+ # tool. It contains methods to parse the command-line arguments, to run the
6
+ # pre-processor, to insert the annotations, and to pretty print the final
7
+ # output.
8
+ # TODO: Add a syntax check by a normal compiler first (e.g. gcc)
9
+ class Engine < Common
10
+
11
+ # Initializes the engine and processes the command line arguments. This
12
+ # method uses the 'trollop' gem to parse the arguments and to create a
13
+ # nicely formatted help menu. This method additionally initializes a result-
14
+ # hash and reads the contents of the source file from disk.
15
+ #
16
+ # ==== Command-line usage:
17
+ # adarwin --application <input> [OPTIONS]
18
+ #
19
+ # ==== Options:
20
+ # --application, -a <s>: Input application file
21
+ # --no-memory-annotations, -m: Disable the printing of memory annotations
22
+ # --mem-remove-spurious, -s: Memcopy optimisation: remove spurious copies
23
+ # --mem-copyin-to-front, -f: Memcopy optimisation: move copyins to front
24
+ # --mem-copyout-to-back, -b: Memcopy optimisation: move copyouts to back
25
+ # --mem-to-outer-loop, -l: Memcopy optimisation: move copies to outer loops
26
+ # --only-alg-number, -o <i>: Only generate code for the x-th species (99 -> all) (default: 99)
27
+ # --version, -v: Print version and exit
28
+ # --help, -h: Show this message
29
+ #
30
+ def initialize
31
+ @result = {:original_code => [],
32
+ :species_code => []}
33
+
34
+ # Parse the command line options using the 'trollop' gem.
35
+ @options = Trollop::options do
36
+ version 'A-darwin, part of Bones version '+File.read(ADARWIN_DIR+'/VERSION').strip+' (c) 2013 Cedric Nugteren, Eindhoven University of Technology'
37
+ banner NL+'A-darwin is an algorithmic species extraction tool. ' +
38
+ 'For more information, see the README.rdoc file or visit the Bones/A-darwin website at http://parse.ele.tue.nl/bones/.' + NL + NL +
39
+ 'Usage:' + NL +
40
+ ' adarwin --application <input> [OPTIONS]' + NL +
41
+ 'using the following flags:'
42
+ opt :application, 'Input application file', :short => 'a', :type => String
43
+ opt :no_memory_annotations, 'Disable the printing of memory annotations', :short => 'm', :default => false
44
+ opt :mem_remove_spurious, 'Memcopy optimisation: remove spurious copies', :short => 'r', :default => false
45
+ opt :mem_copyin_to_front, 'Memcopy optimisation: move copyins to front', :short => 'f', :default => false
46
+ opt :mem_copyout_to_back, 'Memcopy optimisation: move copyouts to back', :short => 'b', :default => false
47
+ opt :mem_to_outer_loop, 'Memcopy optimisation: move copies to outer loops', :short => 'l', :default => false
48
+ opt :fusion, 'Type of kernel fusion to perform (0 -> disable)', :short => 'k', :type => Integer, :default => 0
49
+ opt :print_arc, 'Print array reference characterisations (ARC) instead of species', :short => 'c', :default => false
50
+ opt :silent, 'Become silent (no message printing)', :short => 's', :default => false
51
+ opt :only_alg_number, 'Only generate code for the x-th species (99 -> all)', :short => 'o', :type => Integer, :default => 99
52
+ end
53
+ Trollop::die 'no input file supplied (use: --application)' if !@options[:application_given]
54
+ Trollop::die 'input file "'+@options[:application]+'" does not exist' if !File.exists?(@options[:application])
55
+ @options[:name] = @options[:application].split('/').last.split('.').first
56
+ @options[:no_memory_annotations] = true if @options[:print_arc]
57
+
58
+ # Obtain the source code from file
59
+ @source = File.open(@options[:application],'r'){|f| f.read}
60
+ @basename = File.basename(@options[:application],'.c')
61
+ end
62
+
63
+ # Method to process a file and to output target code. This method calls all
64
+ # the other methods, it is the main engine.
65
+ #
66
+ # ==== Tasks:
67
+ # * Run the preprocessor to obtain algorithm information.
68
+ # * Use the 'CAST' gem to parse the source into an AST.
69
+ # * Call the code generator to perform the real work and produce output.
70
+ def process
71
+
72
+ # Run the preprocessor
73
+ preprocessor = Adarwin::Preprocessor.new(@source)
74
+ preprocessor.process
75
+ @result[:header_code] = preprocessor.header_code
76
+
77
+ # Set-up the CAST gem to include certain types
78
+ # FIXME: What about other (user-defined?) types?
79
+ parser = C::Parser.new
80
+ parser.type_names << 'FILE'
81
+ parser.type_names << 'size_t'
82
+
83
+ # Parse the original source code into AST form (using CAST)
84
+ original_ast = parser.parse(preprocessor.parsed_code)
85
+
86
+ # Create an AST of the SCoP (using CAST) and save a backup
87
+ scop_ast = C::Block.parse('{'+preprocessor.scop_code+'}')
88
+ original_scop_ast = scop_ast.clone
89
+
90
+ # Process the scop to identify the loop nests of interest and to find the
91
+ # corresponding species. This is the method performing most of the work.
92
+ @nests = []
93
+ @id = 0
94
+ populate_nests(scop_ast)
95
+
96
+ # Remove inner-loop (nested) species. This removes all species that are
97
+ # found within another species. For completeness, this might be desired in
98
+ # some cases.
99
+ # TODO: Make this an option
100
+ @nests.each do |nest|
101
+ if nest.has_species?
102
+ remove_inner_species(get_children(nest))
103
+ end
104
+ end
105
+ @nests.delete_if{ |n| n.removed }
106
+
107
+ # Iterate over the nests/statements to optimize the copies. Currently,
108
+ # this will only look at loop nests with a depth of 1. Re-call the memory
109
+ # copy optimisations method every time a change is made.
110
+ # TODO: Investigate what the depth should be.
111
+ basenests = @nests.select{ |n| n.depth == 1 }
112
+ recursive_copy_optimisations(basenests,@options)
113
+
114
+ # Kernel fusion is enabled (1,2,3,4) or disabled (0)
115
+ if @options[:fusion] > 0
116
+ # Test if fusion is legal and perform the actual transformation
117
+ kernel_fusion(@nests, @options[:fusion])
118
+ end
119
+
120
+ # Delete the to-be-removed code (because of fusion)
121
+ @nests.each do |nest|
122
+ if nest.removed
123
+ scop_ast.remove_once(nest.code)
124
+ end
125
+ end
126
+ @nests.delete_if{ |n| n.removed }
127
+
128
+ # Insert the species and memory copy annotations into the original code.
129
+ # Don't do this if the user specified that he is not interested in the
130
+ # memory copy annotations.
131
+ insert_copies(scop_ast) unless @options[:no_memory_annotations]
132
+ insert_species(scop_ast)
133
+
134
+ # Create the modified SCoP and remove the quotes from the pragma's
135
+ # FIXME: This is a hack for now, this has conflicts with strings in code
136
+ modified_scop = INDENT+SCOP_START+NL+scop_ast.to_s+NL+INDENT+SCOP_END+NL
137
+ modified_scop = modified_scop.gsub(PRAGMA_DELIMITER_START,'')
138
+ modified_scop = modified_scop.gsub(PRAGMA_DELIMITER_END,'')
139
+
140
+ # Print the result SCoP
141
+ puts modified_scop if !@options[:silent]
142
+
143
+ # Store the result
144
+ @result[:species_code] = preprocessor.target_code.gsub(preprocessor.scop_code,modified_scop)
145
+ end
146
+
147
+ # This method writes the output code to a file.
148
+ def write_output
149
+
150
+ # Populate the species file
151
+ # TODO: The filename is fixed, make this an optional argument
152
+ File.open(File.join(@options[:application].split('.').first+'_species'+'.c'),'w') do |target|
153
+ target.puts @result[:species_code]
154
+ end
155
+ end
156
+
157
+ # This method populates the Nest datastructure (recursively). It is the main
158
+ # method to process the loop nests and fine the species information. It is
159
+ # called recursively.
160
+ def populate_nests(ast,level=[])
161
+
162
+ # Only proceed if it is a loop
163
+ if ast.block?
164
+
165
+ # Create the new loop nests for the current depth level
166
+ ast.stmts.each_with_index do |nest,index|
167
+ new_level = level.clone.push(index)
168
+
169
+ # Push the loop nest, but only if it is not disabled by options
170
+ if @options[:only_alg_number].to_i == 99 || @options[:only_alg_number].to_i == (@id+1)
171
+
172
+ # Only continue if the nest is an actual loop nest
173
+ if nest.for_statement?
174
+ @nests.push(Nest.new(new_level,nest,@id,@basename,!@options[:silent]))
175
+ end
176
+ end
177
+ @id += 1
178
+ end
179
+
180
+ # Proceed to the next depth level.
181
+ # TODO: Make it an option to only investigate the outer most level(s).
182
+ ast.stmts.each_with_index do |nest,index|
183
+ new_level = level.clone.push(index)
184
+ if nest.stmt # && new_level == 0
185
+ populate_nests(nest.stmt,new_level)
186
+ end
187
+ end
188
+ end
189
+ end
190
+
191
+ # This method removes all species in the current loop nest (called
192
+ # recursively). It assumes these species should be removed.
193
+ def remove_inner_species(nests)
194
+ nests.each do |nest|
195
+ nest.copyins = []
196
+ nest.copyouts = []
197
+ nest.species = ''
198
+ nest.removed = true
199
+ children = get_children(nest)
200
+ remove_inner_species(children) if children
201
+ end
202
+ end
203
+
204
+ # Method to obtain the children of a nest
205
+ def get_children(parent)
206
+ children = []
207
+ @nests.map do |nest|
208
+ if parent.depth+1 == nest.depth
209
+ if parent.level == nest.level.reverse.drop(1).reverse
210
+ children << nest
211
+ end
212
+ end
213
+ end
214
+ return children
215
+ end
216
+
217
+ # This method iterates over the loop nests and inserts the species into the
218
+ # original AST. It also inserts the synchronisation barries when needed, and
219
+ # only if the user is interested in the memory copy annotations.
220
+ def insert_species(scop_ast)
221
+
222
+ # Find out where the synchronisation statements are needed
223
+ sync_needed = []
224
+ @nests.each do |nest|
225
+ sync_needed << nest.copyins.map{ |c| c.get_sync_id }
226
+ sync_needed << nest.copyouts.map{ |c| c.get_sync_id }
227
+ end
228
+ sync_needed = sync_needed.flatten.uniq
229
+
230
+ # Insert the annotations into the code
231
+ sync = 0
232
+ @nests.each do |nest|
233
+ sync = 2*nest.id
234
+
235
+ # Insert the pre-kernel synchronisation barrier
236
+ if sync_needed.include?(sync) && !@options[:no_memory_annotations]
237
+ nest.code.insert_prev(C::StringLiteral.parse(PRAGMA_DELIMITER_START+PRAGMA_SPECIES+' sync '+(sync).to_s+PRAGMA_DELIMITER_END))
238
+ end
239
+
240
+ # Insert the pre-kernel species (start of species)
241
+ if nest.has_species?
242
+ to_print = (@options[:print_arc]) ? nest.print_arc_start : nest.print_species_start
243
+ nest.code.insert_prev(C::StringLiteral.parse(to_print))
244
+ end
245
+
246
+ # Insert the post-kernel synchronisation barrier
247
+ if sync_needed.include?(sync+1) && !@options[:no_memory_annotations]
248
+ node = (nest.code.next && nest.code.next.string? && nest.code.next.val =~ /pragma species copyout/) ? nest.code.next : nest.code
249
+ node.insert_next(C::StringLiteral.parse(PRAGMA_DELIMITER_START+PRAGMA_SPECIES+' sync '+(sync+1).to_s+PRAGMA_DELIMITER_END))
250
+ end
251
+
252
+ # Insert the post-kernel species (end of species)
253
+ if nest.has_species?
254
+ to_print = (@options[:print_arc]) ? nest.print_arc_end : nest.print_species_end
255
+ location = nest.code
256
+ location.insert_next(C::StringLiteral.parse(to_print))
257
+ end
258
+ end
259
+ end
260
+
261
+ # Iterate over the loop nests and insert the memory copy annotations into
262
+ # the original AST.
263
+ def insert_copies(scop_ast)
264
+ @nests.each do |nest|
265
+ if nest.has_copyins?
266
+ nest.code.insert_prev(C::StringLiteral.parse(nest.print_copyins))
267
+ end
268
+ if nest.has_copyouts?
269
+ nest.code.insert_next(C::StringLiteral.parse(nest.print_copyouts))
270
+ end
271
+ end
272
+ end
273
+
274
+ end
275
+
276
+ end
277
+