bones-compiler 1.1.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. checksums.yaml +15 -0
  2. data/CHANGELOG +37 -0
  3. data/LICENSE +1 -1
  4. data/README.rdoc +95 -70
  5. data/Rakefile +78 -3
  6. data/VERSION +1 -1
  7. data/bin/adarwin +17 -0
  8. data/examples/benchmarks/PolyBench/2mm.c +104 -0
  9. data/examples/benchmarks/{3mm.c → PolyBench/3mm.c} +5 -2
  10. data/examples/benchmarks/{adi.c → PolyBench/adi.c} +6 -3
  11. data/examples/benchmarks/{atax.c → PolyBench/atax.c} +5 -2
  12. data/examples/benchmarks/{bicg.c → PolyBench/bicg.c} +5 -2
  13. data/examples/benchmarks/{cholesky.c → PolyBench/cholesky.c} +3 -0
  14. data/examples/benchmarks/{common.h → PolyBench/common.h} +2 -2
  15. data/examples/benchmarks/{correlation.c → PolyBench/correlation.c} +16 -7
  16. data/examples/benchmarks/{covariance.c → PolyBench/covariance.c} +7 -2
  17. data/examples/benchmarks/{doitgen.c → PolyBench/doitgen.c} +5 -2
  18. data/examples/benchmarks/{durbin.c → PolyBench/durbin.c} +3 -0
  19. data/examples/benchmarks/{dynprog.c → PolyBench/dynprog.c} +3 -0
  20. data/examples/benchmarks/{fdtd-2d-apml.c → PolyBench/fdtd-2d-apml.c} +3 -0
  21. data/examples/benchmarks/{fdtd-2d.c → PolyBench/fdtd-2d.c} +5 -2
  22. data/examples/benchmarks/{floyd-warshall.c → PolyBench/floyd-warshall.c} +3 -0
  23. data/examples/benchmarks/{gemm.c → PolyBench/gemm.c} +5 -2
  24. data/examples/benchmarks/{gemver.c → PolyBench/gemver.c} +5 -2
  25. data/examples/benchmarks/{gesummv.c → PolyBench/gesummv.c} +5 -2
  26. data/examples/benchmarks/{gramschmidt.c → PolyBench/gramschmidt.c} +3 -0
  27. data/examples/benchmarks/{jacobi-1d-imper.c → PolyBench/jacobi-1d-imper.c} +10 -2
  28. data/examples/benchmarks/{jacobi-2d-imper.c → PolyBench/jacobi-2d-imper.c} +8 -3
  29. data/examples/benchmarks/{lu.c → PolyBench/lu.c} +3 -0
  30. data/examples/benchmarks/{ludcmp.c → PolyBench/ludcmp.c} +3 -0
  31. data/examples/benchmarks/{mvt.c → PolyBench/mvt.c} +6 -2
  32. data/examples/benchmarks/{reg_detect.c → PolyBench/reg_detect.c} +3 -0
  33. data/examples/benchmarks/{seidel-2d.c → PolyBench/seidel-2d.c} +3 -0
  34. data/examples/benchmarks/{symm.c → PolyBench/symm.c} +3 -0
  35. data/examples/benchmarks/{syr2k.c → PolyBench/syr2k.c} +5 -2
  36. data/examples/benchmarks/{syrk.c → PolyBench/syrk.c} +7 -4
  37. data/examples/benchmarks/{trisolv.c → PolyBench/trisolv.c} +3 -0
  38. data/examples/benchmarks/{trmm.c → PolyBench/trmm.c} +3 -0
  39. data/examples/benchmarks/Rodinia/cfd.c +180 -0
  40. data/examples/benchmarks/Rodinia/hotspot.c +228 -0
  41. data/examples/benchmarks/Rodinia/kmeans.c +164 -0
  42. data/examples/benchmarks/Rodinia/srad.c +188 -0
  43. data/examples/benchmarks/other/common.h +0 -0
  44. data/examples/benchmarks/other/dct.c +58 -0
  45. data/examples/benchmarks/other/mm.c +50 -0
  46. data/examples/benchmarks/{saxpy.c → other/saxpy.c} +11 -7
  47. data/examples/chunk/{example1.c → example01.c} +0 -0
  48. data/examples/chunk/{example2.c → example02.c} +0 -0
  49. data/examples/chunk/{example3.c → example03.c} +0 -0
  50. data/examples/chunk/{example4.c → example04.c} +0 -0
  51. data/examples/chunk/{example5.c → example05.c} +0 -0
  52. data/examples/chunk/example06.c +45 -0
  53. data/examples/chunk/example07.c +49 -0
  54. data/examples/dependences/example01.c +42 -0
  55. data/examples/dependences/example02.c +40 -0
  56. data/examples/dependences/example03.c +43 -0
  57. data/examples/dependences/example04.c +44 -0
  58. data/examples/dependences/example05.c +42 -0
  59. data/examples/element/{example1.c → example01.c} +0 -0
  60. data/examples/element/{example2.c → example02.c} +2 -2
  61. data/examples/element/{example3.c → example03.c} +0 -0
  62. data/examples/element/{example4.c → example04.c} +0 -0
  63. data/examples/element/{example5.c → example05.c} +0 -0
  64. data/examples/element/{example6.c → example06.c} +0 -0
  65. data/examples/element/{example7.c → example07.c} +0 -0
  66. data/examples/element/{example8.c → example08.c} +0 -0
  67. data/examples/element/{example9.c → example09.c} +0 -0
  68. data/examples/element/example13.c +73 -0
  69. data/examples/fusion/example01.c +68 -0
  70. data/examples/fusion/example02.c +73 -0
  71. data/examples/fusion/example03.c +72 -0
  72. data/examples/fusion/example04.c +61 -0
  73. data/examples/fusion/example05.c +55 -0
  74. data/examples/neighbourhood/{example1.c → example01.c} +0 -0
  75. data/examples/neighbourhood/{example2.c → example02.c} +0 -0
  76. data/examples/neighbourhood/{example3.c → example03.c} +0 -0
  77. data/examples/neighbourhood/{example4.c → example04.c} +0 -0
  78. data/examples/neighbourhood/example05.c +44 -0
  79. data/examples/shared/{example1.c → example01.c} +0 -0
  80. data/examples/shared/{example2.c → example02.c} +0 -0
  81. data/examples/shared/{example3.c → example03.c} +0 -0
  82. data/examples/shared/{example4.c → example04.c} +0 -0
  83. data/examples/shared/{example5.c → example05.c} +0 -0
  84. data/lib/adarwin.rb +62 -0
  85. data/lib/adarwin/dependences.rb +268 -0
  86. data/lib/adarwin/engine.rb +277 -0
  87. data/lib/adarwin/fusion.rb +174 -0
  88. data/lib/adarwin/interval.rb +57 -0
  89. data/lib/adarwin/memorycopies.rb +153 -0
  90. data/lib/adarwin/nest.rb +225 -0
  91. data/lib/adarwin/preprocessor.rb +76 -0
  92. data/lib/adarwin/reference.rb +261 -0
  93. data/lib/bones.rb +4 -55
  94. data/lib/bones/algorithm.rb +77 -40
  95. data/lib/bones/copy.rb +26 -0
  96. data/lib/bones/engine.rb +147 -31
  97. data/lib/bones/preprocessor.rb +92 -12
  98. data/lib/bones/species.rb +4 -3
  99. data/lib/bones/structure.rb +14 -4
  100. data/lib/castaddon.rb +11 -6
  101. data/lib/castaddon/node_adarwin.rb +245 -0
  102. data/lib/castaddon/node_bones.rb +316 -0
  103. data/lib/castaddon/node_common.rb +289 -0
  104. data/lib/castaddon/transformations.rb +236 -0
  105. data/lib/common.rb +216 -0
  106. data/skeletons/CPU-C/common/header.c +3 -0
  107. data/skeletons/CPU-C/common/mem_global.c +0 -0
  108. data/skeletons/CPU-C/common/timer_2_start.c +11 -13
  109. data/skeletons/CPU-C/common/timer_2_stop.c +1 -1
  110. data/skeletons/CPU-C/common/timer_globals.c +29 -0
  111. data/skeletons/CPU-OPENCL-INTEL/common/globals.c +1 -1
  112. data/skeletons/CPU-OPENCL-INTEL/common/header.c +3 -0
  113. data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +7 -2
  114. data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +4 -2
  115. data/skeletons/CPU-OPENCL-INTEL/common/mem_global.c +0 -0
  116. data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +6 -3
  117. data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +1 -1
  118. data/skeletons/CPU-OPENCL-INTEL/common/timer_globals.c +24 -0
  119. data/skeletons/CPU-OPENMP/common/globals.c +1 -0
  120. data/skeletons/CPU-OPENMP/common/header.c +3 -0
  121. data/skeletons/CPU-OPENMP/common/mem_global.c +0 -0
  122. data/skeletons/CPU-OPENMP/common/timer_1_start.c +0 -12
  123. data/skeletons/CPU-OPENMP/common/timer_2_stop.c +1 -1
  124. data/skeletons/CPU-OPENMP/common/timer_globals.c +33 -0
  125. data/skeletons/GPU-CUDA/common/globals.c +27 -3
  126. data/skeletons/GPU-CUDA/common/header.c +2 -0
  127. data/skeletons/GPU-CUDA/common/mem_async_alloc.c +6 -0
  128. data/skeletons/GPU-CUDA/common/mem_async_copyin.c +6 -0
  129. data/skeletons/GPU-CUDA/common/mem_async_copyout.c +6 -0
  130. data/skeletons/GPU-CUDA/common/mem_async_free.c +6 -0
  131. data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +2 -1
  132. data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +2 -1
  133. data/skeletons/GPU-CUDA/common/mem_global.c +1 -0
  134. data/skeletons/GPU-CUDA/common/mem_prologue.c +1 -2
  135. data/skeletons/GPU-CUDA/common/scheduler.c +86 -0
  136. data/skeletons/GPU-CUDA/common/timer_2_start.c +2 -4
  137. data/skeletons/GPU-CUDA/common/timer_2_stop.c +3 -5
  138. data/skeletons/GPU-CUDA/common/timer_globals.c +26 -0
  139. data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +5 -7
  140. data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +4 -6
  141. data/skeletons/GPU-CUDA/kernel/default.host.c +1 -1
  142. data/skeletons/GPU-CUDA/kernel/default.kernel.cu +6 -8
  143. data/skeletons/GPU-CUDA/skeletons.txt +6 -5
  144. data/{examples/benchmarks/2mm.c → test/examples/benchmarks/PolyBench/2mm_species.c} +19 -15
  145. data/test/examples/benchmarks/PolyBench/3mm_species.c +82 -0
  146. data/test/examples/benchmarks/PolyBench/adi_species.c +89 -0
  147. data/test/examples/benchmarks/PolyBench/atax_species.c +69 -0
  148. data/test/examples/benchmarks/PolyBench/bicg_species.c +71 -0
  149. data/test/examples/benchmarks/PolyBench/cholesky_species.c +68 -0
  150. data/test/examples/benchmarks/PolyBench/correlation_species.c +97 -0
  151. data/test/examples/benchmarks/PolyBench/covariance_species.c +78 -0
  152. data/test/examples/benchmarks/PolyBench/doitgen_species.c +67 -0
  153. data/test/examples/benchmarks/PolyBench/durbin_species.c +80 -0
  154. data/test/examples/benchmarks/PolyBench/dynprog_species.c +71 -0
  155. data/test/examples/benchmarks/PolyBench/fdtd-2d-apml_species.c +112 -0
  156. data/test/examples/benchmarks/PolyBench/fdtd-2d_species.c +78 -0
  157. data/test/examples/benchmarks/PolyBench/floyd-warshall_species.c +54 -0
  158. data/test/examples/benchmarks/PolyBench/gemm_species.c +73 -0
  159. data/test/examples/benchmarks/PolyBench/gemver_species.c +93 -0
  160. data/test/examples/benchmarks/PolyBench/gesummv_species.c +68 -0
  161. data/test/examples/benchmarks/PolyBench/gramschmidt_species.c +78 -0
  162. data/test/examples/benchmarks/PolyBench/jacobi-1d-imper_species.c +59 -0
  163. data/test/examples/benchmarks/PolyBench/jacobi-2d-imper_species.c +65 -0
  164. data/test/examples/benchmarks/PolyBench/lu_species.c +57 -0
  165. data/test/examples/benchmarks/PolyBench/ludcmp_species.c +89 -0
  166. data/test/examples/benchmarks/PolyBench/mvt_species.c +69 -0
  167. data/test/examples/benchmarks/PolyBench/reg_detect_species.c +86 -0
  168. data/test/examples/benchmarks/PolyBench/seidel-2d_species.c +53 -0
  169. data/test/examples/benchmarks/PolyBench/symm_species.c +74 -0
  170. data/test/examples/benchmarks/PolyBench/syr2k_species.c +69 -0
  171. data/test/examples/benchmarks/PolyBench/syrk_species.c +66 -0
  172. data/test/examples/benchmarks/PolyBench/trisolv_species.c +61 -0
  173. data/test/examples/benchmarks/PolyBench/trmm_species.c +61 -0
  174. data/test/examples/chunk/example01_species.c +58 -0
  175. data/test/examples/chunk/example02_species.c +48 -0
  176. data/test/examples/chunk/example03_species.c +63 -0
  177. data/test/examples/chunk/example04_species.c +58 -0
  178. data/test/examples/chunk/example05_species.c +56 -0
  179. data/test/examples/chunk/example06_species.c +49 -0
  180. data/test/examples/chunk/example07_species.c +53 -0
  181. data/test/examples/dependences/example01_species.c +46 -0
  182. data/test/examples/dependences/example02_species.c +44 -0
  183. data/test/examples/dependences/example03_species.c +47 -0
  184. data/test/examples/dependences/example04_species.c +48 -0
  185. data/test/examples/dependences/example05_species.c +46 -0
  186. data/test/examples/element/example01_species.c +50 -0
  187. data/test/examples/element/example02_species.c +50 -0
  188. data/test/examples/element/example03_species.c +62 -0
  189. data/test/examples/element/example04_species.c +53 -0
  190. data/test/examples/element/example05_species.c +59 -0
  191. data/test/examples/element/example06_species.c +50 -0
  192. data/test/examples/element/example07_species.c +58 -0
  193. data/test/examples/element/example08_species.c +49 -0
  194. data/test/examples/element/example09_species.c +52 -0
  195. data/test/examples/element/example10_species.c +54 -0
  196. data/test/examples/element/example11_species.c +51 -0
  197. data/test/examples/element/example12_species.c +60 -0
  198. data/test/examples/element/example13_species.c +77 -0
  199. data/test/examples/neighbourhood/example01_species.c +57 -0
  200. data/test/examples/neighbourhood/example02_species.c +56 -0
  201. data/test/examples/neighbourhood/example03_species.c +83 -0
  202. data/test/examples/neighbourhood/example04_species.c +55 -0
  203. data/test/examples/neighbourhood/example05_species.c +48 -0
  204. data/test/examples/shared/example01_species.c +49 -0
  205. data/test/examples/shared/example02_species.c +55 -0
  206. data/test/examples/shared/example03_species.c +59 -0
  207. data/test/examples/shared/example04_species.c +56 -0
  208. data/test/examples/shared/example05_species.c +52 -0
  209. metadata +193 -73
  210. data/examples/benchmarks/overview.txt +0 -38
  211. data/lib/castaddon/node.rb +0 -753
data/lib/common.rb ADDED
@@ -0,0 +1,216 @@
1
+
2
+ # Bones/Aset require 'fileutils' from the Ruby standard library.
3
+ require 'fileutils'
4
+
5
+ # Bones/Aset use the 'trollop' gem to parse command line options.
6
+ require 'rubygems'
7
+ require 'trollop'
8
+ require 'symbolic'
9
+
10
+ # Extending the Ruby standard string class to support some
11
+ # additional methods. This includes a hack of the gsub! command.
12
+ class String #:nodoc:
13
+
14
+ # Extend the Ruby string class to be able to chain 'gsub!'
15
+ #-commands. This code is taken from the web.
16
+ meth = 'gsub!'
17
+ orig_meth = "orig_#{meth}"
18
+ alias_method orig_meth, meth
19
+ define_method(meth) do |*args|
20
+ self.send(orig_meth, *args)
21
+ self
22
+ end
23
+
24
+ end
25
+
26
+ # Set the newline character
27
+ NL = "\n"
28
+ # Set the tab size (currently: 2 spaces)
29
+ INDENT = "\t"
30
+
31
+ # A string representing the combination character ('^') of a species.
32
+ WEDGE = '^'
33
+ # A string representing the production character ('->') of a species.
34
+ ARROW = '->'
35
+ # A string representing the pipe character ('|') of a species.
36
+ PIPE = '|'
37
+ # A string representing the colon character (':') to separate ranges in dimensions.
38
+ RANGE_SEP = ':'
39
+ # A string representing the comma character (',') to separate different ranges.
40
+ DIM_SEP = ','
41
+
42
+ # Value to assume a variable to be
43
+ ASSUME_VAL = '1000'
44
+
45
+
46
+ # Helper method to evaluate mathematical expressions, possibly containing
47
+ # symbols. This method is only used for readability, without it the code
48
+ # is functionally correct, but expressions might be larger than needed.
49
+ def simplify(expr)
50
+ raise_error('Invalid expression to simplify') if !expr
51
+ expr = expr.gsub(' ','')
52
+
53
+ # Immediately return if there is an array index in the expression
54
+ return expr if expr =~ /\[/
55
+
56
+ # Handle min/max functions
57
+ if expr =~ /max/ || expr =~ /min/
58
+ return expr
59
+ end
60
+
61
+ # Get all the variables
62
+ vars = get_vars(expr)
63
+
64
+ # Set all the variables
65
+ hash = {}
66
+ vars.uniq.each do |var_name|
67
+ hash[var_name.to_sym] = var :name => var_name
68
+ expr = expr.gsub(/\b#{var_name}\b/,"hash[:#{var_name}]")
69
+ end
70
+
71
+ # Simplify the string using the 'symbolic' gem.
72
+ symbolic_expr = eval(expr)
73
+
74
+ # Return the result as a string
75
+ return symbolic_expr.to_s
76
+ end
77
+
78
+ # Get the variables in an expression
79
+ def get_vars(expr)
80
+ expr.split(/\W+/).reject{ |s| (s.to_i.to_s == s || s.to_f.to_s == s || s == "") }
81
+ end
82
+
83
+ # Solve a linear equality (work in progress)
84
+ def solve(equality,variable,forbidden_vars)
85
+ return "" if equality == ""
86
+
87
+ # Perform the subtitution of the current variable
88
+ expr = '-('+equality.gsub('=','-(').gsub(/\b#{variable}\b/,"0")+'))'
89
+
90
+ # Simplify the result
91
+ result = simplify(expr)
92
+
93
+ # Return the result or nothing (if it still contains forbidden variables)
94
+ vars = get_vars(result)
95
+ if vars & forbidden_vars == []
96
+ return result
97
+ else
98
+ return ""
99
+ end
100
+ end
101
+
102
+ # Find the maximum value of 2 expressions
103
+ def max(expr1,expr2,assumptions=[])
104
+ return expr1 if expr2 == ""
105
+ comparison = simplify("(#{expr1})-(#{expr2})")
106
+
107
+ # Process the assumptions
108
+ assumptions.each do |assumption|
109
+ comparison = simplify(comparison.gsub(assumption[0],assumption[1]))
110
+ end
111
+
112
+ # Test to find the maximum
113
+ if (comparison.to_i.to_s == comparison || comparison.to_f.to_s == comparison)
114
+ return expr1 if (comparison.to_i == 0)
115
+ return expr1 if (comparison.to_i > 0)
116
+ return expr2 if (comparison.to_i < 0)
117
+ else
118
+
119
+ # Handle min/max functions
120
+ if comparison =~ /max/ || comparison =~ /min/
121
+ return "max(#{expr1},#{expr2})"
122
+ end
123
+
124
+ # Find the maximum based on a guess
125
+ var = get_vars(comparison).first
126
+ assumptions << [var,ASSUME_VAL]
127
+ #puts "WARNING: Don't know how to find the max/min of '(#{expr1})' and '(#{expr2})', assuming: #{var}=#{ASSUME_VAL}"
128
+ return max(expr1,expr2,assumptions)
129
+ end
130
+ end
131
+
132
+ # Find the minimum value of 2 expressions (based on the max method)
133
+ def min(expr1,expr2)
134
+ return expr1 if expr2 == ""
135
+ s1 = simplify(expr1)
136
+ s2 = simplify(expr2)
137
+ comparison = simplify("(#{s1})-(#{s2})")
138
+
139
+ # Handle min/max functions
140
+ if comparison =~ /max/ || comparison =~ /min/
141
+ return s1 if s2 =~ /^max\(#{s1},.*\)$/ || s2 =~ /^max\(.*,#{s1}\)$/
142
+ return s2 if s1 =~ /^max\(#{s2},.*\)$/ || s1 =~ /^max\(.*,#{s2}\)$/
143
+ return "min(#{expr1},#{expr2})"
144
+ end
145
+
146
+ # Run the 'max' method
147
+ maximum = max(expr1,expr2)
148
+ return (maximum == expr1) ? expr2 : ( (maximum == expr2) ? expr1 : maximum.gsub('max(','min(') )
149
+ end
150
+
151
+ # Find the exact maximum value of 2 expressions
152
+ def exact_max(expr1,expr2)
153
+ return expr1 if expr1 == expr2
154
+ comparison = simplify("(#{expr1})-(#{expr2})")
155
+ if (comparison.to_i.to_s == comparison || comparison.to_f.to_s == comparison)
156
+ return expr1 if (comparison.to_i == 0)
157
+ return expr1 if (comparison.to_i > 0)
158
+ return expr2 if (comparison.to_i < 0)
159
+ else
160
+ return "max(#{expr1},#{expr2})"
161
+ end
162
+ end
163
+
164
+ # Find the exact minimum value of 2 expressions (based on the exact_max method)
165
+ def exact_min(expr1,expr2)
166
+ return expr1 if expr1 == expr2
167
+ maximum = exact_max(expr1,expr2)
168
+ return (maximum == expr1) ? expr2 : ( (maximum == expr2) ? expr1 : maximum.gsub('max(','min(') )
169
+ end
170
+
171
+
172
+ # Return the absolute value (if possible)
173
+ def abs(expr)
174
+ return expr.to_i.abs.to_s if expr.to_i.to_s == expr
175
+ return expr
176
+ end
177
+
178
+ # Compare two expressions
179
+ def compare(expr1,expr2,loop_data,assumptions=[])
180
+ comparison = simplify("(#{expr1})-(#{expr2})")
181
+
182
+ # Handle min/max functions
183
+ if comparison =~ /max/ || comparison =~ /min/
184
+ return comparison
185
+ end
186
+
187
+ # Process the assumptions
188
+ assumptions.each do |assumption|
189
+ comparison = simplify(comparison.gsub(assumption[0],assumption[1]))
190
+ end
191
+
192
+ # Known comparison
193
+ if (comparison.to_i.to_s == comparison || comparison.to_f.to_s == comparison)
194
+ return 'eq' if (comparison.to_i == 0)
195
+ return 'gt' if (comparison.to_i > 0)
196
+ return 'lt' if (comparison.to_i < 0)
197
+ else
198
+
199
+ # Comparison based on loop data
200
+ get_vars(comparison).each do |var|
201
+ loop_data.each do |loop_datum|
202
+ if loop_datum[:var] == var
203
+ assumptions << [var,loop_datum[:min]]
204
+ #puts "WARNING: Modifying expression '(#{expr1}) vs (#{expr2})', assuming: #{var}=#{loop_datum[:min]}"
205
+ return compare(expr1,expr2,loop_data,assumptions)
206
+ end
207
+ end
208
+ end
209
+
210
+ # Comparison based on a guess
211
+ var = get_vars(comparison).first
212
+ assumptions << [var,ASSUME_VAL]
213
+ #puts "WARNING: Don't know how to compare '(#{expr1})' and '(#{expr2})', assuming: #{var}=#{ASSUME_VAL}"
214
+ return compare(expr1,expr2,loop_data,assumptions)
215
+ end
216
+ end
@@ -0,0 +1,3 @@
1
+
2
+ void bones_timer_start();
3
+ void bones_timer_stop();
File without changes
@@ -3,18 +3,16 @@
3
3
  struct timeval bones_start_time2;
4
4
  struct timeval bones_end_time2;
5
5
  for (int bones_iter=0; bones_iter<ITERS; bones_iter++) {
6
-
7
- // Flush the CPU cache (for measurement purposes only)
8
- const int bones_flush_size = 4*1024*1024; // (16MB)
9
- int bones_flush_i;
10
- int bones_flush_j;
11
- char *bones_flush_c = (char *)malloc(bones_flush_size);
12
- for (bones_flush_i=0; bones_flush_i<10; bones_flush_i++) {
13
- for (bones_flush_j=0; bones_flush_j<bones_flush_size; bones_flush_j++) {
14
- bones_flush_c[bones_flush_j] = bones_flush_i*bones_flush_j;
6
+
7
+ // Flush the CPU cache (for measurement purposes only)
8
+ const int bones_flush_size = 4*1024*1024; // (16MB)
9
+ char *bones_flush_c = (char *)malloc(bones_flush_size);
10
+ for (int i=0; i<10; i++) {
11
+ for (int j=0; j<bones_flush_size; j++) {
12
+ bones_flush_c[j] = i*j;
13
+ }
15
14
  }
16
- }
17
- free(bones_flush_c);
15
+ free(bones_flush_c);
18
16
 
19
- // Start the timer for the measurement of the kernel execution time
20
- gettimeofday(&bones_start_time2, NULL);
17
+ // Start the timer for the measurement of the kernel execution time
18
+ gettimeofday(&bones_start_time2, NULL);
@@ -5,4 +5,4 @@
5
5
  }
6
6
 
7
7
  // Print the measurement data
8
- printf(">>>\t\t (<algorithm_basename>): Execution time [kernel ]: %.3lf ms \n", bones_timer2/((float)ITERS));
8
+ printf(">>>\t\t Execution time [kernel <algorithm_basename>]: %.3lf ms \n", bones_timer2/((float)ITERS));
@@ -0,0 +1,29 @@
1
+ ////////////////////////////////////////
2
+ //////////// Timers ////////////////////
3
+ ////////////////////////////////////////
4
+
5
+ // Timer
6
+ struct timeval bones_start_time1;
7
+
8
+ // Start the timer for the measurement of the whole scop
9
+ void bones_timer_start() {
10
+ const int bones_flush_size = 4*1024*1024; // (16MB)
11
+ char *bones_flush_c = (char *)malloc(bones_flush_size);
12
+ for (int i=0; i<10; i++) {
13
+ for (int j=0; j<bones_flush_size; j++) {
14
+ bones_flush_c[j] = i*j;
15
+ }
16
+ }
17
+ free(bones_flush_c);
18
+ gettimeofday(&bones_start_time1, NULL);
19
+ }
20
+
21
+ // End the timer for the measurement of the whole scop
22
+ void bones_timer_stop() {
23
+ #if (ITERS == 1)
24
+ struct timeval bones_end_time1;
25
+ gettimeofday(&bones_end_time1, NULL);
26
+ float bones_timer1 = 0.001 * (1000000*(bones_end_time1.tv_sec-bones_start_time1.tv_sec)+bones_end_time1.tv_usec-bones_start_time1.tv_usec);
27
+ printf(">>>\t\t Execution time [full scop]: %.3lf ms \n", bones_timer1);
28
+ #endif
29
+ }
@@ -103,7 +103,7 @@ void bones_initialize_target(void) {
103
103
  cl_platform_id bones_platform_ids[10];
104
104
  bones_errors = clGetPlatformIDs(bones_num_platforms,bones_platform_ids,NULL); error_check(bones_errors);
105
105
 
106
- // Select the AMD APP platform
106
+ // Select the Intel SDK platform
107
107
  char bones_buffer[1024];
108
108
  cl_uint bones_platform;
109
109
  for(cl_uint bones_platform_id=0; bones_platform_id<bones_num_platforms; bones_platform_id++) {
@@ -1,5 +1,8 @@
1
1
  #include <stdlib.h>
2
2
 
3
+ void bones_timer_start();
4
+ void bones_timer_stop();
5
+
3
6
  // Allocate a 128-byte aligned pointer
4
7
  void *bones_malloc_128(size_t bones_size) {
5
8
  char *bones_pointer;
@@ -1,5 +1,10 @@
1
1
 
2
2
  // Perform a zero-copy of <array> from device to host
3
- void* bones_pointer_to_<array> = clEnqueueMapBuffer(bones_queue,device_<array>,CL_TRUE,CL_MAP_READ,<offset>,<variable_dimensions>*sizeof(<type>),0,NULL,NULL,&bones_errors); error_check(bones_errors);
4
- clEnqueueUnmapMemObject(bones_queue,device_<array>,bones_pointer_to_<array>,0,NULL,NULL);
3
+ #if ZEROCOPY == 1
4
+ printf("Copying back from device_<array> to <array>\n");
5
+ void* bones_pointer_to_<array> = clEnqueueMapBuffer(bones_queue,device_<array>,CL_TRUE,CL_MAP_READ,0,<variable_dimensions>*sizeof(<type>),0,NULL,NULL,&bones_errors); error_check(bones_errors);
6
+ clEnqueueUnmapMemObject(bones_queue,device_<array>,bones_pointer_to_<array>,0,NULL,NULL);
7
+ #elif ZEROCOPY == 0
8
+ bones_errors = clEnqueueReadBuffer(bones_queue,device_<array>,CL_TRUE,(0)*sizeof(<type>),<variable_dimensions>*sizeof(<type>),<array><flatten>+0,0,NULL,NULL); error_check(bones_errors);
9
+ #endif
5
10
  clFinish(bones_queue);
@@ -1,3 +1,5 @@
1
1
 
2
- //bones_errors = clEnqueueWriteBuffer(bones_queue, device_<array>, CL_TRUE, 0, <variable_dimensions>*sizeof(<type>), <array><flatten>, 0, NULL, NULL); error_check(bones_errors);
3
- //clFinish(bones_queue);
2
+ #if ZEROCOPY == 0
3
+ device_<array> = clCreateBuffer(bones_context,CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,<variable_dimensions>*sizeof(<type>),<array><flatten>, &bones_errors); error_check(bones_errors);
4
+ clFinish(bones_queue);
5
+ #endif
File without changes
@@ -1,4 +1,7 @@
1
1
 
2
- // Create a device pointer for <array> (zero-copy)
3
- cl_mem device_<array> = clCreateBuffer(bones_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, <variable_dimensions>*sizeof(<type>), <array><flatten>, &bones_errors); error_check(bones_errors);
4
- //cl_mem device_<array> = clCreateBuffer(bones_context, CL_MEM_READ_WRITE, <variable_dimensions>*sizeof(<type>), NULL, &bones_errors); error_check(bones_errors);
2
+ // Create a device pointer for <array>
3
+ #if ZEROCOPY == 1
4
+ cl_mem device_<array> = clCreateBuffer(bones_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, <variable_dimensions>*sizeof(<type>), <array><flatten>, &bones_errors); error_check(bones_errors);
5
+ #elif ZEROCOPY == 0
6
+ cl_mem device_<array> = clCreateBuffer(bones_context, CL_MEM_READ_WRITE, <variable_dimensions>*sizeof(<type>), NULL, &bones_errors); error_check(bones_errors);
7
+ #endif
@@ -8,4 +8,4 @@
8
8
  bones_errors = clGetEventProfilingInfo(bones_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end2, 0); error_check(bones_errors);
9
9
  bones_errors = clGetEventProfilingInfo(bones_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start2, 0); error_check(bones_errors);
10
10
  float bones_timer2 = 0.000001 * (end2-start2);
11
- printf(">>>\t\t (<algorithm_basename>): Execution time [kernel ]: %.3lf ms \n", bones_timer2);
11
+ printf(">>>\t\t Execution time [kernel <algorithm_basename>]: %.3lf ms \n", bones_timer2);
@@ -0,0 +1,24 @@
1
+
2
+ ////////////////////////////////////////
3
+ //////////// Timers ////////////////////
4
+ ////////////////////////////////////////
5
+
6
+ // Timer
7
+ struct timeval bones_start_time1;
8
+
9
+ // Start the timer for the measurement of the whole scop
10
+ void bones_timer_start() {
11
+ clFinish(bones_queue);
12
+ gettimeofday(&bones_start_time1, NULL);
13
+ }
14
+
15
+ // End the timer for the measurement of the whole scop
16
+ void bones_timer_stop() {
17
+ #if (ITERS == 1)
18
+ clFinish(bones_queue);
19
+ struct timeval bones_end_time1;
20
+ gettimeofday(&bones_end_time1, NULL);
21
+ float bones_timer1 = 0.001 * (1000000*(bones_end_time1.tv_sec-bones_start_time1.tv_sec)+bones_end_time1.tv_usec-bones_start_time1.tv_usec);
22
+ printf(">>>\t\t Execution time [full scop]: %.3lf ms \n", bones_timer1);
23
+ #endif
24
+ }
@@ -1,5 +1,6 @@
1
1
  #include <omp.h>
2
2
  #include <stdlib.h>
3
+ #include <stdio.h>
3
4
 
4
5
  #define BONES_MIN(a,b) ((a<b) ? a : b)
5
6
  #define BONES_MAX(a,b) ((a>b) ? a : b)
@@ -0,0 +1,3 @@
1
+
2
+ void bones_timer_start();
3
+ void bones_timer_stop();
File without changes
@@ -1,12 +0,0 @@
1
-
2
- // Flush the CPU cache (for measurement purposes only)
3
- const int bones_flush_size = 4*1024*1024; // (16MB)
4
- int bones_flush_i;
5
- int bones_flush_j;
6
- char *bones_flush_c = (char *)malloc(bones_flush_size);
7
- for (bones_flush_i=0; bones_flush_i<10; bones_flush_i++) {
8
- for (bones_flush_j=0; bones_flush_j<bones_flush_size; bones_flush_j++) {
9
- bones_flush_c[bones_flush_j] = bones_flush_i*bones_flush_j;
10
- }
11
- }
12
- free(bones_flush_c);
@@ -5,4 +5,4 @@
5
5
  }
6
6
 
7
7
  // Print the measurement data
8
- printf(">>>\t\t (<algorithm_basename>): Execution time [kernel ]: %.3lf ms \n", bones_timer2/((float)ITERS));
8
+ printf(">>>\t\t Execution time [kernel <algorithm_basename>]: %.3lf ms \n", bones_timer2/((float)ITERS));
@@ -0,0 +1,33 @@
1
+ ////////////////////////////////////////
2
+ //////////// Timers ////////////////////
3
+ ////////////////////////////////////////
4
+
5
+ // Includes
6
+ #include <stdio.h>
7
+
8
+ // Timer
9
+ struct timeval bones_start_time1;
10
+
11
+ // Start the timer for the measurement of the whole scop
12
+ void bones_timer_start() {
13
+ /*
14
+ const int bones_flush_size = 4*1024*1024; // (16MB)
15
+ char *bones_flush_c = (char *)malloc(bones_flush_size);
16
+ for (int i=0; i<10; i++) {
17
+ for (int j=0; j<bones_flush_size; j++) {
18
+ bones_flush_c[j] = i*j;
19
+ }
20
+ }
21
+ free(bones_flush_c);*/
22
+ gettimeofday(&bones_start_time1, NULL);
23
+ }
24
+
25
+ // End the timer for the measurement of the whole scop
26
+ void bones_timer_stop() {
27
+ #if (ITERS == 1)
28
+ struct timeval bones_end_time1;
29
+ gettimeofday(&bones_end_time1, NULL);
30
+ float bones_timer1 = 0.001 * (1000000*(bones_end_time1.tv_sec-bones_start_time1.tv_sec)+bones_end_time1.tv_usec-bones_start_time1.tv_usec);
31
+ printf(">>>\t\t Execution time [full scop]: %.3lf ms \n", bones_timer1);
32
+ #endif
33
+ }