bones-compiler 1.1.0 → 1.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (211) hide show
  1. checksums.yaml +15 -0
  2. data/CHANGELOG +37 -0
  3. data/LICENSE +1 -1
  4. data/README.rdoc +95 -70
  5. data/Rakefile +78 -3
  6. data/VERSION +1 -1
  7. data/bin/adarwin +17 -0
  8. data/examples/benchmarks/PolyBench/2mm.c +104 -0
  9. data/examples/benchmarks/{3mm.c → PolyBench/3mm.c} +5 -2
  10. data/examples/benchmarks/{adi.c → PolyBench/adi.c} +6 -3
  11. data/examples/benchmarks/{atax.c → PolyBench/atax.c} +5 -2
  12. data/examples/benchmarks/{bicg.c → PolyBench/bicg.c} +5 -2
  13. data/examples/benchmarks/{cholesky.c → PolyBench/cholesky.c} +3 -0
  14. data/examples/benchmarks/{common.h → PolyBench/common.h} +2 -2
  15. data/examples/benchmarks/{correlation.c → PolyBench/correlation.c} +16 -7
  16. data/examples/benchmarks/{covariance.c → PolyBench/covariance.c} +7 -2
  17. data/examples/benchmarks/{doitgen.c → PolyBench/doitgen.c} +5 -2
  18. data/examples/benchmarks/{durbin.c → PolyBench/durbin.c} +3 -0
  19. data/examples/benchmarks/{dynprog.c → PolyBench/dynprog.c} +3 -0
  20. data/examples/benchmarks/{fdtd-2d-apml.c → PolyBench/fdtd-2d-apml.c} +3 -0
  21. data/examples/benchmarks/{fdtd-2d.c → PolyBench/fdtd-2d.c} +5 -2
  22. data/examples/benchmarks/{floyd-warshall.c → PolyBench/floyd-warshall.c} +3 -0
  23. data/examples/benchmarks/{gemm.c → PolyBench/gemm.c} +5 -2
  24. data/examples/benchmarks/{gemver.c → PolyBench/gemver.c} +5 -2
  25. data/examples/benchmarks/{gesummv.c → PolyBench/gesummv.c} +5 -2
  26. data/examples/benchmarks/{gramschmidt.c → PolyBench/gramschmidt.c} +3 -0
  27. data/examples/benchmarks/{jacobi-1d-imper.c → PolyBench/jacobi-1d-imper.c} +10 -2
  28. data/examples/benchmarks/{jacobi-2d-imper.c → PolyBench/jacobi-2d-imper.c} +8 -3
  29. data/examples/benchmarks/{lu.c → PolyBench/lu.c} +3 -0
  30. data/examples/benchmarks/{ludcmp.c → PolyBench/ludcmp.c} +3 -0
  31. data/examples/benchmarks/{mvt.c → PolyBench/mvt.c} +6 -2
  32. data/examples/benchmarks/{reg_detect.c → PolyBench/reg_detect.c} +3 -0
  33. data/examples/benchmarks/{seidel-2d.c → PolyBench/seidel-2d.c} +3 -0
  34. data/examples/benchmarks/{symm.c → PolyBench/symm.c} +3 -0
  35. data/examples/benchmarks/{syr2k.c → PolyBench/syr2k.c} +5 -2
  36. data/examples/benchmarks/{syrk.c → PolyBench/syrk.c} +7 -4
  37. data/examples/benchmarks/{trisolv.c → PolyBench/trisolv.c} +3 -0
  38. data/examples/benchmarks/{trmm.c → PolyBench/trmm.c} +3 -0
  39. data/examples/benchmarks/Rodinia/cfd.c +180 -0
  40. data/examples/benchmarks/Rodinia/hotspot.c +228 -0
  41. data/examples/benchmarks/Rodinia/kmeans.c +164 -0
  42. data/examples/benchmarks/Rodinia/srad.c +188 -0
  43. data/examples/benchmarks/other/common.h +0 -0
  44. data/examples/benchmarks/other/dct.c +58 -0
  45. data/examples/benchmarks/other/mm.c +50 -0
  46. data/examples/benchmarks/{saxpy.c → other/saxpy.c} +11 -7
  47. data/examples/chunk/{example1.c → example01.c} +0 -0
  48. data/examples/chunk/{example2.c → example02.c} +0 -0
  49. data/examples/chunk/{example3.c → example03.c} +0 -0
  50. data/examples/chunk/{example4.c → example04.c} +0 -0
  51. data/examples/chunk/{example5.c → example05.c} +0 -0
  52. data/examples/chunk/example06.c +45 -0
  53. data/examples/chunk/example07.c +49 -0
  54. data/examples/dependences/example01.c +42 -0
  55. data/examples/dependences/example02.c +40 -0
  56. data/examples/dependences/example03.c +43 -0
  57. data/examples/dependences/example04.c +44 -0
  58. data/examples/dependences/example05.c +42 -0
  59. data/examples/element/{example1.c → example01.c} +0 -0
  60. data/examples/element/{example2.c → example02.c} +2 -2
  61. data/examples/element/{example3.c → example03.c} +0 -0
  62. data/examples/element/{example4.c → example04.c} +0 -0
  63. data/examples/element/{example5.c → example05.c} +0 -0
  64. data/examples/element/{example6.c → example06.c} +0 -0
  65. data/examples/element/{example7.c → example07.c} +0 -0
  66. data/examples/element/{example8.c → example08.c} +0 -0
  67. data/examples/element/{example9.c → example09.c} +0 -0
  68. data/examples/element/example13.c +73 -0
  69. data/examples/fusion/example01.c +68 -0
  70. data/examples/fusion/example02.c +73 -0
  71. data/examples/fusion/example03.c +72 -0
  72. data/examples/fusion/example04.c +61 -0
  73. data/examples/fusion/example05.c +55 -0
  74. data/examples/neighbourhood/{example1.c → example01.c} +0 -0
  75. data/examples/neighbourhood/{example2.c → example02.c} +0 -0
  76. data/examples/neighbourhood/{example3.c → example03.c} +0 -0
  77. data/examples/neighbourhood/{example4.c → example04.c} +0 -0
  78. data/examples/neighbourhood/example05.c +44 -0
  79. data/examples/shared/{example1.c → example01.c} +0 -0
  80. data/examples/shared/{example2.c → example02.c} +0 -0
  81. data/examples/shared/{example3.c → example03.c} +0 -0
  82. data/examples/shared/{example4.c → example04.c} +0 -0
  83. data/examples/shared/{example5.c → example05.c} +0 -0
  84. data/lib/adarwin.rb +62 -0
  85. data/lib/adarwin/dependences.rb +268 -0
  86. data/lib/adarwin/engine.rb +277 -0
  87. data/lib/adarwin/fusion.rb +174 -0
  88. data/lib/adarwin/interval.rb +57 -0
  89. data/lib/adarwin/memorycopies.rb +153 -0
  90. data/lib/adarwin/nest.rb +225 -0
  91. data/lib/adarwin/preprocessor.rb +76 -0
  92. data/lib/adarwin/reference.rb +261 -0
  93. data/lib/bones.rb +4 -55
  94. data/lib/bones/algorithm.rb +77 -40
  95. data/lib/bones/copy.rb +26 -0
  96. data/lib/bones/engine.rb +147 -31
  97. data/lib/bones/preprocessor.rb +92 -12
  98. data/lib/bones/species.rb +4 -3
  99. data/lib/bones/structure.rb +14 -4
  100. data/lib/castaddon.rb +11 -6
  101. data/lib/castaddon/node_adarwin.rb +245 -0
  102. data/lib/castaddon/node_bones.rb +316 -0
  103. data/lib/castaddon/node_common.rb +289 -0
  104. data/lib/castaddon/transformations.rb +236 -0
  105. data/lib/common.rb +216 -0
  106. data/skeletons/CPU-C/common/header.c +3 -0
  107. data/skeletons/CPU-C/common/mem_global.c +0 -0
  108. data/skeletons/CPU-C/common/timer_2_start.c +11 -13
  109. data/skeletons/CPU-C/common/timer_2_stop.c +1 -1
  110. data/skeletons/CPU-C/common/timer_globals.c +29 -0
  111. data/skeletons/CPU-OPENCL-INTEL/common/globals.c +1 -1
  112. data/skeletons/CPU-OPENCL-INTEL/common/header.c +3 -0
  113. data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +7 -2
  114. data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +4 -2
  115. data/skeletons/CPU-OPENCL-INTEL/common/mem_global.c +0 -0
  116. data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +6 -3
  117. data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +1 -1
  118. data/skeletons/CPU-OPENCL-INTEL/common/timer_globals.c +24 -0
  119. data/skeletons/CPU-OPENMP/common/globals.c +1 -0
  120. data/skeletons/CPU-OPENMP/common/header.c +3 -0
  121. data/skeletons/CPU-OPENMP/common/mem_global.c +0 -0
  122. data/skeletons/CPU-OPENMP/common/timer_1_start.c +0 -12
  123. data/skeletons/CPU-OPENMP/common/timer_2_stop.c +1 -1
  124. data/skeletons/CPU-OPENMP/common/timer_globals.c +33 -0
  125. data/skeletons/GPU-CUDA/common/globals.c +27 -3
  126. data/skeletons/GPU-CUDA/common/header.c +2 -0
  127. data/skeletons/GPU-CUDA/common/mem_async_alloc.c +6 -0
  128. data/skeletons/GPU-CUDA/common/mem_async_copyin.c +6 -0
  129. data/skeletons/GPU-CUDA/common/mem_async_copyout.c +6 -0
  130. data/skeletons/GPU-CUDA/common/mem_async_free.c +6 -0
  131. data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +2 -1
  132. data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +2 -1
  133. data/skeletons/GPU-CUDA/common/mem_global.c +1 -0
  134. data/skeletons/GPU-CUDA/common/mem_prologue.c +1 -2
  135. data/skeletons/GPU-CUDA/common/scheduler.c +86 -0
  136. data/skeletons/GPU-CUDA/common/timer_2_start.c +2 -4
  137. data/skeletons/GPU-CUDA/common/timer_2_stop.c +3 -5
  138. data/skeletons/GPU-CUDA/common/timer_globals.c +26 -0
  139. data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +5 -7
  140. data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +4 -6
  141. data/skeletons/GPU-CUDA/kernel/default.host.c +1 -1
  142. data/skeletons/GPU-CUDA/kernel/default.kernel.cu +6 -8
  143. data/skeletons/GPU-CUDA/skeletons.txt +6 -5
  144. data/{examples/benchmarks/2mm.c → test/examples/benchmarks/PolyBench/2mm_species.c} +19 -15
  145. data/test/examples/benchmarks/PolyBench/3mm_species.c +82 -0
  146. data/test/examples/benchmarks/PolyBench/adi_species.c +89 -0
  147. data/test/examples/benchmarks/PolyBench/atax_species.c +69 -0
  148. data/test/examples/benchmarks/PolyBench/bicg_species.c +71 -0
  149. data/test/examples/benchmarks/PolyBench/cholesky_species.c +68 -0
  150. data/test/examples/benchmarks/PolyBench/correlation_species.c +97 -0
  151. data/test/examples/benchmarks/PolyBench/covariance_species.c +78 -0
  152. data/test/examples/benchmarks/PolyBench/doitgen_species.c +67 -0
  153. data/test/examples/benchmarks/PolyBench/durbin_species.c +80 -0
  154. data/test/examples/benchmarks/PolyBench/dynprog_species.c +71 -0
  155. data/test/examples/benchmarks/PolyBench/fdtd-2d-apml_species.c +112 -0
  156. data/test/examples/benchmarks/PolyBench/fdtd-2d_species.c +78 -0
  157. data/test/examples/benchmarks/PolyBench/floyd-warshall_species.c +54 -0
  158. data/test/examples/benchmarks/PolyBench/gemm_species.c +73 -0
  159. data/test/examples/benchmarks/PolyBench/gemver_species.c +93 -0
  160. data/test/examples/benchmarks/PolyBench/gesummv_species.c +68 -0
  161. data/test/examples/benchmarks/PolyBench/gramschmidt_species.c +78 -0
  162. data/test/examples/benchmarks/PolyBench/jacobi-1d-imper_species.c +59 -0
  163. data/test/examples/benchmarks/PolyBench/jacobi-2d-imper_species.c +65 -0
  164. data/test/examples/benchmarks/PolyBench/lu_species.c +57 -0
  165. data/test/examples/benchmarks/PolyBench/ludcmp_species.c +89 -0
  166. data/test/examples/benchmarks/PolyBench/mvt_species.c +69 -0
  167. data/test/examples/benchmarks/PolyBench/reg_detect_species.c +86 -0
  168. data/test/examples/benchmarks/PolyBench/seidel-2d_species.c +53 -0
  169. data/test/examples/benchmarks/PolyBench/symm_species.c +74 -0
  170. data/test/examples/benchmarks/PolyBench/syr2k_species.c +69 -0
  171. data/test/examples/benchmarks/PolyBench/syrk_species.c +66 -0
  172. data/test/examples/benchmarks/PolyBench/trisolv_species.c +61 -0
  173. data/test/examples/benchmarks/PolyBench/trmm_species.c +61 -0
  174. data/test/examples/chunk/example01_species.c +58 -0
  175. data/test/examples/chunk/example02_species.c +48 -0
  176. data/test/examples/chunk/example03_species.c +63 -0
  177. data/test/examples/chunk/example04_species.c +58 -0
  178. data/test/examples/chunk/example05_species.c +56 -0
  179. data/test/examples/chunk/example06_species.c +49 -0
  180. data/test/examples/chunk/example07_species.c +53 -0
  181. data/test/examples/dependences/example01_species.c +46 -0
  182. data/test/examples/dependences/example02_species.c +44 -0
  183. data/test/examples/dependences/example03_species.c +47 -0
  184. data/test/examples/dependences/example04_species.c +48 -0
  185. data/test/examples/dependences/example05_species.c +46 -0
  186. data/test/examples/element/example01_species.c +50 -0
  187. data/test/examples/element/example02_species.c +50 -0
  188. data/test/examples/element/example03_species.c +62 -0
  189. data/test/examples/element/example04_species.c +53 -0
  190. data/test/examples/element/example05_species.c +59 -0
  191. data/test/examples/element/example06_species.c +50 -0
  192. data/test/examples/element/example07_species.c +58 -0
  193. data/test/examples/element/example08_species.c +49 -0
  194. data/test/examples/element/example09_species.c +52 -0
  195. data/test/examples/element/example10_species.c +54 -0
  196. data/test/examples/element/example11_species.c +51 -0
  197. data/test/examples/element/example12_species.c +60 -0
  198. data/test/examples/element/example13_species.c +77 -0
  199. data/test/examples/neighbourhood/example01_species.c +57 -0
  200. data/test/examples/neighbourhood/example02_species.c +56 -0
  201. data/test/examples/neighbourhood/example03_species.c +83 -0
  202. data/test/examples/neighbourhood/example04_species.c +55 -0
  203. data/test/examples/neighbourhood/example05_species.c +48 -0
  204. data/test/examples/shared/example01_species.c +49 -0
  205. data/test/examples/shared/example02_species.c +55 -0
  206. data/test/examples/shared/example03_species.c +59 -0
  207. data/test/examples/shared/example04_species.c +56 -0
  208. data/test/examples/shared/example05_species.c +52 -0
  209. metadata +193 -73
  210. data/examples/benchmarks/overview.txt +0 -38
  211. data/lib/castaddon/node.rb +0 -753
data/lib/common.rb ADDED
@@ -0,0 +1,216 @@
1
+
2
+ # Bones/Aset require 'fileutils' from the Ruby standard library.
3
+ require 'fileutils'
4
+
5
+ # Bones/Aset use the 'trollop' gem to parse command line options.
6
+ require 'rubygems'
7
+ require 'trollop'
8
+ require 'symbolic'
9
+
10
+ # Extending the Ruby standard string class to support some
11
+ # additional methods. This includes a hack of the gsub! command.
12
+ class String #:nodoc:
13
+
14
+ # Extend the Ruby string class to be able to chain 'gsub!'
15
+ #-commands. This code is taken from the web.
16
+ meth = 'gsub!'
17
+ orig_meth = "orig_#{meth}"
18
+ alias_method orig_meth, meth
19
+ define_method(meth) do |*args|
20
+ self.send(orig_meth, *args)
21
+ self
22
+ end
23
+
24
+ end
25
+
26
+ # Set the newline character
27
+ NL = "\n"
28
+ # Set the tab size (currently: 2 spaces)
29
+ INDENT = "\t"
30
+
31
+ # A string representing the combination character ('^') of a species.
32
+ WEDGE = '^'
33
+ # A string representing the production character ('->') of a species.
34
+ ARROW = '->'
35
+ # A string representing the pipe character ('|') of a species.
36
+ PIPE = '|'
37
+ # A string representing the colon character (':') to separate ranges in dimensions.
38
+ RANGE_SEP = ':'
39
+ # A string representing the comma character (',') to separate different ranges.
40
+ DIM_SEP = ','
41
+
42
+ # Value to assume a variable to be
43
+ ASSUME_VAL = '1000'
44
+
45
+
46
+ # Helper method to evaluate mathematical expressions, possibly containing
47
+ # symbols. This method is only used for readability, without it the code
48
+ # is functionally correct, but expressions might be larger than needed.
49
+ def simplify(expr)
50
+ raise_error('Invalid expression to simplify') if !expr
51
+ expr = expr.gsub(' ','')
52
+
53
+ # Immediately return if there is an array index in the expression
54
+ return expr if expr =~ /\[/
55
+
56
+ # Handle min/max functions
57
+ if expr =~ /max/ || expr =~ /min/
58
+ return expr
59
+ end
60
+
61
+ # Get all the variables
62
+ vars = get_vars(expr)
63
+
64
+ # Set all the variables
65
+ hash = {}
66
+ vars.uniq.each do |var_name|
67
+ hash[var_name.to_sym] = var :name => var_name
68
+ expr = expr.gsub(/\b#{var_name}\b/,"hash[:#{var_name}]")
69
+ end
70
+
71
+ # Simplify the string using the 'symbolic' gem.
72
+ symbolic_expr = eval(expr)
73
+
74
+ # Return the result as a string
75
+ return symbolic_expr.to_s
76
+ end
77
+
78
+ # Get the variables in an expression
79
+ def get_vars(expr)
80
+ expr.split(/\W+/).reject{ |s| (s.to_i.to_s == s || s.to_f.to_s == s || s == "") }
81
+ end
82
+
83
+ # Solve a linear equality (work in progress)
84
+ def solve(equality,variable,forbidden_vars)
85
+ return "" if equality == ""
86
+
87
+ # Perform the subtitution of the current variable
88
+ expr = '-('+equality.gsub('=','-(').gsub(/\b#{variable}\b/,"0")+'))'
89
+
90
+ # Simplify the result
91
+ result = simplify(expr)
92
+
93
+ # Return the result or nothing (if it still contains forbidden variables)
94
+ vars = get_vars(result)
95
+ if vars & forbidden_vars == []
96
+ return result
97
+ else
98
+ return ""
99
+ end
100
+ end
101
+
102
+ # Find the maximum value of 2 expressions
103
+ def max(expr1,expr2,assumptions=[])
104
+ return expr1 if expr2 == ""
105
+ comparison = simplify("(#{expr1})-(#{expr2})")
106
+
107
+ # Process the assumptions
108
+ assumptions.each do |assumption|
109
+ comparison = simplify(comparison.gsub(assumption[0],assumption[1]))
110
+ end
111
+
112
+ # Test to find the maximum
113
+ if (comparison.to_i.to_s == comparison || comparison.to_f.to_s == comparison)
114
+ return expr1 if (comparison.to_i == 0)
115
+ return expr1 if (comparison.to_i > 0)
116
+ return expr2 if (comparison.to_i < 0)
117
+ else
118
+
119
+ # Handle min/max functions
120
+ if comparison =~ /max/ || comparison =~ /min/
121
+ return "max(#{expr1},#{expr2})"
122
+ end
123
+
124
+ # Find the maximum based on a guess
125
+ var = get_vars(comparison).first
126
+ assumptions << [var,ASSUME_VAL]
127
+ #puts "WARNING: Don't know how to find the max/min of '(#{expr1})' and '(#{expr2})', assuming: #{var}=#{ASSUME_VAL}"
128
+ return max(expr1,expr2,assumptions)
129
+ end
130
+ end
131
+
132
+ # Find the minimum value of 2 expressions (based on the max method)
133
+ def min(expr1,expr2)
134
+ return expr1 if expr2 == ""
135
+ s1 = simplify(expr1)
136
+ s2 = simplify(expr2)
137
+ comparison = simplify("(#{s1})-(#{s2})")
138
+
139
+ # Handle min/max functions
140
+ if comparison =~ /max/ || comparison =~ /min/
141
+ return s1 if s2 =~ /^max\(#{s1},.*\)$/ || s2 =~ /^max\(.*,#{s1}\)$/
142
+ return s2 if s1 =~ /^max\(#{s2},.*\)$/ || s1 =~ /^max\(.*,#{s2}\)$/
143
+ return "min(#{expr1},#{expr2})"
144
+ end
145
+
146
+ # Run the 'max' method
147
+ maximum = max(expr1,expr2)
148
+ return (maximum == expr1) ? expr2 : ( (maximum == expr2) ? expr1 : maximum.gsub('max(','min(') )
149
+ end
150
+
151
+ # Find the exact maximum value of 2 expressions
152
+ def exact_max(expr1,expr2)
153
+ return expr1 if expr1 == expr2
154
+ comparison = simplify("(#{expr1})-(#{expr2})")
155
+ if (comparison.to_i.to_s == comparison || comparison.to_f.to_s == comparison)
156
+ return expr1 if (comparison.to_i == 0)
157
+ return expr1 if (comparison.to_i > 0)
158
+ return expr2 if (comparison.to_i < 0)
159
+ else
160
+ return "max(#{expr1},#{expr2})"
161
+ end
162
+ end
163
+
164
+ # Find the exact minimum value of 2 expressions (based on the exact_max method)
165
+ def exact_min(expr1,expr2)
166
+ return expr1 if expr1 == expr2
167
+ maximum = exact_max(expr1,expr2)
168
+ return (maximum == expr1) ? expr2 : ( (maximum == expr2) ? expr1 : maximum.gsub('max(','min(') )
169
+ end
170
+
171
+
172
+ # Return the absolute value (if possible)
173
+ def abs(expr)
174
+ return expr.to_i.abs.to_s if expr.to_i.to_s == expr
175
+ return expr
176
+ end
177
+
178
+ # Compare two expressions
179
+ def compare(expr1,expr2,loop_data,assumptions=[])
180
+ comparison = simplify("(#{expr1})-(#{expr2})")
181
+
182
+ # Handle min/max functions
183
+ if comparison =~ /max/ || comparison =~ /min/
184
+ return comparison
185
+ end
186
+
187
+ # Process the assumptions
188
+ assumptions.each do |assumption|
189
+ comparison = simplify(comparison.gsub(assumption[0],assumption[1]))
190
+ end
191
+
192
+ # Known comparison
193
+ if (comparison.to_i.to_s == comparison || comparison.to_f.to_s == comparison)
194
+ return 'eq' if (comparison.to_i == 0)
195
+ return 'gt' if (comparison.to_i > 0)
196
+ return 'lt' if (comparison.to_i < 0)
197
+ else
198
+
199
+ # Comparison based on loop data
200
+ get_vars(comparison).each do |var|
201
+ loop_data.each do |loop_datum|
202
+ if loop_datum[:var] == var
203
+ assumptions << [var,loop_datum[:min]]
204
+ #puts "WARNING: Modifying expression '(#{expr1}) vs (#{expr2})', assuming: #{var}=#{loop_datum[:min]}"
205
+ return compare(expr1,expr2,loop_data,assumptions)
206
+ end
207
+ end
208
+ end
209
+
210
+ # Comparison based on a guess
211
+ var = get_vars(comparison).first
212
+ assumptions << [var,ASSUME_VAL]
213
+ #puts "WARNING: Don't know how to compare '(#{expr1})' and '(#{expr2})', assuming: #{var}=#{ASSUME_VAL}"
214
+ return compare(expr1,expr2,loop_data,assumptions)
215
+ end
216
+ end
@@ -0,0 +1,3 @@
1
+
2
+ void bones_timer_start();
3
+ void bones_timer_stop();
File without changes
@@ -3,18 +3,16 @@
3
3
  struct timeval bones_start_time2;
4
4
  struct timeval bones_end_time2;
5
5
  for (int bones_iter=0; bones_iter<ITERS; bones_iter++) {
6
-
7
- // Flush the CPU cache (for measurement purposes only)
8
- const int bones_flush_size = 4*1024*1024; // (16MB)
9
- int bones_flush_i;
10
- int bones_flush_j;
11
- char *bones_flush_c = (char *)malloc(bones_flush_size);
12
- for (bones_flush_i=0; bones_flush_i<10; bones_flush_i++) {
13
- for (bones_flush_j=0; bones_flush_j<bones_flush_size; bones_flush_j++) {
14
- bones_flush_c[bones_flush_j] = bones_flush_i*bones_flush_j;
6
+
7
+ // Flush the CPU cache (for measurement purposes only)
8
+ const int bones_flush_size = 4*1024*1024; // (16MB)
9
+ char *bones_flush_c = (char *)malloc(bones_flush_size);
10
+ for (int i=0; i<10; i++) {
11
+ for (int j=0; j<bones_flush_size; j++) {
12
+ bones_flush_c[j] = i*j;
13
+ }
15
14
  }
16
- }
17
- free(bones_flush_c);
15
+ free(bones_flush_c);
18
16
 
19
- // Start the timer for the measurement of the kernel execution time
20
- gettimeofday(&bones_start_time2, NULL);
17
+ // Start the timer for the measurement of the kernel execution time
18
+ gettimeofday(&bones_start_time2, NULL);
@@ -5,4 +5,4 @@
5
5
  }
6
6
 
7
7
  // Print the measurement data
8
- printf(">>>\t\t (<algorithm_basename>): Execution time [kernel ]: %.3lf ms \n", bones_timer2/((float)ITERS));
8
+ printf(">>>\t\t Execution time [kernel <algorithm_basename>]: %.3lf ms \n", bones_timer2/((float)ITERS));
@@ -0,0 +1,29 @@
1
+ ////////////////////////////////////////
2
+ //////////// Timers ////////////////////
3
+ ////////////////////////////////////////
4
+
5
+ // Timer
6
+ struct timeval bones_start_time1;
7
+
8
+ // Start the timer for the measurement of the whole scop
9
+ void bones_timer_start() {
10
+ const int bones_flush_size = 4*1024*1024; // (16MB)
11
+ char *bones_flush_c = (char *)malloc(bones_flush_size);
12
+ for (int i=0; i<10; i++) {
13
+ for (int j=0; j<bones_flush_size; j++) {
14
+ bones_flush_c[j] = i*j;
15
+ }
16
+ }
17
+ free(bones_flush_c);
18
+ gettimeofday(&bones_start_time1, NULL);
19
+ }
20
+
21
+ // End the timer for the measurement of the whole scop
22
+ void bones_timer_stop() {
23
+ #if (ITERS == 1)
24
+ struct timeval bones_end_time1;
25
+ gettimeofday(&bones_end_time1, NULL);
26
+ float bones_timer1 = 0.001 * (1000000*(bones_end_time1.tv_sec-bones_start_time1.tv_sec)+bones_end_time1.tv_usec-bones_start_time1.tv_usec);
27
+ printf(">>>\t\t Execution time [full scop]: %.3lf ms \n", bones_timer1);
28
+ #endif
29
+ }
@@ -103,7 +103,7 @@ void bones_initialize_target(void) {
103
103
  cl_platform_id bones_platform_ids[10];
104
104
  bones_errors = clGetPlatformIDs(bones_num_platforms,bones_platform_ids,NULL); error_check(bones_errors);
105
105
 
106
- // Select the AMD APP platform
106
+ // Select the Intel SDK platform
107
107
  char bones_buffer[1024];
108
108
  cl_uint bones_platform;
109
109
  for(cl_uint bones_platform_id=0; bones_platform_id<bones_num_platforms; bones_platform_id++) {
@@ -1,5 +1,8 @@
1
1
  #include <stdlib.h>
2
2
 
3
+ void bones_timer_start();
4
+ void bones_timer_stop();
5
+
3
6
  // Allocate a 128-byte aligned pointer
4
7
  void *bones_malloc_128(size_t bones_size) {
5
8
  char *bones_pointer;
@@ -1,5 +1,10 @@
1
1
 
2
2
  // Perform a zero-copy of <array> from device to host
3
- void* bones_pointer_to_<array> = clEnqueueMapBuffer(bones_queue,device_<array>,CL_TRUE,CL_MAP_READ,<offset>,<variable_dimensions>*sizeof(<type>),0,NULL,NULL,&bones_errors); error_check(bones_errors);
4
- clEnqueueUnmapMemObject(bones_queue,device_<array>,bones_pointer_to_<array>,0,NULL,NULL);
3
+ #if ZEROCOPY == 1
4
+ printf("Copying back from device_<array> to <array>\n");
5
+ void* bones_pointer_to_<array> = clEnqueueMapBuffer(bones_queue,device_<array>,CL_TRUE,CL_MAP_READ,0,<variable_dimensions>*sizeof(<type>),0,NULL,NULL,&bones_errors); error_check(bones_errors);
6
+ clEnqueueUnmapMemObject(bones_queue,device_<array>,bones_pointer_to_<array>,0,NULL,NULL);
7
+ #elif ZEROCOPY == 0
8
+ bones_errors = clEnqueueReadBuffer(bones_queue,device_<array>,CL_TRUE,(0)*sizeof(<type>),<variable_dimensions>*sizeof(<type>),<array><flatten>+0,0,NULL,NULL); error_check(bones_errors);
9
+ #endif
5
10
  clFinish(bones_queue);
@@ -1,3 +1,5 @@
1
1
 
2
- //bones_errors = clEnqueueWriteBuffer(bones_queue, device_<array>, CL_TRUE, 0, <variable_dimensions>*sizeof(<type>), <array><flatten>, 0, NULL, NULL); error_check(bones_errors);
3
- //clFinish(bones_queue);
2
+ #if ZEROCOPY == 0
3
+ device_<array> = clCreateBuffer(bones_context,CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,<variable_dimensions>*sizeof(<type>),<array><flatten>, &bones_errors); error_check(bones_errors);
4
+ clFinish(bones_queue);
5
+ #endif
File without changes
@@ -1,4 +1,7 @@
1
1
 
2
- // Create a device pointer for <array> (zero-copy)
3
- cl_mem device_<array> = clCreateBuffer(bones_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, <variable_dimensions>*sizeof(<type>), <array><flatten>, &bones_errors); error_check(bones_errors);
4
- //cl_mem device_<array> = clCreateBuffer(bones_context, CL_MEM_READ_WRITE, <variable_dimensions>*sizeof(<type>), NULL, &bones_errors); error_check(bones_errors);
2
+ // Create a device pointer for <array>
3
+ #if ZEROCOPY == 1
4
+ cl_mem device_<array> = clCreateBuffer(bones_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, <variable_dimensions>*sizeof(<type>), <array><flatten>, &bones_errors); error_check(bones_errors);
5
+ #elif ZEROCOPY == 0
6
+ cl_mem device_<array> = clCreateBuffer(bones_context, CL_MEM_READ_WRITE, <variable_dimensions>*sizeof(<type>), NULL, &bones_errors); error_check(bones_errors);
7
+ #endif
@@ -8,4 +8,4 @@
8
8
  bones_errors = clGetEventProfilingInfo(bones_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end2, 0); error_check(bones_errors);
9
9
  bones_errors = clGetEventProfilingInfo(bones_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start2, 0); error_check(bones_errors);
10
10
  float bones_timer2 = 0.000001 * (end2-start2);
11
- printf(">>>\t\t (<algorithm_basename>): Execution time [kernel ]: %.3lf ms \n", bones_timer2);
11
+ printf(">>>\t\t Execution time [kernel <algorithm_basename>]: %.3lf ms \n", bones_timer2);
@@ -0,0 +1,24 @@
1
+
2
+ ////////////////////////////////////////
3
+ //////////// Timers ////////////////////
4
+ ////////////////////////////////////////
5
+
6
+ // Timer
7
+ struct timeval bones_start_time1;
8
+
9
+ // Start the timer for the measurement of the whole scop
10
+ void bones_timer_start() {
11
+ clFinish(bones_queue);
12
+ gettimeofday(&bones_start_time1, NULL);
13
+ }
14
+
15
+ // End the timer for the measurement of the whole scop
16
+ void bones_timer_stop() {
17
+ #if (ITERS == 1)
18
+ clFinish(bones_queue);
19
+ struct timeval bones_end_time1;
20
+ gettimeofday(&bones_end_time1, NULL);
21
+ float bones_timer1 = 0.001 * (1000000*(bones_end_time1.tv_sec-bones_start_time1.tv_sec)+bones_end_time1.tv_usec-bones_start_time1.tv_usec);
22
+ printf(">>>\t\t Execution time [full scop]: %.3lf ms \n", bones_timer1);
23
+ #endif
24
+ }
@@ -1,5 +1,6 @@
1
1
  #include <omp.h>
2
2
  #include <stdlib.h>
3
+ #include <stdio.h>
3
4
 
4
5
  #define BONES_MIN(a,b) ((a<b) ? a : b)
5
6
  #define BONES_MAX(a,b) ((a>b) ? a : b)
@@ -0,0 +1,3 @@
1
+
2
+ void bones_timer_start();
3
+ void bones_timer_stop();
File without changes
@@ -1,12 +0,0 @@
1
-
2
- // Flush the CPU cache (for measurement purposes only)
3
- const int bones_flush_size = 4*1024*1024; // (16MB)
4
- int bones_flush_i;
5
- int bones_flush_j;
6
- char *bones_flush_c = (char *)malloc(bones_flush_size);
7
- for (bones_flush_i=0; bones_flush_i<10; bones_flush_i++) {
8
- for (bones_flush_j=0; bones_flush_j<bones_flush_size; bones_flush_j++) {
9
- bones_flush_c[bones_flush_j] = bones_flush_i*bones_flush_j;
10
- }
11
- }
12
- free(bones_flush_c);
@@ -5,4 +5,4 @@
5
5
  }
6
6
 
7
7
  // Print the measurement data
8
- printf(">>>\t\t (<algorithm_basename>): Execution time [kernel ]: %.3lf ms \n", bones_timer2/((float)ITERS));
8
+ printf(">>>\t\t Execution time [kernel <algorithm_basename>]: %.3lf ms \n", bones_timer2/((float)ITERS));
@@ -0,0 +1,33 @@
1
+ ////////////////////////////////////////
2
+ //////////// Timers ////////////////////
3
+ ////////////////////////////////////////
4
+
5
+ // Includes
6
+ #include <stdio.h>
7
+
8
+ // Timer
9
+ struct timeval bones_start_time1;
10
+
11
+ // Start the timer for the measurement of the whole scop
12
+ void bones_timer_start() {
13
+ /*
14
+ const int bones_flush_size = 4*1024*1024; // (16MB)
15
+ char *bones_flush_c = (char *)malloc(bones_flush_size);
16
+ for (int i=0; i<10; i++) {
17
+ for (int j=0; j<bones_flush_size; j++) {
18
+ bones_flush_c[j] = i*j;
19
+ }
20
+ }
21
+ free(bones_flush_c);*/
22
+ gettimeofday(&bones_start_time1, NULL);
23
+ }
24
+
25
+ // End the timer for the measurement of the whole scop
26
+ void bones_timer_stop() {
27
+ #if (ITERS == 1)
28
+ struct timeval bones_end_time1;
29
+ gettimeofday(&bones_end_time1, NULL);
30
+ float bones_timer1 = 0.001 * (1000000*(bones_end_time1.tv_sec-bones_start_time1.tv_sec)+bones_end_time1.tv_usec-bones_start_time1.tv_usec);
31
+ printf(">>>\t\t Execution time [full scop]: %.3lf ms \n", bones_timer1);
32
+ #endif
33
+ }