bones-compiler 1.1.0 → 1.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/CHANGELOG +37 -0
- data/LICENSE +1 -1
- data/README.rdoc +95 -70
- data/Rakefile +78 -3
- data/VERSION +1 -1
- data/bin/adarwin +17 -0
- data/examples/benchmarks/PolyBench/2mm.c +104 -0
- data/examples/benchmarks/{3mm.c → PolyBench/3mm.c} +5 -2
- data/examples/benchmarks/{adi.c → PolyBench/adi.c} +6 -3
- data/examples/benchmarks/{atax.c → PolyBench/atax.c} +5 -2
- data/examples/benchmarks/{bicg.c → PolyBench/bicg.c} +5 -2
- data/examples/benchmarks/{cholesky.c → PolyBench/cholesky.c} +3 -0
- data/examples/benchmarks/{common.h → PolyBench/common.h} +2 -2
- data/examples/benchmarks/{correlation.c → PolyBench/correlation.c} +16 -7
- data/examples/benchmarks/{covariance.c → PolyBench/covariance.c} +7 -2
- data/examples/benchmarks/{doitgen.c → PolyBench/doitgen.c} +5 -2
- data/examples/benchmarks/{durbin.c → PolyBench/durbin.c} +3 -0
- data/examples/benchmarks/{dynprog.c → PolyBench/dynprog.c} +3 -0
- data/examples/benchmarks/{fdtd-2d-apml.c → PolyBench/fdtd-2d-apml.c} +3 -0
- data/examples/benchmarks/{fdtd-2d.c → PolyBench/fdtd-2d.c} +5 -2
- data/examples/benchmarks/{floyd-warshall.c → PolyBench/floyd-warshall.c} +3 -0
- data/examples/benchmarks/{gemm.c → PolyBench/gemm.c} +5 -2
- data/examples/benchmarks/{gemver.c → PolyBench/gemver.c} +5 -2
- data/examples/benchmarks/{gesummv.c → PolyBench/gesummv.c} +5 -2
- data/examples/benchmarks/{gramschmidt.c → PolyBench/gramschmidt.c} +3 -0
- data/examples/benchmarks/{jacobi-1d-imper.c → PolyBench/jacobi-1d-imper.c} +10 -2
- data/examples/benchmarks/{jacobi-2d-imper.c → PolyBench/jacobi-2d-imper.c} +8 -3
- data/examples/benchmarks/{lu.c → PolyBench/lu.c} +3 -0
- data/examples/benchmarks/{ludcmp.c → PolyBench/ludcmp.c} +3 -0
- data/examples/benchmarks/{mvt.c → PolyBench/mvt.c} +6 -2
- data/examples/benchmarks/{reg_detect.c → PolyBench/reg_detect.c} +3 -0
- data/examples/benchmarks/{seidel-2d.c → PolyBench/seidel-2d.c} +3 -0
- data/examples/benchmarks/{symm.c → PolyBench/symm.c} +3 -0
- data/examples/benchmarks/{syr2k.c → PolyBench/syr2k.c} +5 -2
- data/examples/benchmarks/{syrk.c → PolyBench/syrk.c} +7 -4
- data/examples/benchmarks/{trisolv.c → PolyBench/trisolv.c} +3 -0
- data/examples/benchmarks/{trmm.c → PolyBench/trmm.c} +3 -0
- data/examples/benchmarks/Rodinia/cfd.c +180 -0
- data/examples/benchmarks/Rodinia/hotspot.c +228 -0
- data/examples/benchmarks/Rodinia/kmeans.c +164 -0
- data/examples/benchmarks/Rodinia/srad.c +188 -0
- data/examples/benchmarks/other/common.h +0 -0
- data/examples/benchmarks/other/dct.c +58 -0
- data/examples/benchmarks/other/mm.c +50 -0
- data/examples/benchmarks/{saxpy.c → other/saxpy.c} +11 -7
- data/examples/chunk/{example1.c → example01.c} +0 -0
- data/examples/chunk/{example2.c → example02.c} +0 -0
- data/examples/chunk/{example3.c → example03.c} +0 -0
- data/examples/chunk/{example4.c → example04.c} +0 -0
- data/examples/chunk/{example5.c → example05.c} +0 -0
- data/examples/chunk/example06.c +45 -0
- data/examples/chunk/example07.c +49 -0
- data/examples/dependences/example01.c +42 -0
- data/examples/dependences/example02.c +40 -0
- data/examples/dependences/example03.c +43 -0
- data/examples/dependences/example04.c +44 -0
- data/examples/dependences/example05.c +42 -0
- data/examples/element/{example1.c → example01.c} +0 -0
- data/examples/element/{example2.c → example02.c} +2 -2
- data/examples/element/{example3.c → example03.c} +0 -0
- data/examples/element/{example4.c → example04.c} +0 -0
- data/examples/element/{example5.c → example05.c} +0 -0
- data/examples/element/{example6.c → example06.c} +0 -0
- data/examples/element/{example7.c → example07.c} +0 -0
- data/examples/element/{example8.c → example08.c} +0 -0
- data/examples/element/{example9.c → example09.c} +0 -0
- data/examples/element/example13.c +73 -0
- data/examples/fusion/example01.c +68 -0
- data/examples/fusion/example02.c +73 -0
- data/examples/fusion/example03.c +72 -0
- data/examples/fusion/example04.c +61 -0
- data/examples/fusion/example05.c +55 -0
- data/examples/neighbourhood/{example1.c → example01.c} +0 -0
- data/examples/neighbourhood/{example2.c → example02.c} +0 -0
- data/examples/neighbourhood/{example3.c → example03.c} +0 -0
- data/examples/neighbourhood/{example4.c → example04.c} +0 -0
- data/examples/neighbourhood/example05.c +44 -0
- data/examples/shared/{example1.c → example01.c} +0 -0
- data/examples/shared/{example2.c → example02.c} +0 -0
- data/examples/shared/{example3.c → example03.c} +0 -0
- data/examples/shared/{example4.c → example04.c} +0 -0
- data/examples/shared/{example5.c → example05.c} +0 -0
- data/lib/adarwin.rb +62 -0
- data/lib/adarwin/dependences.rb +268 -0
- data/lib/adarwin/engine.rb +277 -0
- data/lib/adarwin/fusion.rb +174 -0
- data/lib/adarwin/interval.rb +57 -0
- data/lib/adarwin/memorycopies.rb +153 -0
- data/lib/adarwin/nest.rb +225 -0
- data/lib/adarwin/preprocessor.rb +76 -0
- data/lib/adarwin/reference.rb +261 -0
- data/lib/bones.rb +4 -55
- data/lib/bones/algorithm.rb +77 -40
- data/lib/bones/copy.rb +26 -0
- data/lib/bones/engine.rb +147 -31
- data/lib/bones/preprocessor.rb +92 -12
- data/lib/bones/species.rb +4 -3
- data/lib/bones/structure.rb +14 -4
- data/lib/castaddon.rb +11 -6
- data/lib/castaddon/node_adarwin.rb +245 -0
- data/lib/castaddon/node_bones.rb +316 -0
- data/lib/castaddon/node_common.rb +289 -0
- data/lib/castaddon/transformations.rb +236 -0
- data/lib/common.rb +216 -0
- data/skeletons/CPU-C/common/header.c +3 -0
- data/skeletons/CPU-C/common/mem_global.c +0 -0
- data/skeletons/CPU-C/common/timer_2_start.c +11 -13
- data/skeletons/CPU-C/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-C/common/timer_globals.c +29 -0
- data/skeletons/CPU-OPENCL-INTEL/common/globals.c +1 -1
- data/skeletons/CPU-OPENCL-INTEL/common/header.c +3 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +7 -2
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +4 -2
- data/skeletons/CPU-OPENCL-INTEL/common/mem_global.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +6 -3
- data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-OPENCL-INTEL/common/timer_globals.c +24 -0
- data/skeletons/CPU-OPENMP/common/globals.c +1 -0
- data/skeletons/CPU-OPENMP/common/header.c +3 -0
- data/skeletons/CPU-OPENMP/common/mem_global.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_1_start.c +0 -12
- data/skeletons/CPU-OPENMP/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-OPENMP/common/timer_globals.c +33 -0
- data/skeletons/GPU-CUDA/common/globals.c +27 -3
- data/skeletons/GPU-CUDA/common/header.c +2 -0
- data/skeletons/GPU-CUDA/common/mem_async_alloc.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_copyin.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_copyout.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_free.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +2 -1
- data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +2 -1
- data/skeletons/GPU-CUDA/common/mem_global.c +1 -0
- data/skeletons/GPU-CUDA/common/mem_prologue.c +1 -2
- data/skeletons/GPU-CUDA/common/scheduler.c +86 -0
- data/skeletons/GPU-CUDA/common/timer_2_start.c +2 -4
- data/skeletons/GPU-CUDA/common/timer_2_stop.c +3 -5
- data/skeletons/GPU-CUDA/common/timer_globals.c +26 -0
- data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +5 -7
- data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +4 -6
- data/skeletons/GPU-CUDA/kernel/default.host.c +1 -1
- data/skeletons/GPU-CUDA/kernel/default.kernel.cu +6 -8
- data/skeletons/GPU-CUDA/skeletons.txt +6 -5
- data/{examples/benchmarks/2mm.c → test/examples/benchmarks/PolyBench/2mm_species.c} +19 -15
- data/test/examples/benchmarks/PolyBench/3mm_species.c +82 -0
- data/test/examples/benchmarks/PolyBench/adi_species.c +89 -0
- data/test/examples/benchmarks/PolyBench/atax_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/bicg_species.c +71 -0
- data/test/examples/benchmarks/PolyBench/cholesky_species.c +68 -0
- data/test/examples/benchmarks/PolyBench/correlation_species.c +97 -0
- data/test/examples/benchmarks/PolyBench/covariance_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/doitgen_species.c +67 -0
- data/test/examples/benchmarks/PolyBench/durbin_species.c +80 -0
- data/test/examples/benchmarks/PolyBench/dynprog_species.c +71 -0
- data/test/examples/benchmarks/PolyBench/fdtd-2d-apml_species.c +112 -0
- data/test/examples/benchmarks/PolyBench/fdtd-2d_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/floyd-warshall_species.c +54 -0
- data/test/examples/benchmarks/PolyBench/gemm_species.c +73 -0
- data/test/examples/benchmarks/PolyBench/gemver_species.c +93 -0
- data/test/examples/benchmarks/PolyBench/gesummv_species.c +68 -0
- data/test/examples/benchmarks/PolyBench/gramschmidt_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/jacobi-1d-imper_species.c +59 -0
- data/test/examples/benchmarks/PolyBench/jacobi-2d-imper_species.c +65 -0
- data/test/examples/benchmarks/PolyBench/lu_species.c +57 -0
- data/test/examples/benchmarks/PolyBench/ludcmp_species.c +89 -0
- data/test/examples/benchmarks/PolyBench/mvt_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/reg_detect_species.c +86 -0
- data/test/examples/benchmarks/PolyBench/seidel-2d_species.c +53 -0
- data/test/examples/benchmarks/PolyBench/symm_species.c +74 -0
- data/test/examples/benchmarks/PolyBench/syr2k_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/syrk_species.c +66 -0
- data/test/examples/benchmarks/PolyBench/trisolv_species.c +61 -0
- data/test/examples/benchmarks/PolyBench/trmm_species.c +61 -0
- data/test/examples/chunk/example01_species.c +58 -0
- data/test/examples/chunk/example02_species.c +48 -0
- data/test/examples/chunk/example03_species.c +63 -0
- data/test/examples/chunk/example04_species.c +58 -0
- data/test/examples/chunk/example05_species.c +56 -0
- data/test/examples/chunk/example06_species.c +49 -0
- data/test/examples/chunk/example07_species.c +53 -0
- data/test/examples/dependences/example01_species.c +46 -0
- data/test/examples/dependences/example02_species.c +44 -0
- data/test/examples/dependences/example03_species.c +47 -0
- data/test/examples/dependences/example04_species.c +48 -0
- data/test/examples/dependences/example05_species.c +46 -0
- data/test/examples/element/example01_species.c +50 -0
- data/test/examples/element/example02_species.c +50 -0
- data/test/examples/element/example03_species.c +62 -0
- data/test/examples/element/example04_species.c +53 -0
- data/test/examples/element/example05_species.c +59 -0
- data/test/examples/element/example06_species.c +50 -0
- data/test/examples/element/example07_species.c +58 -0
- data/test/examples/element/example08_species.c +49 -0
- data/test/examples/element/example09_species.c +52 -0
- data/test/examples/element/example10_species.c +54 -0
- data/test/examples/element/example11_species.c +51 -0
- data/test/examples/element/example12_species.c +60 -0
- data/test/examples/element/example13_species.c +77 -0
- data/test/examples/neighbourhood/example01_species.c +57 -0
- data/test/examples/neighbourhood/example02_species.c +56 -0
- data/test/examples/neighbourhood/example03_species.c +83 -0
- data/test/examples/neighbourhood/example04_species.c +55 -0
- data/test/examples/neighbourhood/example05_species.c +48 -0
- data/test/examples/shared/example01_species.c +49 -0
- data/test/examples/shared/example02_species.c +55 -0
- data/test/examples/shared/example03_species.c +59 -0
- data/test/examples/shared/example04_species.c +56 -0
- data/test/examples/shared/example05_species.c +52 -0
- metadata +193 -73
- data/examples/benchmarks/overview.txt +0 -38
- data/lib/castaddon/node.rb +0 -753
data/lib/common.rb
ADDED
@@ -0,0 +1,216 @@
|
|
1
|
+
|
2
|
+
# Bones/Aset require 'fileutils' from the Ruby standard library.
|
3
|
+
require 'fileutils'
|
4
|
+
|
5
|
+
# Bones/Aset use the 'trollop' gem to parse command line options.
|
6
|
+
require 'rubygems'
|
7
|
+
require 'trollop'
|
8
|
+
require 'symbolic'
|
9
|
+
|
10
|
+
# Extending the Ruby standard string class to support some
|
11
|
+
# additional methods. This includes a hack of the gsub! command.
|
12
|
+
class String #:nodoc:
|
13
|
+
|
14
|
+
# Extend the Ruby string class to be able to chain 'gsub!'
|
15
|
+
#-commands. This code is taken from the web.
|
16
|
+
meth = 'gsub!'
|
17
|
+
orig_meth = "orig_#{meth}"
|
18
|
+
alias_method orig_meth, meth
|
19
|
+
define_method(meth) do |*args|
|
20
|
+
self.send(orig_meth, *args)
|
21
|
+
self
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
# Set the newline character
|
27
|
+
NL = "\n"
|
28
|
+
# Set the tab size (currently: 2 spaces)
|
29
|
+
INDENT = "\t"
|
30
|
+
|
31
|
+
# A string representing the combination character ('^') of a species.
|
32
|
+
WEDGE = '^'
|
33
|
+
# A string representing the production character ('->') of a species.
|
34
|
+
ARROW = '->'
|
35
|
+
# A string representing the pipe character ('|') of a species.
|
36
|
+
PIPE = '|'
|
37
|
+
# A string representing the colon character (':') to separate ranges in dimensions.
|
38
|
+
RANGE_SEP = ':'
|
39
|
+
# A string representing the comma character (',') to separate different ranges.
|
40
|
+
DIM_SEP = ','
|
41
|
+
|
42
|
+
# Value to assume a variable to be
|
43
|
+
ASSUME_VAL = '1000'
|
44
|
+
|
45
|
+
|
46
|
+
# Helper method to evaluate mathematical expressions, possibly containing
|
47
|
+
# symbols. This method is only used for readability, without it the code
|
48
|
+
# is functionally correct, but expressions might be larger than needed.
|
49
|
+
def simplify(expr)
|
50
|
+
raise_error('Invalid expression to simplify') if !expr
|
51
|
+
expr = expr.gsub(' ','')
|
52
|
+
|
53
|
+
# Immediately return if there is an array index in the expression
|
54
|
+
return expr if expr =~ /\[/
|
55
|
+
|
56
|
+
# Handle min/max functions
|
57
|
+
if expr =~ /max/ || expr =~ /min/
|
58
|
+
return expr
|
59
|
+
end
|
60
|
+
|
61
|
+
# Get all the variables
|
62
|
+
vars = get_vars(expr)
|
63
|
+
|
64
|
+
# Set all the variables
|
65
|
+
hash = {}
|
66
|
+
vars.uniq.each do |var_name|
|
67
|
+
hash[var_name.to_sym] = var :name => var_name
|
68
|
+
expr = expr.gsub(/\b#{var_name}\b/,"hash[:#{var_name}]")
|
69
|
+
end
|
70
|
+
|
71
|
+
# Simplify the string using the 'symbolic' gem.
|
72
|
+
symbolic_expr = eval(expr)
|
73
|
+
|
74
|
+
# Return the result as a string
|
75
|
+
return symbolic_expr.to_s
|
76
|
+
end
|
77
|
+
|
78
|
+
# Get the variables in an expression
|
79
|
+
def get_vars(expr)
|
80
|
+
expr.split(/\W+/).reject{ |s| (s.to_i.to_s == s || s.to_f.to_s == s || s == "") }
|
81
|
+
end
|
82
|
+
|
83
|
+
# Solve a linear equality (work in progress)
|
84
|
+
def solve(equality,variable,forbidden_vars)
|
85
|
+
return "" if equality == ""
|
86
|
+
|
87
|
+
# Perform the subtitution of the current variable
|
88
|
+
expr = '-('+equality.gsub('=','-(').gsub(/\b#{variable}\b/,"0")+'))'
|
89
|
+
|
90
|
+
# Simplify the result
|
91
|
+
result = simplify(expr)
|
92
|
+
|
93
|
+
# Return the result or nothing (if it still contains forbidden variables)
|
94
|
+
vars = get_vars(result)
|
95
|
+
if vars & forbidden_vars == []
|
96
|
+
return result
|
97
|
+
else
|
98
|
+
return ""
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
# Find the maximum value of 2 expressions
|
103
|
+
def max(expr1,expr2,assumptions=[])
|
104
|
+
return expr1 if expr2 == ""
|
105
|
+
comparison = simplify("(#{expr1})-(#{expr2})")
|
106
|
+
|
107
|
+
# Process the assumptions
|
108
|
+
assumptions.each do |assumption|
|
109
|
+
comparison = simplify(comparison.gsub(assumption[0],assumption[1]))
|
110
|
+
end
|
111
|
+
|
112
|
+
# Test to find the maximum
|
113
|
+
if (comparison.to_i.to_s == comparison || comparison.to_f.to_s == comparison)
|
114
|
+
return expr1 if (comparison.to_i == 0)
|
115
|
+
return expr1 if (comparison.to_i > 0)
|
116
|
+
return expr2 if (comparison.to_i < 0)
|
117
|
+
else
|
118
|
+
|
119
|
+
# Handle min/max functions
|
120
|
+
if comparison =~ /max/ || comparison =~ /min/
|
121
|
+
return "max(#{expr1},#{expr2})"
|
122
|
+
end
|
123
|
+
|
124
|
+
# Find the maximum based on a guess
|
125
|
+
var = get_vars(comparison).first
|
126
|
+
assumptions << [var,ASSUME_VAL]
|
127
|
+
#puts "WARNING: Don't know how to find the max/min of '(#{expr1})' and '(#{expr2})', assuming: #{var}=#{ASSUME_VAL}"
|
128
|
+
return max(expr1,expr2,assumptions)
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
# Find the minimum value of 2 expressions (based on the max method)
|
133
|
+
def min(expr1,expr2)
|
134
|
+
return expr1 if expr2 == ""
|
135
|
+
s1 = simplify(expr1)
|
136
|
+
s2 = simplify(expr2)
|
137
|
+
comparison = simplify("(#{s1})-(#{s2})")
|
138
|
+
|
139
|
+
# Handle min/max functions
|
140
|
+
if comparison =~ /max/ || comparison =~ /min/
|
141
|
+
return s1 if s2 =~ /^max\(#{s1},.*\)$/ || s2 =~ /^max\(.*,#{s1}\)$/
|
142
|
+
return s2 if s1 =~ /^max\(#{s2},.*\)$/ || s1 =~ /^max\(.*,#{s2}\)$/
|
143
|
+
return "min(#{expr1},#{expr2})"
|
144
|
+
end
|
145
|
+
|
146
|
+
# Run the 'max' method
|
147
|
+
maximum = max(expr1,expr2)
|
148
|
+
return (maximum == expr1) ? expr2 : ( (maximum == expr2) ? expr1 : maximum.gsub('max(','min(') )
|
149
|
+
end
|
150
|
+
|
151
|
+
# Find the exact maximum value of 2 expressions
|
152
|
+
def exact_max(expr1,expr2)
|
153
|
+
return expr1 if expr1 == expr2
|
154
|
+
comparison = simplify("(#{expr1})-(#{expr2})")
|
155
|
+
if (comparison.to_i.to_s == comparison || comparison.to_f.to_s == comparison)
|
156
|
+
return expr1 if (comparison.to_i == 0)
|
157
|
+
return expr1 if (comparison.to_i > 0)
|
158
|
+
return expr2 if (comparison.to_i < 0)
|
159
|
+
else
|
160
|
+
return "max(#{expr1},#{expr2})"
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
# Find the exact minimum value of 2 expressions (based on the exact_max method)
|
165
|
+
def exact_min(expr1,expr2)
|
166
|
+
return expr1 if expr1 == expr2
|
167
|
+
maximum = exact_max(expr1,expr2)
|
168
|
+
return (maximum == expr1) ? expr2 : ( (maximum == expr2) ? expr1 : maximum.gsub('max(','min(') )
|
169
|
+
end
|
170
|
+
|
171
|
+
|
172
|
+
# Return the absolute value (if possible)
|
173
|
+
def abs(expr)
|
174
|
+
return expr.to_i.abs.to_s if expr.to_i.to_s == expr
|
175
|
+
return expr
|
176
|
+
end
|
177
|
+
|
178
|
+
# Compare two expressions
|
179
|
+
def compare(expr1,expr2,loop_data,assumptions=[])
|
180
|
+
comparison = simplify("(#{expr1})-(#{expr2})")
|
181
|
+
|
182
|
+
# Handle min/max functions
|
183
|
+
if comparison =~ /max/ || comparison =~ /min/
|
184
|
+
return comparison
|
185
|
+
end
|
186
|
+
|
187
|
+
# Process the assumptions
|
188
|
+
assumptions.each do |assumption|
|
189
|
+
comparison = simplify(comparison.gsub(assumption[0],assumption[1]))
|
190
|
+
end
|
191
|
+
|
192
|
+
# Known comparison
|
193
|
+
if (comparison.to_i.to_s == comparison || comparison.to_f.to_s == comparison)
|
194
|
+
return 'eq' if (comparison.to_i == 0)
|
195
|
+
return 'gt' if (comparison.to_i > 0)
|
196
|
+
return 'lt' if (comparison.to_i < 0)
|
197
|
+
else
|
198
|
+
|
199
|
+
# Comparison based on loop data
|
200
|
+
get_vars(comparison).each do |var|
|
201
|
+
loop_data.each do |loop_datum|
|
202
|
+
if loop_datum[:var] == var
|
203
|
+
assumptions << [var,loop_datum[:min]]
|
204
|
+
#puts "WARNING: Modifying expression '(#{expr1}) vs (#{expr2})', assuming: #{var}=#{loop_datum[:min]}"
|
205
|
+
return compare(expr1,expr2,loop_data,assumptions)
|
206
|
+
end
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
# Comparison based on a guess
|
211
|
+
var = get_vars(comparison).first
|
212
|
+
assumptions << [var,ASSUME_VAL]
|
213
|
+
#puts "WARNING: Don't know how to compare '(#{expr1})' and '(#{expr2})', assuming: #{var}=#{ASSUME_VAL}"
|
214
|
+
return compare(expr1,expr2,loop_data,assumptions)
|
215
|
+
end
|
216
|
+
end
|
File without changes
|
@@ -3,18 +3,16 @@
|
|
3
3
|
struct timeval bones_start_time2;
|
4
4
|
struct timeval bones_end_time2;
|
5
5
|
for (int bones_iter=0; bones_iter<ITERS; bones_iter++) {
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
bones_flush_c[bones_flush_j] = bones_flush_i*bones_flush_j;
|
6
|
+
|
7
|
+
// Flush the CPU cache (for measurement purposes only)
|
8
|
+
const int bones_flush_size = 4*1024*1024; // (16MB)
|
9
|
+
char *bones_flush_c = (char *)malloc(bones_flush_size);
|
10
|
+
for (int i=0; i<10; i++) {
|
11
|
+
for (int j=0; j<bones_flush_size; j++) {
|
12
|
+
bones_flush_c[j] = i*j;
|
13
|
+
}
|
15
14
|
}
|
16
|
-
|
17
|
-
free(bones_flush_c);
|
15
|
+
free(bones_flush_c);
|
18
16
|
|
19
|
-
|
20
|
-
|
17
|
+
// Start the timer for the measurement of the kernel execution time
|
18
|
+
gettimeofday(&bones_start_time2, NULL);
|
@@ -0,0 +1,29 @@
|
|
1
|
+
////////////////////////////////////////
|
2
|
+
//////////// Timers ////////////////////
|
3
|
+
////////////////////////////////////////
|
4
|
+
|
5
|
+
// Timer
|
6
|
+
struct timeval bones_start_time1;
|
7
|
+
|
8
|
+
// Start the timer for the measurement of the whole scop
|
9
|
+
void bones_timer_start() {
|
10
|
+
const int bones_flush_size = 4*1024*1024; // (16MB)
|
11
|
+
char *bones_flush_c = (char *)malloc(bones_flush_size);
|
12
|
+
for (int i=0; i<10; i++) {
|
13
|
+
for (int j=0; j<bones_flush_size; j++) {
|
14
|
+
bones_flush_c[j] = i*j;
|
15
|
+
}
|
16
|
+
}
|
17
|
+
free(bones_flush_c);
|
18
|
+
gettimeofday(&bones_start_time1, NULL);
|
19
|
+
}
|
20
|
+
|
21
|
+
// End the timer for the measurement of the whole scop
|
22
|
+
void bones_timer_stop() {
|
23
|
+
#if (ITERS == 1)
|
24
|
+
struct timeval bones_end_time1;
|
25
|
+
gettimeofday(&bones_end_time1, NULL);
|
26
|
+
float bones_timer1 = 0.001 * (1000000*(bones_end_time1.tv_sec-bones_start_time1.tv_sec)+bones_end_time1.tv_usec-bones_start_time1.tv_usec);
|
27
|
+
printf(">>>\t\t Execution time [full scop]: %.3lf ms \n", bones_timer1);
|
28
|
+
#endif
|
29
|
+
}
|
@@ -103,7 +103,7 @@ void bones_initialize_target(void) {
|
|
103
103
|
cl_platform_id bones_platform_ids[10];
|
104
104
|
bones_errors = clGetPlatformIDs(bones_num_platforms,bones_platform_ids,NULL); error_check(bones_errors);
|
105
105
|
|
106
|
-
// Select the
|
106
|
+
// Select the Intel SDK platform
|
107
107
|
char bones_buffer[1024];
|
108
108
|
cl_uint bones_platform;
|
109
109
|
for(cl_uint bones_platform_id=0; bones_platform_id<bones_num_platforms; bones_platform_id++) {
|
@@ -1,5 +1,10 @@
|
|
1
1
|
|
2
2
|
// Perform a zero-copy of <array> from device to host
|
3
|
-
|
4
|
-
|
3
|
+
#if ZEROCOPY == 1
|
4
|
+
printf("Copying back from device_<array> to <array>\n");
|
5
|
+
void* bones_pointer_to_<array> = clEnqueueMapBuffer(bones_queue,device_<array>,CL_TRUE,CL_MAP_READ,0,<variable_dimensions>*sizeof(<type>),0,NULL,NULL,&bones_errors); error_check(bones_errors);
|
6
|
+
clEnqueueUnmapMemObject(bones_queue,device_<array>,bones_pointer_to_<array>,0,NULL,NULL);
|
7
|
+
#elif ZEROCOPY == 0
|
8
|
+
bones_errors = clEnqueueReadBuffer(bones_queue,device_<array>,CL_TRUE,(0)*sizeof(<type>),<variable_dimensions>*sizeof(<type>),<array><flatten>+0,0,NULL,NULL); error_check(bones_errors);
|
9
|
+
#endif
|
5
10
|
clFinish(bones_queue);
|
@@ -1,3 +1,5 @@
|
|
1
1
|
|
2
|
-
|
3
|
-
|
2
|
+
#if ZEROCOPY == 0
|
3
|
+
device_<array> = clCreateBuffer(bones_context,CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,<variable_dimensions>*sizeof(<type>),<array><flatten>, &bones_errors); error_check(bones_errors);
|
4
|
+
clFinish(bones_queue);
|
5
|
+
#endif
|
File without changes
|
@@ -1,4 +1,7 @@
|
|
1
1
|
|
2
|
-
// Create a device pointer for <array>
|
3
|
-
|
4
|
-
|
2
|
+
// Create a device pointer for <array>
|
3
|
+
#if ZEROCOPY == 1
|
4
|
+
cl_mem device_<array> = clCreateBuffer(bones_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, <variable_dimensions>*sizeof(<type>), <array><flatten>, &bones_errors); error_check(bones_errors);
|
5
|
+
#elif ZEROCOPY == 0
|
6
|
+
cl_mem device_<array> = clCreateBuffer(bones_context, CL_MEM_READ_WRITE, <variable_dimensions>*sizeof(<type>), NULL, &bones_errors); error_check(bones_errors);
|
7
|
+
#endif
|
@@ -8,4 +8,4 @@
|
|
8
8
|
bones_errors = clGetEventProfilingInfo(bones_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end2, 0); error_check(bones_errors);
|
9
9
|
bones_errors = clGetEventProfilingInfo(bones_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start2, 0); error_check(bones_errors);
|
10
10
|
float bones_timer2 = 0.000001 * (end2-start2);
|
11
|
-
printf(">>>\t\t
|
11
|
+
printf(">>>\t\t Execution time [kernel <algorithm_basename>]: %.3lf ms \n", bones_timer2);
|
@@ -0,0 +1,24 @@
|
|
1
|
+
|
2
|
+
////////////////////////////////////////
|
3
|
+
//////////// Timers ////////////////////
|
4
|
+
////////////////////////////////////////
|
5
|
+
|
6
|
+
// Timer
|
7
|
+
struct timeval bones_start_time1;
|
8
|
+
|
9
|
+
// Start the timer for the measurement of the whole scop
|
10
|
+
void bones_timer_start() {
|
11
|
+
clFinish(bones_queue);
|
12
|
+
gettimeofday(&bones_start_time1, NULL);
|
13
|
+
}
|
14
|
+
|
15
|
+
// End the timer for the measurement of the whole scop
|
16
|
+
void bones_timer_stop() {
|
17
|
+
#if (ITERS == 1)
|
18
|
+
clFinish(bones_queue);
|
19
|
+
struct timeval bones_end_time1;
|
20
|
+
gettimeofday(&bones_end_time1, NULL);
|
21
|
+
float bones_timer1 = 0.001 * (1000000*(bones_end_time1.tv_sec-bones_start_time1.tv_sec)+bones_end_time1.tv_usec-bones_start_time1.tv_usec);
|
22
|
+
printf(">>>\t\t Execution time [full scop]: %.3lf ms \n", bones_timer1);
|
23
|
+
#endif
|
24
|
+
}
|
File without changes
|
@@ -1,12 +0,0 @@
|
|
1
|
-
|
2
|
-
// Flush the CPU cache (for measurement purposes only)
|
3
|
-
const int bones_flush_size = 4*1024*1024; // (16MB)
|
4
|
-
int bones_flush_i;
|
5
|
-
int bones_flush_j;
|
6
|
-
char *bones_flush_c = (char *)malloc(bones_flush_size);
|
7
|
-
for (bones_flush_i=0; bones_flush_i<10; bones_flush_i++) {
|
8
|
-
for (bones_flush_j=0; bones_flush_j<bones_flush_size; bones_flush_j++) {
|
9
|
-
bones_flush_c[bones_flush_j] = bones_flush_i*bones_flush_j;
|
10
|
-
}
|
11
|
-
}
|
12
|
-
free(bones_flush_c);
|
@@ -0,0 +1,33 @@
|
|
1
|
+
////////////////////////////////////////
|
2
|
+
//////////// Timers ////////////////////
|
3
|
+
////////////////////////////////////////
|
4
|
+
|
5
|
+
// Includes
|
6
|
+
#include <stdio.h>
|
7
|
+
|
8
|
+
// Timer
|
9
|
+
struct timeval bones_start_time1;
|
10
|
+
|
11
|
+
// Start the timer for the measurement of the whole scop
|
12
|
+
void bones_timer_start() {
|
13
|
+
/*
|
14
|
+
const int bones_flush_size = 4*1024*1024; // (16MB)
|
15
|
+
char *bones_flush_c = (char *)malloc(bones_flush_size);
|
16
|
+
for (int i=0; i<10; i++) {
|
17
|
+
for (int j=0; j<bones_flush_size; j++) {
|
18
|
+
bones_flush_c[j] = i*j;
|
19
|
+
}
|
20
|
+
}
|
21
|
+
free(bones_flush_c);*/
|
22
|
+
gettimeofday(&bones_start_time1, NULL);
|
23
|
+
}
|
24
|
+
|
25
|
+
// End the timer for the measurement of the whole scop
|
26
|
+
void bones_timer_stop() {
|
27
|
+
#if (ITERS == 1)
|
28
|
+
struct timeval bones_end_time1;
|
29
|
+
gettimeofday(&bones_end_time1, NULL);
|
30
|
+
float bones_timer1 = 0.001 * (1000000*(bones_end_time1.tv_sec-bones_start_time1.tv_sec)+bones_end_time1.tv_usec-bones_start_time1.tv_usec);
|
31
|
+
printf(">>>\t\t Execution time [full scop]: %.3lf ms \n", bones_timer1);
|
32
|
+
#endif
|
33
|
+
}
|