bones-compiler 1.1.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/CHANGELOG +37 -0
- data/LICENSE +1 -1
- data/README.rdoc +95 -70
- data/Rakefile +78 -3
- data/VERSION +1 -1
- data/bin/adarwin +17 -0
- data/examples/benchmarks/PolyBench/2mm.c +104 -0
- data/examples/benchmarks/{3mm.c → PolyBench/3mm.c} +5 -2
- data/examples/benchmarks/{adi.c → PolyBench/adi.c} +6 -3
- data/examples/benchmarks/{atax.c → PolyBench/atax.c} +5 -2
- data/examples/benchmarks/{bicg.c → PolyBench/bicg.c} +5 -2
- data/examples/benchmarks/{cholesky.c → PolyBench/cholesky.c} +3 -0
- data/examples/benchmarks/{common.h → PolyBench/common.h} +2 -2
- data/examples/benchmarks/{correlation.c → PolyBench/correlation.c} +16 -7
- data/examples/benchmarks/{covariance.c → PolyBench/covariance.c} +7 -2
- data/examples/benchmarks/{doitgen.c → PolyBench/doitgen.c} +5 -2
- data/examples/benchmarks/{durbin.c → PolyBench/durbin.c} +3 -0
- data/examples/benchmarks/{dynprog.c → PolyBench/dynprog.c} +3 -0
- data/examples/benchmarks/{fdtd-2d-apml.c → PolyBench/fdtd-2d-apml.c} +3 -0
- data/examples/benchmarks/{fdtd-2d.c → PolyBench/fdtd-2d.c} +5 -2
- data/examples/benchmarks/{floyd-warshall.c → PolyBench/floyd-warshall.c} +3 -0
- data/examples/benchmarks/{gemm.c → PolyBench/gemm.c} +5 -2
- data/examples/benchmarks/{gemver.c → PolyBench/gemver.c} +5 -2
- data/examples/benchmarks/{gesummv.c → PolyBench/gesummv.c} +5 -2
- data/examples/benchmarks/{gramschmidt.c → PolyBench/gramschmidt.c} +3 -0
- data/examples/benchmarks/{jacobi-1d-imper.c → PolyBench/jacobi-1d-imper.c} +10 -2
- data/examples/benchmarks/{jacobi-2d-imper.c → PolyBench/jacobi-2d-imper.c} +8 -3
- data/examples/benchmarks/{lu.c → PolyBench/lu.c} +3 -0
- data/examples/benchmarks/{ludcmp.c → PolyBench/ludcmp.c} +3 -0
- data/examples/benchmarks/{mvt.c → PolyBench/mvt.c} +6 -2
- data/examples/benchmarks/{reg_detect.c → PolyBench/reg_detect.c} +3 -0
- data/examples/benchmarks/{seidel-2d.c → PolyBench/seidel-2d.c} +3 -0
- data/examples/benchmarks/{symm.c → PolyBench/symm.c} +3 -0
- data/examples/benchmarks/{syr2k.c → PolyBench/syr2k.c} +5 -2
- data/examples/benchmarks/{syrk.c → PolyBench/syrk.c} +7 -4
- data/examples/benchmarks/{trisolv.c → PolyBench/trisolv.c} +3 -0
- data/examples/benchmarks/{trmm.c → PolyBench/trmm.c} +3 -0
- data/examples/benchmarks/Rodinia/cfd.c +180 -0
- data/examples/benchmarks/Rodinia/hotspot.c +228 -0
- data/examples/benchmarks/Rodinia/kmeans.c +164 -0
- data/examples/benchmarks/Rodinia/srad.c +188 -0
- data/examples/benchmarks/other/common.h +0 -0
- data/examples/benchmarks/other/dct.c +58 -0
- data/examples/benchmarks/other/mm.c +50 -0
- data/examples/benchmarks/{saxpy.c → other/saxpy.c} +11 -7
- data/examples/chunk/{example1.c → example01.c} +0 -0
- data/examples/chunk/{example2.c → example02.c} +0 -0
- data/examples/chunk/{example3.c → example03.c} +0 -0
- data/examples/chunk/{example4.c → example04.c} +0 -0
- data/examples/chunk/{example5.c → example05.c} +0 -0
- data/examples/chunk/example06.c +45 -0
- data/examples/chunk/example07.c +49 -0
- data/examples/dependences/example01.c +42 -0
- data/examples/dependences/example02.c +40 -0
- data/examples/dependences/example03.c +43 -0
- data/examples/dependences/example04.c +44 -0
- data/examples/dependences/example05.c +42 -0
- data/examples/element/{example1.c → example01.c} +0 -0
- data/examples/element/{example2.c → example02.c} +2 -2
- data/examples/element/{example3.c → example03.c} +0 -0
- data/examples/element/{example4.c → example04.c} +0 -0
- data/examples/element/{example5.c → example05.c} +0 -0
- data/examples/element/{example6.c → example06.c} +0 -0
- data/examples/element/{example7.c → example07.c} +0 -0
- data/examples/element/{example8.c → example08.c} +0 -0
- data/examples/element/{example9.c → example09.c} +0 -0
- data/examples/element/example13.c +73 -0
- data/examples/fusion/example01.c +68 -0
- data/examples/fusion/example02.c +73 -0
- data/examples/fusion/example03.c +72 -0
- data/examples/fusion/example04.c +61 -0
- data/examples/fusion/example05.c +55 -0
- data/examples/neighbourhood/{example1.c → example01.c} +0 -0
- data/examples/neighbourhood/{example2.c → example02.c} +0 -0
- data/examples/neighbourhood/{example3.c → example03.c} +0 -0
- data/examples/neighbourhood/{example4.c → example04.c} +0 -0
- data/examples/neighbourhood/example05.c +44 -0
- data/examples/shared/{example1.c → example01.c} +0 -0
- data/examples/shared/{example2.c → example02.c} +0 -0
- data/examples/shared/{example3.c → example03.c} +0 -0
- data/examples/shared/{example4.c → example04.c} +0 -0
- data/examples/shared/{example5.c → example05.c} +0 -0
- data/lib/adarwin.rb +62 -0
- data/lib/adarwin/dependences.rb +268 -0
- data/lib/adarwin/engine.rb +277 -0
- data/lib/adarwin/fusion.rb +174 -0
- data/lib/adarwin/interval.rb +57 -0
- data/lib/adarwin/memorycopies.rb +153 -0
- data/lib/adarwin/nest.rb +225 -0
- data/lib/adarwin/preprocessor.rb +76 -0
- data/lib/adarwin/reference.rb +261 -0
- data/lib/bones.rb +4 -55
- data/lib/bones/algorithm.rb +77 -40
- data/lib/bones/copy.rb +26 -0
- data/lib/bones/engine.rb +147 -31
- data/lib/bones/preprocessor.rb +92 -12
- data/lib/bones/species.rb +4 -3
- data/lib/bones/structure.rb +14 -4
- data/lib/castaddon.rb +11 -6
- data/lib/castaddon/node_adarwin.rb +245 -0
- data/lib/castaddon/node_bones.rb +316 -0
- data/lib/castaddon/node_common.rb +289 -0
- data/lib/castaddon/transformations.rb +236 -0
- data/lib/common.rb +216 -0
- data/skeletons/CPU-C/common/header.c +3 -0
- data/skeletons/CPU-C/common/mem_global.c +0 -0
- data/skeletons/CPU-C/common/timer_2_start.c +11 -13
- data/skeletons/CPU-C/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-C/common/timer_globals.c +29 -0
- data/skeletons/CPU-OPENCL-INTEL/common/globals.c +1 -1
- data/skeletons/CPU-OPENCL-INTEL/common/header.c +3 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +7 -2
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +4 -2
- data/skeletons/CPU-OPENCL-INTEL/common/mem_global.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +6 -3
- data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-OPENCL-INTEL/common/timer_globals.c +24 -0
- data/skeletons/CPU-OPENMP/common/globals.c +1 -0
- data/skeletons/CPU-OPENMP/common/header.c +3 -0
- data/skeletons/CPU-OPENMP/common/mem_global.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_1_start.c +0 -12
- data/skeletons/CPU-OPENMP/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-OPENMP/common/timer_globals.c +33 -0
- data/skeletons/GPU-CUDA/common/globals.c +27 -3
- data/skeletons/GPU-CUDA/common/header.c +2 -0
- data/skeletons/GPU-CUDA/common/mem_async_alloc.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_copyin.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_copyout.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_free.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +2 -1
- data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +2 -1
- data/skeletons/GPU-CUDA/common/mem_global.c +1 -0
- data/skeletons/GPU-CUDA/common/mem_prologue.c +1 -2
- data/skeletons/GPU-CUDA/common/scheduler.c +86 -0
- data/skeletons/GPU-CUDA/common/timer_2_start.c +2 -4
- data/skeletons/GPU-CUDA/common/timer_2_stop.c +3 -5
- data/skeletons/GPU-CUDA/common/timer_globals.c +26 -0
- data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +5 -7
- data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +4 -6
- data/skeletons/GPU-CUDA/kernel/default.host.c +1 -1
- data/skeletons/GPU-CUDA/kernel/default.kernel.cu +6 -8
- data/skeletons/GPU-CUDA/skeletons.txt +6 -5
- data/{examples/benchmarks/2mm.c → test/examples/benchmarks/PolyBench/2mm_species.c} +19 -15
- data/test/examples/benchmarks/PolyBench/3mm_species.c +82 -0
- data/test/examples/benchmarks/PolyBench/adi_species.c +89 -0
- data/test/examples/benchmarks/PolyBench/atax_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/bicg_species.c +71 -0
- data/test/examples/benchmarks/PolyBench/cholesky_species.c +68 -0
- data/test/examples/benchmarks/PolyBench/correlation_species.c +97 -0
- data/test/examples/benchmarks/PolyBench/covariance_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/doitgen_species.c +67 -0
- data/test/examples/benchmarks/PolyBench/durbin_species.c +80 -0
- data/test/examples/benchmarks/PolyBench/dynprog_species.c +71 -0
- data/test/examples/benchmarks/PolyBench/fdtd-2d-apml_species.c +112 -0
- data/test/examples/benchmarks/PolyBench/fdtd-2d_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/floyd-warshall_species.c +54 -0
- data/test/examples/benchmarks/PolyBench/gemm_species.c +73 -0
- data/test/examples/benchmarks/PolyBench/gemver_species.c +93 -0
- data/test/examples/benchmarks/PolyBench/gesummv_species.c +68 -0
- data/test/examples/benchmarks/PolyBench/gramschmidt_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/jacobi-1d-imper_species.c +59 -0
- data/test/examples/benchmarks/PolyBench/jacobi-2d-imper_species.c +65 -0
- data/test/examples/benchmarks/PolyBench/lu_species.c +57 -0
- data/test/examples/benchmarks/PolyBench/ludcmp_species.c +89 -0
- data/test/examples/benchmarks/PolyBench/mvt_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/reg_detect_species.c +86 -0
- data/test/examples/benchmarks/PolyBench/seidel-2d_species.c +53 -0
- data/test/examples/benchmarks/PolyBench/symm_species.c +74 -0
- data/test/examples/benchmarks/PolyBench/syr2k_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/syrk_species.c +66 -0
- data/test/examples/benchmarks/PolyBench/trisolv_species.c +61 -0
- data/test/examples/benchmarks/PolyBench/trmm_species.c +61 -0
- data/test/examples/chunk/example01_species.c +58 -0
- data/test/examples/chunk/example02_species.c +48 -0
- data/test/examples/chunk/example03_species.c +63 -0
- data/test/examples/chunk/example04_species.c +58 -0
- data/test/examples/chunk/example05_species.c +56 -0
- data/test/examples/chunk/example06_species.c +49 -0
- data/test/examples/chunk/example07_species.c +53 -0
- data/test/examples/dependences/example01_species.c +46 -0
- data/test/examples/dependences/example02_species.c +44 -0
- data/test/examples/dependences/example03_species.c +47 -0
- data/test/examples/dependences/example04_species.c +48 -0
- data/test/examples/dependences/example05_species.c +46 -0
- data/test/examples/element/example01_species.c +50 -0
- data/test/examples/element/example02_species.c +50 -0
- data/test/examples/element/example03_species.c +62 -0
- data/test/examples/element/example04_species.c +53 -0
- data/test/examples/element/example05_species.c +59 -0
- data/test/examples/element/example06_species.c +50 -0
- data/test/examples/element/example07_species.c +58 -0
- data/test/examples/element/example08_species.c +49 -0
- data/test/examples/element/example09_species.c +52 -0
- data/test/examples/element/example10_species.c +54 -0
- data/test/examples/element/example11_species.c +51 -0
- data/test/examples/element/example12_species.c +60 -0
- data/test/examples/element/example13_species.c +77 -0
- data/test/examples/neighbourhood/example01_species.c +57 -0
- data/test/examples/neighbourhood/example02_species.c +56 -0
- data/test/examples/neighbourhood/example03_species.c +83 -0
- data/test/examples/neighbourhood/example04_species.c +55 -0
- data/test/examples/neighbourhood/example05_species.c +48 -0
- data/test/examples/shared/example01_species.c +49 -0
- data/test/examples/shared/example02_species.c +55 -0
- data/test/examples/shared/example03_species.c +59 -0
- data/test/examples/shared/example04_species.c +56 -0
- data/test/examples/shared/example05_species.c +52 -0
- metadata +193 -73
- data/examples/benchmarks/overview.txt +0 -38
- data/lib/castaddon/node.rb +0 -753
data/lib/common.rb
ADDED
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
|
|
2
|
+
# Bones/Aset require 'fileutils' from the Ruby standard library.
|
|
3
|
+
require 'fileutils'
|
|
4
|
+
|
|
5
|
+
# Bones/Aset use the 'trollop' gem to parse command line options.
|
|
6
|
+
require 'rubygems'
|
|
7
|
+
require 'trollop'
|
|
8
|
+
require 'symbolic'
|
|
9
|
+
|
|
10
|
+
# Extending the Ruby standard string class to support some
|
|
11
|
+
# additional methods. This includes a hack of the gsub! command.
|
|
12
|
+
class String #:nodoc:
|
|
13
|
+
|
|
14
|
+
# Extend the Ruby string class to be able to chain 'gsub!'
|
|
15
|
+
#-commands. This code is taken from the web.
|
|
16
|
+
meth = 'gsub!'
|
|
17
|
+
orig_meth = "orig_#{meth}"
|
|
18
|
+
alias_method orig_meth, meth
|
|
19
|
+
define_method(meth) do |*args|
|
|
20
|
+
self.send(orig_meth, *args)
|
|
21
|
+
self
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Set the newline character
|
|
27
|
+
NL = "\n"
|
|
28
|
+
# Set the tab size (currently: 2 spaces)
|
|
29
|
+
INDENT = "\t"
|
|
30
|
+
|
|
31
|
+
# A string representing the combination character ('^') of a species.
|
|
32
|
+
WEDGE = '^'
|
|
33
|
+
# A string representing the production character ('->') of a species.
|
|
34
|
+
ARROW = '->'
|
|
35
|
+
# A string representing the pipe character ('|') of a species.
|
|
36
|
+
PIPE = '|'
|
|
37
|
+
# A string representing the colon character (':') to separate ranges in dimensions.
|
|
38
|
+
RANGE_SEP = ':'
|
|
39
|
+
# A string representing the comma character (',') to separate different ranges.
|
|
40
|
+
DIM_SEP = ','
|
|
41
|
+
|
|
42
|
+
# Value to assume a variable to be
|
|
43
|
+
ASSUME_VAL = '1000'
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# Helper method to evaluate mathematical expressions, possibly containing
|
|
47
|
+
# symbols. This method is only used for readability, without it the code
|
|
48
|
+
# is functionally correct, but expressions might be larger than needed.
|
|
49
|
+
def simplify(expr)
|
|
50
|
+
raise_error('Invalid expression to simplify') if !expr
|
|
51
|
+
expr = expr.gsub(' ','')
|
|
52
|
+
|
|
53
|
+
# Immediately return if there is an array index in the expression
|
|
54
|
+
return expr if expr =~ /\[/
|
|
55
|
+
|
|
56
|
+
# Handle min/max functions
|
|
57
|
+
if expr =~ /max/ || expr =~ /min/
|
|
58
|
+
return expr
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Get all the variables
|
|
62
|
+
vars = get_vars(expr)
|
|
63
|
+
|
|
64
|
+
# Set all the variables
|
|
65
|
+
hash = {}
|
|
66
|
+
vars.uniq.each do |var_name|
|
|
67
|
+
hash[var_name.to_sym] = var :name => var_name
|
|
68
|
+
expr = expr.gsub(/\b#{var_name}\b/,"hash[:#{var_name}]")
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Simplify the string using the 'symbolic' gem.
|
|
72
|
+
symbolic_expr = eval(expr)
|
|
73
|
+
|
|
74
|
+
# Return the result as a string
|
|
75
|
+
return symbolic_expr.to_s
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Get the variables in an expression
|
|
79
|
+
def get_vars(expr)
|
|
80
|
+
expr.split(/\W+/).reject{ |s| (s.to_i.to_s == s || s.to_f.to_s == s || s == "") }
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Solve a linear equality (work in progress)
|
|
84
|
+
def solve(equality,variable,forbidden_vars)
|
|
85
|
+
return "" if equality == ""
|
|
86
|
+
|
|
87
|
+
# Perform the subtitution of the current variable
|
|
88
|
+
expr = '-('+equality.gsub('=','-(').gsub(/\b#{variable}\b/,"0")+'))'
|
|
89
|
+
|
|
90
|
+
# Simplify the result
|
|
91
|
+
result = simplify(expr)
|
|
92
|
+
|
|
93
|
+
# Return the result or nothing (if it still contains forbidden variables)
|
|
94
|
+
vars = get_vars(result)
|
|
95
|
+
if vars & forbidden_vars == []
|
|
96
|
+
return result
|
|
97
|
+
else
|
|
98
|
+
return ""
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Find the maximum value of 2 expressions
|
|
103
|
+
def max(expr1,expr2,assumptions=[])
|
|
104
|
+
return expr1 if expr2 == ""
|
|
105
|
+
comparison = simplify("(#{expr1})-(#{expr2})")
|
|
106
|
+
|
|
107
|
+
# Process the assumptions
|
|
108
|
+
assumptions.each do |assumption|
|
|
109
|
+
comparison = simplify(comparison.gsub(assumption[0],assumption[1]))
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Test to find the maximum
|
|
113
|
+
if (comparison.to_i.to_s == comparison || comparison.to_f.to_s == comparison)
|
|
114
|
+
return expr1 if (comparison.to_i == 0)
|
|
115
|
+
return expr1 if (comparison.to_i > 0)
|
|
116
|
+
return expr2 if (comparison.to_i < 0)
|
|
117
|
+
else
|
|
118
|
+
|
|
119
|
+
# Handle min/max functions
|
|
120
|
+
if comparison =~ /max/ || comparison =~ /min/
|
|
121
|
+
return "max(#{expr1},#{expr2})"
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# Find the maximum based on a guess
|
|
125
|
+
var = get_vars(comparison).first
|
|
126
|
+
assumptions << [var,ASSUME_VAL]
|
|
127
|
+
#puts "WARNING: Don't know how to find the max/min of '(#{expr1})' and '(#{expr2})', assuming: #{var}=#{ASSUME_VAL}"
|
|
128
|
+
return max(expr1,expr2,assumptions)
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
# Find the minimum value of 2 expressions (based on the max method)
|
|
133
|
+
def min(expr1,expr2)
|
|
134
|
+
return expr1 if expr2 == ""
|
|
135
|
+
s1 = simplify(expr1)
|
|
136
|
+
s2 = simplify(expr2)
|
|
137
|
+
comparison = simplify("(#{s1})-(#{s2})")
|
|
138
|
+
|
|
139
|
+
# Handle min/max functions
|
|
140
|
+
if comparison =~ /max/ || comparison =~ /min/
|
|
141
|
+
return s1 if s2 =~ /^max\(#{s1},.*\)$/ || s2 =~ /^max\(.*,#{s1}\)$/
|
|
142
|
+
return s2 if s1 =~ /^max\(#{s2},.*\)$/ || s1 =~ /^max\(.*,#{s2}\)$/
|
|
143
|
+
return "min(#{expr1},#{expr2})"
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# Run the 'max' method
|
|
147
|
+
maximum = max(expr1,expr2)
|
|
148
|
+
return (maximum == expr1) ? expr2 : ( (maximum == expr2) ? expr1 : maximum.gsub('max(','min(') )
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
# Find the exact maximum value of 2 expressions
|
|
152
|
+
def exact_max(expr1,expr2)
|
|
153
|
+
return expr1 if expr1 == expr2
|
|
154
|
+
comparison = simplify("(#{expr1})-(#{expr2})")
|
|
155
|
+
if (comparison.to_i.to_s == comparison || comparison.to_f.to_s == comparison)
|
|
156
|
+
return expr1 if (comparison.to_i == 0)
|
|
157
|
+
return expr1 if (comparison.to_i > 0)
|
|
158
|
+
return expr2 if (comparison.to_i < 0)
|
|
159
|
+
else
|
|
160
|
+
return "max(#{expr1},#{expr2})"
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# Find the exact minimum value of 2 expressions (based on the exact_max method)
|
|
165
|
+
def exact_min(expr1,expr2)
|
|
166
|
+
return expr1 if expr1 == expr2
|
|
167
|
+
maximum = exact_max(expr1,expr2)
|
|
168
|
+
return (maximum == expr1) ? expr2 : ( (maximum == expr2) ? expr1 : maximum.gsub('max(','min(') )
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
# Return the absolute value (if possible)
|
|
173
|
+
def abs(expr)
|
|
174
|
+
return expr.to_i.abs.to_s if expr.to_i.to_s == expr
|
|
175
|
+
return expr
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
# Compare two expressions
|
|
179
|
+
def compare(expr1,expr2,loop_data,assumptions=[])
|
|
180
|
+
comparison = simplify("(#{expr1})-(#{expr2})")
|
|
181
|
+
|
|
182
|
+
# Handle min/max functions
|
|
183
|
+
if comparison =~ /max/ || comparison =~ /min/
|
|
184
|
+
return comparison
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
# Process the assumptions
|
|
188
|
+
assumptions.each do |assumption|
|
|
189
|
+
comparison = simplify(comparison.gsub(assumption[0],assumption[1]))
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
# Known comparison
|
|
193
|
+
if (comparison.to_i.to_s == comparison || comparison.to_f.to_s == comparison)
|
|
194
|
+
return 'eq' if (comparison.to_i == 0)
|
|
195
|
+
return 'gt' if (comparison.to_i > 0)
|
|
196
|
+
return 'lt' if (comparison.to_i < 0)
|
|
197
|
+
else
|
|
198
|
+
|
|
199
|
+
# Comparison based on loop data
|
|
200
|
+
get_vars(comparison).each do |var|
|
|
201
|
+
loop_data.each do |loop_datum|
|
|
202
|
+
if loop_datum[:var] == var
|
|
203
|
+
assumptions << [var,loop_datum[:min]]
|
|
204
|
+
#puts "WARNING: Modifying expression '(#{expr1}) vs (#{expr2})', assuming: #{var}=#{loop_datum[:min]}"
|
|
205
|
+
return compare(expr1,expr2,loop_data,assumptions)
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
# Comparison based on a guess
|
|
211
|
+
var = get_vars(comparison).first
|
|
212
|
+
assumptions << [var,ASSUME_VAL]
|
|
213
|
+
#puts "WARNING: Don't know how to compare '(#{expr1})' and '(#{expr2})', assuming: #{var}=#{ASSUME_VAL}"
|
|
214
|
+
return compare(expr1,expr2,loop_data,assumptions)
|
|
215
|
+
end
|
|
216
|
+
end
|
|
File without changes
|
|
@@ -3,18 +3,16 @@
|
|
|
3
3
|
struct timeval bones_start_time2;
|
|
4
4
|
struct timeval bones_end_time2;
|
|
5
5
|
for (int bones_iter=0; bones_iter<ITERS; bones_iter++) {
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
bones_flush_c[bones_flush_j] = bones_flush_i*bones_flush_j;
|
|
6
|
+
|
|
7
|
+
// Flush the CPU cache (for measurement purposes only)
|
|
8
|
+
const int bones_flush_size = 4*1024*1024; // (16MB)
|
|
9
|
+
char *bones_flush_c = (char *)malloc(bones_flush_size);
|
|
10
|
+
for (int i=0; i<10; i++) {
|
|
11
|
+
for (int j=0; j<bones_flush_size; j++) {
|
|
12
|
+
bones_flush_c[j] = i*j;
|
|
13
|
+
}
|
|
15
14
|
}
|
|
16
|
-
|
|
17
|
-
free(bones_flush_c);
|
|
15
|
+
free(bones_flush_c);
|
|
18
16
|
|
|
19
|
-
|
|
20
|
-
|
|
17
|
+
// Start the timer for the measurement of the kernel execution time
|
|
18
|
+
gettimeofday(&bones_start_time2, NULL);
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
////////////////////////////////////////
|
|
2
|
+
//////////// Timers ////////////////////
|
|
3
|
+
////////////////////////////////////////
|
|
4
|
+
|
|
5
|
+
// Timer
|
|
6
|
+
struct timeval bones_start_time1;
|
|
7
|
+
|
|
8
|
+
// Start the timer for the measurement of the whole scop
|
|
9
|
+
void bones_timer_start() {
|
|
10
|
+
const int bones_flush_size = 4*1024*1024; // (16MB)
|
|
11
|
+
char *bones_flush_c = (char *)malloc(bones_flush_size);
|
|
12
|
+
for (int i=0; i<10; i++) {
|
|
13
|
+
for (int j=0; j<bones_flush_size; j++) {
|
|
14
|
+
bones_flush_c[j] = i*j;
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
free(bones_flush_c);
|
|
18
|
+
gettimeofday(&bones_start_time1, NULL);
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
// End the timer for the measurement of the whole scop
|
|
22
|
+
void bones_timer_stop() {
|
|
23
|
+
#if (ITERS == 1)
|
|
24
|
+
struct timeval bones_end_time1;
|
|
25
|
+
gettimeofday(&bones_end_time1, NULL);
|
|
26
|
+
float bones_timer1 = 0.001 * (1000000*(bones_end_time1.tv_sec-bones_start_time1.tv_sec)+bones_end_time1.tv_usec-bones_start_time1.tv_usec);
|
|
27
|
+
printf(">>>\t\t Execution time [full scop]: %.3lf ms \n", bones_timer1);
|
|
28
|
+
#endif
|
|
29
|
+
}
|
|
@@ -103,7 +103,7 @@ void bones_initialize_target(void) {
|
|
|
103
103
|
cl_platform_id bones_platform_ids[10];
|
|
104
104
|
bones_errors = clGetPlatformIDs(bones_num_platforms,bones_platform_ids,NULL); error_check(bones_errors);
|
|
105
105
|
|
|
106
|
-
// Select the
|
|
106
|
+
// Select the Intel SDK platform
|
|
107
107
|
char bones_buffer[1024];
|
|
108
108
|
cl_uint bones_platform;
|
|
109
109
|
for(cl_uint bones_platform_id=0; bones_platform_id<bones_num_platforms; bones_platform_id++) {
|
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
|
|
2
2
|
// Perform a zero-copy of <array> from device to host
|
|
3
|
-
|
|
4
|
-
|
|
3
|
+
#if ZEROCOPY == 1
|
|
4
|
+
printf("Copying back from device_<array> to <array>\n");
|
|
5
|
+
void* bones_pointer_to_<array> = clEnqueueMapBuffer(bones_queue,device_<array>,CL_TRUE,CL_MAP_READ,0,<variable_dimensions>*sizeof(<type>),0,NULL,NULL,&bones_errors); error_check(bones_errors);
|
|
6
|
+
clEnqueueUnmapMemObject(bones_queue,device_<array>,bones_pointer_to_<array>,0,NULL,NULL);
|
|
7
|
+
#elif ZEROCOPY == 0
|
|
8
|
+
bones_errors = clEnqueueReadBuffer(bones_queue,device_<array>,CL_TRUE,(0)*sizeof(<type>),<variable_dimensions>*sizeof(<type>),<array><flatten>+0,0,NULL,NULL); error_check(bones_errors);
|
|
9
|
+
#endif
|
|
5
10
|
clFinish(bones_queue);
|
|
@@ -1,3 +1,5 @@
|
|
|
1
1
|
|
|
2
|
-
|
|
3
|
-
|
|
2
|
+
#if ZEROCOPY == 0
|
|
3
|
+
device_<array> = clCreateBuffer(bones_context,CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,<variable_dimensions>*sizeof(<type>),<array><flatten>, &bones_errors); error_check(bones_errors);
|
|
4
|
+
clFinish(bones_queue);
|
|
5
|
+
#endif
|
|
File without changes
|
|
@@ -1,4 +1,7 @@
|
|
|
1
1
|
|
|
2
|
-
// Create a device pointer for <array>
|
|
3
|
-
|
|
4
|
-
|
|
2
|
+
// Create a device pointer for <array>
|
|
3
|
+
#if ZEROCOPY == 1
|
|
4
|
+
cl_mem device_<array> = clCreateBuffer(bones_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, <variable_dimensions>*sizeof(<type>), <array><flatten>, &bones_errors); error_check(bones_errors);
|
|
5
|
+
#elif ZEROCOPY == 0
|
|
6
|
+
cl_mem device_<array> = clCreateBuffer(bones_context, CL_MEM_READ_WRITE, <variable_dimensions>*sizeof(<type>), NULL, &bones_errors); error_check(bones_errors);
|
|
7
|
+
#endif
|
|
@@ -8,4 +8,4 @@
|
|
|
8
8
|
bones_errors = clGetEventProfilingInfo(bones_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end2, 0); error_check(bones_errors);
|
|
9
9
|
bones_errors = clGetEventProfilingInfo(bones_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start2, 0); error_check(bones_errors);
|
|
10
10
|
float bones_timer2 = 0.000001 * (end2-start2);
|
|
11
|
-
printf(">>>\t\t
|
|
11
|
+
printf(">>>\t\t Execution time [kernel <algorithm_basename>]: %.3lf ms \n", bones_timer2);
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
|
|
2
|
+
////////////////////////////////////////
|
|
3
|
+
//////////// Timers ////////////////////
|
|
4
|
+
////////////////////////////////////////
|
|
5
|
+
|
|
6
|
+
// Timer
|
|
7
|
+
struct timeval bones_start_time1;
|
|
8
|
+
|
|
9
|
+
// Start the timer for the measurement of the whole scop
|
|
10
|
+
void bones_timer_start() {
|
|
11
|
+
clFinish(bones_queue);
|
|
12
|
+
gettimeofday(&bones_start_time1, NULL);
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
// End the timer for the measurement of the whole scop
|
|
16
|
+
void bones_timer_stop() {
|
|
17
|
+
#if (ITERS == 1)
|
|
18
|
+
clFinish(bones_queue);
|
|
19
|
+
struct timeval bones_end_time1;
|
|
20
|
+
gettimeofday(&bones_end_time1, NULL);
|
|
21
|
+
float bones_timer1 = 0.001 * (1000000*(bones_end_time1.tv_sec-bones_start_time1.tv_sec)+bones_end_time1.tv_usec-bones_start_time1.tv_usec);
|
|
22
|
+
printf(">>>\t\t Execution time [full scop]: %.3lf ms \n", bones_timer1);
|
|
23
|
+
#endif
|
|
24
|
+
}
|
|
File without changes
|
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
// Flush the CPU cache (for measurement purposes only)
|
|
3
|
-
const int bones_flush_size = 4*1024*1024; // (16MB)
|
|
4
|
-
int bones_flush_i;
|
|
5
|
-
int bones_flush_j;
|
|
6
|
-
char *bones_flush_c = (char *)malloc(bones_flush_size);
|
|
7
|
-
for (bones_flush_i=0; bones_flush_i<10; bones_flush_i++) {
|
|
8
|
-
for (bones_flush_j=0; bones_flush_j<bones_flush_size; bones_flush_j++) {
|
|
9
|
-
bones_flush_c[bones_flush_j] = bones_flush_i*bones_flush_j;
|
|
10
|
-
}
|
|
11
|
-
}
|
|
12
|
-
free(bones_flush_c);
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
////////////////////////////////////////
|
|
2
|
+
//////////// Timers ////////////////////
|
|
3
|
+
////////////////////////////////////////
|
|
4
|
+
|
|
5
|
+
// Includes
|
|
6
|
+
#include <stdio.h>
|
|
7
|
+
|
|
8
|
+
// Timer
|
|
9
|
+
struct timeval bones_start_time1;
|
|
10
|
+
|
|
11
|
+
// Start the timer for the measurement of the whole scop
|
|
12
|
+
void bones_timer_start() {
|
|
13
|
+
/*
|
|
14
|
+
const int bones_flush_size = 4*1024*1024; // (16MB)
|
|
15
|
+
char *bones_flush_c = (char *)malloc(bones_flush_size);
|
|
16
|
+
for (int i=0; i<10; i++) {
|
|
17
|
+
for (int j=0; j<bones_flush_size; j++) {
|
|
18
|
+
bones_flush_c[j] = i*j;
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
free(bones_flush_c);*/
|
|
22
|
+
gettimeofday(&bones_start_time1, NULL);
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
// End the timer for the measurement of the whole scop
|
|
26
|
+
void bones_timer_stop() {
|
|
27
|
+
#if (ITERS == 1)
|
|
28
|
+
struct timeval bones_end_time1;
|
|
29
|
+
gettimeofday(&bones_end_time1, NULL);
|
|
30
|
+
float bones_timer1 = 0.001 * (1000000*(bones_end_time1.tv_sec-bones_start_time1.tv_sec)+bones_end_time1.tv_usec-bones_start_time1.tv_usec);
|
|
31
|
+
printf(">>>\t\t Execution time [full scop]: %.3lf ms \n", bones_timer1);
|
|
32
|
+
#endif
|
|
33
|
+
}
|