bones-compiler 1.1.0 → 1.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/CHANGELOG +37 -0
- data/LICENSE +1 -1
- data/README.rdoc +95 -70
- data/Rakefile +78 -3
- data/VERSION +1 -1
- data/bin/adarwin +17 -0
- data/examples/benchmarks/PolyBench/2mm.c +104 -0
- data/examples/benchmarks/{3mm.c → PolyBench/3mm.c} +5 -2
- data/examples/benchmarks/{adi.c → PolyBench/adi.c} +6 -3
- data/examples/benchmarks/{atax.c → PolyBench/atax.c} +5 -2
- data/examples/benchmarks/{bicg.c → PolyBench/bicg.c} +5 -2
- data/examples/benchmarks/{cholesky.c → PolyBench/cholesky.c} +3 -0
- data/examples/benchmarks/{common.h → PolyBench/common.h} +2 -2
- data/examples/benchmarks/{correlation.c → PolyBench/correlation.c} +16 -7
- data/examples/benchmarks/{covariance.c → PolyBench/covariance.c} +7 -2
- data/examples/benchmarks/{doitgen.c → PolyBench/doitgen.c} +5 -2
- data/examples/benchmarks/{durbin.c → PolyBench/durbin.c} +3 -0
- data/examples/benchmarks/{dynprog.c → PolyBench/dynprog.c} +3 -0
- data/examples/benchmarks/{fdtd-2d-apml.c → PolyBench/fdtd-2d-apml.c} +3 -0
- data/examples/benchmarks/{fdtd-2d.c → PolyBench/fdtd-2d.c} +5 -2
- data/examples/benchmarks/{floyd-warshall.c → PolyBench/floyd-warshall.c} +3 -0
- data/examples/benchmarks/{gemm.c → PolyBench/gemm.c} +5 -2
- data/examples/benchmarks/{gemver.c → PolyBench/gemver.c} +5 -2
- data/examples/benchmarks/{gesummv.c → PolyBench/gesummv.c} +5 -2
- data/examples/benchmarks/{gramschmidt.c → PolyBench/gramschmidt.c} +3 -0
- data/examples/benchmarks/{jacobi-1d-imper.c → PolyBench/jacobi-1d-imper.c} +10 -2
- data/examples/benchmarks/{jacobi-2d-imper.c → PolyBench/jacobi-2d-imper.c} +8 -3
- data/examples/benchmarks/{lu.c → PolyBench/lu.c} +3 -0
- data/examples/benchmarks/{ludcmp.c → PolyBench/ludcmp.c} +3 -0
- data/examples/benchmarks/{mvt.c → PolyBench/mvt.c} +6 -2
- data/examples/benchmarks/{reg_detect.c → PolyBench/reg_detect.c} +3 -0
- data/examples/benchmarks/{seidel-2d.c → PolyBench/seidel-2d.c} +3 -0
- data/examples/benchmarks/{symm.c → PolyBench/symm.c} +3 -0
- data/examples/benchmarks/{syr2k.c → PolyBench/syr2k.c} +5 -2
- data/examples/benchmarks/{syrk.c → PolyBench/syrk.c} +7 -4
- data/examples/benchmarks/{trisolv.c → PolyBench/trisolv.c} +3 -0
- data/examples/benchmarks/{trmm.c → PolyBench/trmm.c} +3 -0
- data/examples/benchmarks/Rodinia/cfd.c +180 -0
- data/examples/benchmarks/Rodinia/hotspot.c +228 -0
- data/examples/benchmarks/Rodinia/kmeans.c +164 -0
- data/examples/benchmarks/Rodinia/srad.c +188 -0
- data/examples/benchmarks/other/common.h +0 -0
- data/examples/benchmarks/other/dct.c +58 -0
- data/examples/benchmarks/other/mm.c +50 -0
- data/examples/benchmarks/{saxpy.c → other/saxpy.c} +11 -7
- data/examples/chunk/{example1.c → example01.c} +0 -0
- data/examples/chunk/{example2.c → example02.c} +0 -0
- data/examples/chunk/{example3.c → example03.c} +0 -0
- data/examples/chunk/{example4.c → example04.c} +0 -0
- data/examples/chunk/{example5.c → example05.c} +0 -0
- data/examples/chunk/example06.c +45 -0
- data/examples/chunk/example07.c +49 -0
- data/examples/dependences/example01.c +42 -0
- data/examples/dependences/example02.c +40 -0
- data/examples/dependences/example03.c +43 -0
- data/examples/dependences/example04.c +44 -0
- data/examples/dependences/example05.c +42 -0
- data/examples/element/{example1.c → example01.c} +0 -0
- data/examples/element/{example2.c → example02.c} +2 -2
- data/examples/element/{example3.c → example03.c} +0 -0
- data/examples/element/{example4.c → example04.c} +0 -0
- data/examples/element/{example5.c → example05.c} +0 -0
- data/examples/element/{example6.c → example06.c} +0 -0
- data/examples/element/{example7.c → example07.c} +0 -0
- data/examples/element/{example8.c → example08.c} +0 -0
- data/examples/element/{example9.c → example09.c} +0 -0
- data/examples/element/example13.c +73 -0
- data/examples/fusion/example01.c +68 -0
- data/examples/fusion/example02.c +73 -0
- data/examples/fusion/example03.c +72 -0
- data/examples/fusion/example04.c +61 -0
- data/examples/fusion/example05.c +55 -0
- data/examples/neighbourhood/{example1.c → example01.c} +0 -0
- data/examples/neighbourhood/{example2.c → example02.c} +0 -0
- data/examples/neighbourhood/{example3.c → example03.c} +0 -0
- data/examples/neighbourhood/{example4.c → example04.c} +0 -0
- data/examples/neighbourhood/example05.c +44 -0
- data/examples/shared/{example1.c → example01.c} +0 -0
- data/examples/shared/{example2.c → example02.c} +0 -0
- data/examples/shared/{example3.c → example03.c} +0 -0
- data/examples/shared/{example4.c → example04.c} +0 -0
- data/examples/shared/{example5.c → example05.c} +0 -0
- data/lib/adarwin.rb +62 -0
- data/lib/adarwin/dependences.rb +268 -0
- data/lib/adarwin/engine.rb +277 -0
- data/lib/adarwin/fusion.rb +174 -0
- data/lib/adarwin/interval.rb +57 -0
- data/lib/adarwin/memorycopies.rb +153 -0
- data/lib/adarwin/nest.rb +225 -0
- data/lib/adarwin/preprocessor.rb +76 -0
- data/lib/adarwin/reference.rb +261 -0
- data/lib/bones.rb +4 -55
- data/lib/bones/algorithm.rb +77 -40
- data/lib/bones/copy.rb +26 -0
- data/lib/bones/engine.rb +147 -31
- data/lib/bones/preprocessor.rb +92 -12
- data/lib/bones/species.rb +4 -3
- data/lib/bones/structure.rb +14 -4
- data/lib/castaddon.rb +11 -6
- data/lib/castaddon/node_adarwin.rb +245 -0
- data/lib/castaddon/node_bones.rb +316 -0
- data/lib/castaddon/node_common.rb +289 -0
- data/lib/castaddon/transformations.rb +236 -0
- data/lib/common.rb +216 -0
- data/skeletons/CPU-C/common/header.c +3 -0
- data/skeletons/CPU-C/common/mem_global.c +0 -0
- data/skeletons/CPU-C/common/timer_2_start.c +11 -13
- data/skeletons/CPU-C/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-C/common/timer_globals.c +29 -0
- data/skeletons/CPU-OPENCL-INTEL/common/globals.c +1 -1
- data/skeletons/CPU-OPENCL-INTEL/common/header.c +3 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +7 -2
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +4 -2
- data/skeletons/CPU-OPENCL-INTEL/common/mem_global.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +6 -3
- data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-OPENCL-INTEL/common/timer_globals.c +24 -0
- data/skeletons/CPU-OPENMP/common/globals.c +1 -0
- data/skeletons/CPU-OPENMP/common/header.c +3 -0
- data/skeletons/CPU-OPENMP/common/mem_global.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_1_start.c +0 -12
- data/skeletons/CPU-OPENMP/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-OPENMP/common/timer_globals.c +33 -0
- data/skeletons/GPU-CUDA/common/globals.c +27 -3
- data/skeletons/GPU-CUDA/common/header.c +2 -0
- data/skeletons/GPU-CUDA/common/mem_async_alloc.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_copyin.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_copyout.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_free.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +2 -1
- data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +2 -1
- data/skeletons/GPU-CUDA/common/mem_global.c +1 -0
- data/skeletons/GPU-CUDA/common/mem_prologue.c +1 -2
- data/skeletons/GPU-CUDA/common/scheduler.c +86 -0
- data/skeletons/GPU-CUDA/common/timer_2_start.c +2 -4
- data/skeletons/GPU-CUDA/common/timer_2_stop.c +3 -5
- data/skeletons/GPU-CUDA/common/timer_globals.c +26 -0
- data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +5 -7
- data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +4 -6
- data/skeletons/GPU-CUDA/kernel/default.host.c +1 -1
- data/skeletons/GPU-CUDA/kernel/default.kernel.cu +6 -8
- data/skeletons/GPU-CUDA/skeletons.txt +6 -5
- data/{examples/benchmarks/2mm.c → test/examples/benchmarks/PolyBench/2mm_species.c} +19 -15
- data/test/examples/benchmarks/PolyBench/3mm_species.c +82 -0
- data/test/examples/benchmarks/PolyBench/adi_species.c +89 -0
- data/test/examples/benchmarks/PolyBench/atax_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/bicg_species.c +71 -0
- data/test/examples/benchmarks/PolyBench/cholesky_species.c +68 -0
- data/test/examples/benchmarks/PolyBench/correlation_species.c +97 -0
- data/test/examples/benchmarks/PolyBench/covariance_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/doitgen_species.c +67 -0
- data/test/examples/benchmarks/PolyBench/durbin_species.c +80 -0
- data/test/examples/benchmarks/PolyBench/dynprog_species.c +71 -0
- data/test/examples/benchmarks/PolyBench/fdtd-2d-apml_species.c +112 -0
- data/test/examples/benchmarks/PolyBench/fdtd-2d_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/floyd-warshall_species.c +54 -0
- data/test/examples/benchmarks/PolyBench/gemm_species.c +73 -0
- data/test/examples/benchmarks/PolyBench/gemver_species.c +93 -0
- data/test/examples/benchmarks/PolyBench/gesummv_species.c +68 -0
- data/test/examples/benchmarks/PolyBench/gramschmidt_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/jacobi-1d-imper_species.c +59 -0
- data/test/examples/benchmarks/PolyBench/jacobi-2d-imper_species.c +65 -0
- data/test/examples/benchmarks/PolyBench/lu_species.c +57 -0
- data/test/examples/benchmarks/PolyBench/ludcmp_species.c +89 -0
- data/test/examples/benchmarks/PolyBench/mvt_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/reg_detect_species.c +86 -0
- data/test/examples/benchmarks/PolyBench/seidel-2d_species.c +53 -0
- data/test/examples/benchmarks/PolyBench/symm_species.c +74 -0
- data/test/examples/benchmarks/PolyBench/syr2k_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/syrk_species.c +66 -0
- data/test/examples/benchmarks/PolyBench/trisolv_species.c +61 -0
- data/test/examples/benchmarks/PolyBench/trmm_species.c +61 -0
- data/test/examples/chunk/example01_species.c +58 -0
- data/test/examples/chunk/example02_species.c +48 -0
- data/test/examples/chunk/example03_species.c +63 -0
- data/test/examples/chunk/example04_species.c +58 -0
- data/test/examples/chunk/example05_species.c +56 -0
- data/test/examples/chunk/example06_species.c +49 -0
- data/test/examples/chunk/example07_species.c +53 -0
- data/test/examples/dependences/example01_species.c +46 -0
- data/test/examples/dependences/example02_species.c +44 -0
- data/test/examples/dependences/example03_species.c +47 -0
- data/test/examples/dependences/example04_species.c +48 -0
- data/test/examples/dependences/example05_species.c +46 -0
- data/test/examples/element/example01_species.c +50 -0
- data/test/examples/element/example02_species.c +50 -0
- data/test/examples/element/example03_species.c +62 -0
- data/test/examples/element/example04_species.c +53 -0
- data/test/examples/element/example05_species.c +59 -0
- data/test/examples/element/example06_species.c +50 -0
- data/test/examples/element/example07_species.c +58 -0
- data/test/examples/element/example08_species.c +49 -0
- data/test/examples/element/example09_species.c +52 -0
- data/test/examples/element/example10_species.c +54 -0
- data/test/examples/element/example11_species.c +51 -0
- data/test/examples/element/example12_species.c +60 -0
- data/test/examples/element/example13_species.c +77 -0
- data/test/examples/neighbourhood/example01_species.c +57 -0
- data/test/examples/neighbourhood/example02_species.c +56 -0
- data/test/examples/neighbourhood/example03_species.c +83 -0
- data/test/examples/neighbourhood/example04_species.c +55 -0
- data/test/examples/neighbourhood/example05_species.c +48 -0
- data/test/examples/shared/example01_species.c +49 -0
- data/test/examples/shared/example02_species.c +55 -0
- data/test/examples/shared/example03_species.c +59 -0
- data/test/examples/shared/example04_species.c +56 -0
- data/test/examples/shared/example05_species.c +52 -0
- metadata +193 -73
- data/examples/benchmarks/overview.txt +0 -38
- data/lib/castaddon/node.rb +0 -753
data/lib/bones/copy.rb
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
|
2
|
+
module Bones
|
3
|
+
|
4
|
+
# Class copyin/out
|
5
|
+
class Copy
|
6
|
+
attr_accessor :name, :domain, :deadline, :direction, :id
|
7
|
+
|
8
|
+
def initialize(name,domain,deadline,direction,id)
|
9
|
+
@name = name
|
10
|
+
@domain = domain
|
11
|
+
@deadline = deadline
|
12
|
+
@direction = direction
|
13
|
+
@id = id
|
14
|
+
end
|
15
|
+
|
16
|
+
def get_definition(array_definition,type)
|
17
|
+
array_definition = '' if type == 'free' || type == 'alloc'
|
18
|
+
'void bones_'+type+'_'+@id+'_'+@name+'('+array_definition+');'
|
19
|
+
end
|
20
|
+
|
21
|
+
def get_function_call(type)
|
22
|
+
'bones_'+type+'_'+@id+'_'+@name+'();'
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
data/lib/bones/engine.rb
CHANGED
@@ -17,13 +17,17 @@ module Bones
|
|
17
17
|
# A list of timer files to be found in the skeleton library.
|
18
18
|
TIMER_FILES = ['timer_1_start','timer_1_stop','timer_2_start','timer_2_stop']
|
19
19
|
# A list of files to be found in the common directory of the skeleton library (excluding timer files).
|
20
|
-
COMMON_FILES = ['prologue','epilogue','mem_prologue','mem_copy_H2D','mem_copy_D2H','mem_epilogue']
|
20
|
+
COMMON_FILES = ['prologue','epilogue','mem_prologue','mem_copy_H2D','mem_copy_D2H','mem_epilogue','mem_global']
|
21
21
|
# The name of the file containing the globals as found in the skeleton library
|
22
22
|
COMMON_GLOBALS = 'globals'
|
23
23
|
# The name of the file containing the header file for the original C code as found in the skeleton library
|
24
24
|
COMMON_HEADER = 'header'
|
25
25
|
# The name of the file containing the globals for the kernel files as found in the skeleton library
|
26
26
|
COMMON_GLOBALS_KERNEL = 'globals_kernel'
|
27
|
+
# The name of the file containing the scheduler code
|
28
|
+
COMMON_SCHEDULER = 'scheduler'
|
29
|
+
# Global timers
|
30
|
+
GLOBAL_TIMERS = 'timer_globals'
|
27
31
|
|
28
32
|
# The extension of a host file in the skeleton library. See also SKELETON_DEVICE.
|
29
33
|
SKELETON_HOST = '.host'
|
@@ -54,13 +58,15 @@ module Bones
|
|
54
58
|
# --help, -h: Show this message
|
55
59
|
#
|
56
60
|
def initialize
|
57
|
-
@result = {:original_code
|
58
|
-
:header_code
|
59
|
-
:host_declarations
|
60
|
-
:host_code_lists
|
61
|
-
:algorithm_declarations
|
62
|
-
:algorithm_code_lists
|
63
|
-
:verify_code
|
61
|
+
@result = {:original_code => [],
|
62
|
+
:header_code => [],
|
63
|
+
:host_declarations => [],
|
64
|
+
:host_code_lists => [],
|
65
|
+
:algorithm_declarations => [],
|
66
|
+
:algorithm_code_lists => [],
|
67
|
+
:verify_code => [],
|
68
|
+
:host_device_mem_globals => []}
|
69
|
+
@state = 0
|
64
70
|
|
65
71
|
# Provides a list of possible targets (e.g. GPU-CUDA, 'CPU-OPENCL-INTEL').
|
66
72
|
targets = []
|
@@ -86,6 +92,9 @@ module Bones
|
|
86
92
|
opt :verify, 'Verify correctness of the generated code', :short => 'c', :default => false
|
87
93
|
opt :only_alg_number, 'Only generate code for the x-th species (99 -> all)', :short => 'o', :type => Integer, :default => 99
|
88
94
|
opt :merge_factor, 'Thread merge factor, default is 1 (==disabled)', :short => 'f', :type => Integer, :default => 1
|
95
|
+
opt :register_caching,'Enable register caching: 1:enabled (default), 0:disabled', :short => 'r', :type => Integer, :default => 1
|
96
|
+
opt :zero_copy ,'Enable OpenCL zero-copy: 1:enabled (default), 0:disabled', :short => 'z', :type => Integer, :default => 1
|
97
|
+
opt :skeletons ,'Enable non-default skeletons: 1:enabled (default), 0:disabled', :short => 's', :type => Integer, :default => 1
|
89
98
|
end
|
90
99
|
Trollop::die 'no input file supplied (use: --application)' if !@options[:application_given]
|
91
100
|
Trollop::die 'no target supplied (use: --target)' if !@options[:target_given]
|
@@ -103,6 +112,12 @@ module Bones
|
|
103
112
|
# Set a prefix for functions called from the original file but defined in a host file
|
104
113
|
@prefix = (@options[:target] == 'GPU-CUDA') ? '' : ''
|
105
114
|
|
115
|
+
# Setting to include the scheduler (CUDA only)
|
116
|
+
@scheduler = (@options[:target] == 'GPU-CUDA') ? true : false
|
117
|
+
|
118
|
+
# Skip analyse passes for certain targets
|
119
|
+
@skiptarget = false #(@options[:target] == 'PAR4ALL') ? true : false
|
120
|
+
|
106
121
|
# Set the location for the skeleton library
|
107
122
|
@dir = {}
|
108
123
|
@dir[:library] = File.join(BONES_DIR_SKELETONS,@options[:target])
|
@@ -125,7 +140,7 @@ module Bones
|
|
125
140
|
def process
|
126
141
|
|
127
142
|
# Run the preprocessor
|
128
|
-
preprocessor = Bones::Preprocessor.new(@source,File.dirname(@options[:application]),@basename)
|
143
|
+
preprocessor = Bones::Preprocessor.new(@source,File.dirname(@options[:application]),@basename,@scheduler)
|
129
144
|
preprocessor.process
|
130
145
|
@result[:header_code] = preprocessor.header_code
|
131
146
|
@result[:device_header] = preprocessor.device_header
|
@@ -137,11 +152,20 @@ module Bones
|
|
137
152
|
parser.type_names << 'size_t'
|
138
153
|
ast = parser.parse(preprocessor.target_code)
|
139
154
|
ast.preprocess
|
155
|
+
|
156
|
+
# Add the scheduler's global code
|
157
|
+
if @scheduler
|
158
|
+
@result[:host_code_lists].push(File.read(File.join(@dir[:common_library],COMMON_SCHEDULER+@extension)))
|
159
|
+
end
|
140
160
|
|
141
161
|
# Set the algorithm's skeleton and generate the global code
|
142
162
|
one_time = true
|
143
163
|
preprocessor.algorithms.each_with_index do |algorithm,algorithm_number|
|
144
164
|
algorithm.species.set_skeleton(File.join(@dir[:library],SKELETON_FILE))
|
165
|
+
if @options[:skeletons] == 0
|
166
|
+
algorithm.species.skeleton_name = 'default'
|
167
|
+
algorithm.species.settings.gsub!('10','00').gsub!('20','00').gsub!('30','00')
|
168
|
+
end
|
145
169
|
if algorithm.species.skeleton_name && one_time
|
146
170
|
@result[:host_code_lists].push(File.read(File.join(@dir[:common_library],COMMON_GLOBALS+@extension)))
|
147
171
|
@result[:algorithm_code_lists].push(File.read(File.join(@dir[:common_library],COMMON_GLOBALS_KERNEL+@extension)))
|
@@ -149,24 +173,60 @@ module Bones
|
|
149
173
|
end
|
150
174
|
end
|
151
175
|
|
152
|
-
# Perform code generation
|
176
|
+
# Perform code generation (per-species code)
|
153
177
|
@result[:original_code] = ast
|
178
|
+
arrays = []
|
154
179
|
preprocessor.algorithms.each_with_index do |algorithm,algorithm_number|
|
155
180
|
if @options[:only_alg_number] == 99 || algorithm_number == [@options[:only_alg_number],preprocessor.algorithms.length-1].min
|
156
181
|
puts MESSAGE+'Starting code generation for algorithm "'+algorithm.name+'"'
|
157
182
|
if algorithm.species.skeleton_name
|
158
183
|
algorithm.merge_factor = @options[:merge_factor] if (@options[:target] == 'GPU-CUDA')
|
184
|
+
algorithm.register_caching_enabled = @options[:register_caching]
|
159
185
|
algorithm.set_function(ast)
|
160
|
-
algorithm.populate_variables(ast,preprocessor.defines)
|
186
|
+
algorithm.populate_variables(ast,preprocessor.defines) if !@skiptarget
|
161
187
|
algorithm.populate_lists()
|
162
|
-
algorithm.populate_hash()
|
188
|
+
algorithm.populate_hash() if !@skiptarget
|
163
189
|
generate(algorithm)
|
164
190
|
puts MESSAGE+'Code generated using the "'+algorithm.species.skeleton_name+'" skeleton'
|
191
|
+
arrays.concat(algorithm.arrays)
|
165
192
|
else
|
166
193
|
puts WARNING+'Skeleton "'+algorithm.species.name+'" not available'
|
167
194
|
end
|
168
195
|
end
|
169
196
|
end
|
197
|
+
|
198
|
+
# Only if the scheduler is included
|
199
|
+
if @scheduler
|
200
|
+
|
201
|
+
# Perform code generation (sync statements)
|
202
|
+
@result[:host_declarations].push('void bones_synchronize(int bones_task_id);')
|
203
|
+
|
204
|
+
# Perform code generation (memory allocs)
|
205
|
+
allocs = []
|
206
|
+
preprocessor.copies.each do |copy|
|
207
|
+
if !allocs.include?(copy.name)
|
208
|
+
generate_memory('alloc',copy,arrays,0)
|
209
|
+
allocs << copy.name
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
# Perform code generation (memory copies)
|
214
|
+
preprocessor.copies.each_with_index do |copy,index|
|
215
|
+
#puts MESSAGE+'Generating copy code for array "'+copy.name+'"'
|
216
|
+
generate_memory('copy',copy,arrays,index)
|
217
|
+
end
|
218
|
+
|
219
|
+
# Perform code generation (memory frees)
|
220
|
+
frees = []
|
221
|
+
preprocessor.copies.each do |copy|
|
222
|
+
if !frees.include?(copy.name)
|
223
|
+
generate_memory('free',copy,arrays,0)
|
224
|
+
frees << copy.name
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
228
|
+
end
|
229
|
+
|
170
230
|
end
|
171
231
|
|
172
232
|
# This method writes the output code to files. It creates
|
@@ -202,7 +262,7 @@ module Bones
|
|
202
262
|
end
|
203
263
|
end
|
204
264
|
|
205
|
-
# Populate the verification
|
265
|
+
# Populate the verification file
|
206
266
|
if @options[:verify]
|
207
267
|
File.open(File.join(directory,@options[:name]+OUTPUT_VERIFICATION+@extension),'w') do |verification|
|
208
268
|
verification.puts @result[:header_code]
|
@@ -212,15 +272,22 @@ module Bones
|
|
212
272
|
end
|
213
273
|
end
|
214
274
|
|
215
|
-
# Populate the target file
|
275
|
+
# Populate the target file (host)
|
216
276
|
File.open(File.join(directory,@options[:name]+OUTPUT_HOST+@extension),'w') do |target|
|
277
|
+
target.puts '#include <cuda_runtime.h>'+NL if @options[:target] == 'GPU-CUDA'
|
278
|
+
target.puts "#define ZEROCOPY 0"+NL if @options[:zero_copy] == 0 && @options[:target] == 'CPU-OPENCL-INTEL'
|
279
|
+
target.puts "#define ZEROCOPY 1"+NL if @options[:zero_copy] == 1 && @options[:target] == 'CPU-OPENCL-INTEL'
|
217
280
|
target.puts @result[:header_code]
|
218
|
-
target.puts @result[:algorithm_declarations]
|
219
281
|
target.puts
|
282
|
+
target.puts @result[:host_device_mem_globals]
|
283
|
+
target.puts
|
284
|
+
target.puts @result[:algorithm_declarations]
|
220
285
|
target.puts @result[:host_code_lists]
|
286
|
+
target.puts
|
287
|
+
target.puts File.read(File.join(@dir[:common_library],GLOBAL_TIMERS+@extension))
|
221
288
|
end
|
222
289
|
|
223
|
-
# Populate the algorithm file
|
290
|
+
# Populate the algorithm file (device)
|
224
291
|
File.open(File.join(directory,@options[:name]+OUTPUT_DEVICE+@algorithm_extension),'w') do |algorithm|
|
225
292
|
algorithm.puts @result[:device_header]
|
226
293
|
algorithm.puts @result[:algorithm_code_lists]
|
@@ -251,7 +318,7 @@ module Bones
|
|
251
318
|
:device => File.read(file_name_device+@algorithm_extension)}
|
252
319
|
|
253
320
|
# Perform the transformations on the algorithm's code
|
254
|
-
algorithm.perform_transformations(algorithm.species.settings)
|
321
|
+
algorithm.perform_transformations(algorithm.species.settings) if !@skiptarget
|
255
322
|
|
256
323
|
# Load the common skeletons from the skeleton library
|
257
324
|
COMMON_FILES.each do |skeleton|
|
@@ -291,13 +358,19 @@ module Bones
|
|
291
358
|
minihash = { :array => array.name,
|
292
359
|
:type => array.type_name,
|
293
360
|
:flatten => array.flatten,
|
294
|
-
:variable_dimensions => array.size.join('*')
|
361
|
+
:variable_dimensions => array.size.join('*'),
|
362
|
+
:state => @state.to_s}
|
363
|
+
@state += 1
|
295
364
|
|
296
365
|
# Apply the mini-search-and-replace hash to create the memory allocations, memory copies (if input only), etc.
|
297
366
|
processed[:mem_prologue] += search_and_replace(minihash,skeletons[:mem_prologue])
|
298
367
|
processed[:mem_copy_H2D] += search_and_replace(minihash,skeletons[:mem_copy_H2D]) if array.input? || array.species.shared?
|
299
368
|
processed[:mem_epilogue] += search_and_replace(minihash,skeletons[:mem_epilogue])
|
369
|
+
|
370
|
+
# Add the device declarations
|
371
|
+
@result[:host_device_mem_globals].push(search_and_replace(minihash,skeletons[:mem_global]))
|
300
372
|
end
|
373
|
+
|
301
374
|
# Iterate over all the array variables and create a mini-search-and-replace hash for each array (output arrays)
|
302
375
|
algorithm.arrays.select(OUTPUT).each_with_index do |array, num_array|
|
303
376
|
hash = algorithm.hash["out#{num_array}".to_sym]
|
@@ -305,7 +378,9 @@ module Bones
|
|
305
378
|
:type => array.type_name,
|
306
379
|
:flatten => array.flatten,
|
307
380
|
:offset => '('+hash[:dimension0][:from]+')',
|
308
|
-
:variable_dimensions => '('+hash[:dimensions]+')'
|
381
|
+
:variable_dimensions => '('+hash[:dimensions]+')',
|
382
|
+
:state => @state.to_s}
|
383
|
+
@state += 1
|
309
384
|
|
310
385
|
# Perform selective copy for arrays with 2 dimensions (uses a for-loop over the memory copies)
|
311
386
|
if array.dimensions == 2 && @options[:target] == 'GPU-CUDA' && false
|
@@ -346,11 +421,17 @@ module Bones
|
|
346
421
|
search_and_replace!(algorithm.hash,skeletons[:epilogue])
|
347
422
|
|
348
423
|
# Construct the final host function, inluding the timers and memory copies
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
424
|
+
if @scheduler
|
425
|
+
host = skeletons[:prologue ] +
|
426
|
+
skeletons[:timer_2_start] + skeletons[:host ] + skeletons[:timer_2_stop ] +
|
427
|
+
skeletons[:epilogue ]
|
428
|
+
else
|
429
|
+
host = skeletons[:prologue ] +
|
430
|
+
skeletons[:timer_1_start] + processed[:mem_prologue ] + processed[:mem_copy_H2D ] +
|
431
|
+
skeletons[:timer_2_start] + skeletons[:host ] + skeletons[:timer_2_stop ] +
|
432
|
+
processed[:mem_copy_D2H ] + processed[:mem_epilogue ] + skeletons[:timer_1_stop ] +
|
433
|
+
skeletons[:epilogue ]
|
434
|
+
end
|
354
435
|
|
355
436
|
# Generate code to replace the original code, including verification code if specified by the option flag
|
356
437
|
verify_skeleton = File.read(File.join(@dir[:verify_library],'verify_results.c'))
|
@@ -362,24 +443,59 @@ module Bones
|
|
362
443
|
# Add a performance model to the original code
|
363
444
|
#replacement_code.insert(0,algorithm.performance_model_code('model'))
|
364
445
|
|
365
|
-
# Replace mallocs and frees in the original code with aligned memory allocations (only for CPU-OpenCL targets)
|
366
|
-
if @options[:target] == 'CPU-OPENCL-INTEL'
|
367
|
-
@result[:original_code].
|
368
|
-
@result[:original_code].
|
446
|
+
# Replace mallocs and frees in the original code with aligned memory allocations (only for CPU-OpenCL targets with zero-copy)
|
447
|
+
if @options[:zero_copy] == 1 && @options[:target] == 'CPU-OPENCL-INTEL'
|
448
|
+
@result[:original_code].search_and_replace_function_call(C::Variable.parse('malloc'),C::Variable.parse(VARIABLE_PREFIX+'malloc_128'))
|
449
|
+
@result[:original_code].search_and_replace_function_call(C::Variable.parse('free'),C::Variable.parse(VARIABLE_PREFIX+'free_128'))
|
369
450
|
end
|
370
451
|
|
371
452
|
# Give the original main function a new name
|
372
|
-
@result[:original_code].
|
453
|
+
@result[:original_code].search_and_replace_function_definition('main',VARIABLE_PREFIX+'main')
|
373
454
|
|
374
455
|
# Replace the original code with a function call to the newly generated code
|
375
|
-
@result[:original_code].
|
456
|
+
@result[:original_code].search_and_replace_node(algorithm.code,replacement_code)
|
376
457
|
|
377
458
|
# The host code is generated, push the data to the output hashes
|
378
459
|
accelerated_definition = 'void '+algorithm.name+'_accelerated('+algorithm.lists[:host_definition]+')'
|
379
|
-
@result[:host_code_lists].push(@prefix+accelerated_definition+' {'+NL+host+NL+'}')
|
460
|
+
@result[:host_code_lists].push(@prefix+accelerated_definition+' {'+NL+host+NL+'}'+NL+NL)
|
380
461
|
@result[:host_declarations].push(@prefix+accelerated_definition+';'+NL+@prefix+original_definition+';')
|
381
462
|
end
|
382
463
|
|
464
|
+
|
465
|
+
def generate_memory(type,copy,arrays,index)
|
466
|
+
|
467
|
+
# Find the corresponding array
|
468
|
+
arrays.each do |array|
|
469
|
+
if array.name == copy.name && (array.direction == copy.direction || array.direction == INOUT)
|
470
|
+
|
471
|
+
# Load the skeleton from the skeleton library
|
472
|
+
type += copy.direction if type == 'copy'
|
473
|
+
skeleton = File.read(File.join(@dir[:common_library],'mem_async_'+type+@extension))
|
474
|
+
|
475
|
+
# Create the find-and-replace hash
|
476
|
+
minihash = { :array => copy.name,
|
477
|
+
:id => copy.id,
|
478
|
+
:index => index.to_s,
|
479
|
+
:direction => copy.direction,
|
480
|
+
:definition => array.definition,
|
481
|
+
:type => array.type_name,
|
482
|
+
:flatten => array.flatten,
|
483
|
+
:offset => '0',
|
484
|
+
:variable_dimensions => array.size.join('*'),
|
485
|
+
:state => copy.deadline}
|
486
|
+
|
487
|
+
# Instanstiate the skeleton and add it to the final result
|
488
|
+
@result[:host_code_lists].push(search_and_replace(minihash,skeleton))
|
489
|
+
|
490
|
+
# Add a forward declaration of this function
|
491
|
+
@result[:host_declarations].push(copy.get_definition(array.definition,type))
|
492
|
+
|
493
|
+
# Done
|
494
|
+
return
|
495
|
+
end
|
496
|
+
end
|
497
|
+
end
|
498
|
+
|
383
499
|
end
|
384
500
|
|
385
501
|
end
|
data/lib/bones/preprocessor.rb
CHANGED
@@ -9,7 +9,7 @@ module Bones
|
|
9
9
|
# * +algorithms+ - An array of identified algorithms, each of class Bones::Algorithm.
|
10
10
|
# * +target_code+ - The processed code containing no Bones directives nor other pre-processor directives (such as includes and defines).
|
11
11
|
class Preprocessor < Common
|
12
|
-
attr_reader :header_code, :algorithms, :target_code, :device_header, :defines
|
12
|
+
attr_reader :header_code, :algorithms, :target_code, :device_header, :defines, :scop, :copies
|
13
13
|
|
14
14
|
# Denotes the start of an algorithmic species.
|
15
15
|
IDENTIFIER = '#pragma species'
|
@@ -18,9 +18,22 @@ module Bones
|
|
18
18
|
WHITESPACE = '\s*'
|
19
19
|
|
20
20
|
# This directive denotes the start of a algorithm. It is based on the IDENTIFIER constant.
|
21
|
-
|
21
|
+
SPECIES_START = IDENTIFIER+' kernel'
|
22
22
|
# This directive denotes the end of a algorithm. It is based on the IDENTIFIER constant.
|
23
|
-
|
23
|
+
SPECIES_END = IDENTIFIER+' endkernel'
|
24
|
+
|
25
|
+
# Start of the scop
|
26
|
+
SCOP_START = '#pragma scop'
|
27
|
+
# Enf of the scop
|
28
|
+
SCOP_END = '#pragma endscop'
|
29
|
+
|
30
|
+
# Synchronise directive.
|
31
|
+
SYNC = IDENTIFIER+' sync'
|
32
|
+
|
33
|
+
# Copy in directive.
|
34
|
+
COPYIN = IDENTIFIER+ ' copyin'
|
35
|
+
# Copy out directive.
|
36
|
+
COPYOUT = IDENTIFIER+ ' copyout'
|
24
37
|
|
25
38
|
# A regular expression captures a prefix in a algorithm (e.g. unordered/multiple).
|
26
39
|
REGEXP_PREFIX = /^[a-z]+ /
|
@@ -31,16 +44,18 @@ module Bones
|
|
31
44
|
# This is the method which initializes the preprocessor.
|
32
45
|
# Initialization requires the target source code to process,
|
33
46
|
# which is then set as the class variable +@source_code+.
|
34
|
-
def initialize(source_code,directory,filename)
|
47
|
+
def initialize(source_code,directory,filename,scheduler)
|
35
48
|
@source_code = source_code
|
36
|
-
@target_code =
|
49
|
+
@target_code = []
|
37
50
|
@header_code = ''
|
38
51
|
@device_header = ''
|
39
52
|
@directory = directory
|
40
53
|
@filename = filename
|
41
54
|
@algorithms = Array.new
|
55
|
+
@copies = Array.new
|
42
56
|
@defines = {}
|
43
57
|
@found_algorithms = 0
|
58
|
+
@scheduler = scheduler
|
44
59
|
end
|
45
60
|
|
46
61
|
# This is the method to perform the actual preprocessing.
|
@@ -51,6 +66,7 @@ module Bones
|
|
51
66
|
algorithm_code = ''
|
52
67
|
species = nil
|
53
68
|
found = 0
|
69
|
+
alloc_index, free_index = 0, 0
|
54
70
|
|
55
71
|
# Process the file line by line
|
56
72
|
@source_code.each_line.with_index do |line,index|
|
@@ -71,7 +87,7 @@ module Bones
|
|
71
87
|
@defines[match.first[0].to_sym] = match.first[1]
|
72
88
|
|
73
89
|
# Found the start of algorithm marker
|
74
|
-
elsif line =~ /^#{WHITESPACE}#{
|
90
|
+
elsif line =~ /^#{WHITESPACE}#{SPECIES_START}/
|
75
91
|
if found == 0
|
76
92
|
line = replace_defines(line,@defines)
|
77
93
|
prefix, input, output = marker_to_algorithm(line)
|
@@ -80,28 +96,92 @@ module Bones
|
|
80
96
|
@found_algorithms = @found_algorithms + 1
|
81
97
|
end
|
82
98
|
found = found + 1
|
99
|
+
#@target_code << "int bones_temp_species_start = '#{line.gsub(NL,'')}';"+NL
|
83
100
|
|
84
101
|
# Found the end of algorithm marker
|
85
|
-
elsif line =~ /^#{WHITESPACE}#{
|
102
|
+
elsif line =~ /^#{WHITESPACE}#{SPECIES_END}/
|
86
103
|
if found == 1
|
87
|
-
name = line.strip.scan(/^#{WHITESPACE}#{
|
104
|
+
name = line.strip.scan(/^#{WHITESPACE}#{SPECIES_END} (.+)/).join
|
88
105
|
name = DEFAULT_NAME if name == ''
|
89
106
|
@algorithms.push(Bones::Algorithm.new(name,@filename,index.to_s,species,algorithm_code))
|
90
107
|
algorithm_code = ''
|
91
108
|
end
|
92
109
|
found = found - 1
|
110
|
+
#@target_code << "int bones_temp_species_end = '#{line.gsub(NL,'')}';"+NL
|
111
|
+
|
112
|
+
# Found a sync marker
|
113
|
+
elsif @scheduler && line =~ /^#{WHITESPACE}#{SYNC}/
|
114
|
+
sync = line.strip.scan(/^#{WHITESPACE}#{SYNC} (.+)/).join
|
115
|
+
@target_code << "bones_synchronize(#{sync});"+NL
|
116
|
+
|
117
|
+
# Found a copyin marker
|
118
|
+
elsif @scheduler && line =~ /^#{WHITESPACE}#{COPYIN}/
|
119
|
+
copies = line.strip.scan(/^#{WHITESPACE}#{COPYIN} (.+)/).join.split(WEDGE).map{ |c| c.strip }
|
120
|
+
copies.each_with_index do |copy,copynum|
|
121
|
+
name = copy.split('[').first
|
122
|
+
domain = copy.scan(/\[(.+)\]/).join.split(DIM_SEP)
|
123
|
+
deadline = copy.split('|').last
|
124
|
+
@copies.push(Bones::Copy.new(name,domain,deadline,'in',"#{index*100+copynum}"))
|
125
|
+
@target_code << "bones_copyin_#{index*100+copynum}_#{name}(#{name});"+NL
|
126
|
+
end
|
127
|
+
|
128
|
+
# Found a copyout marker
|
129
|
+
elsif @scheduler && line =~ /^#{WHITESPACE}#{COPYOUT}/
|
130
|
+
copies = line.strip.scan(/^#{WHITESPACE}#{COPYOUT} (.+)/).join.split(WEDGE).map{ |c| c.strip }
|
131
|
+
copies.each_with_index do |copy,copynum|
|
132
|
+
name = copy.split('[').first
|
133
|
+
domain = copy.scan(/\[(.+)\]/).join.split(DIM_SEP)
|
134
|
+
deadline = copy.split('|').last
|
135
|
+
@copies.push(Bones::Copy.new(name,domain,deadline,'out',"#{index*100+copynum}"))
|
136
|
+
@target_code << "bones_copyout_#{index*100+copynum}_#{name}(#{name});"+NL
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
# Check if it was a 'pragma scop' / 'pragma endscop' line
|
141
|
+
if line =~ /^#{WHITESPACE}#{SCOP_START}/
|
142
|
+
alloc_index = index
|
143
|
+
elsif line =~ /^#{WHITESPACE}#{SCOP_END}/
|
144
|
+
free_index = @target_code.length
|
93
145
|
end
|
146
|
+
|
94
147
|
else
|
95
148
|
if found > 0
|
96
149
|
algorithm_line = replace_defines(line,@defines)
|
97
|
-
@target_code
|
150
|
+
@target_code << algorithm_line
|
98
151
|
algorithm_code += algorithm_line if line !~ /^#{WHITESPACE}#/
|
99
152
|
else
|
100
|
-
@target_code
|
153
|
+
@target_code << line
|
101
154
|
end
|
102
155
|
end
|
103
156
|
end
|
104
|
-
puts WARNING+'Begin/end kernel mismatch ('+@found_algorithms.to_s+' versus '+@algorithms.length.to_s+'), probably missing a "'+
|
157
|
+
puts WARNING+'Begin/end kernel mismatch ('+@found_algorithms.to_s+' versus '+@algorithms.length.to_s+'), probably missing a "'+SPECIES_END+'"' unless @algorithms.length == @found_algorithms
|
158
|
+
|
159
|
+
# Add frees and mallocs
|
160
|
+
if @scheduler
|
161
|
+
alloc_code, free_code = '', ''
|
162
|
+
included_copies = []
|
163
|
+
copies.each do |copy|
|
164
|
+
if !included_copies.include?(copy.name)
|
165
|
+
alloc_code += copy.get_function_call('alloc')+NL
|
166
|
+
free_code += copy.get_function_call('free')+NL
|
167
|
+
included_copies << copy.name
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
# Add timers (whole scop timing) and frees/mallocs to the code
|
173
|
+
offset = @header_code.lines.count
|
174
|
+
@target_code.insert(alloc_index-offset, 'bones_timer_start();'+NL)
|
175
|
+
if @scheduler
|
176
|
+
@target_code.insert(alloc_index-offset+1, alloc_code)
|
177
|
+
@target_code.insert(free_index+2, free_code)
|
178
|
+
@target_code.insert(free_index+3, 'bones_timer_stop();'+NL)
|
179
|
+
else
|
180
|
+
@target_code.insert(free_index+2, 'bones_timer_stop();'+NL)
|
181
|
+
end
|
182
|
+
|
183
|
+
# Join the array
|
184
|
+
@target_code = @target_code.join('')
|
105
185
|
end
|
106
186
|
|
107
187
|
# This is the method to preprocess a header file. Currently,
|
@@ -143,7 +223,7 @@ module Bones
|
|
143
223
|
|
144
224
|
# Method to extract the algorithm details from a marker found in code.
|
145
225
|
def marker_to_algorithm(marker)
|
146
|
-
algorithm = marker.strip.scan(/^#{WHITESPACE}#{
|
226
|
+
algorithm = marker.strip.scan(/^#{WHITESPACE}#{SPECIES_START} (.+)/).join
|
147
227
|
prefix = ''
|
148
228
|
if algorithm =~ REGEXP_PREFIX
|
149
229
|
split = algorithm.partition(' ')
|