bones-compiler 1.1.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/CHANGELOG +37 -0
- data/LICENSE +1 -1
- data/README.rdoc +95 -70
- data/Rakefile +78 -3
- data/VERSION +1 -1
- data/bin/adarwin +17 -0
- data/examples/benchmarks/PolyBench/2mm.c +104 -0
- data/examples/benchmarks/{3mm.c → PolyBench/3mm.c} +5 -2
- data/examples/benchmarks/{adi.c → PolyBench/adi.c} +6 -3
- data/examples/benchmarks/{atax.c → PolyBench/atax.c} +5 -2
- data/examples/benchmarks/{bicg.c → PolyBench/bicg.c} +5 -2
- data/examples/benchmarks/{cholesky.c → PolyBench/cholesky.c} +3 -0
- data/examples/benchmarks/{common.h → PolyBench/common.h} +2 -2
- data/examples/benchmarks/{correlation.c → PolyBench/correlation.c} +16 -7
- data/examples/benchmarks/{covariance.c → PolyBench/covariance.c} +7 -2
- data/examples/benchmarks/{doitgen.c → PolyBench/doitgen.c} +5 -2
- data/examples/benchmarks/{durbin.c → PolyBench/durbin.c} +3 -0
- data/examples/benchmarks/{dynprog.c → PolyBench/dynprog.c} +3 -0
- data/examples/benchmarks/{fdtd-2d-apml.c → PolyBench/fdtd-2d-apml.c} +3 -0
- data/examples/benchmarks/{fdtd-2d.c → PolyBench/fdtd-2d.c} +5 -2
- data/examples/benchmarks/{floyd-warshall.c → PolyBench/floyd-warshall.c} +3 -0
- data/examples/benchmarks/{gemm.c → PolyBench/gemm.c} +5 -2
- data/examples/benchmarks/{gemver.c → PolyBench/gemver.c} +5 -2
- data/examples/benchmarks/{gesummv.c → PolyBench/gesummv.c} +5 -2
- data/examples/benchmarks/{gramschmidt.c → PolyBench/gramschmidt.c} +3 -0
- data/examples/benchmarks/{jacobi-1d-imper.c → PolyBench/jacobi-1d-imper.c} +10 -2
- data/examples/benchmarks/{jacobi-2d-imper.c → PolyBench/jacobi-2d-imper.c} +8 -3
- data/examples/benchmarks/{lu.c → PolyBench/lu.c} +3 -0
- data/examples/benchmarks/{ludcmp.c → PolyBench/ludcmp.c} +3 -0
- data/examples/benchmarks/{mvt.c → PolyBench/mvt.c} +6 -2
- data/examples/benchmarks/{reg_detect.c → PolyBench/reg_detect.c} +3 -0
- data/examples/benchmarks/{seidel-2d.c → PolyBench/seidel-2d.c} +3 -0
- data/examples/benchmarks/{symm.c → PolyBench/symm.c} +3 -0
- data/examples/benchmarks/{syr2k.c → PolyBench/syr2k.c} +5 -2
- data/examples/benchmarks/{syrk.c → PolyBench/syrk.c} +7 -4
- data/examples/benchmarks/{trisolv.c → PolyBench/trisolv.c} +3 -0
- data/examples/benchmarks/{trmm.c → PolyBench/trmm.c} +3 -0
- data/examples/benchmarks/Rodinia/cfd.c +180 -0
- data/examples/benchmarks/Rodinia/hotspot.c +228 -0
- data/examples/benchmarks/Rodinia/kmeans.c +164 -0
- data/examples/benchmarks/Rodinia/srad.c +188 -0
- data/examples/benchmarks/other/common.h +0 -0
- data/examples/benchmarks/other/dct.c +58 -0
- data/examples/benchmarks/other/mm.c +50 -0
- data/examples/benchmarks/{saxpy.c → other/saxpy.c} +11 -7
- data/examples/chunk/{example1.c → example01.c} +0 -0
- data/examples/chunk/{example2.c → example02.c} +0 -0
- data/examples/chunk/{example3.c → example03.c} +0 -0
- data/examples/chunk/{example4.c → example04.c} +0 -0
- data/examples/chunk/{example5.c → example05.c} +0 -0
- data/examples/chunk/example06.c +45 -0
- data/examples/chunk/example07.c +49 -0
- data/examples/dependences/example01.c +42 -0
- data/examples/dependences/example02.c +40 -0
- data/examples/dependences/example03.c +43 -0
- data/examples/dependences/example04.c +44 -0
- data/examples/dependences/example05.c +42 -0
- data/examples/element/{example1.c → example01.c} +0 -0
- data/examples/element/{example2.c → example02.c} +2 -2
- data/examples/element/{example3.c → example03.c} +0 -0
- data/examples/element/{example4.c → example04.c} +0 -0
- data/examples/element/{example5.c → example05.c} +0 -0
- data/examples/element/{example6.c → example06.c} +0 -0
- data/examples/element/{example7.c → example07.c} +0 -0
- data/examples/element/{example8.c → example08.c} +0 -0
- data/examples/element/{example9.c → example09.c} +0 -0
- data/examples/element/example13.c +73 -0
- data/examples/fusion/example01.c +68 -0
- data/examples/fusion/example02.c +73 -0
- data/examples/fusion/example03.c +72 -0
- data/examples/fusion/example04.c +61 -0
- data/examples/fusion/example05.c +55 -0
- data/examples/neighbourhood/{example1.c → example01.c} +0 -0
- data/examples/neighbourhood/{example2.c → example02.c} +0 -0
- data/examples/neighbourhood/{example3.c → example03.c} +0 -0
- data/examples/neighbourhood/{example4.c → example04.c} +0 -0
- data/examples/neighbourhood/example05.c +44 -0
- data/examples/shared/{example1.c → example01.c} +0 -0
- data/examples/shared/{example2.c → example02.c} +0 -0
- data/examples/shared/{example3.c → example03.c} +0 -0
- data/examples/shared/{example4.c → example04.c} +0 -0
- data/examples/shared/{example5.c → example05.c} +0 -0
- data/lib/adarwin.rb +62 -0
- data/lib/adarwin/dependences.rb +268 -0
- data/lib/adarwin/engine.rb +277 -0
- data/lib/adarwin/fusion.rb +174 -0
- data/lib/adarwin/interval.rb +57 -0
- data/lib/adarwin/memorycopies.rb +153 -0
- data/lib/adarwin/nest.rb +225 -0
- data/lib/adarwin/preprocessor.rb +76 -0
- data/lib/adarwin/reference.rb +261 -0
- data/lib/bones.rb +4 -55
- data/lib/bones/algorithm.rb +77 -40
- data/lib/bones/copy.rb +26 -0
- data/lib/bones/engine.rb +147 -31
- data/lib/bones/preprocessor.rb +92 -12
- data/lib/bones/species.rb +4 -3
- data/lib/bones/structure.rb +14 -4
- data/lib/castaddon.rb +11 -6
- data/lib/castaddon/node_adarwin.rb +245 -0
- data/lib/castaddon/node_bones.rb +316 -0
- data/lib/castaddon/node_common.rb +289 -0
- data/lib/castaddon/transformations.rb +236 -0
- data/lib/common.rb +216 -0
- data/skeletons/CPU-C/common/header.c +3 -0
- data/skeletons/CPU-C/common/mem_global.c +0 -0
- data/skeletons/CPU-C/common/timer_2_start.c +11 -13
- data/skeletons/CPU-C/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-C/common/timer_globals.c +29 -0
- data/skeletons/CPU-OPENCL-INTEL/common/globals.c +1 -1
- data/skeletons/CPU-OPENCL-INTEL/common/header.c +3 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_D2H.c +7 -2
- data/skeletons/CPU-OPENCL-INTEL/common/mem_copy_H2D.c +4 -2
- data/skeletons/CPU-OPENCL-INTEL/common/mem_global.c +0 -0
- data/skeletons/CPU-OPENCL-INTEL/common/mem_prologue.c +6 -3
- data/skeletons/CPU-OPENCL-INTEL/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-OPENCL-INTEL/common/timer_globals.c +24 -0
- data/skeletons/CPU-OPENMP/common/globals.c +1 -0
- data/skeletons/CPU-OPENMP/common/header.c +3 -0
- data/skeletons/CPU-OPENMP/common/mem_global.c +0 -0
- data/skeletons/CPU-OPENMP/common/timer_1_start.c +0 -12
- data/skeletons/CPU-OPENMP/common/timer_2_stop.c +1 -1
- data/skeletons/CPU-OPENMP/common/timer_globals.c +33 -0
- data/skeletons/GPU-CUDA/common/globals.c +27 -3
- data/skeletons/GPU-CUDA/common/header.c +2 -0
- data/skeletons/GPU-CUDA/common/mem_async_alloc.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_copyin.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_copyout.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_async_free.c +6 -0
- data/skeletons/GPU-CUDA/common/mem_copy_D2H.c +2 -1
- data/skeletons/GPU-CUDA/common/mem_copy_H2D.c +2 -1
- data/skeletons/GPU-CUDA/common/mem_global.c +1 -0
- data/skeletons/GPU-CUDA/common/mem_prologue.c +1 -2
- data/skeletons/GPU-CUDA/common/scheduler.c +86 -0
- data/skeletons/GPU-CUDA/common/timer_2_start.c +2 -4
- data/skeletons/GPU-CUDA/common/timer_2_stop.c +3 -5
- data/skeletons/GPU-CUDA/common/timer_globals.c +26 -0
- data/skeletons/GPU-CUDA/kernel/2xN-N-chunk-1-N-to-D-element.kernel.cu +5 -7
- data/skeletons/GPU-CUDA/kernel/N-N-chunk-1-N-to-D-element.kernel.cu +4 -6
- data/skeletons/GPU-CUDA/kernel/default.host.c +1 -1
- data/skeletons/GPU-CUDA/kernel/default.kernel.cu +6 -8
- data/skeletons/GPU-CUDA/skeletons.txt +6 -5
- data/{examples/benchmarks/2mm.c → test/examples/benchmarks/PolyBench/2mm_species.c} +19 -15
- data/test/examples/benchmarks/PolyBench/3mm_species.c +82 -0
- data/test/examples/benchmarks/PolyBench/adi_species.c +89 -0
- data/test/examples/benchmarks/PolyBench/atax_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/bicg_species.c +71 -0
- data/test/examples/benchmarks/PolyBench/cholesky_species.c +68 -0
- data/test/examples/benchmarks/PolyBench/correlation_species.c +97 -0
- data/test/examples/benchmarks/PolyBench/covariance_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/doitgen_species.c +67 -0
- data/test/examples/benchmarks/PolyBench/durbin_species.c +80 -0
- data/test/examples/benchmarks/PolyBench/dynprog_species.c +71 -0
- data/test/examples/benchmarks/PolyBench/fdtd-2d-apml_species.c +112 -0
- data/test/examples/benchmarks/PolyBench/fdtd-2d_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/floyd-warshall_species.c +54 -0
- data/test/examples/benchmarks/PolyBench/gemm_species.c +73 -0
- data/test/examples/benchmarks/PolyBench/gemver_species.c +93 -0
- data/test/examples/benchmarks/PolyBench/gesummv_species.c +68 -0
- data/test/examples/benchmarks/PolyBench/gramschmidt_species.c +78 -0
- data/test/examples/benchmarks/PolyBench/jacobi-1d-imper_species.c +59 -0
- data/test/examples/benchmarks/PolyBench/jacobi-2d-imper_species.c +65 -0
- data/test/examples/benchmarks/PolyBench/lu_species.c +57 -0
- data/test/examples/benchmarks/PolyBench/ludcmp_species.c +89 -0
- data/test/examples/benchmarks/PolyBench/mvt_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/reg_detect_species.c +86 -0
- data/test/examples/benchmarks/PolyBench/seidel-2d_species.c +53 -0
- data/test/examples/benchmarks/PolyBench/symm_species.c +74 -0
- data/test/examples/benchmarks/PolyBench/syr2k_species.c +69 -0
- data/test/examples/benchmarks/PolyBench/syrk_species.c +66 -0
- data/test/examples/benchmarks/PolyBench/trisolv_species.c +61 -0
- data/test/examples/benchmarks/PolyBench/trmm_species.c +61 -0
- data/test/examples/chunk/example01_species.c +58 -0
- data/test/examples/chunk/example02_species.c +48 -0
- data/test/examples/chunk/example03_species.c +63 -0
- data/test/examples/chunk/example04_species.c +58 -0
- data/test/examples/chunk/example05_species.c +56 -0
- data/test/examples/chunk/example06_species.c +49 -0
- data/test/examples/chunk/example07_species.c +53 -0
- data/test/examples/dependences/example01_species.c +46 -0
- data/test/examples/dependences/example02_species.c +44 -0
- data/test/examples/dependences/example03_species.c +47 -0
- data/test/examples/dependences/example04_species.c +48 -0
- data/test/examples/dependences/example05_species.c +46 -0
- data/test/examples/element/example01_species.c +50 -0
- data/test/examples/element/example02_species.c +50 -0
- data/test/examples/element/example03_species.c +62 -0
- data/test/examples/element/example04_species.c +53 -0
- data/test/examples/element/example05_species.c +59 -0
- data/test/examples/element/example06_species.c +50 -0
- data/test/examples/element/example07_species.c +58 -0
- data/test/examples/element/example08_species.c +49 -0
- data/test/examples/element/example09_species.c +52 -0
- data/test/examples/element/example10_species.c +54 -0
- data/test/examples/element/example11_species.c +51 -0
- data/test/examples/element/example12_species.c +60 -0
- data/test/examples/element/example13_species.c +77 -0
- data/test/examples/neighbourhood/example01_species.c +57 -0
- data/test/examples/neighbourhood/example02_species.c +56 -0
- data/test/examples/neighbourhood/example03_species.c +83 -0
- data/test/examples/neighbourhood/example04_species.c +55 -0
- data/test/examples/neighbourhood/example05_species.c +48 -0
- data/test/examples/shared/example01_species.c +49 -0
- data/test/examples/shared/example02_species.c +55 -0
- data/test/examples/shared/example03_species.c +59 -0
- data/test/examples/shared/example04_species.c +56 -0
- data/test/examples/shared/example05_species.c +52 -0
- metadata +193 -73
- data/examples/benchmarks/overview.txt +0 -38
- data/lib/castaddon/node.rb +0 -753
data/lib/bones/copy.rb
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
|
|
2
|
+
module Bones
|
|
3
|
+
|
|
4
|
+
# Class copyin/out
|
|
5
|
+
class Copy
|
|
6
|
+
attr_accessor :name, :domain, :deadline, :direction, :id
|
|
7
|
+
|
|
8
|
+
def initialize(name,domain,deadline,direction,id)
|
|
9
|
+
@name = name
|
|
10
|
+
@domain = domain
|
|
11
|
+
@deadline = deadline
|
|
12
|
+
@direction = direction
|
|
13
|
+
@id = id
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def get_definition(array_definition,type)
|
|
17
|
+
array_definition = '' if type == 'free' || type == 'alloc'
|
|
18
|
+
'void bones_'+type+'_'+@id+'_'+@name+'('+array_definition+');'
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def get_function_call(type)
|
|
22
|
+
'bones_'+type+'_'+@id+'_'+@name+'();'
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
end
|
data/lib/bones/engine.rb
CHANGED
|
@@ -17,13 +17,17 @@ module Bones
|
|
|
17
17
|
# A list of timer files to be found in the skeleton library.
|
|
18
18
|
TIMER_FILES = ['timer_1_start','timer_1_stop','timer_2_start','timer_2_stop']
|
|
19
19
|
# A list of files to be found in the common directory of the skeleton library (excluding timer files).
|
|
20
|
-
COMMON_FILES = ['prologue','epilogue','mem_prologue','mem_copy_H2D','mem_copy_D2H','mem_epilogue']
|
|
20
|
+
COMMON_FILES = ['prologue','epilogue','mem_prologue','mem_copy_H2D','mem_copy_D2H','mem_epilogue','mem_global']
|
|
21
21
|
# The name of the file containing the globals as found in the skeleton library
|
|
22
22
|
COMMON_GLOBALS = 'globals'
|
|
23
23
|
# The name of the file containing the header file for the original C code as found in the skeleton library
|
|
24
24
|
COMMON_HEADER = 'header'
|
|
25
25
|
# The name of the file containing the globals for the kernel files as found in the skeleton library
|
|
26
26
|
COMMON_GLOBALS_KERNEL = 'globals_kernel'
|
|
27
|
+
# The name of the file containing the scheduler code
|
|
28
|
+
COMMON_SCHEDULER = 'scheduler'
|
|
29
|
+
# Global timers
|
|
30
|
+
GLOBAL_TIMERS = 'timer_globals'
|
|
27
31
|
|
|
28
32
|
# The extension of a host file in the skeleton library. See also SKELETON_DEVICE.
|
|
29
33
|
SKELETON_HOST = '.host'
|
|
@@ -54,13 +58,15 @@ module Bones
|
|
|
54
58
|
# --help, -h: Show this message
|
|
55
59
|
#
|
|
56
60
|
def initialize
|
|
57
|
-
@result = {:original_code
|
|
58
|
-
:header_code
|
|
59
|
-
:host_declarations
|
|
60
|
-
:host_code_lists
|
|
61
|
-
:algorithm_declarations
|
|
62
|
-
:algorithm_code_lists
|
|
63
|
-
:verify_code
|
|
61
|
+
@result = {:original_code => [],
|
|
62
|
+
:header_code => [],
|
|
63
|
+
:host_declarations => [],
|
|
64
|
+
:host_code_lists => [],
|
|
65
|
+
:algorithm_declarations => [],
|
|
66
|
+
:algorithm_code_lists => [],
|
|
67
|
+
:verify_code => [],
|
|
68
|
+
:host_device_mem_globals => []}
|
|
69
|
+
@state = 0
|
|
64
70
|
|
|
65
71
|
# Provides a list of possible targets (e.g. GPU-CUDA, 'CPU-OPENCL-INTEL').
|
|
66
72
|
targets = []
|
|
@@ -86,6 +92,9 @@ module Bones
|
|
|
86
92
|
opt :verify, 'Verify correctness of the generated code', :short => 'c', :default => false
|
|
87
93
|
opt :only_alg_number, 'Only generate code for the x-th species (99 -> all)', :short => 'o', :type => Integer, :default => 99
|
|
88
94
|
opt :merge_factor, 'Thread merge factor, default is 1 (==disabled)', :short => 'f', :type => Integer, :default => 1
|
|
95
|
+
opt :register_caching,'Enable register caching: 1:enabled (default), 0:disabled', :short => 'r', :type => Integer, :default => 1
|
|
96
|
+
opt :zero_copy ,'Enable OpenCL zero-copy: 1:enabled (default), 0:disabled', :short => 'z', :type => Integer, :default => 1
|
|
97
|
+
opt :skeletons ,'Enable non-default skeletons: 1:enabled (default), 0:disabled', :short => 's', :type => Integer, :default => 1
|
|
89
98
|
end
|
|
90
99
|
Trollop::die 'no input file supplied (use: --application)' if !@options[:application_given]
|
|
91
100
|
Trollop::die 'no target supplied (use: --target)' if !@options[:target_given]
|
|
@@ -103,6 +112,12 @@ module Bones
|
|
|
103
112
|
# Set a prefix for functions called from the original file but defined in a host file
|
|
104
113
|
@prefix = (@options[:target] == 'GPU-CUDA') ? '' : ''
|
|
105
114
|
|
|
115
|
+
# Setting to include the scheduler (CUDA only)
|
|
116
|
+
@scheduler = (@options[:target] == 'GPU-CUDA') ? true : false
|
|
117
|
+
|
|
118
|
+
# Skip analyse passes for certain targets
|
|
119
|
+
@skiptarget = false #(@options[:target] == 'PAR4ALL') ? true : false
|
|
120
|
+
|
|
106
121
|
# Set the location for the skeleton library
|
|
107
122
|
@dir = {}
|
|
108
123
|
@dir[:library] = File.join(BONES_DIR_SKELETONS,@options[:target])
|
|
@@ -125,7 +140,7 @@ module Bones
|
|
|
125
140
|
def process
|
|
126
141
|
|
|
127
142
|
# Run the preprocessor
|
|
128
|
-
preprocessor = Bones::Preprocessor.new(@source,File.dirname(@options[:application]),@basename)
|
|
143
|
+
preprocessor = Bones::Preprocessor.new(@source,File.dirname(@options[:application]),@basename,@scheduler)
|
|
129
144
|
preprocessor.process
|
|
130
145
|
@result[:header_code] = preprocessor.header_code
|
|
131
146
|
@result[:device_header] = preprocessor.device_header
|
|
@@ -137,11 +152,20 @@ module Bones
|
|
|
137
152
|
parser.type_names << 'size_t'
|
|
138
153
|
ast = parser.parse(preprocessor.target_code)
|
|
139
154
|
ast.preprocess
|
|
155
|
+
|
|
156
|
+
# Add the scheduler's global code
|
|
157
|
+
if @scheduler
|
|
158
|
+
@result[:host_code_lists].push(File.read(File.join(@dir[:common_library],COMMON_SCHEDULER+@extension)))
|
|
159
|
+
end
|
|
140
160
|
|
|
141
161
|
# Set the algorithm's skeleton and generate the global code
|
|
142
162
|
one_time = true
|
|
143
163
|
preprocessor.algorithms.each_with_index do |algorithm,algorithm_number|
|
|
144
164
|
algorithm.species.set_skeleton(File.join(@dir[:library],SKELETON_FILE))
|
|
165
|
+
if @options[:skeletons] == 0
|
|
166
|
+
algorithm.species.skeleton_name = 'default'
|
|
167
|
+
algorithm.species.settings.gsub!('10','00').gsub!('20','00').gsub!('30','00')
|
|
168
|
+
end
|
|
145
169
|
if algorithm.species.skeleton_name && one_time
|
|
146
170
|
@result[:host_code_lists].push(File.read(File.join(@dir[:common_library],COMMON_GLOBALS+@extension)))
|
|
147
171
|
@result[:algorithm_code_lists].push(File.read(File.join(@dir[:common_library],COMMON_GLOBALS_KERNEL+@extension)))
|
|
@@ -149,24 +173,60 @@ module Bones
|
|
|
149
173
|
end
|
|
150
174
|
end
|
|
151
175
|
|
|
152
|
-
# Perform code generation
|
|
176
|
+
# Perform code generation (per-species code)
|
|
153
177
|
@result[:original_code] = ast
|
|
178
|
+
arrays = []
|
|
154
179
|
preprocessor.algorithms.each_with_index do |algorithm,algorithm_number|
|
|
155
180
|
if @options[:only_alg_number] == 99 || algorithm_number == [@options[:only_alg_number],preprocessor.algorithms.length-1].min
|
|
156
181
|
puts MESSAGE+'Starting code generation for algorithm "'+algorithm.name+'"'
|
|
157
182
|
if algorithm.species.skeleton_name
|
|
158
183
|
algorithm.merge_factor = @options[:merge_factor] if (@options[:target] == 'GPU-CUDA')
|
|
184
|
+
algorithm.register_caching_enabled = @options[:register_caching]
|
|
159
185
|
algorithm.set_function(ast)
|
|
160
|
-
algorithm.populate_variables(ast,preprocessor.defines)
|
|
186
|
+
algorithm.populate_variables(ast,preprocessor.defines) if !@skiptarget
|
|
161
187
|
algorithm.populate_lists()
|
|
162
|
-
algorithm.populate_hash()
|
|
188
|
+
algorithm.populate_hash() if !@skiptarget
|
|
163
189
|
generate(algorithm)
|
|
164
190
|
puts MESSAGE+'Code generated using the "'+algorithm.species.skeleton_name+'" skeleton'
|
|
191
|
+
arrays.concat(algorithm.arrays)
|
|
165
192
|
else
|
|
166
193
|
puts WARNING+'Skeleton "'+algorithm.species.name+'" not available'
|
|
167
194
|
end
|
|
168
195
|
end
|
|
169
196
|
end
|
|
197
|
+
|
|
198
|
+
# Only if the scheduler is included
|
|
199
|
+
if @scheduler
|
|
200
|
+
|
|
201
|
+
# Perform code generation (sync statements)
|
|
202
|
+
@result[:host_declarations].push('void bones_synchronize(int bones_task_id);')
|
|
203
|
+
|
|
204
|
+
# Perform code generation (memory allocs)
|
|
205
|
+
allocs = []
|
|
206
|
+
preprocessor.copies.each do |copy|
|
|
207
|
+
if !allocs.include?(copy.name)
|
|
208
|
+
generate_memory('alloc',copy,arrays,0)
|
|
209
|
+
allocs << copy.name
|
|
210
|
+
end
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
# Perform code generation (memory copies)
|
|
214
|
+
preprocessor.copies.each_with_index do |copy,index|
|
|
215
|
+
#puts MESSAGE+'Generating copy code for array "'+copy.name+'"'
|
|
216
|
+
generate_memory('copy',copy,arrays,index)
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
# Perform code generation (memory frees)
|
|
220
|
+
frees = []
|
|
221
|
+
preprocessor.copies.each do |copy|
|
|
222
|
+
if !frees.include?(copy.name)
|
|
223
|
+
generate_memory('free',copy,arrays,0)
|
|
224
|
+
frees << copy.name
|
|
225
|
+
end
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
end
|
|
229
|
+
|
|
170
230
|
end
|
|
171
231
|
|
|
172
232
|
# This method writes the output code to files. It creates
|
|
@@ -202,7 +262,7 @@ module Bones
|
|
|
202
262
|
end
|
|
203
263
|
end
|
|
204
264
|
|
|
205
|
-
# Populate the verification
|
|
265
|
+
# Populate the verification file
|
|
206
266
|
if @options[:verify]
|
|
207
267
|
File.open(File.join(directory,@options[:name]+OUTPUT_VERIFICATION+@extension),'w') do |verification|
|
|
208
268
|
verification.puts @result[:header_code]
|
|
@@ -212,15 +272,22 @@ module Bones
|
|
|
212
272
|
end
|
|
213
273
|
end
|
|
214
274
|
|
|
215
|
-
# Populate the target file
|
|
275
|
+
# Populate the target file (host)
|
|
216
276
|
File.open(File.join(directory,@options[:name]+OUTPUT_HOST+@extension),'w') do |target|
|
|
277
|
+
target.puts '#include <cuda_runtime.h>'+NL if @options[:target] == 'GPU-CUDA'
|
|
278
|
+
target.puts "#define ZEROCOPY 0"+NL if @options[:zero_copy] == 0 && @options[:target] == 'CPU-OPENCL-INTEL'
|
|
279
|
+
target.puts "#define ZEROCOPY 1"+NL if @options[:zero_copy] == 1 && @options[:target] == 'CPU-OPENCL-INTEL'
|
|
217
280
|
target.puts @result[:header_code]
|
|
218
|
-
target.puts @result[:algorithm_declarations]
|
|
219
281
|
target.puts
|
|
282
|
+
target.puts @result[:host_device_mem_globals]
|
|
283
|
+
target.puts
|
|
284
|
+
target.puts @result[:algorithm_declarations]
|
|
220
285
|
target.puts @result[:host_code_lists]
|
|
286
|
+
target.puts
|
|
287
|
+
target.puts File.read(File.join(@dir[:common_library],GLOBAL_TIMERS+@extension))
|
|
221
288
|
end
|
|
222
289
|
|
|
223
|
-
# Populate the algorithm file
|
|
290
|
+
# Populate the algorithm file (device)
|
|
224
291
|
File.open(File.join(directory,@options[:name]+OUTPUT_DEVICE+@algorithm_extension),'w') do |algorithm|
|
|
225
292
|
algorithm.puts @result[:device_header]
|
|
226
293
|
algorithm.puts @result[:algorithm_code_lists]
|
|
@@ -251,7 +318,7 @@ module Bones
|
|
|
251
318
|
:device => File.read(file_name_device+@algorithm_extension)}
|
|
252
319
|
|
|
253
320
|
# Perform the transformations on the algorithm's code
|
|
254
|
-
algorithm.perform_transformations(algorithm.species.settings)
|
|
321
|
+
algorithm.perform_transformations(algorithm.species.settings) if !@skiptarget
|
|
255
322
|
|
|
256
323
|
# Load the common skeletons from the skeleton library
|
|
257
324
|
COMMON_FILES.each do |skeleton|
|
|
@@ -291,13 +358,19 @@ module Bones
|
|
|
291
358
|
minihash = { :array => array.name,
|
|
292
359
|
:type => array.type_name,
|
|
293
360
|
:flatten => array.flatten,
|
|
294
|
-
:variable_dimensions => array.size.join('*')
|
|
361
|
+
:variable_dimensions => array.size.join('*'),
|
|
362
|
+
:state => @state.to_s}
|
|
363
|
+
@state += 1
|
|
295
364
|
|
|
296
365
|
# Apply the mini-search-and-replace hash to create the memory allocations, memory copies (if input only), etc.
|
|
297
366
|
processed[:mem_prologue] += search_and_replace(minihash,skeletons[:mem_prologue])
|
|
298
367
|
processed[:mem_copy_H2D] += search_and_replace(minihash,skeletons[:mem_copy_H2D]) if array.input? || array.species.shared?
|
|
299
368
|
processed[:mem_epilogue] += search_and_replace(minihash,skeletons[:mem_epilogue])
|
|
369
|
+
|
|
370
|
+
# Add the device declarations
|
|
371
|
+
@result[:host_device_mem_globals].push(search_and_replace(minihash,skeletons[:mem_global]))
|
|
300
372
|
end
|
|
373
|
+
|
|
301
374
|
# Iterate over all the array variables and create a mini-search-and-replace hash for each array (output arrays)
|
|
302
375
|
algorithm.arrays.select(OUTPUT).each_with_index do |array, num_array|
|
|
303
376
|
hash = algorithm.hash["out#{num_array}".to_sym]
|
|
@@ -305,7 +378,9 @@ module Bones
|
|
|
305
378
|
:type => array.type_name,
|
|
306
379
|
:flatten => array.flatten,
|
|
307
380
|
:offset => '('+hash[:dimension0][:from]+')',
|
|
308
|
-
:variable_dimensions => '('+hash[:dimensions]+')'
|
|
381
|
+
:variable_dimensions => '('+hash[:dimensions]+')',
|
|
382
|
+
:state => @state.to_s}
|
|
383
|
+
@state += 1
|
|
309
384
|
|
|
310
385
|
# Perform selective copy for arrays with 2 dimensions (uses a for-loop over the memory copies)
|
|
311
386
|
if array.dimensions == 2 && @options[:target] == 'GPU-CUDA' && false
|
|
@@ -346,11 +421,17 @@ module Bones
|
|
|
346
421
|
search_and_replace!(algorithm.hash,skeletons[:epilogue])
|
|
347
422
|
|
|
348
423
|
# Construct the final host function, inluding the timers and memory copies
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
424
|
+
if @scheduler
|
|
425
|
+
host = skeletons[:prologue ] +
|
|
426
|
+
skeletons[:timer_2_start] + skeletons[:host ] + skeletons[:timer_2_stop ] +
|
|
427
|
+
skeletons[:epilogue ]
|
|
428
|
+
else
|
|
429
|
+
host = skeletons[:prologue ] +
|
|
430
|
+
skeletons[:timer_1_start] + processed[:mem_prologue ] + processed[:mem_copy_H2D ] +
|
|
431
|
+
skeletons[:timer_2_start] + skeletons[:host ] + skeletons[:timer_2_stop ] +
|
|
432
|
+
processed[:mem_copy_D2H ] + processed[:mem_epilogue ] + skeletons[:timer_1_stop ] +
|
|
433
|
+
skeletons[:epilogue ]
|
|
434
|
+
end
|
|
354
435
|
|
|
355
436
|
# Generate code to replace the original code, including verification code if specified by the option flag
|
|
356
437
|
verify_skeleton = File.read(File.join(@dir[:verify_library],'verify_results.c'))
|
|
@@ -362,24 +443,59 @@ module Bones
|
|
|
362
443
|
# Add a performance model to the original code
|
|
363
444
|
#replacement_code.insert(0,algorithm.performance_model_code('model'))
|
|
364
445
|
|
|
365
|
-
# Replace mallocs and frees in the original code with aligned memory allocations (only for CPU-OpenCL targets)
|
|
366
|
-
if @options[:target] == 'CPU-OPENCL-INTEL'
|
|
367
|
-
@result[:original_code].
|
|
368
|
-
@result[:original_code].
|
|
446
|
+
# Replace mallocs and frees in the original code with aligned memory allocations (only for CPU-OpenCL targets with zero-copy)
|
|
447
|
+
if @options[:zero_copy] == 1 && @options[:target] == 'CPU-OPENCL-INTEL'
|
|
448
|
+
@result[:original_code].search_and_replace_function_call(C::Variable.parse('malloc'),C::Variable.parse(VARIABLE_PREFIX+'malloc_128'))
|
|
449
|
+
@result[:original_code].search_and_replace_function_call(C::Variable.parse('free'),C::Variable.parse(VARIABLE_PREFIX+'free_128'))
|
|
369
450
|
end
|
|
370
451
|
|
|
371
452
|
# Give the original main function a new name
|
|
372
|
-
@result[:original_code].
|
|
453
|
+
@result[:original_code].search_and_replace_function_definition('main',VARIABLE_PREFIX+'main')
|
|
373
454
|
|
|
374
455
|
# Replace the original code with a function call to the newly generated code
|
|
375
|
-
@result[:original_code].
|
|
456
|
+
@result[:original_code].search_and_replace_node(algorithm.code,replacement_code)
|
|
376
457
|
|
|
377
458
|
# The host code is generated, push the data to the output hashes
|
|
378
459
|
accelerated_definition = 'void '+algorithm.name+'_accelerated('+algorithm.lists[:host_definition]+')'
|
|
379
|
-
@result[:host_code_lists].push(@prefix+accelerated_definition+' {'+NL+host+NL+'}')
|
|
460
|
+
@result[:host_code_lists].push(@prefix+accelerated_definition+' {'+NL+host+NL+'}'+NL+NL)
|
|
380
461
|
@result[:host_declarations].push(@prefix+accelerated_definition+';'+NL+@prefix+original_definition+';')
|
|
381
462
|
end
|
|
382
463
|
|
|
464
|
+
|
|
465
|
+
def generate_memory(type,copy,arrays,index)
|
|
466
|
+
|
|
467
|
+
# Find the corresponding array
|
|
468
|
+
arrays.each do |array|
|
|
469
|
+
if array.name == copy.name && (array.direction == copy.direction || array.direction == INOUT)
|
|
470
|
+
|
|
471
|
+
# Load the skeleton from the skeleton library
|
|
472
|
+
type += copy.direction if type == 'copy'
|
|
473
|
+
skeleton = File.read(File.join(@dir[:common_library],'mem_async_'+type+@extension))
|
|
474
|
+
|
|
475
|
+
# Create the find-and-replace hash
|
|
476
|
+
minihash = { :array => copy.name,
|
|
477
|
+
:id => copy.id,
|
|
478
|
+
:index => index.to_s,
|
|
479
|
+
:direction => copy.direction,
|
|
480
|
+
:definition => array.definition,
|
|
481
|
+
:type => array.type_name,
|
|
482
|
+
:flatten => array.flatten,
|
|
483
|
+
:offset => '0',
|
|
484
|
+
:variable_dimensions => array.size.join('*'),
|
|
485
|
+
:state => copy.deadline}
|
|
486
|
+
|
|
487
|
+
# Instanstiate the skeleton and add it to the final result
|
|
488
|
+
@result[:host_code_lists].push(search_and_replace(minihash,skeleton))
|
|
489
|
+
|
|
490
|
+
# Add a forward declaration of this function
|
|
491
|
+
@result[:host_declarations].push(copy.get_definition(array.definition,type))
|
|
492
|
+
|
|
493
|
+
# Done
|
|
494
|
+
return
|
|
495
|
+
end
|
|
496
|
+
end
|
|
497
|
+
end
|
|
498
|
+
|
|
383
499
|
end
|
|
384
500
|
|
|
385
501
|
end
|
data/lib/bones/preprocessor.rb
CHANGED
|
@@ -9,7 +9,7 @@ module Bones
|
|
|
9
9
|
# * +algorithms+ - An array of identified algorithms, each of class Bones::Algorithm.
|
|
10
10
|
# * +target_code+ - The processed code containing no Bones directives nor other pre-processor directives (such as includes and defines).
|
|
11
11
|
class Preprocessor < Common
|
|
12
|
-
attr_reader :header_code, :algorithms, :target_code, :device_header, :defines
|
|
12
|
+
attr_reader :header_code, :algorithms, :target_code, :device_header, :defines, :scop, :copies
|
|
13
13
|
|
|
14
14
|
# Denotes the start of an algorithmic species.
|
|
15
15
|
IDENTIFIER = '#pragma species'
|
|
@@ -18,9 +18,22 @@ module Bones
|
|
|
18
18
|
WHITESPACE = '\s*'
|
|
19
19
|
|
|
20
20
|
# This directive denotes the start of a algorithm. It is based on the IDENTIFIER constant.
|
|
21
|
-
|
|
21
|
+
SPECIES_START = IDENTIFIER+' kernel'
|
|
22
22
|
# This directive denotes the end of a algorithm. It is based on the IDENTIFIER constant.
|
|
23
|
-
|
|
23
|
+
SPECIES_END = IDENTIFIER+' endkernel'
|
|
24
|
+
|
|
25
|
+
# Start of the scop
|
|
26
|
+
SCOP_START = '#pragma scop'
|
|
27
|
+
# Enf of the scop
|
|
28
|
+
SCOP_END = '#pragma endscop'
|
|
29
|
+
|
|
30
|
+
# Synchronise directive.
|
|
31
|
+
SYNC = IDENTIFIER+' sync'
|
|
32
|
+
|
|
33
|
+
# Copy in directive.
|
|
34
|
+
COPYIN = IDENTIFIER+ ' copyin'
|
|
35
|
+
# Copy out directive.
|
|
36
|
+
COPYOUT = IDENTIFIER+ ' copyout'
|
|
24
37
|
|
|
25
38
|
# A regular expression captures a prefix in a algorithm (e.g. unordered/multiple).
|
|
26
39
|
REGEXP_PREFIX = /^[a-z]+ /
|
|
@@ -31,16 +44,18 @@ module Bones
|
|
|
31
44
|
# This is the method which initializes the preprocessor.
|
|
32
45
|
# Initialization requires the target source code to process,
|
|
33
46
|
# which is then set as the class variable +@source_code+.
|
|
34
|
-
def initialize(source_code,directory,filename)
|
|
47
|
+
def initialize(source_code,directory,filename,scheduler)
|
|
35
48
|
@source_code = source_code
|
|
36
|
-
@target_code =
|
|
49
|
+
@target_code = []
|
|
37
50
|
@header_code = ''
|
|
38
51
|
@device_header = ''
|
|
39
52
|
@directory = directory
|
|
40
53
|
@filename = filename
|
|
41
54
|
@algorithms = Array.new
|
|
55
|
+
@copies = Array.new
|
|
42
56
|
@defines = {}
|
|
43
57
|
@found_algorithms = 0
|
|
58
|
+
@scheduler = scheduler
|
|
44
59
|
end
|
|
45
60
|
|
|
46
61
|
# This is the method to perform the actual preprocessing.
|
|
@@ -51,6 +66,7 @@ module Bones
|
|
|
51
66
|
algorithm_code = ''
|
|
52
67
|
species = nil
|
|
53
68
|
found = 0
|
|
69
|
+
alloc_index, free_index = 0, 0
|
|
54
70
|
|
|
55
71
|
# Process the file line by line
|
|
56
72
|
@source_code.each_line.with_index do |line,index|
|
|
@@ -71,7 +87,7 @@ module Bones
|
|
|
71
87
|
@defines[match.first[0].to_sym] = match.first[1]
|
|
72
88
|
|
|
73
89
|
# Found the start of algorithm marker
|
|
74
|
-
elsif line =~ /^#{WHITESPACE}#{
|
|
90
|
+
elsif line =~ /^#{WHITESPACE}#{SPECIES_START}/
|
|
75
91
|
if found == 0
|
|
76
92
|
line = replace_defines(line,@defines)
|
|
77
93
|
prefix, input, output = marker_to_algorithm(line)
|
|
@@ -80,28 +96,92 @@ module Bones
|
|
|
80
96
|
@found_algorithms = @found_algorithms + 1
|
|
81
97
|
end
|
|
82
98
|
found = found + 1
|
|
99
|
+
#@target_code << "int bones_temp_species_start = '#{line.gsub(NL,'')}';"+NL
|
|
83
100
|
|
|
84
101
|
# Found the end of algorithm marker
|
|
85
|
-
elsif line =~ /^#{WHITESPACE}#{
|
|
102
|
+
elsif line =~ /^#{WHITESPACE}#{SPECIES_END}/
|
|
86
103
|
if found == 1
|
|
87
|
-
name = line.strip.scan(/^#{WHITESPACE}#{
|
|
104
|
+
name = line.strip.scan(/^#{WHITESPACE}#{SPECIES_END} (.+)/).join
|
|
88
105
|
name = DEFAULT_NAME if name == ''
|
|
89
106
|
@algorithms.push(Bones::Algorithm.new(name,@filename,index.to_s,species,algorithm_code))
|
|
90
107
|
algorithm_code = ''
|
|
91
108
|
end
|
|
92
109
|
found = found - 1
|
|
110
|
+
#@target_code << "int bones_temp_species_end = '#{line.gsub(NL,'')}';"+NL
|
|
111
|
+
|
|
112
|
+
# Found a sync marker
|
|
113
|
+
elsif @scheduler && line =~ /^#{WHITESPACE}#{SYNC}/
|
|
114
|
+
sync = line.strip.scan(/^#{WHITESPACE}#{SYNC} (.+)/).join
|
|
115
|
+
@target_code << "bones_synchronize(#{sync});"+NL
|
|
116
|
+
|
|
117
|
+
# Found a copyin marker
|
|
118
|
+
elsif @scheduler && line =~ /^#{WHITESPACE}#{COPYIN}/
|
|
119
|
+
copies = line.strip.scan(/^#{WHITESPACE}#{COPYIN} (.+)/).join.split(WEDGE).map{ |c| c.strip }
|
|
120
|
+
copies.each_with_index do |copy,copynum|
|
|
121
|
+
name = copy.split('[').first
|
|
122
|
+
domain = copy.scan(/\[(.+)\]/).join.split(DIM_SEP)
|
|
123
|
+
deadline = copy.split('|').last
|
|
124
|
+
@copies.push(Bones::Copy.new(name,domain,deadline,'in',"#{index*100+copynum}"))
|
|
125
|
+
@target_code << "bones_copyin_#{index*100+copynum}_#{name}(#{name});"+NL
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# Found a copyout marker
|
|
129
|
+
elsif @scheduler && line =~ /^#{WHITESPACE}#{COPYOUT}/
|
|
130
|
+
copies = line.strip.scan(/^#{WHITESPACE}#{COPYOUT} (.+)/).join.split(WEDGE).map{ |c| c.strip }
|
|
131
|
+
copies.each_with_index do |copy,copynum|
|
|
132
|
+
name = copy.split('[').first
|
|
133
|
+
domain = copy.scan(/\[(.+)\]/).join.split(DIM_SEP)
|
|
134
|
+
deadline = copy.split('|').last
|
|
135
|
+
@copies.push(Bones::Copy.new(name,domain,deadline,'out',"#{index*100+copynum}"))
|
|
136
|
+
@target_code << "bones_copyout_#{index*100+copynum}_#{name}(#{name});"+NL
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
# Check if it was a 'pragma scop' / 'pragma endscop' line
|
|
141
|
+
if line =~ /^#{WHITESPACE}#{SCOP_START}/
|
|
142
|
+
alloc_index = index
|
|
143
|
+
elsif line =~ /^#{WHITESPACE}#{SCOP_END}/
|
|
144
|
+
free_index = @target_code.length
|
|
93
145
|
end
|
|
146
|
+
|
|
94
147
|
else
|
|
95
148
|
if found > 0
|
|
96
149
|
algorithm_line = replace_defines(line,@defines)
|
|
97
|
-
@target_code
|
|
150
|
+
@target_code << algorithm_line
|
|
98
151
|
algorithm_code += algorithm_line if line !~ /^#{WHITESPACE}#/
|
|
99
152
|
else
|
|
100
|
-
@target_code
|
|
153
|
+
@target_code << line
|
|
101
154
|
end
|
|
102
155
|
end
|
|
103
156
|
end
|
|
104
|
-
puts WARNING+'Begin/end kernel mismatch ('+@found_algorithms.to_s+' versus '+@algorithms.length.to_s+'), probably missing a "'+
|
|
157
|
+
puts WARNING+'Begin/end kernel mismatch ('+@found_algorithms.to_s+' versus '+@algorithms.length.to_s+'), probably missing a "'+SPECIES_END+'"' unless @algorithms.length == @found_algorithms
|
|
158
|
+
|
|
159
|
+
# Add frees and mallocs
|
|
160
|
+
if @scheduler
|
|
161
|
+
alloc_code, free_code = '', ''
|
|
162
|
+
included_copies = []
|
|
163
|
+
copies.each do |copy|
|
|
164
|
+
if !included_copies.include?(copy.name)
|
|
165
|
+
alloc_code += copy.get_function_call('alloc')+NL
|
|
166
|
+
free_code += copy.get_function_call('free')+NL
|
|
167
|
+
included_copies << copy.name
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
# Add timers (whole scop timing) and frees/mallocs to the code
|
|
173
|
+
offset = @header_code.lines.count
|
|
174
|
+
@target_code.insert(alloc_index-offset, 'bones_timer_start();'+NL)
|
|
175
|
+
if @scheduler
|
|
176
|
+
@target_code.insert(alloc_index-offset+1, alloc_code)
|
|
177
|
+
@target_code.insert(free_index+2, free_code)
|
|
178
|
+
@target_code.insert(free_index+3, 'bones_timer_stop();'+NL)
|
|
179
|
+
else
|
|
180
|
+
@target_code.insert(free_index+2, 'bones_timer_stop();'+NL)
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
# Join the array
|
|
184
|
+
@target_code = @target_code.join('')
|
|
105
185
|
end
|
|
106
186
|
|
|
107
187
|
# This is the method to preprocess a header file. Currently,
|
|
@@ -143,7 +223,7 @@ module Bones
|
|
|
143
223
|
|
|
144
224
|
# Method to extract the algorithm details from a marker found in code.
|
|
145
225
|
def marker_to_algorithm(marker)
|
|
146
|
-
algorithm = marker.strip.scan(/^#{WHITESPACE}#{
|
|
226
|
+
algorithm = marker.strip.scan(/^#{WHITESPACE}#{SPECIES_START} (.+)/).join
|
|
147
227
|
prefix = ''
|
|
148
228
|
if algorithm =~ REGEXP_PREFIX
|
|
149
229
|
split = algorithm.partition(' ')
|