ikra 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/ast/builder.rb +225 -77
- data/lib/ast/host_section_builder.rb +38 -0
- data/lib/ast/interpreter.rb +67 -0
- data/lib/ast/lexical_variables_enumerator.rb +3 -2
- data/lib/ast/nodes.rb +521 -31
- data/lib/ast/printer.rb +116 -18
- data/lib/ast/ssa_generator.rb +192 -0
- data/lib/ast/visitor.rb +235 -21
- data/lib/config/configuration.rb +28 -3
- data/lib/config/os_configuration.rb +62 -9
- data/lib/cpu/cpu_implementation.rb +39 -0
- data/lib/ikra.rb +13 -3
- data/lib/resources/cuda/allocate_device_memory.cpp +5 -0
- data/lib/resources/cuda/allocate_host_memory.cpp +1 -0
- data/lib/resources/cuda/allocate_memcpy_environment_to_device.cpp +11 -0
- data/lib/resources/cuda/ast/assignment.cpp +1 -0
- data/lib/resources/cuda/block_function_head.cpp +7 -1
- data/lib/resources/cuda/entry_point.cpp +47 -0
- data/lib/resources/cuda/env_builder_copy_array.cpp +8 -2
- data/lib/resources/cuda/free_device_memory.cpp +3 -0
- data/lib/resources/cuda/free_memory_for_command.cpp +24 -0
- data/lib/resources/cuda/header.cpp +23 -9
- data/lib/resources/cuda/header_structs.cpp +92 -0
- data/lib/resources/cuda/host_section_block_function_head.cpp +12 -0
- data/lib/resources/cuda/host_section_entry_point.cpp +55 -0
- data/lib/resources/cuda/host_section_free_device_memory.cpp +18 -0
- data/lib/resources/cuda/host_section_launch_parallel_section.cpp +14 -0
- data/lib/resources/cuda/host_section_malloc_memcpy_device_to_host.cpp +10 -0
- data/lib/resources/cuda/kernel.cpp +9 -2
- data/lib/resources/cuda/launch_kernel.cpp +5 -0
- data/lib/resources/cuda/memcpy_device_to_host.cpp +3 -0
- data/lib/resources/cuda/memcpy_device_to_host_expr.cpp +10 -0
- data/lib/resources/cuda/reduce_body.cpp +88 -0
- data/lib/resources/cuda/stencil_array_reconstruction.cpp +2 -0
- data/lib/resources/cuda/stencil_body.cpp +16 -0
- data/lib/resources/cuda/struct_definition.cpp +4 -0
- data/lib/ruby_core/array.rb +34 -0
- data/lib/ruby_core/array_command.rb +313 -0
- data/lib/ruby_core/core.rb +103 -0
- data/lib/ruby_core/interpreter.rb +16 -0
- data/lib/ruby_core/math.rb +32 -0
- data/lib/ruby_core/ruby_integration.rb +256 -0
- data/lib/symbolic/host_section.rb +115 -0
- data/lib/symbolic/input.rb +87 -0
- data/lib/symbolic/input_visitor.rb +68 -0
- data/lib/symbolic/symbolic.rb +793 -117
- data/lib/symbolic/visitor.rb +70 -8
- data/lib/translator/array_command_struct_builder.rb +163 -0
- data/lib/translator/ast_translator.rb +572 -0
- data/lib/translator/block_translator.rb +104 -48
- data/lib/translator/commands/array_combine_command.rb +41 -0
- data/lib/translator/commands/array_identity_command.rb +28 -0
- data/lib/translator/commands/array_index_command.rb +52 -0
- data/lib/translator/commands/array_reduce_command.rb +135 -0
- data/lib/translator/commands/array_stencil_command.rb +129 -0
- data/lib/translator/commands/array_zip_command.rb +30 -0
- data/lib/translator/commands/command_translator.rb +264 -0
- data/lib/translator/cuda_errors.rb +32 -0
- data/lib/translator/environment_builder.rb +263 -0
- data/lib/translator/host_section/array_host_section_command.rb +150 -0
- data/lib/translator/host_section/array_in_host_section_command.rb +41 -0
- data/lib/translator/host_section/ast_translator.rb +14 -0
- data/lib/translator/host_section/parallel_section_invocation_visitor.rb +20 -0
- data/lib/translator/host_section/program_builder.rb +89 -0
- data/lib/translator/input_translator.rb +226 -0
- data/lib/translator/kernel_builder.rb +137 -0
- data/lib/translator/kernel_launcher/for_loop_kernel_launcher.rb +40 -0
- data/lib/translator/kernel_launcher/kernel_launcher.rb +259 -0
- data/lib/translator/kernel_launcher/while_loop_kernel_launcher.rb +38 -0
- data/lib/translator/last_returns_visitor.rb +19 -10
- data/lib/translator/program_builder.rb +197 -0
- data/lib/translator/program_launcher.rb +273 -0
- data/lib/translator/struct_type.rb +55 -0
- data/lib/translator/translator.rb +34 -11
- data/lib/translator/variable_classifier_visitor.rb +56 -0
- data/lib/types/inference/ast_inference.rb +586 -0
- data/lib/types/inference/clear_types_visitor.rb +11 -0
- data/lib/types/inference/command_inference.rb +101 -0
- data/lib/types/inference/input_inference.rb +62 -0
- data/lib/types/{object_tracer.rb → inference/object_tracer.rb} +5 -6
- data/lib/types/inference/ruby_extension.rb +35 -0
- data/lib/types/inference/symbol_table.rb +131 -0
- data/lib/types/types.rb +14 -0
- data/lib/types/types/array_command_type.rb +123 -0
- data/lib/types/types/array_type.rb +137 -0
- data/lib/types/{class_type.rb → types/class_type.rb} +42 -18
- data/lib/types/{primitive_type.rb → types/primitive_type.rb} +20 -7
- data/lib/types/types/ruby_type.rb +88 -0
- data/lib/types/types/struct_type.rb +179 -0
- data/lib/types/types/union_type.rb +239 -0
- metadata +160 -18
- data/lib/ast/method_definition.rb +0 -37
- data/lib/ast/translator.rb +0 -264
- data/lib/resources/cuda/kernel_launcher.cpp +0 -28
- data/lib/scope.rb +0 -166
- data/lib/translator/command_translator.rb +0 -421
- data/lib/translator/local_variables_enumerator.rb +0 -35
- data/lib/translator/method_translator.rb +0 -24
- data/lib/types/array_type.rb +0 -51
- data/lib/types/ruby_extension.rb +0 -67
- data/lib/types/ruby_type.rb +0 -45
- data/lib/types/type_inference.rb +0 -382
- data/lib/types/union_type.rb +0 -155
@@ -0,0 +1,14 @@
|
|
1
|
+
require_relative "../ast_translator"
|
2
|
+
|
3
|
+
module Ikra
|
4
|
+
module Translator
|
5
|
+
class HostSectionASTTranslator < ASTTranslator
|
6
|
+
attr_reader :command_translator
|
7
|
+
|
8
|
+
def initialize(command_translator:)
|
9
|
+
super()
|
10
|
+
@command_translator = command_translator
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require_relative "../../ast/nodes"
|
2
|
+
require_relative "../../ast/visitor"
|
3
|
+
|
4
|
+
module Ikra
|
5
|
+
module Translator
|
6
|
+
|
7
|
+
# This visitor inserts a synthetic method call whenever a parallel section should be
|
8
|
+
# invoked, i.e.:
|
9
|
+
# - The return value of the host section (must be an ArrayCommand-typed expression)
|
10
|
+
# - When the content of an ArrayCommand-typed expression is accessed
|
11
|
+
class ParallelSectionInvocationVisitor < AST::Visitor
|
12
|
+
def visit_return_node(node)
|
13
|
+
node.replace_child(
|
14
|
+
node.value,
|
15
|
+
AST::SendNode.new(receiver: node.value, selector: :__call__))
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
require "set"
|
2
|
+
|
3
|
+
require_relative "../program_builder"
|
4
|
+
|
5
|
+
module Ikra
|
6
|
+
module Translator
|
7
|
+
class CommandTranslator
|
8
|
+
class HostSectionProgramBuilder < ProgramBuilder
|
9
|
+
# A host C++ function containing the source code of the host section.
|
10
|
+
attr_accessor :host_section_source
|
11
|
+
|
12
|
+
# The type of the result (not an array type, just the inner type).
|
13
|
+
attr_accessor :result_type
|
14
|
+
|
15
|
+
# An expression that returns the final result, as an `variable_size_array_t` object
|
16
|
+
# pointing to an array in the host memory.
|
17
|
+
attr_accessor :host_result_expression
|
18
|
+
|
19
|
+
def initialize(environment_builder:, root_command:)
|
20
|
+
super
|
21
|
+
|
22
|
+
@kernel_builders = Set.new
|
23
|
+
end
|
24
|
+
|
25
|
+
def assert_ready_to_build
|
26
|
+
if host_section_source == nil
|
27
|
+
raise AssertionError.new("Not ready to build (HostSectionProgramBuilder): No host section source code defined")
|
28
|
+
end
|
29
|
+
|
30
|
+
if result_type == nil
|
31
|
+
raise AssertionError.new("Not ready to build (HostSectionProgramBuilder): No result type defined")
|
32
|
+
end
|
33
|
+
|
34
|
+
if host_result_expression == nil
|
35
|
+
raise AssertionError.new("Not ready to build (HostSectionProgramBuilder): No host result expression defined")
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def clear_kernel_launchers
|
40
|
+
@kernel_launchers.clear
|
41
|
+
end
|
42
|
+
|
43
|
+
def add_kernel_launcher(launcher)
|
44
|
+
super
|
45
|
+
|
46
|
+
# Let's keep track of kernels here by ourselves
|
47
|
+
@kernel_builders.merge(launcher.kernel_builders)
|
48
|
+
end
|
49
|
+
|
50
|
+
def all_kernel_builders
|
51
|
+
return @kernel_builders
|
52
|
+
end
|
53
|
+
|
54
|
+
def prepare_additional_args_for_launch(command)
|
55
|
+
kernel_launchers.each do |launcher|
|
56
|
+
launcher.prepare_additional_args_for_launch(command)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def build_memory_free_except_last
|
61
|
+
result = ""
|
62
|
+
|
63
|
+
for launcher in kernel_launchers[0...-1]
|
64
|
+
if !launcher.reuse_memory?
|
65
|
+
result = result + launcher.build_device_memory_free_in_host_section
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
return result
|
70
|
+
end
|
71
|
+
|
72
|
+
# Builds the CUDA program. Returns the source code string.
|
73
|
+
def build_program
|
74
|
+
assert_ready_to_build
|
75
|
+
|
76
|
+
result = build_header + build_struct_types + build_header_structs +
|
77
|
+
build_array_command_struct_types + build_environment_struct +
|
78
|
+
build_kernels + host_section_source
|
79
|
+
|
80
|
+
# Build program entry point
|
81
|
+
return result + Translator.read_file(file_name: "host_section_entry_point.cpp", replacements: {
|
82
|
+
"prepare_environment" => environment_builder.build_environment_variable,
|
83
|
+
"host_env_var_name" => Constants::ENV_HOST_IDENTIFIER,
|
84
|
+
"host_result_array" => host_result_expression})
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
@@ -0,0 +1,226 @@
|
|
1
|
+
module Ikra
|
2
|
+
module Symbolic
|
3
|
+
class Input
|
4
|
+
def translate_input(**kwargs)
|
5
|
+
raise NotImplementedError.new
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
class SingleInput < Input
|
10
|
+
def translate_input(parent_command:, command_translator:, start_eat_params_offset: 0)
|
11
|
+
# Translate input using visitor
|
12
|
+
input_command_translation_result = command_translator.translate_input(self)
|
13
|
+
|
14
|
+
parameters = [Translator::Variable.new(
|
15
|
+
name: parent_command.block_parameter_names[start_eat_params_offset],
|
16
|
+
type: input_command_translation_result.result_type)]
|
17
|
+
|
18
|
+
return Translator::InputTranslationResult.new(
|
19
|
+
parameters: parameters,
|
20
|
+
command_translation_result: input_command_translation_result)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
class ReduceInput < SingleInput
|
25
|
+
def translate_input(parent_command:, command_translator:, start_eat_params_offset: 0)
|
26
|
+
# Translate input using visitor
|
27
|
+
input_command_translation_result = command_translator.translate_input(self)
|
28
|
+
|
29
|
+
# TODO: Fix type inference (sometimes type has to be expanded)
|
30
|
+
parameters = [
|
31
|
+
Translator::Variable.new(
|
32
|
+
name: parent_command.block_parameter_names[start_eat_params_offset],
|
33
|
+
type: input_command_translation_result.result_type),
|
34
|
+
Translator::Variable.new(
|
35
|
+
name: parent_command.block_parameter_names[start_eat_params_offset + 1],
|
36
|
+
type: input_command_translation_result.result_type)]
|
37
|
+
|
38
|
+
return Translator::InputTranslationResult.new(
|
39
|
+
parameters: parameters,
|
40
|
+
command_translation_result: input_command_translation_result)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
class StencilArrayInput < Input
|
45
|
+
def translate_input(parent_command:, command_translator:, start_eat_params_offset: 0)
|
46
|
+
# Parameters are allocated in a constant-sized array
|
47
|
+
|
48
|
+
# Count number of parameters
|
49
|
+
num_parameters = parent_command.offsets.size
|
50
|
+
|
51
|
+
# Get single parameter name
|
52
|
+
block_param_name = parent_command.block_parameter_names[start_eat_params_offset]
|
53
|
+
|
54
|
+
# Translate input using visitor
|
55
|
+
input_command_translation_result = command_translator.translate_input(self)
|
56
|
+
|
57
|
+
# Take return type from previous computation
|
58
|
+
parameters = [Translator::Variable.new(
|
59
|
+
name: block_param_name,
|
60
|
+
type: input_command_translation_result.result_type.to_array_type)]
|
61
|
+
|
62
|
+
|
63
|
+
# Allocate and fill array of parameters
|
64
|
+
actual_parameter_names = (0...num_parameters).map do |param_index|
|
65
|
+
"_#{block_param_name}_#{param_index}"
|
66
|
+
end
|
67
|
+
|
68
|
+
param_array_init = "{ " + actual_parameter_names.join(", ") + " }"
|
69
|
+
|
70
|
+
pre_execution = Translator.read_file(file_name: "stencil_array_reconstruction.cpp", replacements: {
|
71
|
+
"type" => input_command_translation_result.result_type.to_c_type,
|
72
|
+
"name" => block_param_name.to_s,
|
73
|
+
"initializer" => param_array_init})
|
74
|
+
|
75
|
+
# Pass multiple single values instead of array
|
76
|
+
override_block_parameters = actual_parameter_names.map do |param_name|
|
77
|
+
Translator::Variable.new(
|
78
|
+
name: param_name,
|
79
|
+
type: input_command_translation_result.result_type)
|
80
|
+
end
|
81
|
+
|
82
|
+
return Translator::InputTranslationResult.new(
|
83
|
+
pre_execution: pre_execution,
|
84
|
+
parameters: parameters,
|
85
|
+
override_block_parameters: override_block_parameters,
|
86
|
+
command_translation_result: input_command_translation_result)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
class StencilSingleInput < Input
|
91
|
+
def translate_input(parent_command:, command_translator:, start_eat_params_offset: 0)
|
92
|
+
# Pass separate parameters
|
93
|
+
|
94
|
+
# Translate input using visitor
|
95
|
+
input_command_translation_result = command_translator.translate_input(self)
|
96
|
+
|
97
|
+
# Count number of parameters
|
98
|
+
num_parameters = parent_command.offsets.size
|
99
|
+
|
100
|
+
# Take return type from previous computation
|
101
|
+
parameters = []
|
102
|
+
for index in start_eat_params_offset...(start_eat_params_offset + num_parameters)
|
103
|
+
parameters.push(Translator::Variable.new(
|
104
|
+
name: parent_command.block_parameter_names[index],
|
105
|
+
type: input_command_translation_result.result_type))
|
106
|
+
end
|
107
|
+
|
108
|
+
return Translator::InputTranslationResult.new(
|
109
|
+
parameters: parameters,
|
110
|
+
command_translation_result: input_command_translation_result)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
module Translator
|
116
|
+
class InputTranslationResult
|
117
|
+
# Code to be executed before the actual execution of the block begins (but inside the
|
118
|
+
# block function)
|
119
|
+
attr_reader :pre_execution
|
120
|
+
|
121
|
+
# Parameter names and types of the block (for type inference)
|
122
|
+
attr_reader :parameters
|
123
|
+
|
124
|
+
# Change (override) parameters of the block (to actually pass different parameters).
|
125
|
+
# This does not affect type inference.
|
126
|
+
attr_reader :override_block_parameters
|
127
|
+
|
128
|
+
attr_reader :command_translation_result
|
129
|
+
|
130
|
+
def initialize(
|
131
|
+
pre_execution: "",
|
132
|
+
parameters:,
|
133
|
+
override_block_parameters: nil,
|
134
|
+
command_translation_result:)
|
135
|
+
|
136
|
+
@pre_execution = pre_execution
|
137
|
+
@parameters = parameters
|
138
|
+
@override_block_parameters = override_block_parameters
|
139
|
+
@command_translation_result = command_translation_result
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
# Instance of this class store the result of translation of multiple input commands.
|
144
|
+
# Instance methods can be used to access the values of the translated commands. Most
|
145
|
+
# methods support access by index and access by range, in which case values are
|
146
|
+
# aggregated, if meaningful.
|
147
|
+
class EntireInputTranslationResult
|
148
|
+
def initialize(input_translation_results)
|
149
|
+
@input = input_translation_results
|
150
|
+
end
|
151
|
+
|
152
|
+
def block_parameters(index = 0..-1)
|
153
|
+
if index.is_a?(Fixnum)
|
154
|
+
return @input[index].parameters
|
155
|
+
elsif index.is_a?(Range)
|
156
|
+
return @input[index].reduce([]) do |acc, n|
|
157
|
+
acc + n.parameters
|
158
|
+
end
|
159
|
+
else
|
160
|
+
raise ArgumentError.new("Expected Fixnum or Range")
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
def pre_execution(index = 0..-1)
|
165
|
+
if index.is_a?(Fixnum)
|
166
|
+
return @input[index].pre_execution
|
167
|
+
elsif index.is_a?(Range)
|
168
|
+
return @input[index].reduce("") do |acc, n|
|
169
|
+
acc + "\n" + n.pre_execution
|
170
|
+
end
|
171
|
+
else
|
172
|
+
raise ArgumentError.new("Expected Fixnum or Range")
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
def override_block_parameters(index = 0..-1)
|
177
|
+
if index.is_a?(Fixnum)
|
178
|
+
if @input[index].override_block_parameters == nil
|
179
|
+
# No override specified
|
180
|
+
return @input[index].parameters
|
181
|
+
else
|
182
|
+
return @input[index].override_block_parameters
|
183
|
+
end
|
184
|
+
elsif index.is_a?(Range)
|
185
|
+
return @input[index].reduce([]) do |acc, n|
|
186
|
+
if n.override_block_parameters == nil
|
187
|
+
acc + n.parameters
|
188
|
+
else
|
189
|
+
acc + n.override_block_parameters
|
190
|
+
end
|
191
|
+
end
|
192
|
+
else
|
193
|
+
raise ArgumentError.new("Expected Fixnum or Range")
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
def execution(index = 0..-1)
|
198
|
+
if index.is_a?(Fixnum)
|
199
|
+
return @input[index].command_translation_result.execution
|
200
|
+
elsif index.is_a?(Range)
|
201
|
+
return @input[index].reduce("") do |acc, n|
|
202
|
+
acc + n.command_translation_result.execution
|
203
|
+
end
|
204
|
+
else
|
205
|
+
raise ArgumentError.new("Expected Fixnum or Range")
|
206
|
+
end
|
207
|
+
end
|
208
|
+
|
209
|
+
def result(index = 0..-1)
|
210
|
+
if index.is_a?(Fixnum)
|
211
|
+
return @input[index].command_translation_result.result
|
212
|
+
elsif index.is_a?(Range)
|
213
|
+
return @input[index].map do |n|
|
214
|
+
n.command_translation_result.result
|
215
|
+
end
|
216
|
+
else
|
217
|
+
raise ArgumentError.new("Expected Fixnum or Range")
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
def command_translation_result(index)
|
222
|
+
return @input[index].command_translation_result
|
223
|
+
end
|
224
|
+
end
|
225
|
+
end
|
226
|
+
end
|
@@ -0,0 +1,137 @@
|
|
1
|
+
module Ikra
|
2
|
+
module Translator
|
3
|
+
class CommandTranslator
|
4
|
+
|
5
|
+
# Builds a CUDA kernel. This class is responsible for generating the kernel function
|
6
|
+
# itself (not the block functions/methods though).
|
7
|
+
#
|
8
|
+
# For example:
|
9
|
+
# __global__ void kernel(env_t *_env_, int *_result_, int *_previous_1_*, ...) { ... }
|
10
|
+
|
11
|
+
class KernelBuilder
|
12
|
+
attr_accessor :kernel_name
|
13
|
+
|
14
|
+
# --- Optional fields ---
|
15
|
+
|
16
|
+
# An array of all methods that should be translated
|
17
|
+
attr_accessor :methods
|
18
|
+
|
19
|
+
# An array of all blocks that should be translated
|
20
|
+
attr_accessor :blocks
|
21
|
+
|
22
|
+
# Additional parameters that this kernel should accept (to access the result
|
23
|
+
# of previous kernels)
|
24
|
+
attr_accessor :previous_kernel_input
|
25
|
+
|
26
|
+
# --- Required fields ---
|
27
|
+
|
28
|
+
# A string returning the result of this kernel for one thread
|
29
|
+
attr_accessor :block_invocation
|
30
|
+
|
31
|
+
# A string containing the statements that execute the body of the kernel
|
32
|
+
attr_accessor :execution
|
33
|
+
|
34
|
+
# The result type of this kernel
|
35
|
+
attr_accessor :result_type
|
36
|
+
|
37
|
+
# Additional Parameters for certain commands that are attached to the kernel
|
38
|
+
attr_accessor :additional_parameters
|
39
|
+
|
40
|
+
# IDs of commands that whose results are kept on the GPU
|
41
|
+
attr_accessor :cached_results
|
42
|
+
|
43
|
+
def initialize
|
44
|
+
@methods = []
|
45
|
+
@blocks = []
|
46
|
+
@previous_kernel_input = []
|
47
|
+
@block_invocation = nil
|
48
|
+
@num_threads = nil
|
49
|
+
@additional_parameters = []
|
50
|
+
@kernel_name = "kernel_" + CommandTranslator.next_unique_id.to_s
|
51
|
+
@cached_results = {}
|
52
|
+
@execution = ""
|
53
|
+
end
|
54
|
+
|
55
|
+
# --- Prepare kernel ---
|
56
|
+
|
57
|
+
# Adds one or multiple methods (source code strings) to this builder.
|
58
|
+
def add_methods(*method)
|
59
|
+
@methods.push(*method)
|
60
|
+
end
|
61
|
+
|
62
|
+
# Adds a block (source code string) to this builder.
|
63
|
+
def add_block(block)
|
64
|
+
@blocks.push(block)
|
65
|
+
end
|
66
|
+
|
67
|
+
def add_previous_kernel_parameter(parameter)
|
68
|
+
@previous_kernel_input.push(parameter)
|
69
|
+
end
|
70
|
+
|
71
|
+
# Add additional parameters to the kernel function that might be needed for some computations
|
72
|
+
def add_additional_parameters(parameter)
|
73
|
+
@additional_parameters.push(parameter)
|
74
|
+
end
|
75
|
+
|
76
|
+
# Adds a result that has to be kept on GPU. Therefore additional memory allocations will be made
|
77
|
+
def add_cached_result(result_id, type)
|
78
|
+
@cached_results[result_id] = type
|
79
|
+
end
|
80
|
+
|
81
|
+
def assert_ready_to_build
|
82
|
+
required_values = [:block_invocation, :result_type]
|
83
|
+
|
84
|
+
for selector in required_values
|
85
|
+
if send(selector) == nil
|
86
|
+
raise AssertionError.new(
|
87
|
+
"Not ready to build (KernelBuilder): #{selector} is not set")
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
|
93
|
+
# --- Constructor source code ---
|
94
|
+
|
95
|
+
def build_methods
|
96
|
+
return @methods.join("\n\n")
|
97
|
+
end
|
98
|
+
|
99
|
+
def build_blocks
|
100
|
+
return @blocks.join("\n\n")
|
101
|
+
end
|
102
|
+
|
103
|
+
def build_kernel
|
104
|
+
Log.info("Building kernel (num_blocks=#{@blocks.size})")
|
105
|
+
assert_ready_to_build
|
106
|
+
|
107
|
+
# Build parameters
|
108
|
+
p_env = Constants::ENV_TYPE + " *" + Constants::ENV_IDENTIFIER
|
109
|
+
p_num_threads = Constants::NUM_THREADS_TYPE + " " + Constants::NUM_THREADS_IDENTIFIER
|
110
|
+
p_result = result_type.to_c_type + " *" + Constants::RESULT_IDENTIFIER
|
111
|
+
p_cached_results = cached_results.map do |result_id, type|
|
112
|
+
type.to_c_type + " *" + Constants::RESULT_IDENTIFIER + result_id
|
113
|
+
end
|
114
|
+
|
115
|
+
cached_results.each do |result_id, type|
|
116
|
+
@execution = execution + "\n" + " " + Constants::RESULT_IDENTIFIER + result_id + "[_tid_] = " + Constants::TEMP_RESULT_IDENTIFIER + result_id + ";"
|
117
|
+
end
|
118
|
+
|
119
|
+
previous_kernel_params = []
|
120
|
+
for var in previous_kernel_input
|
121
|
+
previous_kernel_params.push(var.type.to_c_type + " *" + var.name.to_s)
|
122
|
+
end
|
123
|
+
|
124
|
+
parameters = ([p_env, p_num_threads, p_result] + p_cached_results + previous_kernel_params + additional_parameters).join(", ")
|
125
|
+
|
126
|
+
# Build kernel
|
127
|
+
return Translator.read_file(file_name: "kernel.cpp", replacements: {
|
128
|
+
"block_invocation" => block_invocation,
|
129
|
+
"execution" => execution,
|
130
|
+
"kernel_name" => kernel_name,
|
131
|
+
"parameters" => parameters,
|
132
|
+
"num_threads" => Constants::NUM_THREADS_IDENTIFIER})
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|