ikra 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/ast/builder.rb +225 -77
- data/lib/ast/host_section_builder.rb +38 -0
- data/lib/ast/interpreter.rb +67 -0
- data/lib/ast/lexical_variables_enumerator.rb +3 -2
- data/lib/ast/nodes.rb +521 -31
- data/lib/ast/printer.rb +116 -18
- data/lib/ast/ssa_generator.rb +192 -0
- data/lib/ast/visitor.rb +235 -21
- data/lib/config/configuration.rb +28 -3
- data/lib/config/os_configuration.rb +62 -9
- data/lib/cpu/cpu_implementation.rb +39 -0
- data/lib/ikra.rb +13 -3
- data/lib/resources/cuda/allocate_device_memory.cpp +5 -0
- data/lib/resources/cuda/allocate_host_memory.cpp +1 -0
- data/lib/resources/cuda/allocate_memcpy_environment_to_device.cpp +11 -0
- data/lib/resources/cuda/ast/assignment.cpp +1 -0
- data/lib/resources/cuda/block_function_head.cpp +7 -1
- data/lib/resources/cuda/entry_point.cpp +47 -0
- data/lib/resources/cuda/env_builder_copy_array.cpp +8 -2
- data/lib/resources/cuda/free_device_memory.cpp +3 -0
- data/lib/resources/cuda/free_memory_for_command.cpp +24 -0
- data/lib/resources/cuda/header.cpp +23 -9
- data/lib/resources/cuda/header_structs.cpp +92 -0
- data/lib/resources/cuda/host_section_block_function_head.cpp +12 -0
- data/lib/resources/cuda/host_section_entry_point.cpp +55 -0
- data/lib/resources/cuda/host_section_free_device_memory.cpp +18 -0
- data/lib/resources/cuda/host_section_launch_parallel_section.cpp +14 -0
- data/lib/resources/cuda/host_section_malloc_memcpy_device_to_host.cpp +10 -0
- data/lib/resources/cuda/kernel.cpp +9 -2
- data/lib/resources/cuda/launch_kernel.cpp +5 -0
- data/lib/resources/cuda/memcpy_device_to_host.cpp +3 -0
- data/lib/resources/cuda/memcpy_device_to_host_expr.cpp +10 -0
- data/lib/resources/cuda/reduce_body.cpp +88 -0
- data/lib/resources/cuda/stencil_array_reconstruction.cpp +2 -0
- data/lib/resources/cuda/stencil_body.cpp +16 -0
- data/lib/resources/cuda/struct_definition.cpp +4 -0
- data/lib/ruby_core/array.rb +34 -0
- data/lib/ruby_core/array_command.rb +313 -0
- data/lib/ruby_core/core.rb +103 -0
- data/lib/ruby_core/interpreter.rb +16 -0
- data/lib/ruby_core/math.rb +32 -0
- data/lib/ruby_core/ruby_integration.rb +256 -0
- data/lib/symbolic/host_section.rb +115 -0
- data/lib/symbolic/input.rb +87 -0
- data/lib/symbolic/input_visitor.rb +68 -0
- data/lib/symbolic/symbolic.rb +793 -117
- data/lib/symbolic/visitor.rb +70 -8
- data/lib/translator/array_command_struct_builder.rb +163 -0
- data/lib/translator/ast_translator.rb +572 -0
- data/lib/translator/block_translator.rb +104 -48
- data/lib/translator/commands/array_combine_command.rb +41 -0
- data/lib/translator/commands/array_identity_command.rb +28 -0
- data/lib/translator/commands/array_index_command.rb +52 -0
- data/lib/translator/commands/array_reduce_command.rb +135 -0
- data/lib/translator/commands/array_stencil_command.rb +129 -0
- data/lib/translator/commands/array_zip_command.rb +30 -0
- data/lib/translator/commands/command_translator.rb +264 -0
- data/lib/translator/cuda_errors.rb +32 -0
- data/lib/translator/environment_builder.rb +263 -0
- data/lib/translator/host_section/array_host_section_command.rb +150 -0
- data/lib/translator/host_section/array_in_host_section_command.rb +41 -0
- data/lib/translator/host_section/ast_translator.rb +14 -0
- data/lib/translator/host_section/parallel_section_invocation_visitor.rb +20 -0
- data/lib/translator/host_section/program_builder.rb +89 -0
- data/lib/translator/input_translator.rb +226 -0
- data/lib/translator/kernel_builder.rb +137 -0
- data/lib/translator/kernel_launcher/for_loop_kernel_launcher.rb +40 -0
- data/lib/translator/kernel_launcher/kernel_launcher.rb +259 -0
- data/lib/translator/kernel_launcher/while_loop_kernel_launcher.rb +38 -0
- data/lib/translator/last_returns_visitor.rb +19 -10
- data/lib/translator/program_builder.rb +197 -0
- data/lib/translator/program_launcher.rb +273 -0
- data/lib/translator/struct_type.rb +55 -0
- data/lib/translator/translator.rb +34 -11
- data/lib/translator/variable_classifier_visitor.rb +56 -0
- data/lib/types/inference/ast_inference.rb +586 -0
- data/lib/types/inference/clear_types_visitor.rb +11 -0
- data/lib/types/inference/command_inference.rb +101 -0
- data/lib/types/inference/input_inference.rb +62 -0
- data/lib/types/{object_tracer.rb → inference/object_tracer.rb} +5 -6
- data/lib/types/inference/ruby_extension.rb +35 -0
- data/lib/types/inference/symbol_table.rb +131 -0
- data/lib/types/types.rb +14 -0
- data/lib/types/types/array_command_type.rb +123 -0
- data/lib/types/types/array_type.rb +137 -0
- data/lib/types/{class_type.rb → types/class_type.rb} +42 -18
- data/lib/types/{primitive_type.rb → types/primitive_type.rb} +20 -7
- data/lib/types/types/ruby_type.rb +88 -0
- data/lib/types/types/struct_type.rb +179 -0
- data/lib/types/types/union_type.rb +239 -0
- metadata +160 -18
- data/lib/ast/method_definition.rb +0 -37
- data/lib/ast/translator.rb +0 -264
- data/lib/resources/cuda/kernel_launcher.cpp +0 -28
- data/lib/scope.rb +0 -166
- data/lib/translator/command_translator.rb +0 -421
- data/lib/translator/local_variables_enumerator.rb +0 -35
- data/lib/translator/method_translator.rb +0 -24
- data/lib/types/array_type.rb +0 -51
- data/lib/types/ruby_extension.rb +0 -67
- data/lib/types/ruby_type.rb +0 -45
- data/lib/types/type_inference.rb +0 -382
- data/lib/types/union_type.rb +0 -155
@@ -0,0 +1,14 @@
|
|
1
|
+
require_relative "../ast_translator"
|
2
|
+
|
3
|
+
module Ikra
|
4
|
+
module Translator
|
5
|
+
class HostSectionASTTranslator < ASTTranslator
|
6
|
+
attr_reader :command_translator
|
7
|
+
|
8
|
+
def initialize(command_translator:)
|
9
|
+
super()
|
10
|
+
@command_translator = command_translator
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require_relative "../../ast/nodes"
|
2
|
+
require_relative "../../ast/visitor"
|
3
|
+
|
4
|
+
module Ikra
|
5
|
+
module Translator
|
6
|
+
|
7
|
+
# This visitor inserts a synthetic method call whenever a parallel section should be
|
8
|
+
# invoked, i.e.:
|
9
|
+
# - The return value of the host section (must be an ArrayCommand-typed expression)
|
10
|
+
# - When the content of an ArrayCommand-typed expression is accessed
|
11
|
+
class ParallelSectionInvocationVisitor < AST::Visitor
|
12
|
+
def visit_return_node(node)
|
13
|
+
node.replace_child(
|
14
|
+
node.value,
|
15
|
+
AST::SendNode.new(receiver: node.value, selector: :__call__))
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
require "set"
|
2
|
+
|
3
|
+
require_relative "../program_builder"
|
4
|
+
|
5
|
+
module Ikra
|
6
|
+
module Translator
|
7
|
+
class CommandTranslator
|
8
|
+
class HostSectionProgramBuilder < ProgramBuilder
|
9
|
+
# A host C++ function containing the source code of the host section.
|
10
|
+
attr_accessor :host_section_source
|
11
|
+
|
12
|
+
# The type of the result (not an array type, just the inner type).
|
13
|
+
attr_accessor :result_type
|
14
|
+
|
15
|
+
# An expression that returns the final result, as an `variable_size_array_t` object
|
16
|
+
# pointing to an array in the host memory.
|
17
|
+
attr_accessor :host_result_expression
|
18
|
+
|
19
|
+
def initialize(environment_builder:, root_command:)
|
20
|
+
super
|
21
|
+
|
22
|
+
@kernel_builders = Set.new
|
23
|
+
end
|
24
|
+
|
25
|
+
def assert_ready_to_build
|
26
|
+
if host_section_source == nil
|
27
|
+
raise AssertionError.new("Not ready to build (HostSectionProgramBuilder): No host section source code defined")
|
28
|
+
end
|
29
|
+
|
30
|
+
if result_type == nil
|
31
|
+
raise AssertionError.new("Not ready to build (HostSectionProgramBuilder): No result type defined")
|
32
|
+
end
|
33
|
+
|
34
|
+
if host_result_expression == nil
|
35
|
+
raise AssertionError.new("Not ready to build (HostSectionProgramBuilder): No host result expression defined")
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def clear_kernel_launchers
|
40
|
+
@kernel_launchers.clear
|
41
|
+
end
|
42
|
+
|
43
|
+
def add_kernel_launcher(launcher)
|
44
|
+
super
|
45
|
+
|
46
|
+
# Let's keep track of kernels here by ourselves
|
47
|
+
@kernel_builders.merge(launcher.kernel_builders)
|
48
|
+
end
|
49
|
+
|
50
|
+
def all_kernel_builders
|
51
|
+
return @kernel_builders
|
52
|
+
end
|
53
|
+
|
54
|
+
def prepare_additional_args_for_launch(command)
|
55
|
+
kernel_launchers.each do |launcher|
|
56
|
+
launcher.prepare_additional_args_for_launch(command)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def build_memory_free_except_last
|
61
|
+
result = ""
|
62
|
+
|
63
|
+
for launcher in kernel_launchers[0...-1]
|
64
|
+
if !launcher.reuse_memory?
|
65
|
+
result = result + launcher.build_device_memory_free_in_host_section
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
return result
|
70
|
+
end
|
71
|
+
|
72
|
+
# Builds the CUDA program. Returns the source code string.
|
73
|
+
def build_program
|
74
|
+
assert_ready_to_build
|
75
|
+
|
76
|
+
result = build_header + build_struct_types + build_header_structs +
|
77
|
+
build_array_command_struct_types + build_environment_struct +
|
78
|
+
build_kernels + host_section_source
|
79
|
+
|
80
|
+
# Build program entry point
|
81
|
+
return result + Translator.read_file(file_name: "host_section_entry_point.cpp", replacements: {
|
82
|
+
"prepare_environment" => environment_builder.build_environment_variable,
|
83
|
+
"host_env_var_name" => Constants::ENV_HOST_IDENTIFIER,
|
84
|
+
"host_result_array" => host_result_expression})
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
@@ -0,0 +1,226 @@
|
|
1
|
+
module Ikra
|
2
|
+
module Symbolic
|
3
|
+
class Input
|
4
|
+
def translate_input(**kwargs)
|
5
|
+
raise NotImplementedError.new
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
class SingleInput < Input
|
10
|
+
def translate_input(parent_command:, command_translator:, start_eat_params_offset: 0)
|
11
|
+
# Translate input using visitor
|
12
|
+
input_command_translation_result = command_translator.translate_input(self)
|
13
|
+
|
14
|
+
parameters = [Translator::Variable.new(
|
15
|
+
name: parent_command.block_parameter_names[start_eat_params_offset],
|
16
|
+
type: input_command_translation_result.result_type)]
|
17
|
+
|
18
|
+
return Translator::InputTranslationResult.new(
|
19
|
+
parameters: parameters,
|
20
|
+
command_translation_result: input_command_translation_result)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
class ReduceInput < SingleInput
|
25
|
+
def translate_input(parent_command:, command_translator:, start_eat_params_offset: 0)
|
26
|
+
# Translate input using visitor
|
27
|
+
input_command_translation_result = command_translator.translate_input(self)
|
28
|
+
|
29
|
+
# TODO: Fix type inference (sometimes type has to be expanded)
|
30
|
+
parameters = [
|
31
|
+
Translator::Variable.new(
|
32
|
+
name: parent_command.block_parameter_names[start_eat_params_offset],
|
33
|
+
type: input_command_translation_result.result_type),
|
34
|
+
Translator::Variable.new(
|
35
|
+
name: parent_command.block_parameter_names[start_eat_params_offset + 1],
|
36
|
+
type: input_command_translation_result.result_type)]
|
37
|
+
|
38
|
+
return Translator::InputTranslationResult.new(
|
39
|
+
parameters: parameters,
|
40
|
+
command_translation_result: input_command_translation_result)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
class StencilArrayInput < Input
|
45
|
+
def translate_input(parent_command:, command_translator:, start_eat_params_offset: 0)
|
46
|
+
# Parameters are allocated in a constant-sized array
|
47
|
+
|
48
|
+
# Count number of parameters
|
49
|
+
num_parameters = parent_command.offsets.size
|
50
|
+
|
51
|
+
# Get single parameter name
|
52
|
+
block_param_name = parent_command.block_parameter_names[start_eat_params_offset]
|
53
|
+
|
54
|
+
# Translate input using visitor
|
55
|
+
input_command_translation_result = command_translator.translate_input(self)
|
56
|
+
|
57
|
+
# Take return type from previous computation
|
58
|
+
parameters = [Translator::Variable.new(
|
59
|
+
name: block_param_name,
|
60
|
+
type: input_command_translation_result.result_type.to_array_type)]
|
61
|
+
|
62
|
+
|
63
|
+
# Allocate and fill array of parameters
|
64
|
+
actual_parameter_names = (0...num_parameters).map do |param_index|
|
65
|
+
"_#{block_param_name}_#{param_index}"
|
66
|
+
end
|
67
|
+
|
68
|
+
param_array_init = "{ " + actual_parameter_names.join(", ") + " }"
|
69
|
+
|
70
|
+
pre_execution = Translator.read_file(file_name: "stencil_array_reconstruction.cpp", replacements: {
|
71
|
+
"type" => input_command_translation_result.result_type.to_c_type,
|
72
|
+
"name" => block_param_name.to_s,
|
73
|
+
"initializer" => param_array_init})
|
74
|
+
|
75
|
+
# Pass multiple single values instead of array
|
76
|
+
override_block_parameters = actual_parameter_names.map do |param_name|
|
77
|
+
Translator::Variable.new(
|
78
|
+
name: param_name,
|
79
|
+
type: input_command_translation_result.result_type)
|
80
|
+
end
|
81
|
+
|
82
|
+
return Translator::InputTranslationResult.new(
|
83
|
+
pre_execution: pre_execution,
|
84
|
+
parameters: parameters,
|
85
|
+
override_block_parameters: override_block_parameters,
|
86
|
+
command_translation_result: input_command_translation_result)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
class StencilSingleInput < Input
|
91
|
+
def translate_input(parent_command:, command_translator:, start_eat_params_offset: 0)
|
92
|
+
# Pass separate parameters
|
93
|
+
|
94
|
+
# Translate input using visitor
|
95
|
+
input_command_translation_result = command_translator.translate_input(self)
|
96
|
+
|
97
|
+
# Count number of parameters
|
98
|
+
num_parameters = parent_command.offsets.size
|
99
|
+
|
100
|
+
# Take return type from previous computation
|
101
|
+
parameters = []
|
102
|
+
for index in start_eat_params_offset...(start_eat_params_offset + num_parameters)
|
103
|
+
parameters.push(Translator::Variable.new(
|
104
|
+
name: parent_command.block_parameter_names[index],
|
105
|
+
type: input_command_translation_result.result_type))
|
106
|
+
end
|
107
|
+
|
108
|
+
return Translator::InputTranslationResult.new(
|
109
|
+
parameters: parameters,
|
110
|
+
command_translation_result: input_command_translation_result)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
module Translator
|
116
|
+
class InputTranslationResult
|
117
|
+
# Code to be executed before the actual execution of the block begins (but inside the
|
118
|
+
# block function)
|
119
|
+
attr_reader :pre_execution
|
120
|
+
|
121
|
+
# Parameter names and types of the block (for type inference)
|
122
|
+
attr_reader :parameters
|
123
|
+
|
124
|
+
# Change (override) parameters of the block (to actually pass different parameters).
|
125
|
+
# This does not affect type inference.
|
126
|
+
attr_reader :override_block_parameters
|
127
|
+
|
128
|
+
attr_reader :command_translation_result
|
129
|
+
|
130
|
+
def initialize(
|
131
|
+
pre_execution: "",
|
132
|
+
parameters:,
|
133
|
+
override_block_parameters: nil,
|
134
|
+
command_translation_result:)
|
135
|
+
|
136
|
+
@pre_execution = pre_execution
|
137
|
+
@parameters = parameters
|
138
|
+
@override_block_parameters = override_block_parameters
|
139
|
+
@command_translation_result = command_translation_result
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
# Instance of this class store the result of translation of multiple input commands.
|
144
|
+
# Instance methods can be used to access the values of the translated commands. Most
|
145
|
+
# methods support access by index and access by range, in which case values are
|
146
|
+
# aggregated, if meaningful.
|
147
|
+
class EntireInputTranslationResult
|
148
|
+
def initialize(input_translation_results)
|
149
|
+
@input = input_translation_results
|
150
|
+
end
|
151
|
+
|
152
|
+
def block_parameters(index = 0..-1)
|
153
|
+
if index.is_a?(Fixnum)
|
154
|
+
return @input[index].parameters
|
155
|
+
elsif index.is_a?(Range)
|
156
|
+
return @input[index].reduce([]) do |acc, n|
|
157
|
+
acc + n.parameters
|
158
|
+
end
|
159
|
+
else
|
160
|
+
raise ArgumentError.new("Expected Fixnum or Range")
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
def pre_execution(index = 0..-1)
|
165
|
+
if index.is_a?(Fixnum)
|
166
|
+
return @input[index].pre_execution
|
167
|
+
elsif index.is_a?(Range)
|
168
|
+
return @input[index].reduce("") do |acc, n|
|
169
|
+
acc + "\n" + n.pre_execution
|
170
|
+
end
|
171
|
+
else
|
172
|
+
raise ArgumentError.new("Expected Fixnum or Range")
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
def override_block_parameters(index = 0..-1)
|
177
|
+
if index.is_a?(Fixnum)
|
178
|
+
if @input[index].override_block_parameters == nil
|
179
|
+
# No override specified
|
180
|
+
return @input[index].parameters
|
181
|
+
else
|
182
|
+
return @input[index].override_block_parameters
|
183
|
+
end
|
184
|
+
elsif index.is_a?(Range)
|
185
|
+
return @input[index].reduce([]) do |acc, n|
|
186
|
+
if n.override_block_parameters == nil
|
187
|
+
acc + n.parameters
|
188
|
+
else
|
189
|
+
acc + n.override_block_parameters
|
190
|
+
end
|
191
|
+
end
|
192
|
+
else
|
193
|
+
raise ArgumentError.new("Expected Fixnum or Range")
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
def execution(index = 0..-1)
|
198
|
+
if index.is_a?(Fixnum)
|
199
|
+
return @input[index].command_translation_result.execution
|
200
|
+
elsif index.is_a?(Range)
|
201
|
+
return @input[index].reduce("") do |acc, n|
|
202
|
+
acc + n.command_translation_result.execution
|
203
|
+
end
|
204
|
+
else
|
205
|
+
raise ArgumentError.new("Expected Fixnum or Range")
|
206
|
+
end
|
207
|
+
end
|
208
|
+
|
209
|
+
def result(index = 0..-1)
|
210
|
+
if index.is_a?(Fixnum)
|
211
|
+
return @input[index].command_translation_result.result
|
212
|
+
elsif index.is_a?(Range)
|
213
|
+
return @input[index].map do |n|
|
214
|
+
n.command_translation_result.result
|
215
|
+
end
|
216
|
+
else
|
217
|
+
raise ArgumentError.new("Expected Fixnum or Range")
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
def command_translation_result(index)
|
222
|
+
return @input[index].command_translation_result
|
223
|
+
end
|
224
|
+
end
|
225
|
+
end
|
226
|
+
end
|
@@ -0,0 +1,137 @@
|
|
1
|
+
module Ikra
|
2
|
+
module Translator
|
3
|
+
class CommandTranslator
|
4
|
+
|
5
|
+
# Builds a CUDA kernel. This class is responsible for generating the kernel function
|
6
|
+
# itself (not the block functions/methods though).
|
7
|
+
#
|
8
|
+
# For example:
|
9
|
+
# __global__ void kernel(env_t *_env_, int *_result_, int *_previous_1_*, ...) { ... }
|
10
|
+
|
11
|
+
class KernelBuilder
|
12
|
+
attr_accessor :kernel_name
|
13
|
+
|
14
|
+
# --- Optional fields ---
|
15
|
+
|
16
|
+
# An array of all methods that should be translated
|
17
|
+
attr_accessor :methods
|
18
|
+
|
19
|
+
# An array of all blocks that should be translated
|
20
|
+
attr_accessor :blocks
|
21
|
+
|
22
|
+
# Additional parameters that this kernel should accept (to access the result
|
23
|
+
# of previous kernels)
|
24
|
+
attr_accessor :previous_kernel_input
|
25
|
+
|
26
|
+
# --- Required fields ---
|
27
|
+
|
28
|
+
# A string returning the result of this kernel for one thread
|
29
|
+
attr_accessor :block_invocation
|
30
|
+
|
31
|
+
# A string containing the statements that execute the body of the kernel
|
32
|
+
attr_accessor :execution
|
33
|
+
|
34
|
+
# The result type of this kernel
|
35
|
+
attr_accessor :result_type
|
36
|
+
|
37
|
+
# Additional Parameters for certain commands that are attached to the kernel
|
38
|
+
attr_accessor :additional_parameters
|
39
|
+
|
40
|
+
# IDs of commands that whose results are kept on the GPU
|
41
|
+
attr_accessor :cached_results
|
42
|
+
|
43
|
+
def initialize
|
44
|
+
@methods = []
|
45
|
+
@blocks = []
|
46
|
+
@previous_kernel_input = []
|
47
|
+
@block_invocation = nil
|
48
|
+
@num_threads = nil
|
49
|
+
@additional_parameters = []
|
50
|
+
@kernel_name = "kernel_" + CommandTranslator.next_unique_id.to_s
|
51
|
+
@cached_results = {}
|
52
|
+
@execution = ""
|
53
|
+
end
|
54
|
+
|
55
|
+
# --- Prepare kernel ---
|
56
|
+
|
57
|
+
# Adds one or multiple methods (source code strings) to this builder.
|
58
|
+
def add_methods(*method)
|
59
|
+
@methods.push(*method)
|
60
|
+
end
|
61
|
+
|
62
|
+
# Adds a block (source code string) to this builder.
|
63
|
+
def add_block(block)
|
64
|
+
@blocks.push(block)
|
65
|
+
end
|
66
|
+
|
67
|
+
def add_previous_kernel_parameter(parameter)
|
68
|
+
@previous_kernel_input.push(parameter)
|
69
|
+
end
|
70
|
+
|
71
|
+
# Add additional parameters to the kernel function that might be needed for some computations
|
72
|
+
def add_additional_parameters(parameter)
|
73
|
+
@additional_parameters.push(parameter)
|
74
|
+
end
|
75
|
+
|
76
|
+
# Adds a result that has to be kept on GPU. Therefore additional memory allocations will be made
|
77
|
+
def add_cached_result(result_id, type)
|
78
|
+
@cached_results[result_id] = type
|
79
|
+
end
|
80
|
+
|
81
|
+
def assert_ready_to_build
|
82
|
+
required_values = [:block_invocation, :result_type]
|
83
|
+
|
84
|
+
for selector in required_values
|
85
|
+
if send(selector) == nil
|
86
|
+
raise AssertionError.new(
|
87
|
+
"Not ready to build (KernelBuilder): #{selector} is not set")
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
|
93
|
+
# --- Constructor source code ---
|
94
|
+
|
95
|
+
def build_methods
|
96
|
+
return @methods.join("\n\n")
|
97
|
+
end
|
98
|
+
|
99
|
+
def build_blocks
|
100
|
+
return @blocks.join("\n\n")
|
101
|
+
end
|
102
|
+
|
103
|
+
def build_kernel
|
104
|
+
Log.info("Building kernel (num_blocks=#{@blocks.size})")
|
105
|
+
assert_ready_to_build
|
106
|
+
|
107
|
+
# Build parameters
|
108
|
+
p_env = Constants::ENV_TYPE + " *" + Constants::ENV_IDENTIFIER
|
109
|
+
p_num_threads = Constants::NUM_THREADS_TYPE + " " + Constants::NUM_THREADS_IDENTIFIER
|
110
|
+
p_result = result_type.to_c_type + " *" + Constants::RESULT_IDENTIFIER
|
111
|
+
p_cached_results = cached_results.map do |result_id, type|
|
112
|
+
type.to_c_type + " *" + Constants::RESULT_IDENTIFIER + result_id
|
113
|
+
end
|
114
|
+
|
115
|
+
cached_results.each do |result_id, type|
|
116
|
+
@execution = execution + "\n" + " " + Constants::RESULT_IDENTIFIER + result_id + "[_tid_] = " + Constants::TEMP_RESULT_IDENTIFIER + result_id + ";"
|
117
|
+
end
|
118
|
+
|
119
|
+
previous_kernel_params = []
|
120
|
+
for var in previous_kernel_input
|
121
|
+
previous_kernel_params.push(var.type.to_c_type + " *" + var.name.to_s)
|
122
|
+
end
|
123
|
+
|
124
|
+
parameters = ([p_env, p_num_threads, p_result] + p_cached_results + previous_kernel_params + additional_parameters).join(", ")
|
125
|
+
|
126
|
+
# Build kernel
|
127
|
+
return Translator.read_file(file_name: "kernel.cpp", replacements: {
|
128
|
+
"block_invocation" => block_invocation,
|
129
|
+
"execution" => execution,
|
130
|
+
"kernel_name" => kernel_name,
|
131
|
+
"parameters" => parameters,
|
132
|
+
"num_threads" => Constants::NUM_THREADS_IDENTIFIER})
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|