ikra 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/ast/builder.rb +225 -77
- data/lib/ast/host_section_builder.rb +38 -0
- data/lib/ast/interpreter.rb +67 -0
- data/lib/ast/lexical_variables_enumerator.rb +3 -2
- data/lib/ast/nodes.rb +521 -31
- data/lib/ast/printer.rb +116 -18
- data/lib/ast/ssa_generator.rb +192 -0
- data/lib/ast/visitor.rb +235 -21
- data/lib/config/configuration.rb +28 -3
- data/lib/config/os_configuration.rb +62 -9
- data/lib/cpu/cpu_implementation.rb +39 -0
- data/lib/ikra.rb +13 -3
- data/lib/resources/cuda/allocate_device_memory.cpp +5 -0
- data/lib/resources/cuda/allocate_host_memory.cpp +1 -0
- data/lib/resources/cuda/allocate_memcpy_environment_to_device.cpp +11 -0
- data/lib/resources/cuda/ast/assignment.cpp +1 -0
- data/lib/resources/cuda/block_function_head.cpp +7 -1
- data/lib/resources/cuda/entry_point.cpp +47 -0
- data/lib/resources/cuda/env_builder_copy_array.cpp +8 -2
- data/lib/resources/cuda/free_device_memory.cpp +3 -0
- data/lib/resources/cuda/free_memory_for_command.cpp +24 -0
- data/lib/resources/cuda/header.cpp +23 -9
- data/lib/resources/cuda/header_structs.cpp +92 -0
- data/lib/resources/cuda/host_section_block_function_head.cpp +12 -0
- data/lib/resources/cuda/host_section_entry_point.cpp +55 -0
- data/lib/resources/cuda/host_section_free_device_memory.cpp +18 -0
- data/lib/resources/cuda/host_section_launch_parallel_section.cpp +14 -0
- data/lib/resources/cuda/host_section_malloc_memcpy_device_to_host.cpp +10 -0
- data/lib/resources/cuda/kernel.cpp +9 -2
- data/lib/resources/cuda/launch_kernel.cpp +5 -0
- data/lib/resources/cuda/memcpy_device_to_host.cpp +3 -0
- data/lib/resources/cuda/memcpy_device_to_host_expr.cpp +10 -0
- data/lib/resources/cuda/reduce_body.cpp +88 -0
- data/lib/resources/cuda/stencil_array_reconstruction.cpp +2 -0
- data/lib/resources/cuda/stencil_body.cpp +16 -0
- data/lib/resources/cuda/struct_definition.cpp +4 -0
- data/lib/ruby_core/array.rb +34 -0
- data/lib/ruby_core/array_command.rb +313 -0
- data/lib/ruby_core/core.rb +103 -0
- data/lib/ruby_core/interpreter.rb +16 -0
- data/lib/ruby_core/math.rb +32 -0
- data/lib/ruby_core/ruby_integration.rb +256 -0
- data/lib/symbolic/host_section.rb +115 -0
- data/lib/symbolic/input.rb +87 -0
- data/lib/symbolic/input_visitor.rb +68 -0
- data/lib/symbolic/symbolic.rb +793 -117
- data/lib/symbolic/visitor.rb +70 -8
- data/lib/translator/array_command_struct_builder.rb +163 -0
- data/lib/translator/ast_translator.rb +572 -0
- data/lib/translator/block_translator.rb +104 -48
- data/lib/translator/commands/array_combine_command.rb +41 -0
- data/lib/translator/commands/array_identity_command.rb +28 -0
- data/lib/translator/commands/array_index_command.rb +52 -0
- data/lib/translator/commands/array_reduce_command.rb +135 -0
- data/lib/translator/commands/array_stencil_command.rb +129 -0
- data/lib/translator/commands/array_zip_command.rb +30 -0
- data/lib/translator/commands/command_translator.rb +264 -0
- data/lib/translator/cuda_errors.rb +32 -0
- data/lib/translator/environment_builder.rb +263 -0
- data/lib/translator/host_section/array_host_section_command.rb +150 -0
- data/lib/translator/host_section/array_in_host_section_command.rb +41 -0
- data/lib/translator/host_section/ast_translator.rb +14 -0
- data/lib/translator/host_section/parallel_section_invocation_visitor.rb +20 -0
- data/lib/translator/host_section/program_builder.rb +89 -0
- data/lib/translator/input_translator.rb +226 -0
- data/lib/translator/kernel_builder.rb +137 -0
- data/lib/translator/kernel_launcher/for_loop_kernel_launcher.rb +40 -0
- data/lib/translator/kernel_launcher/kernel_launcher.rb +259 -0
- data/lib/translator/kernel_launcher/while_loop_kernel_launcher.rb +38 -0
- data/lib/translator/last_returns_visitor.rb +19 -10
- data/lib/translator/program_builder.rb +197 -0
- data/lib/translator/program_launcher.rb +273 -0
- data/lib/translator/struct_type.rb +55 -0
- data/lib/translator/translator.rb +34 -11
- data/lib/translator/variable_classifier_visitor.rb +56 -0
- data/lib/types/inference/ast_inference.rb +586 -0
- data/lib/types/inference/clear_types_visitor.rb +11 -0
- data/lib/types/inference/command_inference.rb +101 -0
- data/lib/types/inference/input_inference.rb +62 -0
- data/lib/types/{object_tracer.rb → inference/object_tracer.rb} +5 -6
- data/lib/types/inference/ruby_extension.rb +35 -0
- data/lib/types/inference/symbol_table.rb +131 -0
- data/lib/types/types.rb +14 -0
- data/lib/types/types/array_command_type.rb +123 -0
- data/lib/types/types/array_type.rb +137 -0
- data/lib/types/{class_type.rb → types/class_type.rb} +42 -18
- data/lib/types/{primitive_type.rb → types/primitive_type.rb} +20 -7
- data/lib/types/types/ruby_type.rb +88 -0
- data/lib/types/types/struct_type.rb +179 -0
- data/lib/types/types/union_type.rb +239 -0
- metadata +160 -18
- data/lib/ast/method_definition.rb +0 -37
- data/lib/ast/translator.rb +0 -264
- data/lib/resources/cuda/kernel_launcher.cpp +0 -28
- data/lib/scope.rb +0 -166
- data/lib/translator/command_translator.rb +0 -421
- data/lib/translator/local_variables_enumerator.rb +0 -35
- data/lib/translator/method_translator.rb +0 -24
- data/lib/types/array_type.rb +0 -51
- data/lib/types/ruby_extension.rb +0 -67
- data/lib/types/ruby_type.rb +0 -45
- data/lib/types/type_inference.rb +0 -382
- data/lib/types/union_type.rb +0 -155
@@ -0,0 +1,263 @@
|
|
1
|
+
module Ikra
|
2
|
+
module Translator
|
3
|
+
|
4
|
+
# Interface for transferring data to the CUDA side using FFI. Builds a struct containing all required objects (including lexical variables). Traces objects.
|
5
|
+
class EnvironmentBuilder
|
6
|
+
|
7
|
+
class UnionTypeStruct < FFI::Struct
|
8
|
+
layout :class_id, :int32, :object_id, :int32
|
9
|
+
end
|
10
|
+
|
11
|
+
attr_accessor :objects
|
12
|
+
|
13
|
+
attr_accessor :device_struct_allocation
|
14
|
+
attr_accessor :ffi_struct
|
15
|
+
|
16
|
+
def initialize
|
17
|
+
@objects = {}
|
18
|
+
@previous_results = {}
|
19
|
+
@previous_results_types = {}
|
20
|
+
@device_struct_allocation = ""
|
21
|
+
@ffi_struct = {}
|
22
|
+
end
|
23
|
+
|
24
|
+
# Hash that maps the unique_id of a command on the adress of its result on the GPU.
|
25
|
+
# Returns a sorted version of the hash.
|
26
|
+
def previous_results
|
27
|
+
return Hash[@previous_results.sort]
|
28
|
+
end
|
29
|
+
|
30
|
+
# Hash that maps the unique_id of a command on the type of its result.
|
31
|
+
# Returns a sorted version of the hash.
|
32
|
+
def previous_results_types
|
33
|
+
return Hash[@previous_results_types.sort]
|
34
|
+
end
|
35
|
+
|
36
|
+
# Adds an objects as a lexical variable.
|
37
|
+
def add_object(command_id, identifier, object)
|
38
|
+
cuda_id = "l#{command_id}_#{identifier}"
|
39
|
+
|
40
|
+
if objects[cuda_id].object_id != object.object_id
|
41
|
+
# Don't add the object multiple times
|
42
|
+
objects[cuda_id] = object
|
43
|
+
update_dev_struct_allocation(cuda_id, object)
|
44
|
+
end
|
45
|
+
|
46
|
+
return cuda_id
|
47
|
+
end
|
48
|
+
|
49
|
+
# Adds object to the ffi_struct which is of type unique_id => pointer in GPU
|
50
|
+
def add_previous_result(previous_command_id, pointer_to_result)
|
51
|
+
cuda_id = "prev_#{previous_command_id}"
|
52
|
+
@previous_results[cuda_id] = pointer_to_result
|
53
|
+
|
54
|
+
update_dev_struct_allocation(cuda_id, pointer_to_result)
|
55
|
+
|
56
|
+
cuda_id
|
57
|
+
end
|
58
|
+
|
59
|
+
# Adds object to the ffi_struct which is of type unique_id => 0
|
60
|
+
def allocate_previous_pointer(previous_command_id)
|
61
|
+
add_previous_result(previous_command_id, 0)
|
62
|
+
end
|
63
|
+
|
64
|
+
# Adds object to the ffi_struct which is of type unique_id => type of command with unique_id
|
65
|
+
def add_previous_result_type(previous_command_id, type)
|
66
|
+
cuda_id = "prev_#{previous_command_id}"
|
67
|
+
@previous_results_types[cuda_id] = type
|
68
|
+
|
69
|
+
cuda_id
|
70
|
+
end
|
71
|
+
|
72
|
+
# Adds an object as a base array
|
73
|
+
def add_base_array(command_id, object)
|
74
|
+
cuda_id = "b#{command_id}_base"
|
75
|
+
|
76
|
+
if objects.include?(cuda_id)
|
77
|
+
# Object already present
|
78
|
+
|
79
|
+
if !objects[cuda_id].equal?(object)
|
80
|
+
raise AssertionError.new("Adding different base array under different name")
|
81
|
+
end
|
82
|
+
|
83
|
+
return cuda_id
|
84
|
+
end
|
85
|
+
|
86
|
+
objects[cuda_id] = object
|
87
|
+
|
88
|
+
cuda_id_size = "b#{command_id}_size"
|
89
|
+
if object.class == FFI::MemoryPointer
|
90
|
+
objects[cuda_id_size] = object.size / UnionTypeStruct.size
|
91
|
+
else
|
92
|
+
objects[cuda_id_size] = object.size
|
93
|
+
end
|
94
|
+
|
95
|
+
# Generate code for copying data to global memory
|
96
|
+
update_dev_struct_allocation(cuda_id, object)
|
97
|
+
|
98
|
+
return cuda_id
|
99
|
+
end
|
100
|
+
|
101
|
+
# Add an array for the Structure of Arrays object layout
|
102
|
+
def add_soa_array(name, object)
|
103
|
+
objects[name] = object
|
104
|
+
objects["#{name}_size"] = object.size
|
105
|
+
|
106
|
+
update_dev_struct_allocation(name, object)
|
107
|
+
end
|
108
|
+
|
109
|
+
def update_dev_struct_allocation(field, object)
|
110
|
+
if object.class == Array
|
111
|
+
# Allocate new array
|
112
|
+
@device_struct_allocation += Translator.read_file(
|
113
|
+
file_name: "env_builder_copy_array.cpp",
|
114
|
+
replacements: {
|
115
|
+
"field" => field,
|
116
|
+
"host_env" => Constants::ENV_HOST_IDENTIFIER,
|
117
|
+
"dev_env" => Constants::ENV_DEVICE_IDENTIFIER,
|
118
|
+
"size_bytes" => (object.first.class.to_ikra_type.c_size * object.size).to_s})
|
119
|
+
elsif object.class == FFI::MemoryPointer
|
120
|
+
# This is an array of union type structs
|
121
|
+
# Allocate new array
|
122
|
+
@device_struct_allocation += Translator.read_file(
|
123
|
+
file_name: "env_builder_copy_array.cpp",
|
124
|
+
replacements: {
|
125
|
+
"field" => field,
|
126
|
+
"host_env" => Constants::ENV_HOST_IDENTIFIER,
|
127
|
+
"dev_env" => Constants::ENV_DEVICE_IDENTIFIER,
|
128
|
+
"size_bytes" => object.size.to_s})
|
129
|
+
else
|
130
|
+
# Nothing to do, this case is handled by mem-copying the struct
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
# Returns the name of the field containing the base array for a certain identity command.
|
135
|
+
def self.base_identifier(command_id)
|
136
|
+
return "b#{command_id}_base"
|
137
|
+
end
|
138
|
+
|
139
|
+
def build_environment_variable
|
140
|
+
# Copy arrays to device side
|
141
|
+
result = @device_struct_allocation
|
142
|
+
|
143
|
+
# Allocate and copy over environment to device
|
144
|
+
result = result + Translator.read_file(
|
145
|
+
file_name: "allocate_memcpy_environment_to_device.cpp",
|
146
|
+
replacements: {
|
147
|
+
"dev_env" => Constants::ENV_DEVICE_IDENTIFIER,
|
148
|
+
"host_env" => Constants::ENV_HOST_IDENTIFIER})
|
149
|
+
|
150
|
+
return result
|
151
|
+
end
|
152
|
+
|
153
|
+
def build_environment_struct
|
154
|
+
@objects.freeze
|
155
|
+
|
156
|
+
struct_def = "struct environment_struct\n{\n"
|
157
|
+
@objects.each do |key, value|
|
158
|
+
if value.class == FFI::MemoryPointer
|
159
|
+
# TODO: can this be an extension method of FFI::MemoryPointer?
|
160
|
+
struct_def += " union_t * #{key};\n"
|
161
|
+
else
|
162
|
+
struct_def += " #{value.ikra_type.to_c_type} #{key};\n"
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
previous_results_types.each do |key, value|
|
167
|
+
struct_def += " #{value.to_c_type} *#{key};\n"
|
168
|
+
end
|
169
|
+
|
170
|
+
struct_def += "};\n"
|
171
|
+
|
172
|
+
return struct_def
|
173
|
+
end
|
174
|
+
|
175
|
+
def build_ffi_type
|
176
|
+
struct_layout = []
|
177
|
+
@objects.each do |key, value|
|
178
|
+
if value.class == FFI::MemoryPointer
|
179
|
+
# TODO: can this be an extension method of FFI::MemoryPointer?
|
180
|
+
struct_layout += [key.to_sym, :pointer]
|
181
|
+
else
|
182
|
+
struct_layout += [key.to_sym, value.ikra_type.to_ffi_type]
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
previous_results.each do |key, value|
|
187
|
+
struct_layout += [key.to_sym, :pointer]
|
188
|
+
end
|
189
|
+
|
190
|
+
# Add dummy at the end of layout, because layouts cannot be empty
|
191
|
+
struct_layout += [:dummy, :int]
|
192
|
+
|
193
|
+
struct_type = Class.new(FFI::Struct)
|
194
|
+
struct_type.layout(*struct_layout)
|
195
|
+
|
196
|
+
struct_type
|
197
|
+
end
|
198
|
+
|
199
|
+
def build_ffi_object
|
200
|
+
struct_type = build_ffi_type
|
201
|
+
struct = struct_type.new
|
202
|
+
|
203
|
+
@objects.each do |key, value|
|
204
|
+
# TODO: need proper Array handling
|
205
|
+
if value.class == Array
|
206
|
+
# Check first element to determine type of array
|
207
|
+
# TODO: check for polymorphic
|
208
|
+
inner_type = value.first.class.to_ikra_type
|
209
|
+
array_ptr = FFI::MemoryPointer.new(value.size * inner_type.c_size)
|
210
|
+
|
211
|
+
if inner_type == Types::PrimitiveType::Int
|
212
|
+
array_ptr.put_array_of_int(0, value)
|
213
|
+
elsif inner_type == Types::PrimitiveType::Float
|
214
|
+
array_ptr.put_array_of_float(0, value)
|
215
|
+
else
|
216
|
+
raise NotImplementedError
|
217
|
+
end
|
218
|
+
|
219
|
+
struct[key.to_sym] = array_ptr
|
220
|
+
else
|
221
|
+
struct[key.to_sym] = value
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
previous_results.each do |key, value|
|
226
|
+
struct[key.to_sym] = value
|
227
|
+
end
|
228
|
+
|
229
|
+
struct[:dummy] = 0
|
230
|
+
|
231
|
+
@ffi_struct = struct
|
232
|
+
|
233
|
+
struct.to_ptr
|
234
|
+
end
|
235
|
+
|
236
|
+
def [](command_id)
|
237
|
+
CurriedBuilder.new(self, command_id)
|
238
|
+
end
|
239
|
+
|
240
|
+
class CurriedBuilder
|
241
|
+
def initialize(builder, command_id)
|
242
|
+
@builder = builder
|
243
|
+
@command_id = command_id
|
244
|
+
end
|
245
|
+
|
246
|
+
def add_object(identifier, object)
|
247
|
+
@builder.add_object(@command_id, identifier, object)
|
248
|
+
end
|
249
|
+
|
250
|
+
def add_base_array(object)
|
251
|
+
@builder.add_base_array(@command_id, object)
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
255
|
+
def clone
|
256
|
+
result = self.class.new
|
257
|
+
result.objects = @objects.clone
|
258
|
+
result.device_struct_allocation = @device_struct_allocation
|
259
|
+
result
|
260
|
+
end
|
261
|
+
end
|
262
|
+
end
|
263
|
+
end
|
@@ -0,0 +1,150 @@
|
|
1
|
+
require_relative "parallel_section_invocation_visitor"
|
2
|
+
require_relative "program_builder"
|
3
|
+
require_relative "ast_translator"
|
4
|
+
require_relative "../../ast/ssa_generator"
|
5
|
+
|
6
|
+
module Ikra
|
7
|
+
module Translator
|
8
|
+
class HostSectionCommandTranslator < CommandTranslator
|
9
|
+
def initialize(root_command:)
|
10
|
+
super
|
11
|
+
|
12
|
+
# Use a different program builder
|
13
|
+
@program_builder = HostSectionProgramBuilder.new(
|
14
|
+
environment_builder: environment_builder,
|
15
|
+
root_command: root_command)
|
16
|
+
end
|
17
|
+
|
18
|
+
def start_translation
|
19
|
+
Log.info("HostSectionCommandTranslator: Starting translation...")
|
20
|
+
|
21
|
+
# Trace all objects
|
22
|
+
@object_tracer = TypeInference::ObjectTracer.new(root_command)
|
23
|
+
all_objects = object_tracer.trace_all
|
24
|
+
|
25
|
+
# Translate the command (might create additional kernels)
|
26
|
+
root_command.accept(self)
|
27
|
+
|
28
|
+
# Add SoA arrays to environment
|
29
|
+
object_tracer.register_soa_arrays(environment_builder)
|
30
|
+
end
|
31
|
+
|
32
|
+
def visit_array_host_section_command(command)
|
33
|
+
Log.info("Translating ArrayHostSectionCommand [#{command.unique_id}]")
|
34
|
+
|
35
|
+
super
|
36
|
+
|
37
|
+
# A host section must be a top-level (root) command. It uses a special
|
38
|
+
# [HostSectionProgramBuilder].
|
39
|
+
|
40
|
+
block_def_node = command.block_def_node
|
41
|
+
|
42
|
+
# Cannot use the normal `translate_block` method here, this is special!
|
43
|
+
# TODO: There's some duplication here with [BlockTranslator]
|
44
|
+
|
45
|
+
# Build hash of parameter name -> type mappings
|
46
|
+
block_parameter_types = {}
|
47
|
+
command.block_parameter_names.each_with_index do |name, index|
|
48
|
+
block_parameter_types[name] = command.section_input[index].ikra_type.to_union_type
|
49
|
+
end
|
50
|
+
|
51
|
+
parameter_types_string = "[" + block_parameter_types.map do |id, type| "#{id}: #{type}" end.join(", ") + "]"
|
52
|
+
Log.info("Translating block with input types #{parameter_types_string}")
|
53
|
+
|
54
|
+
# Add information to block_def_node
|
55
|
+
block_def_node.parameters_names_and_types = block_parameter_types
|
56
|
+
|
57
|
+
# Insert return statements (also done by type inference visitor, but we need
|
58
|
+
# it now)
|
59
|
+
block_def_node.accept(LastStatementReturnsVisitor.new)
|
60
|
+
|
61
|
+
# Insert synthetic __call__ send nodes for return values
|
62
|
+
block_def_node.accept(ParallelSectionInvocationVisitor.new)
|
63
|
+
|
64
|
+
# Concert to SSA form
|
65
|
+
AST::SSAGenerator.transform_to_ssa!(block_def_node)
|
66
|
+
|
67
|
+
# Type inference
|
68
|
+
type_inference_visitor = TypeInference::Visitor.new
|
69
|
+
result_type = type_inference_visitor.process_block(block_def_node)
|
70
|
+
|
71
|
+
for singleton_type in result_type
|
72
|
+
if !singleton_type.is_a?(Types::LocationAwareArrayType)
|
73
|
+
raise AssertionError.new("Return value of host section must be a LocationAwareArrayType. Found a code path with #{singleton_type}.")
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
# C++/CUDA code generation
|
78
|
+
ast_translator = HostSectionASTTranslator.new(command_translator: self)
|
79
|
+
|
80
|
+
# Auxiliary methods are instance methods that are called by the host section
|
81
|
+
aux_methods = type_inference_visitor.all_methods.map do |method|
|
82
|
+
ast_translator.translate_method(method)
|
83
|
+
end
|
84
|
+
|
85
|
+
# Build C++ function
|
86
|
+
function_translation = ast_translator.translate_block(block_def_node)
|
87
|
+
|
88
|
+
# Declare local variables
|
89
|
+
block_def_node.local_variables_names_and_types.each do |name, type|
|
90
|
+
function_translation.prepend("#{type.to_c_type} #{name};\n")
|
91
|
+
end
|
92
|
+
|
93
|
+
mangled_name = "_host_section_#{command.unique_id}_"
|
94
|
+
function_parameters = [
|
95
|
+
"#{Constants::ENV_TYPE} *#{Constants::ENV_HOST_IDENTIFIER}",
|
96
|
+
"#{Constants::ENV_TYPE} *#{Constants::ENV_DEVICE_IDENTIFIER}",
|
97
|
+
"#{Constants::PROGRAM_RESULT_TYPE} *#{Constants::PROGRAM_RESULT_IDENTIFIER}"]
|
98
|
+
|
99
|
+
# Define incoming values (parameters). These must all be array commands for now.
|
100
|
+
parameter_def = block_parameter_types.map do |name, type|
|
101
|
+
if type.singleton_type.is_a?(Symbolic::ArrayCommand)
|
102
|
+
# Should be initialized with new array command struct
|
103
|
+
"#{type.singleton_type.to_c_type} #{name} = new #{type.singleton_type.to_c_type[0...-2]}();"
|
104
|
+
else
|
105
|
+
"#{type.singleton_type.to_c_type} #{name};"
|
106
|
+
end
|
107
|
+
end.join("\n") + "\n"
|
108
|
+
|
109
|
+
translation_result = Translator.read_file(
|
110
|
+
file_name: "host_section_block_function_head.cpp",
|
111
|
+
replacements: {
|
112
|
+
"name" => mangled_name,
|
113
|
+
"result_type" => result_type.to_c_type,
|
114
|
+
"parameters" => function_parameters.join(", "),
|
115
|
+
"body" => Translator.wrap_in_c_block(parameter_def + function_translation)})
|
116
|
+
|
117
|
+
program_builder.host_section_source = translation_result
|
118
|
+
|
119
|
+
# Build function invocation
|
120
|
+
args = [
|
121
|
+
Constants::ENV_HOST_IDENTIFIER,
|
122
|
+
Constants::ENV_DEVICE_IDENTIFIER,
|
123
|
+
Constants::PROGRAM_RESULT_IDENTIFIER]
|
124
|
+
|
125
|
+
# Generate code that transfers data back to host. By creating a synthetic send
|
126
|
+
# node here, we can let the compiler generate a switch statement if the type of
|
127
|
+
# the return value (array) cannot be determined uniquely at compile time.
|
128
|
+
host_section_invocation = AST::SourceCodeExprNode.new(
|
129
|
+
code: "#{mangled_name}(#{args.join(", ")})")
|
130
|
+
host_section_invocation.merge_union_type(result_type)
|
131
|
+
device_to_host_transfer_node = AST::SendNode.new(
|
132
|
+
receiver: host_section_invocation,
|
133
|
+
selector: :__to_host_array__)
|
134
|
+
|
135
|
+
# Type inference is a prerequisite for code generation
|
136
|
+
type_inference_visitor.visit_send_node(device_to_host_transfer_node)
|
137
|
+
|
138
|
+
program_builder.host_result_expression = device_to_host_transfer_node.accept(
|
139
|
+
ast_translator.expression_translator)
|
140
|
+
program_builder.result_type = device_to_host_transfer_node.get_type
|
141
|
+
|
142
|
+
Log.info("DONE translating ArrayHostSectionCommand [#{command.unique_id}]")
|
143
|
+
|
144
|
+
# This method has no return value (for the moment)
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
require_relative "array_in_host_section_command"
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module Ikra
|
2
|
+
module Translator
|
3
|
+
class HostSectionCommandTranslator < CommandTranslator
|
4
|
+
def visit_array_in_host_section_command(command)
|
5
|
+
Log.info("Translating ArrayInHostSectionCommand [#{command.unique_id}]")
|
6
|
+
|
7
|
+
super
|
8
|
+
|
9
|
+
# This is a root command, determine grid/block dimensions
|
10
|
+
kernel_launcher.configure_grid(command.size, block_size: command.block_size)
|
11
|
+
|
12
|
+
array_input_id = "_array_#{self.class.next_unique_id}_"
|
13
|
+
kernel_builder.add_additional_parameters("#{command.base_type.to_c_type} *#{array_input_id}")
|
14
|
+
|
15
|
+
# Add placeholder for argument (input array). This should be done here to preserve
|
16
|
+
# the order or arguments.
|
17
|
+
kernel_launcher.add_additional_arguments(proc do |cmd|
|
18
|
+
# `cmd` is a reference to the command being launched (which might be merged
|
19
|
+
# with other commands). Based on that information, we can generate an
|
20
|
+
# expression that returns the input array.
|
21
|
+
arg = Translator::KernelLaunchArgumentGenerator.generate_arg(
|
22
|
+
command, cmd, "cmd")
|
23
|
+
|
24
|
+
if arg == nil
|
25
|
+
raise AssertionError.new("Argument not found: Trying to launch command #{cmd.unique_id}, looking for result of command #{command.unique_id}")
|
26
|
+
end
|
27
|
+
|
28
|
+
arg
|
29
|
+
end)
|
30
|
+
|
31
|
+
command_translation = build_command_translation_result(
|
32
|
+
result: "#{array_input_id}[_tid_]",
|
33
|
+
command: command)
|
34
|
+
|
35
|
+
Log.info("DONE translating ArrayInHostSectionCommand [#{command.unique_id}]")
|
36
|
+
|
37
|
+
return command_translation
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|