ikra 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/ast/builder.rb +225 -77
- data/lib/ast/host_section_builder.rb +38 -0
- data/lib/ast/interpreter.rb +67 -0
- data/lib/ast/lexical_variables_enumerator.rb +3 -2
- data/lib/ast/nodes.rb +521 -31
- data/lib/ast/printer.rb +116 -18
- data/lib/ast/ssa_generator.rb +192 -0
- data/lib/ast/visitor.rb +235 -21
- data/lib/config/configuration.rb +28 -3
- data/lib/config/os_configuration.rb +62 -9
- data/lib/cpu/cpu_implementation.rb +39 -0
- data/lib/ikra.rb +13 -3
- data/lib/resources/cuda/allocate_device_memory.cpp +5 -0
- data/lib/resources/cuda/allocate_host_memory.cpp +1 -0
- data/lib/resources/cuda/allocate_memcpy_environment_to_device.cpp +11 -0
- data/lib/resources/cuda/ast/assignment.cpp +1 -0
- data/lib/resources/cuda/block_function_head.cpp +7 -1
- data/lib/resources/cuda/entry_point.cpp +47 -0
- data/lib/resources/cuda/env_builder_copy_array.cpp +8 -2
- data/lib/resources/cuda/free_device_memory.cpp +3 -0
- data/lib/resources/cuda/free_memory_for_command.cpp +24 -0
- data/lib/resources/cuda/header.cpp +23 -9
- data/lib/resources/cuda/header_structs.cpp +92 -0
- data/lib/resources/cuda/host_section_block_function_head.cpp +12 -0
- data/lib/resources/cuda/host_section_entry_point.cpp +55 -0
- data/lib/resources/cuda/host_section_free_device_memory.cpp +18 -0
- data/lib/resources/cuda/host_section_launch_parallel_section.cpp +14 -0
- data/lib/resources/cuda/host_section_malloc_memcpy_device_to_host.cpp +10 -0
- data/lib/resources/cuda/kernel.cpp +9 -2
- data/lib/resources/cuda/launch_kernel.cpp +5 -0
- data/lib/resources/cuda/memcpy_device_to_host.cpp +3 -0
- data/lib/resources/cuda/memcpy_device_to_host_expr.cpp +10 -0
- data/lib/resources/cuda/reduce_body.cpp +88 -0
- data/lib/resources/cuda/stencil_array_reconstruction.cpp +2 -0
- data/lib/resources/cuda/stencil_body.cpp +16 -0
- data/lib/resources/cuda/struct_definition.cpp +4 -0
- data/lib/ruby_core/array.rb +34 -0
- data/lib/ruby_core/array_command.rb +313 -0
- data/lib/ruby_core/core.rb +103 -0
- data/lib/ruby_core/interpreter.rb +16 -0
- data/lib/ruby_core/math.rb +32 -0
- data/lib/ruby_core/ruby_integration.rb +256 -0
- data/lib/symbolic/host_section.rb +115 -0
- data/lib/symbolic/input.rb +87 -0
- data/lib/symbolic/input_visitor.rb +68 -0
- data/lib/symbolic/symbolic.rb +793 -117
- data/lib/symbolic/visitor.rb +70 -8
- data/lib/translator/array_command_struct_builder.rb +163 -0
- data/lib/translator/ast_translator.rb +572 -0
- data/lib/translator/block_translator.rb +104 -48
- data/lib/translator/commands/array_combine_command.rb +41 -0
- data/lib/translator/commands/array_identity_command.rb +28 -0
- data/lib/translator/commands/array_index_command.rb +52 -0
- data/lib/translator/commands/array_reduce_command.rb +135 -0
- data/lib/translator/commands/array_stencil_command.rb +129 -0
- data/lib/translator/commands/array_zip_command.rb +30 -0
- data/lib/translator/commands/command_translator.rb +264 -0
- data/lib/translator/cuda_errors.rb +32 -0
- data/lib/translator/environment_builder.rb +263 -0
- data/lib/translator/host_section/array_host_section_command.rb +150 -0
- data/lib/translator/host_section/array_in_host_section_command.rb +41 -0
- data/lib/translator/host_section/ast_translator.rb +14 -0
- data/lib/translator/host_section/parallel_section_invocation_visitor.rb +20 -0
- data/lib/translator/host_section/program_builder.rb +89 -0
- data/lib/translator/input_translator.rb +226 -0
- data/lib/translator/kernel_builder.rb +137 -0
- data/lib/translator/kernel_launcher/for_loop_kernel_launcher.rb +40 -0
- data/lib/translator/kernel_launcher/kernel_launcher.rb +259 -0
- data/lib/translator/kernel_launcher/while_loop_kernel_launcher.rb +38 -0
- data/lib/translator/last_returns_visitor.rb +19 -10
- data/lib/translator/program_builder.rb +197 -0
- data/lib/translator/program_launcher.rb +273 -0
- data/lib/translator/struct_type.rb +55 -0
- data/lib/translator/translator.rb +34 -11
- data/lib/translator/variable_classifier_visitor.rb +56 -0
- data/lib/types/inference/ast_inference.rb +586 -0
- data/lib/types/inference/clear_types_visitor.rb +11 -0
- data/lib/types/inference/command_inference.rb +101 -0
- data/lib/types/inference/input_inference.rb +62 -0
- data/lib/types/{object_tracer.rb → inference/object_tracer.rb} +5 -6
- data/lib/types/inference/ruby_extension.rb +35 -0
- data/lib/types/inference/symbol_table.rb +131 -0
- data/lib/types/types.rb +14 -0
- data/lib/types/types/array_command_type.rb +123 -0
- data/lib/types/types/array_type.rb +137 -0
- data/lib/types/{class_type.rb → types/class_type.rb} +42 -18
- data/lib/types/{primitive_type.rb → types/primitive_type.rb} +20 -7
- data/lib/types/types/ruby_type.rb +88 -0
- data/lib/types/types/struct_type.rb +179 -0
- data/lib/types/types/union_type.rb +239 -0
- metadata +160 -18
- data/lib/ast/method_definition.rb +0 -37
- data/lib/ast/translator.rb +0 -264
- data/lib/resources/cuda/kernel_launcher.cpp +0 -28
- data/lib/scope.rb +0 -166
- data/lib/translator/command_translator.rb +0 -421
- data/lib/translator/local_variables_enumerator.rb +0 -35
- data/lib/translator/method_translator.rb +0 -24
- data/lib/types/array_type.rb +0 -51
- data/lib/types/ruby_extension.rb +0 -67
- data/lib/types/ruby_type.rb +0 -45
- data/lib/types/type_inference.rb +0 -382
- data/lib/types/union_type.rb +0 -155
@@ -0,0 +1,263 @@
|
|
1
|
+
module Ikra
|
2
|
+
module Translator
|
3
|
+
|
4
|
+
# Interface for transferring data to the CUDA side using FFI. Builds a struct containing all required objects (including lexical variables). Traces objects.
|
5
|
+
class EnvironmentBuilder
|
6
|
+
|
7
|
+
class UnionTypeStruct < FFI::Struct
|
8
|
+
layout :class_id, :int32, :object_id, :int32
|
9
|
+
end
|
10
|
+
|
11
|
+
attr_accessor :objects
|
12
|
+
|
13
|
+
attr_accessor :device_struct_allocation
|
14
|
+
attr_accessor :ffi_struct
|
15
|
+
|
16
|
+
def initialize
|
17
|
+
@objects = {}
|
18
|
+
@previous_results = {}
|
19
|
+
@previous_results_types = {}
|
20
|
+
@device_struct_allocation = ""
|
21
|
+
@ffi_struct = {}
|
22
|
+
end
|
23
|
+
|
24
|
+
# Hash that maps the unique_id of a command on the adress of its result on the GPU.
|
25
|
+
# Returns a sorted version of the hash.
|
26
|
+
def previous_results
|
27
|
+
return Hash[@previous_results.sort]
|
28
|
+
end
|
29
|
+
|
30
|
+
# Hash that maps the unique_id of a command on the type of its result.
|
31
|
+
# Returns a sorted version of the hash.
|
32
|
+
def previous_results_types
|
33
|
+
return Hash[@previous_results_types.sort]
|
34
|
+
end
|
35
|
+
|
36
|
+
# Adds an objects as a lexical variable.
|
37
|
+
def add_object(command_id, identifier, object)
|
38
|
+
cuda_id = "l#{command_id}_#{identifier}"
|
39
|
+
|
40
|
+
if objects[cuda_id].object_id != object.object_id
|
41
|
+
# Don't add the object multiple times
|
42
|
+
objects[cuda_id] = object
|
43
|
+
update_dev_struct_allocation(cuda_id, object)
|
44
|
+
end
|
45
|
+
|
46
|
+
return cuda_id
|
47
|
+
end
|
48
|
+
|
49
|
+
# Adds object to the ffi_struct which is of type unique_id => pointer in GPU
|
50
|
+
def add_previous_result(previous_command_id, pointer_to_result)
|
51
|
+
cuda_id = "prev_#{previous_command_id}"
|
52
|
+
@previous_results[cuda_id] = pointer_to_result
|
53
|
+
|
54
|
+
update_dev_struct_allocation(cuda_id, pointer_to_result)
|
55
|
+
|
56
|
+
cuda_id
|
57
|
+
end
|
58
|
+
|
59
|
+
# Adds object to the ffi_struct which is of type unique_id => 0
|
60
|
+
def allocate_previous_pointer(previous_command_id)
|
61
|
+
add_previous_result(previous_command_id, 0)
|
62
|
+
end
|
63
|
+
|
64
|
+
# Adds object to the ffi_struct which is of type unique_id => type of command with unique_id
|
65
|
+
def add_previous_result_type(previous_command_id, type)
|
66
|
+
cuda_id = "prev_#{previous_command_id}"
|
67
|
+
@previous_results_types[cuda_id] = type
|
68
|
+
|
69
|
+
cuda_id
|
70
|
+
end
|
71
|
+
|
72
|
+
# Adds an object as a base array
|
73
|
+
def add_base_array(command_id, object)
|
74
|
+
cuda_id = "b#{command_id}_base"
|
75
|
+
|
76
|
+
if objects.include?(cuda_id)
|
77
|
+
# Object already present
|
78
|
+
|
79
|
+
if !objects[cuda_id].equal?(object)
|
80
|
+
raise AssertionError.new("Adding different base array under different name")
|
81
|
+
end
|
82
|
+
|
83
|
+
return cuda_id
|
84
|
+
end
|
85
|
+
|
86
|
+
objects[cuda_id] = object
|
87
|
+
|
88
|
+
cuda_id_size = "b#{command_id}_size"
|
89
|
+
if object.class == FFI::MemoryPointer
|
90
|
+
objects[cuda_id_size] = object.size / UnionTypeStruct.size
|
91
|
+
else
|
92
|
+
objects[cuda_id_size] = object.size
|
93
|
+
end
|
94
|
+
|
95
|
+
# Generate code for copying data to global memory
|
96
|
+
update_dev_struct_allocation(cuda_id, object)
|
97
|
+
|
98
|
+
return cuda_id
|
99
|
+
end
|
100
|
+
|
101
|
+
# Add an array for the Structure of Arrays object layout
|
102
|
+
def add_soa_array(name, object)
|
103
|
+
objects[name] = object
|
104
|
+
objects["#{name}_size"] = object.size
|
105
|
+
|
106
|
+
update_dev_struct_allocation(name, object)
|
107
|
+
end
|
108
|
+
|
109
|
+
def update_dev_struct_allocation(field, object)
|
110
|
+
if object.class == Array
|
111
|
+
# Allocate new array
|
112
|
+
@device_struct_allocation += Translator.read_file(
|
113
|
+
file_name: "env_builder_copy_array.cpp",
|
114
|
+
replacements: {
|
115
|
+
"field" => field,
|
116
|
+
"host_env" => Constants::ENV_HOST_IDENTIFIER,
|
117
|
+
"dev_env" => Constants::ENV_DEVICE_IDENTIFIER,
|
118
|
+
"size_bytes" => (object.first.class.to_ikra_type.c_size * object.size).to_s})
|
119
|
+
elsif object.class == FFI::MemoryPointer
|
120
|
+
# This is an array of union type structs
|
121
|
+
# Allocate new array
|
122
|
+
@device_struct_allocation += Translator.read_file(
|
123
|
+
file_name: "env_builder_copy_array.cpp",
|
124
|
+
replacements: {
|
125
|
+
"field" => field,
|
126
|
+
"host_env" => Constants::ENV_HOST_IDENTIFIER,
|
127
|
+
"dev_env" => Constants::ENV_DEVICE_IDENTIFIER,
|
128
|
+
"size_bytes" => object.size.to_s})
|
129
|
+
else
|
130
|
+
# Nothing to do, this case is handled by mem-copying the struct
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
# Returns the name of the field containing the base array for a certain identity command.
|
135
|
+
def self.base_identifier(command_id)
|
136
|
+
return "b#{command_id}_base"
|
137
|
+
end
|
138
|
+
|
139
|
+
def build_environment_variable
|
140
|
+
# Copy arrays to device side
|
141
|
+
result = @device_struct_allocation
|
142
|
+
|
143
|
+
# Allocate and copy over environment to device
|
144
|
+
result = result + Translator.read_file(
|
145
|
+
file_name: "allocate_memcpy_environment_to_device.cpp",
|
146
|
+
replacements: {
|
147
|
+
"dev_env" => Constants::ENV_DEVICE_IDENTIFIER,
|
148
|
+
"host_env" => Constants::ENV_HOST_IDENTIFIER})
|
149
|
+
|
150
|
+
return result
|
151
|
+
end
|
152
|
+
|
153
|
+
def build_environment_struct
|
154
|
+
@objects.freeze
|
155
|
+
|
156
|
+
struct_def = "struct environment_struct\n{\n"
|
157
|
+
@objects.each do |key, value|
|
158
|
+
if value.class == FFI::MemoryPointer
|
159
|
+
# TODO: can this be an extension method of FFI::MemoryPointer?
|
160
|
+
struct_def += " union_t * #{key};\n"
|
161
|
+
else
|
162
|
+
struct_def += " #{value.ikra_type.to_c_type} #{key};\n"
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
previous_results_types.each do |key, value|
|
167
|
+
struct_def += " #{value.to_c_type} *#{key};\n"
|
168
|
+
end
|
169
|
+
|
170
|
+
struct_def += "};\n"
|
171
|
+
|
172
|
+
return struct_def
|
173
|
+
end
|
174
|
+
|
175
|
+
def build_ffi_type
|
176
|
+
struct_layout = []
|
177
|
+
@objects.each do |key, value|
|
178
|
+
if value.class == FFI::MemoryPointer
|
179
|
+
# TODO: can this be an extension method of FFI::MemoryPointer?
|
180
|
+
struct_layout += [key.to_sym, :pointer]
|
181
|
+
else
|
182
|
+
struct_layout += [key.to_sym, value.ikra_type.to_ffi_type]
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
previous_results.each do |key, value|
|
187
|
+
struct_layout += [key.to_sym, :pointer]
|
188
|
+
end
|
189
|
+
|
190
|
+
# Add dummy at the end of layout, because layouts cannot be empty
|
191
|
+
struct_layout += [:dummy, :int]
|
192
|
+
|
193
|
+
struct_type = Class.new(FFI::Struct)
|
194
|
+
struct_type.layout(*struct_layout)
|
195
|
+
|
196
|
+
struct_type
|
197
|
+
end
|
198
|
+
|
199
|
+
def build_ffi_object
|
200
|
+
struct_type = build_ffi_type
|
201
|
+
struct = struct_type.new
|
202
|
+
|
203
|
+
@objects.each do |key, value|
|
204
|
+
# TODO: need proper Array handling
|
205
|
+
if value.class == Array
|
206
|
+
# Check first element to determine type of array
|
207
|
+
# TODO: check for polymorphic
|
208
|
+
inner_type = value.first.class.to_ikra_type
|
209
|
+
array_ptr = FFI::MemoryPointer.new(value.size * inner_type.c_size)
|
210
|
+
|
211
|
+
if inner_type == Types::PrimitiveType::Int
|
212
|
+
array_ptr.put_array_of_int(0, value)
|
213
|
+
elsif inner_type == Types::PrimitiveType::Float
|
214
|
+
array_ptr.put_array_of_float(0, value)
|
215
|
+
else
|
216
|
+
raise NotImplementedError
|
217
|
+
end
|
218
|
+
|
219
|
+
struct[key.to_sym] = array_ptr
|
220
|
+
else
|
221
|
+
struct[key.to_sym] = value
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
previous_results.each do |key, value|
|
226
|
+
struct[key.to_sym] = value
|
227
|
+
end
|
228
|
+
|
229
|
+
struct[:dummy] = 0
|
230
|
+
|
231
|
+
@ffi_struct = struct
|
232
|
+
|
233
|
+
struct.to_ptr
|
234
|
+
end
|
235
|
+
|
236
|
+
def [](command_id)
|
237
|
+
CurriedBuilder.new(self, command_id)
|
238
|
+
end
|
239
|
+
|
240
|
+
class CurriedBuilder
|
241
|
+
def initialize(builder, command_id)
|
242
|
+
@builder = builder
|
243
|
+
@command_id = command_id
|
244
|
+
end
|
245
|
+
|
246
|
+
def add_object(identifier, object)
|
247
|
+
@builder.add_object(@command_id, identifier, object)
|
248
|
+
end
|
249
|
+
|
250
|
+
def add_base_array(object)
|
251
|
+
@builder.add_base_array(@command_id, object)
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
255
|
+
def clone
|
256
|
+
result = self.class.new
|
257
|
+
result.objects = @objects.clone
|
258
|
+
result.device_struct_allocation = @device_struct_allocation
|
259
|
+
result
|
260
|
+
end
|
261
|
+
end
|
262
|
+
end
|
263
|
+
end
|
@@ -0,0 +1,150 @@
|
|
1
|
+
require_relative "parallel_section_invocation_visitor"
|
2
|
+
require_relative "program_builder"
|
3
|
+
require_relative "ast_translator"
|
4
|
+
require_relative "../../ast/ssa_generator"
|
5
|
+
|
6
|
+
module Ikra
|
7
|
+
module Translator
|
8
|
+
class HostSectionCommandTranslator < CommandTranslator
|
9
|
+
def initialize(root_command:)
|
10
|
+
super
|
11
|
+
|
12
|
+
# Use a different program builder
|
13
|
+
@program_builder = HostSectionProgramBuilder.new(
|
14
|
+
environment_builder: environment_builder,
|
15
|
+
root_command: root_command)
|
16
|
+
end
|
17
|
+
|
18
|
+
def start_translation
|
19
|
+
Log.info("HostSectionCommandTranslator: Starting translation...")
|
20
|
+
|
21
|
+
# Trace all objects
|
22
|
+
@object_tracer = TypeInference::ObjectTracer.new(root_command)
|
23
|
+
all_objects = object_tracer.trace_all
|
24
|
+
|
25
|
+
# Translate the command (might create additional kernels)
|
26
|
+
root_command.accept(self)
|
27
|
+
|
28
|
+
# Add SoA arrays to environment
|
29
|
+
object_tracer.register_soa_arrays(environment_builder)
|
30
|
+
end
|
31
|
+
|
32
|
+
def visit_array_host_section_command(command)
|
33
|
+
Log.info("Translating ArrayHostSectionCommand [#{command.unique_id}]")
|
34
|
+
|
35
|
+
super
|
36
|
+
|
37
|
+
# A host section must be a top-level (root) command. It uses a special
|
38
|
+
# [HostSectionProgramBuilder].
|
39
|
+
|
40
|
+
block_def_node = command.block_def_node
|
41
|
+
|
42
|
+
# Cannot use the normal `translate_block` method here, this is special!
|
43
|
+
# TODO: There's some duplication here with [BlockTranslator]
|
44
|
+
|
45
|
+
# Build hash of parameter name -> type mappings
|
46
|
+
block_parameter_types = {}
|
47
|
+
command.block_parameter_names.each_with_index do |name, index|
|
48
|
+
block_parameter_types[name] = command.section_input[index].ikra_type.to_union_type
|
49
|
+
end
|
50
|
+
|
51
|
+
parameter_types_string = "[" + block_parameter_types.map do |id, type| "#{id}: #{type}" end.join(", ") + "]"
|
52
|
+
Log.info("Translating block with input types #{parameter_types_string}")
|
53
|
+
|
54
|
+
# Add information to block_def_node
|
55
|
+
block_def_node.parameters_names_and_types = block_parameter_types
|
56
|
+
|
57
|
+
# Insert return statements (also done by type inference visitor, but we need
|
58
|
+
# it now)
|
59
|
+
block_def_node.accept(LastStatementReturnsVisitor.new)
|
60
|
+
|
61
|
+
# Insert synthetic __call__ send nodes for return values
|
62
|
+
block_def_node.accept(ParallelSectionInvocationVisitor.new)
|
63
|
+
|
64
|
+
# Concert to SSA form
|
65
|
+
AST::SSAGenerator.transform_to_ssa!(block_def_node)
|
66
|
+
|
67
|
+
# Type inference
|
68
|
+
type_inference_visitor = TypeInference::Visitor.new
|
69
|
+
result_type = type_inference_visitor.process_block(block_def_node)
|
70
|
+
|
71
|
+
for singleton_type in result_type
|
72
|
+
if !singleton_type.is_a?(Types::LocationAwareArrayType)
|
73
|
+
raise AssertionError.new("Return value of host section must be a LocationAwareArrayType. Found a code path with #{singleton_type}.")
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
# C++/CUDA code generation
|
78
|
+
ast_translator = HostSectionASTTranslator.new(command_translator: self)
|
79
|
+
|
80
|
+
# Auxiliary methods are instance methods that are called by the host section
|
81
|
+
aux_methods = type_inference_visitor.all_methods.map do |method|
|
82
|
+
ast_translator.translate_method(method)
|
83
|
+
end
|
84
|
+
|
85
|
+
# Build C++ function
|
86
|
+
function_translation = ast_translator.translate_block(block_def_node)
|
87
|
+
|
88
|
+
# Declare local variables
|
89
|
+
block_def_node.local_variables_names_and_types.each do |name, type|
|
90
|
+
function_translation.prepend("#{type.to_c_type} #{name};\n")
|
91
|
+
end
|
92
|
+
|
93
|
+
mangled_name = "_host_section_#{command.unique_id}_"
|
94
|
+
function_parameters = [
|
95
|
+
"#{Constants::ENV_TYPE} *#{Constants::ENV_HOST_IDENTIFIER}",
|
96
|
+
"#{Constants::ENV_TYPE} *#{Constants::ENV_DEVICE_IDENTIFIER}",
|
97
|
+
"#{Constants::PROGRAM_RESULT_TYPE} *#{Constants::PROGRAM_RESULT_IDENTIFIER}"]
|
98
|
+
|
99
|
+
# Define incoming values (parameters). These must all be array commands for now.
|
100
|
+
parameter_def = block_parameter_types.map do |name, type|
|
101
|
+
if type.singleton_type.is_a?(Symbolic::ArrayCommand)
|
102
|
+
# Should be initialized with new array command struct
|
103
|
+
"#{type.singleton_type.to_c_type} #{name} = new #{type.singleton_type.to_c_type[0...-2]}();"
|
104
|
+
else
|
105
|
+
"#{type.singleton_type.to_c_type} #{name};"
|
106
|
+
end
|
107
|
+
end.join("\n") + "\n"
|
108
|
+
|
109
|
+
translation_result = Translator.read_file(
|
110
|
+
file_name: "host_section_block_function_head.cpp",
|
111
|
+
replacements: {
|
112
|
+
"name" => mangled_name,
|
113
|
+
"result_type" => result_type.to_c_type,
|
114
|
+
"parameters" => function_parameters.join(", "),
|
115
|
+
"body" => Translator.wrap_in_c_block(parameter_def + function_translation)})
|
116
|
+
|
117
|
+
program_builder.host_section_source = translation_result
|
118
|
+
|
119
|
+
# Build function invocation
|
120
|
+
args = [
|
121
|
+
Constants::ENV_HOST_IDENTIFIER,
|
122
|
+
Constants::ENV_DEVICE_IDENTIFIER,
|
123
|
+
Constants::PROGRAM_RESULT_IDENTIFIER]
|
124
|
+
|
125
|
+
# Generate code that transfers data back to host. By creating a synthetic send
|
126
|
+
# node here, we can let the compiler generate a switch statement if the type of
|
127
|
+
# the return value (array) cannot be determined uniquely at compile time.
|
128
|
+
host_section_invocation = AST::SourceCodeExprNode.new(
|
129
|
+
code: "#{mangled_name}(#{args.join(", ")})")
|
130
|
+
host_section_invocation.merge_union_type(result_type)
|
131
|
+
device_to_host_transfer_node = AST::SendNode.new(
|
132
|
+
receiver: host_section_invocation,
|
133
|
+
selector: :__to_host_array__)
|
134
|
+
|
135
|
+
# Type inference is a prerequisite for code generation
|
136
|
+
type_inference_visitor.visit_send_node(device_to_host_transfer_node)
|
137
|
+
|
138
|
+
program_builder.host_result_expression = device_to_host_transfer_node.accept(
|
139
|
+
ast_translator.expression_translator)
|
140
|
+
program_builder.result_type = device_to_host_transfer_node.get_type
|
141
|
+
|
142
|
+
Log.info("DONE translating ArrayHostSectionCommand [#{command.unique_id}]")
|
143
|
+
|
144
|
+
# This method has no return value (for the moment)
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
require_relative "array_in_host_section_command"
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module Ikra
|
2
|
+
module Translator
|
3
|
+
class HostSectionCommandTranslator < CommandTranslator
|
4
|
+
def visit_array_in_host_section_command(command)
|
5
|
+
Log.info("Translating ArrayInHostSectionCommand [#{command.unique_id}]")
|
6
|
+
|
7
|
+
super
|
8
|
+
|
9
|
+
# This is a root command, determine grid/block dimensions
|
10
|
+
kernel_launcher.configure_grid(command.size, block_size: command.block_size)
|
11
|
+
|
12
|
+
array_input_id = "_array_#{self.class.next_unique_id}_"
|
13
|
+
kernel_builder.add_additional_parameters("#{command.base_type.to_c_type} *#{array_input_id}")
|
14
|
+
|
15
|
+
# Add placeholder for argument (input array). This should be done here to preserve
|
16
|
+
# the order or arguments.
|
17
|
+
kernel_launcher.add_additional_arguments(proc do |cmd|
|
18
|
+
# `cmd` is a reference to the command being launched (which might be merged
|
19
|
+
# with other commands). Based on that information, we can generate an
|
20
|
+
# expression that returns the input array.
|
21
|
+
arg = Translator::KernelLaunchArgumentGenerator.generate_arg(
|
22
|
+
command, cmd, "cmd")
|
23
|
+
|
24
|
+
if arg == nil
|
25
|
+
raise AssertionError.new("Argument not found: Trying to launch command #{cmd.unique_id}, looking for result of command #{command.unique_id}")
|
26
|
+
end
|
27
|
+
|
28
|
+
arg
|
29
|
+
end)
|
30
|
+
|
31
|
+
command_translation = build_command_translation_result(
|
32
|
+
result: "#{array_input_id}[_tid_]",
|
33
|
+
command: command)
|
34
|
+
|
35
|
+
Log.info("DONE translating ArrayInHostSectionCommand [#{command.unique_id}]")
|
36
|
+
|
37
|
+
return command_translation
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|