ikra 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. checksums.yaml +4 -4
  2. data/lib/ast/builder.rb +225 -77
  3. data/lib/ast/host_section_builder.rb +38 -0
  4. data/lib/ast/interpreter.rb +67 -0
  5. data/lib/ast/lexical_variables_enumerator.rb +3 -2
  6. data/lib/ast/nodes.rb +521 -31
  7. data/lib/ast/printer.rb +116 -18
  8. data/lib/ast/ssa_generator.rb +192 -0
  9. data/lib/ast/visitor.rb +235 -21
  10. data/lib/config/configuration.rb +28 -3
  11. data/lib/config/os_configuration.rb +62 -9
  12. data/lib/cpu/cpu_implementation.rb +39 -0
  13. data/lib/ikra.rb +13 -3
  14. data/lib/resources/cuda/allocate_device_memory.cpp +5 -0
  15. data/lib/resources/cuda/allocate_host_memory.cpp +1 -0
  16. data/lib/resources/cuda/allocate_memcpy_environment_to_device.cpp +11 -0
  17. data/lib/resources/cuda/ast/assignment.cpp +1 -0
  18. data/lib/resources/cuda/block_function_head.cpp +7 -1
  19. data/lib/resources/cuda/entry_point.cpp +47 -0
  20. data/lib/resources/cuda/env_builder_copy_array.cpp +8 -2
  21. data/lib/resources/cuda/free_device_memory.cpp +3 -0
  22. data/lib/resources/cuda/free_memory_for_command.cpp +24 -0
  23. data/lib/resources/cuda/header.cpp +23 -9
  24. data/lib/resources/cuda/header_structs.cpp +92 -0
  25. data/lib/resources/cuda/host_section_block_function_head.cpp +12 -0
  26. data/lib/resources/cuda/host_section_entry_point.cpp +55 -0
  27. data/lib/resources/cuda/host_section_free_device_memory.cpp +18 -0
  28. data/lib/resources/cuda/host_section_launch_parallel_section.cpp +14 -0
  29. data/lib/resources/cuda/host_section_malloc_memcpy_device_to_host.cpp +10 -0
  30. data/lib/resources/cuda/kernel.cpp +9 -2
  31. data/lib/resources/cuda/launch_kernel.cpp +5 -0
  32. data/lib/resources/cuda/memcpy_device_to_host.cpp +3 -0
  33. data/lib/resources/cuda/memcpy_device_to_host_expr.cpp +10 -0
  34. data/lib/resources/cuda/reduce_body.cpp +88 -0
  35. data/lib/resources/cuda/stencil_array_reconstruction.cpp +2 -0
  36. data/lib/resources/cuda/stencil_body.cpp +16 -0
  37. data/lib/resources/cuda/struct_definition.cpp +4 -0
  38. data/lib/ruby_core/array.rb +34 -0
  39. data/lib/ruby_core/array_command.rb +313 -0
  40. data/lib/ruby_core/core.rb +103 -0
  41. data/lib/ruby_core/interpreter.rb +16 -0
  42. data/lib/ruby_core/math.rb +32 -0
  43. data/lib/ruby_core/ruby_integration.rb +256 -0
  44. data/lib/symbolic/host_section.rb +115 -0
  45. data/lib/symbolic/input.rb +87 -0
  46. data/lib/symbolic/input_visitor.rb +68 -0
  47. data/lib/symbolic/symbolic.rb +793 -117
  48. data/lib/symbolic/visitor.rb +70 -8
  49. data/lib/translator/array_command_struct_builder.rb +163 -0
  50. data/lib/translator/ast_translator.rb +572 -0
  51. data/lib/translator/block_translator.rb +104 -48
  52. data/lib/translator/commands/array_combine_command.rb +41 -0
  53. data/lib/translator/commands/array_identity_command.rb +28 -0
  54. data/lib/translator/commands/array_index_command.rb +52 -0
  55. data/lib/translator/commands/array_reduce_command.rb +135 -0
  56. data/lib/translator/commands/array_stencil_command.rb +129 -0
  57. data/lib/translator/commands/array_zip_command.rb +30 -0
  58. data/lib/translator/commands/command_translator.rb +264 -0
  59. data/lib/translator/cuda_errors.rb +32 -0
  60. data/lib/translator/environment_builder.rb +263 -0
  61. data/lib/translator/host_section/array_host_section_command.rb +150 -0
  62. data/lib/translator/host_section/array_in_host_section_command.rb +41 -0
  63. data/lib/translator/host_section/ast_translator.rb +14 -0
  64. data/lib/translator/host_section/parallel_section_invocation_visitor.rb +20 -0
  65. data/lib/translator/host_section/program_builder.rb +89 -0
  66. data/lib/translator/input_translator.rb +226 -0
  67. data/lib/translator/kernel_builder.rb +137 -0
  68. data/lib/translator/kernel_launcher/for_loop_kernel_launcher.rb +40 -0
  69. data/lib/translator/kernel_launcher/kernel_launcher.rb +259 -0
  70. data/lib/translator/kernel_launcher/while_loop_kernel_launcher.rb +38 -0
  71. data/lib/translator/last_returns_visitor.rb +19 -10
  72. data/lib/translator/program_builder.rb +197 -0
  73. data/lib/translator/program_launcher.rb +273 -0
  74. data/lib/translator/struct_type.rb +55 -0
  75. data/lib/translator/translator.rb +34 -11
  76. data/lib/translator/variable_classifier_visitor.rb +56 -0
  77. data/lib/types/inference/ast_inference.rb +586 -0
  78. data/lib/types/inference/clear_types_visitor.rb +11 -0
  79. data/lib/types/inference/command_inference.rb +101 -0
  80. data/lib/types/inference/input_inference.rb +62 -0
  81. data/lib/types/{object_tracer.rb → inference/object_tracer.rb} +5 -6
  82. data/lib/types/inference/ruby_extension.rb +35 -0
  83. data/lib/types/inference/symbol_table.rb +131 -0
  84. data/lib/types/types.rb +14 -0
  85. data/lib/types/types/array_command_type.rb +123 -0
  86. data/lib/types/types/array_type.rb +137 -0
  87. data/lib/types/{class_type.rb → types/class_type.rb} +42 -18
  88. data/lib/types/{primitive_type.rb → types/primitive_type.rb} +20 -7
  89. data/lib/types/types/ruby_type.rb +88 -0
  90. data/lib/types/types/struct_type.rb +179 -0
  91. data/lib/types/types/union_type.rb +239 -0
  92. metadata +160 -18
  93. data/lib/ast/method_definition.rb +0 -37
  94. data/lib/ast/translator.rb +0 -264
  95. data/lib/resources/cuda/kernel_launcher.cpp +0 -28
  96. data/lib/scope.rb +0 -166
  97. data/lib/translator/command_translator.rb +0 -421
  98. data/lib/translator/local_variables_enumerator.rb +0 -35
  99. data/lib/translator/method_translator.rb +0 -24
  100. data/lib/types/array_type.rb +0 -51
  101. data/lib/types/ruby_extension.rb +0 -67
  102. data/lib/types/ruby_type.rb +0 -45
  103. data/lib/types/type_inference.rb +0 -382
  104. data/lib/types/union_type.rb +0 -155
@@ -0,0 +1,263 @@
1
+ module Ikra
2
+ module Translator
3
+
4
+ # Interface for transferring data to the CUDA side using FFI. Builds a struct containing all required objects (including lexical variables). Traces objects.
5
+ class EnvironmentBuilder
6
+
7
+ class UnionTypeStruct < FFI::Struct
8
+ layout :class_id, :int32, :object_id, :int32
9
+ end
10
+
11
+ attr_accessor :objects
12
+
13
+ attr_accessor :device_struct_allocation
14
+ attr_accessor :ffi_struct
15
+
16
+ def initialize
17
+ @objects = {}
18
+ @previous_results = {}
19
+ @previous_results_types = {}
20
+ @device_struct_allocation = ""
21
+ @ffi_struct = {}
22
+ end
23
+
24
+ # Hash that maps the unique_id of a command on the adress of its result on the GPU.
25
+ # Returns a sorted version of the hash.
26
+ def previous_results
27
+ return Hash[@previous_results.sort]
28
+ end
29
+
30
+ # Hash that maps the unique_id of a command on the type of its result.
31
+ # Returns a sorted version of the hash.
32
+ def previous_results_types
33
+ return Hash[@previous_results_types.sort]
34
+ end
35
+
36
+ # Adds an objects as a lexical variable.
37
+ def add_object(command_id, identifier, object)
38
+ cuda_id = "l#{command_id}_#{identifier}"
39
+
40
+ if objects[cuda_id].object_id != object.object_id
41
+ # Don't add the object multiple times
42
+ objects[cuda_id] = object
43
+ update_dev_struct_allocation(cuda_id, object)
44
+ end
45
+
46
+ return cuda_id
47
+ end
48
+
49
+ # Adds object to the ffi_struct which is of type unique_id => pointer in GPU
50
+ def add_previous_result(previous_command_id, pointer_to_result)
51
+ cuda_id = "prev_#{previous_command_id}"
52
+ @previous_results[cuda_id] = pointer_to_result
53
+
54
+ update_dev_struct_allocation(cuda_id, pointer_to_result)
55
+
56
+ cuda_id
57
+ end
58
+
59
+ # Adds object to the ffi_struct which is of type unique_id => 0
60
+ def allocate_previous_pointer(previous_command_id)
61
+ add_previous_result(previous_command_id, 0)
62
+ end
63
+
64
+ # Adds object to the ffi_struct which is of type unique_id => type of command with unique_id
65
+ def add_previous_result_type(previous_command_id, type)
66
+ cuda_id = "prev_#{previous_command_id}"
67
+ @previous_results_types[cuda_id] = type
68
+
69
+ cuda_id
70
+ end
71
+
72
+ # Adds an object as a base array
73
+ def add_base_array(command_id, object)
74
+ cuda_id = "b#{command_id}_base"
75
+
76
+ if objects.include?(cuda_id)
77
+ # Object already present
78
+
79
+ if !objects[cuda_id].equal?(object)
80
+ raise AssertionError.new("Adding different base array under different name")
81
+ end
82
+
83
+ return cuda_id
84
+ end
85
+
86
+ objects[cuda_id] = object
87
+
88
+ cuda_id_size = "b#{command_id}_size"
89
+ if object.class == FFI::MemoryPointer
90
+ objects[cuda_id_size] = object.size / UnionTypeStruct.size
91
+ else
92
+ objects[cuda_id_size] = object.size
93
+ end
94
+
95
+ # Generate code for copying data to global memory
96
+ update_dev_struct_allocation(cuda_id, object)
97
+
98
+ return cuda_id
99
+ end
100
+
101
+ # Add an array for the Structure of Arrays object layout
102
+ def add_soa_array(name, object)
103
+ objects[name] = object
104
+ objects["#{name}_size"] = object.size
105
+
106
+ update_dev_struct_allocation(name, object)
107
+ end
108
+
109
+ def update_dev_struct_allocation(field, object)
110
+ if object.class == Array
111
+ # Allocate new array
112
+ @device_struct_allocation += Translator.read_file(
113
+ file_name: "env_builder_copy_array.cpp",
114
+ replacements: {
115
+ "field" => field,
116
+ "host_env" => Constants::ENV_HOST_IDENTIFIER,
117
+ "dev_env" => Constants::ENV_DEVICE_IDENTIFIER,
118
+ "size_bytes" => (object.first.class.to_ikra_type.c_size * object.size).to_s})
119
+ elsif object.class == FFI::MemoryPointer
120
+ # This is an array of union type structs
121
+ # Allocate new array
122
+ @device_struct_allocation += Translator.read_file(
123
+ file_name: "env_builder_copy_array.cpp",
124
+ replacements: {
125
+ "field" => field,
126
+ "host_env" => Constants::ENV_HOST_IDENTIFIER,
127
+ "dev_env" => Constants::ENV_DEVICE_IDENTIFIER,
128
+ "size_bytes" => object.size.to_s})
129
+ else
130
+ # Nothing to do, this case is handled by mem-copying the struct
131
+ end
132
+ end
133
+
134
+ # Returns the name of the field containing the base array for a certain identity command.
135
+ def self.base_identifier(command_id)
136
+ return "b#{command_id}_base"
137
+ end
138
+
139
+ def build_environment_variable
140
+ # Copy arrays to device side
141
+ result = @device_struct_allocation
142
+
143
+ # Allocate and copy over environment to device
144
+ result = result + Translator.read_file(
145
+ file_name: "allocate_memcpy_environment_to_device.cpp",
146
+ replacements: {
147
+ "dev_env" => Constants::ENV_DEVICE_IDENTIFIER,
148
+ "host_env" => Constants::ENV_HOST_IDENTIFIER})
149
+
150
+ return result
151
+ end
152
+
153
+ def build_environment_struct
154
+ @objects.freeze
155
+
156
+ struct_def = "struct environment_struct\n{\n"
157
+ @objects.each do |key, value|
158
+ if value.class == FFI::MemoryPointer
159
+ # TODO: can this be an extension method of FFI::MemoryPointer?
160
+ struct_def += " union_t * #{key};\n"
161
+ else
162
+ struct_def += " #{value.ikra_type.to_c_type} #{key};\n"
163
+ end
164
+ end
165
+
166
+ previous_results_types.each do |key, value|
167
+ struct_def += " #{value.to_c_type} *#{key};\n"
168
+ end
169
+
170
+ struct_def += "};\n"
171
+
172
+ return struct_def
173
+ end
174
+
175
+ def build_ffi_type
176
+ struct_layout = []
177
+ @objects.each do |key, value|
178
+ if value.class == FFI::MemoryPointer
179
+ # TODO: can this be an extension method of FFI::MemoryPointer?
180
+ struct_layout += [key.to_sym, :pointer]
181
+ else
182
+ struct_layout += [key.to_sym, value.ikra_type.to_ffi_type]
183
+ end
184
+ end
185
+
186
+ previous_results.each do |key, value|
187
+ struct_layout += [key.to_sym, :pointer]
188
+ end
189
+
190
+ # Add dummy at the end of layout, because layouts cannot be empty
191
+ struct_layout += [:dummy, :int]
192
+
193
+ struct_type = Class.new(FFI::Struct)
194
+ struct_type.layout(*struct_layout)
195
+
196
+ struct_type
197
+ end
198
+
199
+ def build_ffi_object
200
+ struct_type = build_ffi_type
201
+ struct = struct_type.new
202
+
203
+ @objects.each do |key, value|
204
+ # TODO: need proper Array handling
205
+ if value.class == Array
206
+ # Check first element to determine type of array
207
+ # TODO: check for polymorphic
208
+ inner_type = value.first.class.to_ikra_type
209
+ array_ptr = FFI::MemoryPointer.new(value.size * inner_type.c_size)
210
+
211
+ if inner_type == Types::PrimitiveType::Int
212
+ array_ptr.put_array_of_int(0, value)
213
+ elsif inner_type == Types::PrimitiveType::Float
214
+ array_ptr.put_array_of_float(0, value)
215
+ else
216
+ raise NotImplementedError
217
+ end
218
+
219
+ struct[key.to_sym] = array_ptr
220
+ else
221
+ struct[key.to_sym] = value
222
+ end
223
+ end
224
+
225
+ previous_results.each do |key, value|
226
+ struct[key.to_sym] = value
227
+ end
228
+
229
+ struct[:dummy] = 0
230
+
231
+ @ffi_struct = struct
232
+
233
+ struct.to_ptr
234
+ end
235
+
236
+ def [](command_id)
237
+ CurriedBuilder.new(self, command_id)
238
+ end
239
+
240
+ class CurriedBuilder
241
+ def initialize(builder, command_id)
242
+ @builder = builder
243
+ @command_id = command_id
244
+ end
245
+
246
+ def add_object(identifier, object)
247
+ @builder.add_object(@command_id, identifier, object)
248
+ end
249
+
250
+ def add_base_array(object)
251
+ @builder.add_base_array(@command_id, object)
252
+ end
253
+ end
254
+
255
+ def clone
256
+ result = self.class.new
257
+ result.objects = @objects.clone
258
+ result.device_struct_allocation = @device_struct_allocation
259
+ result
260
+ end
261
+ end
262
+ end
263
+ end
@@ -0,0 +1,150 @@
1
+ require_relative "parallel_section_invocation_visitor"
2
+ require_relative "program_builder"
3
+ require_relative "ast_translator"
4
+ require_relative "../../ast/ssa_generator"
5
+
6
+ module Ikra
7
+ module Translator
8
+ class HostSectionCommandTranslator < CommandTranslator
9
+ def initialize(root_command:)
10
+ super
11
+
12
+ # Use a different program builder
13
+ @program_builder = HostSectionProgramBuilder.new(
14
+ environment_builder: environment_builder,
15
+ root_command: root_command)
16
+ end
17
+
18
+ def start_translation
19
+ Log.info("HostSectionCommandTranslator: Starting translation...")
20
+
21
+ # Trace all objects
22
+ @object_tracer = TypeInference::ObjectTracer.new(root_command)
23
+ all_objects = object_tracer.trace_all
24
+
25
+ # Translate the command (might create additional kernels)
26
+ root_command.accept(self)
27
+
28
+ # Add SoA arrays to environment
29
+ object_tracer.register_soa_arrays(environment_builder)
30
+ end
31
+
32
+ def visit_array_host_section_command(command)
33
+ Log.info("Translating ArrayHostSectionCommand [#{command.unique_id}]")
34
+
35
+ super
36
+
37
+ # A host section must be a top-level (root) command. It uses a special
38
+ # [HostSectionProgramBuilder].
39
+
40
+ block_def_node = command.block_def_node
41
+
42
+ # Cannot use the normal `translate_block` method here, this is special!
43
+ # TODO: There's some duplication here with [BlockTranslator]
44
+
45
+ # Build hash of parameter name -> type mappings
46
+ block_parameter_types = {}
47
+ command.block_parameter_names.each_with_index do |name, index|
48
+ block_parameter_types[name] = command.section_input[index].ikra_type.to_union_type
49
+ end
50
+
51
+ parameter_types_string = "[" + block_parameter_types.map do |id, type| "#{id}: #{type}" end.join(", ") + "]"
52
+ Log.info("Translating block with input types #{parameter_types_string}")
53
+
54
+ # Add information to block_def_node
55
+ block_def_node.parameters_names_and_types = block_parameter_types
56
+
57
+ # Insert return statements (also done by type inference visitor, but we need
58
+ # it now)
59
+ block_def_node.accept(LastStatementReturnsVisitor.new)
60
+
61
+ # Insert synthetic __call__ send nodes for return values
62
+ block_def_node.accept(ParallelSectionInvocationVisitor.new)
63
+
64
+ # Concert to SSA form
65
+ AST::SSAGenerator.transform_to_ssa!(block_def_node)
66
+
67
+ # Type inference
68
+ type_inference_visitor = TypeInference::Visitor.new
69
+ result_type = type_inference_visitor.process_block(block_def_node)
70
+
71
+ for singleton_type in result_type
72
+ if !singleton_type.is_a?(Types::LocationAwareArrayType)
73
+ raise AssertionError.new("Return value of host section must be a LocationAwareArrayType. Found a code path with #{singleton_type}.")
74
+ end
75
+ end
76
+
77
+ # C++/CUDA code generation
78
+ ast_translator = HostSectionASTTranslator.new(command_translator: self)
79
+
80
+ # Auxiliary methods are instance methods that are called by the host section
81
+ aux_methods = type_inference_visitor.all_methods.map do |method|
82
+ ast_translator.translate_method(method)
83
+ end
84
+
85
+ # Build C++ function
86
+ function_translation = ast_translator.translate_block(block_def_node)
87
+
88
+ # Declare local variables
89
+ block_def_node.local_variables_names_and_types.each do |name, type|
90
+ function_translation.prepend("#{type.to_c_type} #{name};\n")
91
+ end
92
+
93
+ mangled_name = "_host_section_#{command.unique_id}_"
94
+ function_parameters = [
95
+ "#{Constants::ENV_TYPE} *#{Constants::ENV_HOST_IDENTIFIER}",
96
+ "#{Constants::ENV_TYPE} *#{Constants::ENV_DEVICE_IDENTIFIER}",
97
+ "#{Constants::PROGRAM_RESULT_TYPE} *#{Constants::PROGRAM_RESULT_IDENTIFIER}"]
98
+
99
+ # Define incoming values (parameters). These must all be array commands for now.
100
+ parameter_def = block_parameter_types.map do |name, type|
101
+ if type.singleton_type.is_a?(Symbolic::ArrayCommand)
102
+ # Should be initialized with new array command struct
103
+ "#{type.singleton_type.to_c_type} #{name} = new #{type.singleton_type.to_c_type[0...-2]}();"
104
+ else
105
+ "#{type.singleton_type.to_c_type} #{name};"
106
+ end
107
+ end.join("\n") + "\n"
108
+
109
+ translation_result = Translator.read_file(
110
+ file_name: "host_section_block_function_head.cpp",
111
+ replacements: {
112
+ "name" => mangled_name,
113
+ "result_type" => result_type.to_c_type,
114
+ "parameters" => function_parameters.join(", "),
115
+ "body" => Translator.wrap_in_c_block(parameter_def + function_translation)})
116
+
117
+ program_builder.host_section_source = translation_result
118
+
119
+ # Build function invocation
120
+ args = [
121
+ Constants::ENV_HOST_IDENTIFIER,
122
+ Constants::ENV_DEVICE_IDENTIFIER,
123
+ Constants::PROGRAM_RESULT_IDENTIFIER]
124
+
125
+ # Generate code that transfers data back to host. By creating a synthetic send
126
+ # node here, we can let the compiler generate a switch statement if the type of
127
+ # the return value (array) cannot be determined uniquely at compile time.
128
+ host_section_invocation = AST::SourceCodeExprNode.new(
129
+ code: "#{mangled_name}(#{args.join(", ")})")
130
+ host_section_invocation.merge_union_type(result_type)
131
+ device_to_host_transfer_node = AST::SendNode.new(
132
+ receiver: host_section_invocation,
133
+ selector: :__to_host_array__)
134
+
135
+ # Type inference is a prerequisite for code generation
136
+ type_inference_visitor.visit_send_node(device_to_host_transfer_node)
137
+
138
+ program_builder.host_result_expression = device_to_host_transfer_node.accept(
139
+ ast_translator.expression_translator)
140
+ program_builder.result_type = device_to_host_transfer_node.get_type
141
+
142
+ Log.info("DONE translating ArrayHostSectionCommand [#{command.unique_id}]")
143
+
144
+ # This method has no return value (for the moment)
145
+ end
146
+ end
147
+ end
148
+ end
149
+
150
+ require_relative "array_in_host_section_command"
@@ -0,0 +1,41 @@
1
+ module Ikra
2
+ module Translator
3
+ class HostSectionCommandTranslator < CommandTranslator
4
+ def visit_array_in_host_section_command(command)
5
+ Log.info("Translating ArrayInHostSectionCommand [#{command.unique_id}]")
6
+
7
+ super
8
+
9
+ # This is a root command, determine grid/block dimensions
10
+ kernel_launcher.configure_grid(command.size, block_size: command.block_size)
11
+
12
+ array_input_id = "_array_#{self.class.next_unique_id}_"
13
+ kernel_builder.add_additional_parameters("#{command.base_type.to_c_type} *#{array_input_id}")
14
+
15
+ # Add placeholder for argument (input array). This should be done here to preserve
16
+ # the order or arguments.
17
+ kernel_launcher.add_additional_arguments(proc do |cmd|
18
+ # `cmd` is a reference to the command being launched (which might be merged
19
+ # with other commands). Based on that information, we can generate an
20
+ # expression that returns the input array.
21
+ arg = Translator::KernelLaunchArgumentGenerator.generate_arg(
22
+ command, cmd, "cmd")
23
+
24
+ if arg == nil
25
+ raise AssertionError.new("Argument not found: Trying to launch command #{cmd.unique_id}, looking for result of command #{command.unique_id}")
26
+ end
27
+
28
+ arg
29
+ end)
30
+
31
+ command_translation = build_command_translation_result(
32
+ result: "#{array_input_id}[_tid_]",
33
+ command: command)
34
+
35
+ Log.info("DONE translating ArrayInHostSectionCommand [#{command.unique_id}]")
36
+
37
+ return command_translation
38
+ end
39
+ end
40
+ end
41
+ end