ikra 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (104) hide show
  1. checksums.yaml +4 -4
  2. data/lib/ast/builder.rb +225 -77
  3. data/lib/ast/host_section_builder.rb +38 -0
  4. data/lib/ast/interpreter.rb +67 -0
  5. data/lib/ast/lexical_variables_enumerator.rb +3 -2
  6. data/lib/ast/nodes.rb +521 -31
  7. data/lib/ast/printer.rb +116 -18
  8. data/lib/ast/ssa_generator.rb +192 -0
  9. data/lib/ast/visitor.rb +235 -21
  10. data/lib/config/configuration.rb +28 -3
  11. data/lib/config/os_configuration.rb +62 -9
  12. data/lib/cpu/cpu_implementation.rb +39 -0
  13. data/lib/ikra.rb +13 -3
  14. data/lib/resources/cuda/allocate_device_memory.cpp +5 -0
  15. data/lib/resources/cuda/allocate_host_memory.cpp +1 -0
  16. data/lib/resources/cuda/allocate_memcpy_environment_to_device.cpp +11 -0
  17. data/lib/resources/cuda/ast/assignment.cpp +1 -0
  18. data/lib/resources/cuda/block_function_head.cpp +7 -1
  19. data/lib/resources/cuda/entry_point.cpp +47 -0
  20. data/lib/resources/cuda/env_builder_copy_array.cpp +8 -2
  21. data/lib/resources/cuda/free_device_memory.cpp +3 -0
  22. data/lib/resources/cuda/free_memory_for_command.cpp +24 -0
  23. data/lib/resources/cuda/header.cpp +23 -9
  24. data/lib/resources/cuda/header_structs.cpp +92 -0
  25. data/lib/resources/cuda/host_section_block_function_head.cpp +12 -0
  26. data/lib/resources/cuda/host_section_entry_point.cpp +55 -0
  27. data/lib/resources/cuda/host_section_free_device_memory.cpp +18 -0
  28. data/lib/resources/cuda/host_section_launch_parallel_section.cpp +14 -0
  29. data/lib/resources/cuda/host_section_malloc_memcpy_device_to_host.cpp +10 -0
  30. data/lib/resources/cuda/kernel.cpp +9 -2
  31. data/lib/resources/cuda/launch_kernel.cpp +5 -0
  32. data/lib/resources/cuda/memcpy_device_to_host.cpp +3 -0
  33. data/lib/resources/cuda/memcpy_device_to_host_expr.cpp +10 -0
  34. data/lib/resources/cuda/reduce_body.cpp +88 -0
  35. data/lib/resources/cuda/stencil_array_reconstruction.cpp +2 -0
  36. data/lib/resources/cuda/stencil_body.cpp +16 -0
  37. data/lib/resources/cuda/struct_definition.cpp +4 -0
  38. data/lib/ruby_core/array.rb +34 -0
  39. data/lib/ruby_core/array_command.rb +313 -0
  40. data/lib/ruby_core/core.rb +103 -0
  41. data/lib/ruby_core/interpreter.rb +16 -0
  42. data/lib/ruby_core/math.rb +32 -0
  43. data/lib/ruby_core/ruby_integration.rb +256 -0
  44. data/lib/symbolic/host_section.rb +115 -0
  45. data/lib/symbolic/input.rb +87 -0
  46. data/lib/symbolic/input_visitor.rb +68 -0
  47. data/lib/symbolic/symbolic.rb +793 -117
  48. data/lib/symbolic/visitor.rb +70 -8
  49. data/lib/translator/array_command_struct_builder.rb +163 -0
  50. data/lib/translator/ast_translator.rb +572 -0
  51. data/lib/translator/block_translator.rb +104 -48
  52. data/lib/translator/commands/array_combine_command.rb +41 -0
  53. data/lib/translator/commands/array_identity_command.rb +28 -0
  54. data/lib/translator/commands/array_index_command.rb +52 -0
  55. data/lib/translator/commands/array_reduce_command.rb +135 -0
  56. data/lib/translator/commands/array_stencil_command.rb +129 -0
  57. data/lib/translator/commands/array_zip_command.rb +30 -0
  58. data/lib/translator/commands/command_translator.rb +264 -0
  59. data/lib/translator/cuda_errors.rb +32 -0
  60. data/lib/translator/environment_builder.rb +263 -0
  61. data/lib/translator/host_section/array_host_section_command.rb +150 -0
  62. data/lib/translator/host_section/array_in_host_section_command.rb +41 -0
  63. data/lib/translator/host_section/ast_translator.rb +14 -0
  64. data/lib/translator/host_section/parallel_section_invocation_visitor.rb +20 -0
  65. data/lib/translator/host_section/program_builder.rb +89 -0
  66. data/lib/translator/input_translator.rb +226 -0
  67. data/lib/translator/kernel_builder.rb +137 -0
  68. data/lib/translator/kernel_launcher/for_loop_kernel_launcher.rb +40 -0
  69. data/lib/translator/kernel_launcher/kernel_launcher.rb +259 -0
  70. data/lib/translator/kernel_launcher/while_loop_kernel_launcher.rb +38 -0
  71. data/lib/translator/last_returns_visitor.rb +19 -10
  72. data/lib/translator/program_builder.rb +197 -0
  73. data/lib/translator/program_launcher.rb +273 -0
  74. data/lib/translator/struct_type.rb +55 -0
  75. data/lib/translator/translator.rb +34 -11
  76. data/lib/translator/variable_classifier_visitor.rb +56 -0
  77. data/lib/types/inference/ast_inference.rb +586 -0
  78. data/lib/types/inference/clear_types_visitor.rb +11 -0
  79. data/lib/types/inference/command_inference.rb +101 -0
  80. data/lib/types/inference/input_inference.rb +62 -0
  81. data/lib/types/{object_tracer.rb → inference/object_tracer.rb} +5 -6
  82. data/lib/types/inference/ruby_extension.rb +35 -0
  83. data/lib/types/inference/symbol_table.rb +131 -0
  84. data/lib/types/types.rb +14 -0
  85. data/lib/types/types/array_command_type.rb +123 -0
  86. data/lib/types/types/array_type.rb +137 -0
  87. data/lib/types/{class_type.rb → types/class_type.rb} +42 -18
  88. data/lib/types/{primitive_type.rb → types/primitive_type.rb} +20 -7
  89. data/lib/types/types/ruby_type.rb +88 -0
  90. data/lib/types/types/struct_type.rb +179 -0
  91. data/lib/types/types/union_type.rb +239 -0
  92. metadata +160 -18
  93. data/lib/ast/method_definition.rb +0 -37
  94. data/lib/ast/translator.rb +0 -264
  95. data/lib/resources/cuda/kernel_launcher.cpp +0 -28
  96. data/lib/scope.rb +0 -166
  97. data/lib/translator/command_translator.rb +0 -421
  98. data/lib/translator/local_variables_enumerator.rb +0 -35
  99. data/lib/translator/method_translator.rb +0 -24
  100. data/lib/types/array_type.rb +0 -51
  101. data/lib/types/ruby_extension.rb +0 -67
  102. data/lib/types/ruby_type.rb +0 -45
  103. data/lib/types/type_inference.rb +0 -382
  104. data/lib/types/union_type.rb +0 -155
@@ -0,0 +1,263 @@
1
+ module Ikra
2
+ module Translator
3
+
4
+ # Interface for transferring data to the CUDA side using FFI. Builds a struct containing all required objects (including lexical variables). Traces objects.
5
+ class EnvironmentBuilder
6
+
7
+ class UnionTypeStruct < FFI::Struct
8
+ layout :class_id, :int32, :object_id, :int32
9
+ end
10
+
11
+ attr_accessor :objects
12
+
13
+ attr_accessor :device_struct_allocation
14
+ attr_accessor :ffi_struct
15
+
16
+ def initialize
17
+ @objects = {}
18
+ @previous_results = {}
19
+ @previous_results_types = {}
20
+ @device_struct_allocation = ""
21
+ @ffi_struct = {}
22
+ end
23
+
24
+ # Hash that maps the unique_id of a command on the adress of its result on the GPU.
25
+ # Returns a sorted version of the hash.
26
+ def previous_results
27
+ return Hash[@previous_results.sort]
28
+ end
29
+
30
+ # Hash that maps the unique_id of a command on the type of its result.
31
+ # Returns a sorted version of the hash.
32
+ def previous_results_types
33
+ return Hash[@previous_results_types.sort]
34
+ end
35
+
36
+ # Adds an objects as a lexical variable.
37
+ def add_object(command_id, identifier, object)
38
+ cuda_id = "l#{command_id}_#{identifier}"
39
+
40
+ if objects[cuda_id].object_id != object.object_id
41
+ # Don't add the object multiple times
42
+ objects[cuda_id] = object
43
+ update_dev_struct_allocation(cuda_id, object)
44
+ end
45
+
46
+ return cuda_id
47
+ end
48
+
49
+ # Adds object to the ffi_struct which is of type unique_id => pointer in GPU
50
+ def add_previous_result(previous_command_id, pointer_to_result)
51
+ cuda_id = "prev_#{previous_command_id}"
52
+ @previous_results[cuda_id] = pointer_to_result
53
+
54
+ update_dev_struct_allocation(cuda_id, pointer_to_result)
55
+
56
+ cuda_id
57
+ end
58
+
59
+ # Adds object to the ffi_struct which is of type unique_id => 0
60
+ def allocate_previous_pointer(previous_command_id)
61
+ add_previous_result(previous_command_id, 0)
62
+ end
63
+
64
+ # Adds object to the ffi_struct which is of type unique_id => type of command with unique_id
65
+ def add_previous_result_type(previous_command_id, type)
66
+ cuda_id = "prev_#{previous_command_id}"
67
+ @previous_results_types[cuda_id] = type
68
+
69
+ cuda_id
70
+ end
71
+
72
+ # Adds an object as a base array
73
+ def add_base_array(command_id, object)
74
+ cuda_id = "b#{command_id}_base"
75
+
76
+ if objects.include?(cuda_id)
77
+ # Object already present
78
+
79
+ if !objects[cuda_id].equal?(object)
80
+ raise AssertionError.new("Adding different base array under different name")
81
+ end
82
+
83
+ return cuda_id
84
+ end
85
+
86
+ objects[cuda_id] = object
87
+
88
+ cuda_id_size = "b#{command_id}_size"
89
+ if object.class == FFI::MemoryPointer
90
+ objects[cuda_id_size] = object.size / UnionTypeStruct.size
91
+ else
92
+ objects[cuda_id_size] = object.size
93
+ end
94
+
95
+ # Generate code for copying data to global memory
96
+ update_dev_struct_allocation(cuda_id, object)
97
+
98
+ return cuda_id
99
+ end
100
+
101
+ # Add an array for the Structure of Arrays object layout
102
+ def add_soa_array(name, object)
103
+ objects[name] = object
104
+ objects["#{name}_size"] = object.size
105
+
106
+ update_dev_struct_allocation(name, object)
107
+ end
108
+
109
+ def update_dev_struct_allocation(field, object)
110
+ if object.class == Array
111
+ # Allocate new array
112
+ @device_struct_allocation += Translator.read_file(
113
+ file_name: "env_builder_copy_array.cpp",
114
+ replacements: {
115
+ "field" => field,
116
+ "host_env" => Constants::ENV_HOST_IDENTIFIER,
117
+ "dev_env" => Constants::ENV_DEVICE_IDENTIFIER,
118
+ "size_bytes" => (object.first.class.to_ikra_type.c_size * object.size).to_s})
119
+ elsif object.class == FFI::MemoryPointer
120
+ # This is an array of union type structs
121
+ # Allocate new array
122
+ @device_struct_allocation += Translator.read_file(
123
+ file_name: "env_builder_copy_array.cpp",
124
+ replacements: {
125
+ "field" => field,
126
+ "host_env" => Constants::ENV_HOST_IDENTIFIER,
127
+ "dev_env" => Constants::ENV_DEVICE_IDENTIFIER,
128
+ "size_bytes" => object.size.to_s})
129
+ else
130
+ # Nothing to do, this case is handled by mem-copying the struct
131
+ end
132
+ end
133
+
134
+ # Returns the name of the field containing the base array for a certain identity command.
135
+ def self.base_identifier(command_id)
136
+ return "b#{command_id}_base"
137
+ end
138
+
139
+ def build_environment_variable
140
+ # Copy arrays to device side
141
+ result = @device_struct_allocation
142
+
143
+ # Allocate and copy over environment to device
144
+ result = result + Translator.read_file(
145
+ file_name: "allocate_memcpy_environment_to_device.cpp",
146
+ replacements: {
147
+ "dev_env" => Constants::ENV_DEVICE_IDENTIFIER,
148
+ "host_env" => Constants::ENV_HOST_IDENTIFIER})
149
+
150
+ return result
151
+ end
152
+
153
+ def build_environment_struct
154
+ @objects.freeze
155
+
156
+ struct_def = "struct environment_struct\n{\n"
157
+ @objects.each do |key, value|
158
+ if value.class == FFI::MemoryPointer
159
+ # TODO: can this be an extension method of FFI::MemoryPointer?
160
+ struct_def += " union_t * #{key};\n"
161
+ else
162
+ struct_def += " #{value.ikra_type.to_c_type} #{key};\n"
163
+ end
164
+ end
165
+
166
+ previous_results_types.each do |key, value|
167
+ struct_def += " #{value.to_c_type} *#{key};\n"
168
+ end
169
+
170
+ struct_def += "};\n"
171
+
172
+ return struct_def
173
+ end
174
+
175
+ def build_ffi_type
176
+ struct_layout = []
177
+ @objects.each do |key, value|
178
+ if value.class == FFI::MemoryPointer
179
+ # TODO: can this be an extension method of FFI::MemoryPointer?
180
+ struct_layout += [key.to_sym, :pointer]
181
+ else
182
+ struct_layout += [key.to_sym, value.ikra_type.to_ffi_type]
183
+ end
184
+ end
185
+
186
+ previous_results.each do |key, value|
187
+ struct_layout += [key.to_sym, :pointer]
188
+ end
189
+
190
+ # Add dummy at the end of layout, because layouts cannot be empty
191
+ struct_layout += [:dummy, :int]
192
+
193
+ struct_type = Class.new(FFI::Struct)
194
+ struct_type.layout(*struct_layout)
195
+
196
+ struct_type
197
+ end
198
+
199
+ def build_ffi_object
200
+ struct_type = build_ffi_type
201
+ struct = struct_type.new
202
+
203
+ @objects.each do |key, value|
204
+ # TODO: need proper Array handling
205
+ if value.class == Array
206
+ # Check first element to determine type of array
207
+ # TODO: check for polymorphic
208
+ inner_type = value.first.class.to_ikra_type
209
+ array_ptr = FFI::MemoryPointer.new(value.size * inner_type.c_size)
210
+
211
+ if inner_type == Types::PrimitiveType::Int
212
+ array_ptr.put_array_of_int(0, value)
213
+ elsif inner_type == Types::PrimitiveType::Float
214
+ array_ptr.put_array_of_float(0, value)
215
+ else
216
+ raise NotImplementedError
217
+ end
218
+
219
+ struct[key.to_sym] = array_ptr
220
+ else
221
+ struct[key.to_sym] = value
222
+ end
223
+ end
224
+
225
+ previous_results.each do |key, value|
226
+ struct[key.to_sym] = value
227
+ end
228
+
229
+ struct[:dummy] = 0
230
+
231
+ @ffi_struct = struct
232
+
233
+ struct.to_ptr
234
+ end
235
+
236
+ def [](command_id)
237
+ CurriedBuilder.new(self, command_id)
238
+ end
239
+
240
+ class CurriedBuilder
241
+ def initialize(builder, command_id)
242
+ @builder = builder
243
+ @command_id = command_id
244
+ end
245
+
246
+ def add_object(identifier, object)
247
+ @builder.add_object(@command_id, identifier, object)
248
+ end
249
+
250
+ def add_base_array(object)
251
+ @builder.add_base_array(@command_id, object)
252
+ end
253
+ end
254
+
255
+ def clone
256
+ result = self.class.new
257
+ result.objects = @objects.clone
258
+ result.device_struct_allocation = @device_struct_allocation
259
+ result
260
+ end
261
+ end
262
+ end
263
+ end
@@ -0,0 +1,150 @@
1
+ require_relative "parallel_section_invocation_visitor"
2
+ require_relative "program_builder"
3
+ require_relative "ast_translator"
4
+ require_relative "../../ast/ssa_generator"
5
+
6
+ module Ikra
7
+ module Translator
8
+ class HostSectionCommandTranslator < CommandTranslator
9
+ def initialize(root_command:)
10
+ super
11
+
12
+ # Use a different program builder
13
+ @program_builder = HostSectionProgramBuilder.new(
14
+ environment_builder: environment_builder,
15
+ root_command: root_command)
16
+ end
17
+
18
+ def start_translation
19
+ Log.info("HostSectionCommandTranslator: Starting translation...")
20
+
21
+ # Trace all objects
22
+ @object_tracer = TypeInference::ObjectTracer.new(root_command)
23
+ all_objects = object_tracer.trace_all
24
+
25
+ # Translate the command (might create additional kernels)
26
+ root_command.accept(self)
27
+
28
+ # Add SoA arrays to environment
29
+ object_tracer.register_soa_arrays(environment_builder)
30
+ end
31
+
32
+ def visit_array_host_section_command(command)
33
+ Log.info("Translating ArrayHostSectionCommand [#{command.unique_id}]")
34
+
35
+ super
36
+
37
+ # A host section must be a top-level (root) command. It uses a special
38
+ # [HostSectionProgramBuilder].
39
+
40
+ block_def_node = command.block_def_node
41
+
42
+ # Cannot use the normal `translate_block` method here, this is special!
43
+ # TODO: There's some duplication here with [BlockTranslator]
44
+
45
+ # Build hash of parameter name -> type mappings
46
+ block_parameter_types = {}
47
+ command.block_parameter_names.each_with_index do |name, index|
48
+ block_parameter_types[name] = command.section_input[index].ikra_type.to_union_type
49
+ end
50
+
51
+ parameter_types_string = "[" + block_parameter_types.map do |id, type| "#{id}: #{type}" end.join(", ") + "]"
52
+ Log.info("Translating block with input types #{parameter_types_string}")
53
+
54
+ # Add information to block_def_node
55
+ block_def_node.parameters_names_and_types = block_parameter_types
56
+
57
+ # Insert return statements (also done by type inference visitor, but we need
58
+ # it now)
59
+ block_def_node.accept(LastStatementReturnsVisitor.new)
60
+
61
+ # Insert synthetic __call__ send nodes for return values
62
+ block_def_node.accept(ParallelSectionInvocationVisitor.new)
63
+
64
+ # Concert to SSA form
65
+ AST::SSAGenerator.transform_to_ssa!(block_def_node)
66
+
67
+ # Type inference
68
+ type_inference_visitor = TypeInference::Visitor.new
69
+ result_type = type_inference_visitor.process_block(block_def_node)
70
+
71
+ for singleton_type in result_type
72
+ if !singleton_type.is_a?(Types::LocationAwareArrayType)
73
+ raise AssertionError.new("Return value of host section must be a LocationAwareArrayType. Found a code path with #{singleton_type}.")
74
+ end
75
+ end
76
+
77
+ # C++/CUDA code generation
78
+ ast_translator = HostSectionASTTranslator.new(command_translator: self)
79
+
80
+ # Auxiliary methods are instance methods that are called by the host section
81
+ aux_methods = type_inference_visitor.all_methods.map do |method|
82
+ ast_translator.translate_method(method)
83
+ end
84
+
85
+ # Build C++ function
86
+ function_translation = ast_translator.translate_block(block_def_node)
87
+
88
+ # Declare local variables
89
+ block_def_node.local_variables_names_and_types.each do |name, type|
90
+ function_translation.prepend("#{type.to_c_type} #{name};\n")
91
+ end
92
+
93
+ mangled_name = "_host_section_#{command.unique_id}_"
94
+ function_parameters = [
95
+ "#{Constants::ENV_TYPE} *#{Constants::ENV_HOST_IDENTIFIER}",
96
+ "#{Constants::ENV_TYPE} *#{Constants::ENV_DEVICE_IDENTIFIER}",
97
+ "#{Constants::PROGRAM_RESULT_TYPE} *#{Constants::PROGRAM_RESULT_IDENTIFIER}"]
98
+
99
+ # Define incoming values (parameters). These must all be array commands for now.
100
+ parameter_def = block_parameter_types.map do |name, type|
101
+ if type.singleton_type.is_a?(Symbolic::ArrayCommand)
102
+ # Should be initialized with new array command struct
103
+ "#{type.singleton_type.to_c_type} #{name} = new #{type.singleton_type.to_c_type[0...-2]}();"
104
+ else
105
+ "#{type.singleton_type.to_c_type} #{name};"
106
+ end
107
+ end.join("\n") + "\n"
108
+
109
+ translation_result = Translator.read_file(
110
+ file_name: "host_section_block_function_head.cpp",
111
+ replacements: {
112
+ "name" => mangled_name,
113
+ "result_type" => result_type.to_c_type,
114
+ "parameters" => function_parameters.join(", "),
115
+ "body" => Translator.wrap_in_c_block(parameter_def + function_translation)})
116
+
117
+ program_builder.host_section_source = translation_result
118
+
119
+ # Build function invocation
120
+ args = [
121
+ Constants::ENV_HOST_IDENTIFIER,
122
+ Constants::ENV_DEVICE_IDENTIFIER,
123
+ Constants::PROGRAM_RESULT_IDENTIFIER]
124
+
125
+ # Generate code that transfers data back to host. By creating a synthetic send
126
+ # node here, we can let the compiler generate a switch statement if the type of
127
+ # the return value (array) cannot be determined uniquely at compile time.
128
+ host_section_invocation = AST::SourceCodeExprNode.new(
129
+ code: "#{mangled_name}(#{args.join(", ")})")
130
+ host_section_invocation.merge_union_type(result_type)
131
+ device_to_host_transfer_node = AST::SendNode.new(
132
+ receiver: host_section_invocation,
133
+ selector: :__to_host_array__)
134
+
135
+ # Type inference is a prerequisite for code generation
136
+ type_inference_visitor.visit_send_node(device_to_host_transfer_node)
137
+
138
+ program_builder.host_result_expression = device_to_host_transfer_node.accept(
139
+ ast_translator.expression_translator)
140
+ program_builder.result_type = device_to_host_transfer_node.get_type
141
+
142
+ Log.info("DONE translating ArrayHostSectionCommand [#{command.unique_id}]")
143
+
144
+ # This method has no return value (for the moment)
145
+ end
146
+ end
147
+ end
148
+ end
149
+
150
+ require_relative "array_in_host_section_command"
@@ -0,0 +1,41 @@
1
+ module Ikra
2
+ module Translator
3
+ class HostSectionCommandTranslator < CommandTranslator
4
+ def visit_array_in_host_section_command(command)
5
+ Log.info("Translating ArrayInHostSectionCommand [#{command.unique_id}]")
6
+
7
+ super
8
+
9
+ # This is a root command, determine grid/block dimensions
10
+ kernel_launcher.configure_grid(command.size, block_size: command.block_size)
11
+
12
+ array_input_id = "_array_#{self.class.next_unique_id}_"
13
+ kernel_builder.add_additional_parameters("#{command.base_type.to_c_type} *#{array_input_id}")
14
+
15
+ # Add placeholder for argument (input array). This should be done here to preserve
16
+ # the order or arguments.
17
+ kernel_launcher.add_additional_arguments(proc do |cmd|
18
+ # `cmd` is a reference to the command being launched (which might be merged
19
+ # with other commands). Based on that information, we can generate an
20
+ # expression that returns the input array.
21
+ arg = Translator::KernelLaunchArgumentGenerator.generate_arg(
22
+ command, cmd, "cmd")
23
+
24
+ if arg == nil
25
+ raise AssertionError.new("Argument not found: Trying to launch command #{cmd.unique_id}, looking for result of command #{command.unique_id}")
26
+ end
27
+
28
+ arg
29
+ end)
30
+
31
+ command_translation = build_command_translation_result(
32
+ result: "#{array_input_id}[_tid_]",
33
+ command: command)
34
+
35
+ Log.info("DONE translating ArrayInHostSectionCommand [#{command.unique_id}]")
36
+
37
+ return command_translation
38
+ end
39
+ end
40
+ end
41
+ end