ikra 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. checksums.yaml +4 -4
  2. data/lib/ast/builder.rb +225 -77
  3. data/lib/ast/host_section_builder.rb +38 -0
  4. data/lib/ast/interpreter.rb +67 -0
  5. data/lib/ast/lexical_variables_enumerator.rb +3 -2
  6. data/lib/ast/nodes.rb +521 -31
  7. data/lib/ast/printer.rb +116 -18
  8. data/lib/ast/ssa_generator.rb +192 -0
  9. data/lib/ast/visitor.rb +235 -21
  10. data/lib/config/configuration.rb +28 -3
  11. data/lib/config/os_configuration.rb +62 -9
  12. data/lib/cpu/cpu_implementation.rb +39 -0
  13. data/lib/ikra.rb +13 -3
  14. data/lib/resources/cuda/allocate_device_memory.cpp +5 -0
  15. data/lib/resources/cuda/allocate_host_memory.cpp +1 -0
  16. data/lib/resources/cuda/allocate_memcpy_environment_to_device.cpp +11 -0
  17. data/lib/resources/cuda/ast/assignment.cpp +1 -0
  18. data/lib/resources/cuda/block_function_head.cpp +7 -1
  19. data/lib/resources/cuda/entry_point.cpp +47 -0
  20. data/lib/resources/cuda/env_builder_copy_array.cpp +8 -2
  21. data/lib/resources/cuda/free_device_memory.cpp +3 -0
  22. data/lib/resources/cuda/free_memory_for_command.cpp +24 -0
  23. data/lib/resources/cuda/header.cpp +23 -9
  24. data/lib/resources/cuda/header_structs.cpp +92 -0
  25. data/lib/resources/cuda/host_section_block_function_head.cpp +12 -0
  26. data/lib/resources/cuda/host_section_entry_point.cpp +55 -0
  27. data/lib/resources/cuda/host_section_free_device_memory.cpp +18 -0
  28. data/lib/resources/cuda/host_section_launch_parallel_section.cpp +14 -0
  29. data/lib/resources/cuda/host_section_malloc_memcpy_device_to_host.cpp +10 -0
  30. data/lib/resources/cuda/kernel.cpp +9 -2
  31. data/lib/resources/cuda/launch_kernel.cpp +5 -0
  32. data/lib/resources/cuda/memcpy_device_to_host.cpp +3 -0
  33. data/lib/resources/cuda/memcpy_device_to_host_expr.cpp +10 -0
  34. data/lib/resources/cuda/reduce_body.cpp +88 -0
  35. data/lib/resources/cuda/stencil_array_reconstruction.cpp +2 -0
  36. data/lib/resources/cuda/stencil_body.cpp +16 -0
  37. data/lib/resources/cuda/struct_definition.cpp +4 -0
  38. data/lib/ruby_core/array.rb +34 -0
  39. data/lib/ruby_core/array_command.rb +313 -0
  40. data/lib/ruby_core/core.rb +103 -0
  41. data/lib/ruby_core/interpreter.rb +16 -0
  42. data/lib/ruby_core/math.rb +32 -0
  43. data/lib/ruby_core/ruby_integration.rb +256 -0
  44. data/lib/symbolic/host_section.rb +115 -0
  45. data/lib/symbolic/input.rb +87 -0
  46. data/lib/symbolic/input_visitor.rb +68 -0
  47. data/lib/symbolic/symbolic.rb +793 -117
  48. data/lib/symbolic/visitor.rb +70 -8
  49. data/lib/translator/array_command_struct_builder.rb +163 -0
  50. data/lib/translator/ast_translator.rb +572 -0
  51. data/lib/translator/block_translator.rb +104 -48
  52. data/lib/translator/commands/array_combine_command.rb +41 -0
  53. data/lib/translator/commands/array_identity_command.rb +28 -0
  54. data/lib/translator/commands/array_index_command.rb +52 -0
  55. data/lib/translator/commands/array_reduce_command.rb +135 -0
  56. data/lib/translator/commands/array_stencil_command.rb +129 -0
  57. data/lib/translator/commands/array_zip_command.rb +30 -0
  58. data/lib/translator/commands/command_translator.rb +264 -0
  59. data/lib/translator/cuda_errors.rb +32 -0
  60. data/lib/translator/environment_builder.rb +263 -0
  61. data/lib/translator/host_section/array_host_section_command.rb +150 -0
  62. data/lib/translator/host_section/array_in_host_section_command.rb +41 -0
  63. data/lib/translator/host_section/ast_translator.rb +14 -0
  64. data/lib/translator/host_section/parallel_section_invocation_visitor.rb +20 -0
  65. data/lib/translator/host_section/program_builder.rb +89 -0
  66. data/lib/translator/input_translator.rb +226 -0
  67. data/lib/translator/kernel_builder.rb +137 -0
  68. data/lib/translator/kernel_launcher/for_loop_kernel_launcher.rb +40 -0
  69. data/lib/translator/kernel_launcher/kernel_launcher.rb +259 -0
  70. data/lib/translator/kernel_launcher/while_loop_kernel_launcher.rb +38 -0
  71. data/lib/translator/last_returns_visitor.rb +19 -10
  72. data/lib/translator/program_builder.rb +197 -0
  73. data/lib/translator/program_launcher.rb +273 -0
  74. data/lib/translator/struct_type.rb +55 -0
  75. data/lib/translator/translator.rb +34 -11
  76. data/lib/translator/variable_classifier_visitor.rb +56 -0
  77. data/lib/types/inference/ast_inference.rb +586 -0
  78. data/lib/types/inference/clear_types_visitor.rb +11 -0
  79. data/lib/types/inference/command_inference.rb +101 -0
  80. data/lib/types/inference/input_inference.rb +62 -0
  81. data/lib/types/{object_tracer.rb → inference/object_tracer.rb} +5 -6
  82. data/lib/types/inference/ruby_extension.rb +35 -0
  83. data/lib/types/inference/symbol_table.rb +131 -0
  84. data/lib/types/types.rb +14 -0
  85. data/lib/types/types/array_command_type.rb +123 -0
  86. data/lib/types/types/array_type.rb +137 -0
  87. data/lib/types/{class_type.rb → types/class_type.rb} +42 -18
  88. data/lib/types/{primitive_type.rb → types/primitive_type.rb} +20 -7
  89. data/lib/types/types/ruby_type.rb +88 -0
  90. data/lib/types/types/struct_type.rb +179 -0
  91. data/lib/types/types/union_type.rb +239 -0
  92. metadata +160 -18
  93. data/lib/ast/method_definition.rb +0 -37
  94. data/lib/ast/translator.rb +0 -264
  95. data/lib/resources/cuda/kernel_launcher.cpp +0 -28
  96. data/lib/scope.rb +0 -166
  97. data/lib/translator/command_translator.rb +0 -421
  98. data/lib/translator/local_variables_enumerator.rb +0 -35
  99. data/lib/translator/method_translator.rb +0 -24
  100. data/lib/types/array_type.rb +0 -51
  101. data/lib/types/ruby_extension.rb +0 -67
  102. data/lib/types/ruby_type.rb +0 -45
  103. data/lib/types/type_inference.rb +0 -382
  104. data/lib/types/union_type.rb +0 -155
@@ -0,0 +1,14 @@
1
+ require_relative "../ast_translator"
2
+
3
+ module Ikra
4
+ module Translator
5
+ class HostSectionASTTranslator < ASTTranslator
6
+ attr_reader :command_translator
7
+
8
+ def initialize(command_translator:)
9
+ super()
10
+ @command_translator = command_translator
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,20 @@
1
+ require_relative "../../ast/nodes"
2
+ require_relative "../../ast/visitor"
3
+
4
+ module Ikra
5
+ module Translator
6
+
7
+ # This visitor inserts a synthetic method call whenever a parallel section should be
8
+ # invoked, i.e.:
9
+ # - The return value of the host section (must be an ArrayCommand-typed expression)
10
+ # - When the content of an ArrayCommand-typed expression is accessed
11
+ class ParallelSectionInvocationVisitor < AST::Visitor
12
+ def visit_return_node(node)
13
+ node.replace_child(
14
+ node.value,
15
+ AST::SendNode.new(receiver: node.value, selector: :__call__))
16
+ end
17
+ end
18
+
19
+ end
20
+ end
@@ -0,0 +1,89 @@
1
+ require "set"
2
+
3
+ require_relative "../program_builder"
4
+
5
+ module Ikra
6
+ module Translator
7
+ class CommandTranslator
8
+ class HostSectionProgramBuilder < ProgramBuilder
9
+ # A host C++ function containing the source code of the host section.
10
+ attr_accessor :host_section_source
11
+
12
+ # The type of the result (not an array type, just the inner type).
13
+ attr_accessor :result_type
14
+
15
+ # An expression that returns the final result, as an `variable_size_array_t` object
16
+ # pointing to an array in the host memory.
17
+ attr_accessor :host_result_expression
18
+
19
+ def initialize(environment_builder:, root_command:)
20
+ super
21
+
22
+ @kernel_builders = Set.new
23
+ end
24
+
25
+ def assert_ready_to_build
26
+ if host_section_source == nil
27
+ raise AssertionError.new("Not ready to build (HostSectionProgramBuilder): No host section source code defined")
28
+ end
29
+
30
+ if result_type == nil
31
+ raise AssertionError.new("Not ready to build (HostSectionProgramBuilder): No result type defined")
32
+ end
33
+
34
+ if host_result_expression == nil
35
+ raise AssertionError.new("Not ready to build (HostSectionProgramBuilder): No host result expression defined")
36
+ end
37
+ end
38
+
39
+ def clear_kernel_launchers
40
+ @kernel_launchers.clear
41
+ end
42
+
43
+ def add_kernel_launcher(launcher)
44
+ super
45
+
46
+ # Let's keep track of kernels here by ourselves
47
+ @kernel_builders.merge(launcher.kernel_builders)
48
+ end
49
+
50
+ def all_kernel_builders
51
+ return @kernel_builders
52
+ end
53
+
54
+ def prepare_additional_args_for_launch(command)
55
+ kernel_launchers.each do |launcher|
56
+ launcher.prepare_additional_args_for_launch(command)
57
+ end
58
+ end
59
+
60
+ def build_memory_free_except_last
61
+ result = ""
62
+
63
+ for launcher in kernel_launchers[0...-1]
64
+ if !launcher.reuse_memory?
65
+ result = result + launcher.build_device_memory_free_in_host_section
66
+ end
67
+ end
68
+
69
+ return result
70
+ end
71
+
72
+ # Builds the CUDA program. Returns the source code string.
73
+ def build_program
74
+ assert_ready_to_build
75
+
76
+ result = build_header + build_struct_types + build_header_structs +
77
+ build_array_command_struct_types + build_environment_struct +
78
+ build_kernels + host_section_source
79
+
80
+ # Build program entry point
81
+ return result + Translator.read_file(file_name: "host_section_entry_point.cpp", replacements: {
82
+ "prepare_environment" => environment_builder.build_environment_variable,
83
+ "host_env_var_name" => Constants::ENV_HOST_IDENTIFIER,
84
+ "host_result_array" => host_result_expression})
85
+ end
86
+ end
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,226 @@
1
+ module Ikra
2
+ module Symbolic
3
+ class Input
4
+ def translate_input(**kwargs)
5
+ raise NotImplementedError.new
6
+ end
7
+ end
8
+
9
+ class SingleInput < Input
10
+ def translate_input(parent_command:, command_translator:, start_eat_params_offset: 0)
11
+ # Translate input using visitor
12
+ input_command_translation_result = command_translator.translate_input(self)
13
+
14
+ parameters = [Translator::Variable.new(
15
+ name: parent_command.block_parameter_names[start_eat_params_offset],
16
+ type: input_command_translation_result.result_type)]
17
+
18
+ return Translator::InputTranslationResult.new(
19
+ parameters: parameters,
20
+ command_translation_result: input_command_translation_result)
21
+ end
22
+ end
23
+
24
+ class ReduceInput < SingleInput
25
+ def translate_input(parent_command:, command_translator:, start_eat_params_offset: 0)
26
+ # Translate input using visitor
27
+ input_command_translation_result = command_translator.translate_input(self)
28
+
29
+ # TODO: Fix type inference (sometimes type has to be expanded)
30
+ parameters = [
31
+ Translator::Variable.new(
32
+ name: parent_command.block_parameter_names[start_eat_params_offset],
33
+ type: input_command_translation_result.result_type),
34
+ Translator::Variable.new(
35
+ name: parent_command.block_parameter_names[start_eat_params_offset + 1],
36
+ type: input_command_translation_result.result_type)]
37
+
38
+ return Translator::InputTranslationResult.new(
39
+ parameters: parameters,
40
+ command_translation_result: input_command_translation_result)
41
+ end
42
+ end
43
+
44
+ class StencilArrayInput < Input
45
+ def translate_input(parent_command:, command_translator:, start_eat_params_offset: 0)
46
+ # Parameters are allocated in a constant-sized array
47
+
48
+ # Count number of parameters
49
+ num_parameters = parent_command.offsets.size
50
+
51
+ # Get single parameter name
52
+ block_param_name = parent_command.block_parameter_names[start_eat_params_offset]
53
+
54
+ # Translate input using visitor
55
+ input_command_translation_result = command_translator.translate_input(self)
56
+
57
+ # Take return type from previous computation
58
+ parameters = [Translator::Variable.new(
59
+ name: block_param_name,
60
+ type: input_command_translation_result.result_type.to_array_type)]
61
+
62
+
63
+ # Allocate and fill array of parameters
64
+ actual_parameter_names = (0...num_parameters).map do |param_index|
65
+ "_#{block_param_name}_#{param_index}"
66
+ end
67
+
68
+ param_array_init = "{ " + actual_parameter_names.join(", ") + " }"
69
+
70
+ pre_execution = Translator.read_file(file_name: "stencil_array_reconstruction.cpp", replacements: {
71
+ "type" => input_command_translation_result.result_type.to_c_type,
72
+ "name" => block_param_name.to_s,
73
+ "initializer" => param_array_init})
74
+
75
+ # Pass multiple single values instead of array
76
+ override_block_parameters = actual_parameter_names.map do |param_name|
77
+ Translator::Variable.new(
78
+ name: param_name,
79
+ type: input_command_translation_result.result_type)
80
+ end
81
+
82
+ return Translator::InputTranslationResult.new(
83
+ pre_execution: pre_execution,
84
+ parameters: parameters,
85
+ override_block_parameters: override_block_parameters,
86
+ command_translation_result: input_command_translation_result)
87
+ end
88
+ end
89
+
90
+ class StencilSingleInput < Input
91
+ def translate_input(parent_command:, command_translator:, start_eat_params_offset: 0)
92
+ # Pass separate parameters
93
+
94
+ # Translate input using visitor
95
+ input_command_translation_result = command_translator.translate_input(self)
96
+
97
+ # Count number of parameters
98
+ num_parameters = parent_command.offsets.size
99
+
100
+ # Take return type from previous computation
101
+ parameters = []
102
+ for index in start_eat_params_offset...(start_eat_params_offset + num_parameters)
103
+ parameters.push(Translator::Variable.new(
104
+ name: parent_command.block_parameter_names[index],
105
+ type: input_command_translation_result.result_type))
106
+ end
107
+
108
+ return Translator::InputTranslationResult.new(
109
+ parameters: parameters,
110
+ command_translation_result: input_command_translation_result)
111
+ end
112
+ end
113
+ end
114
+
115
+ module Translator
116
+ class InputTranslationResult
117
+ # Code to be executed before the actual execution of the block begins (but inside the
118
+ # block function)
119
+ attr_reader :pre_execution
120
+
121
+ # Parameter names and types of the block (for type inference)
122
+ attr_reader :parameters
123
+
124
+ # Change (override) parameters of the block (to actually pass different parameters).
125
+ # This does not affect type inference.
126
+ attr_reader :override_block_parameters
127
+
128
+ attr_reader :command_translation_result
129
+
130
+ def initialize(
131
+ pre_execution: "",
132
+ parameters:,
133
+ override_block_parameters: nil,
134
+ command_translation_result:)
135
+
136
+ @pre_execution = pre_execution
137
+ @parameters = parameters
138
+ @override_block_parameters = override_block_parameters
139
+ @command_translation_result = command_translation_result
140
+ end
141
+ end
142
+
143
+ # Instance of this class store the result of translation of multiple input commands.
144
+ # Instance methods can be used to access the values of the translated commands. Most
145
+ # methods support access by index and access by range, in which case values are
146
+ # aggregated, if meaningful.
147
+ class EntireInputTranslationResult
148
+ def initialize(input_translation_results)
149
+ @input = input_translation_results
150
+ end
151
+
152
+ def block_parameters(index = 0..-1)
153
+ if index.is_a?(Fixnum)
154
+ return @input[index].parameters
155
+ elsif index.is_a?(Range)
156
+ return @input[index].reduce([]) do |acc, n|
157
+ acc + n.parameters
158
+ end
159
+ else
160
+ raise ArgumentError.new("Expected Fixnum or Range")
161
+ end
162
+ end
163
+
164
+ def pre_execution(index = 0..-1)
165
+ if index.is_a?(Fixnum)
166
+ return @input[index].pre_execution
167
+ elsif index.is_a?(Range)
168
+ return @input[index].reduce("") do |acc, n|
169
+ acc + "\n" + n.pre_execution
170
+ end
171
+ else
172
+ raise ArgumentError.new("Expected Fixnum or Range")
173
+ end
174
+ end
175
+
176
+ def override_block_parameters(index = 0..-1)
177
+ if index.is_a?(Fixnum)
178
+ if @input[index].override_block_parameters == nil
179
+ # No override specified
180
+ return @input[index].parameters
181
+ else
182
+ return @input[index].override_block_parameters
183
+ end
184
+ elsif index.is_a?(Range)
185
+ return @input[index].reduce([]) do |acc, n|
186
+ if n.override_block_parameters == nil
187
+ acc + n.parameters
188
+ else
189
+ acc + n.override_block_parameters
190
+ end
191
+ end
192
+ else
193
+ raise ArgumentError.new("Expected Fixnum or Range")
194
+ end
195
+ end
196
+
197
+ def execution(index = 0..-1)
198
+ if index.is_a?(Fixnum)
199
+ return @input[index].command_translation_result.execution
200
+ elsif index.is_a?(Range)
201
+ return @input[index].reduce("") do |acc, n|
202
+ acc + n.command_translation_result.execution
203
+ end
204
+ else
205
+ raise ArgumentError.new("Expected Fixnum or Range")
206
+ end
207
+ end
208
+
209
+ def result(index = 0..-1)
210
+ if index.is_a?(Fixnum)
211
+ return @input[index].command_translation_result.result
212
+ elsif index.is_a?(Range)
213
+ return @input[index].map do |n|
214
+ n.command_translation_result.result
215
+ end
216
+ else
217
+ raise ArgumentError.new("Expected Fixnum or Range")
218
+ end
219
+ end
220
+
221
+ def command_translation_result(index)
222
+ return @input[index].command_translation_result
223
+ end
224
+ end
225
+ end
226
+ end
@@ -0,0 +1,137 @@
1
+ module Ikra
2
+ module Translator
3
+ class CommandTranslator
4
+
5
+ # Builds a CUDA kernel. This class is responsible for generating the kernel function
6
+ # itself (not the block functions/methods though).
7
+ #
8
+ # For example:
9
+ # __global__ void kernel(env_t *_env_, int *_result_, int *_previous_1_*, ...) { ... }
10
+
11
+ class KernelBuilder
12
+ attr_accessor :kernel_name
13
+
14
+ # --- Optional fields ---
15
+
16
+ # An array of all methods that should be translated
17
+ attr_accessor :methods
18
+
19
+ # An array of all blocks that should be translated
20
+ attr_accessor :blocks
21
+
22
+ # Additional parameters that this kernel should accept (to access the result
23
+ # of previous kernels)
24
+ attr_accessor :previous_kernel_input
25
+
26
+ # --- Required fields ---
27
+
28
+ # A string returning the result of this kernel for one thread
29
+ attr_accessor :block_invocation
30
+
31
+ # A string containing the statements that execute the body of the kernel
32
+ attr_accessor :execution
33
+
34
+ # The result type of this kernel
35
+ attr_accessor :result_type
36
+
37
+ # Additional Parameters for certain commands that are attached to the kernel
38
+ attr_accessor :additional_parameters
39
+
40
+ # IDs of commands that whose results are kept on the GPU
41
+ attr_accessor :cached_results
42
+
43
+ def initialize
44
+ @methods = []
45
+ @blocks = []
46
+ @previous_kernel_input = []
47
+ @block_invocation = nil
48
+ @num_threads = nil
49
+ @additional_parameters = []
50
+ @kernel_name = "kernel_" + CommandTranslator.next_unique_id.to_s
51
+ @cached_results = {}
52
+ @execution = ""
53
+ end
54
+
55
+ # --- Prepare kernel ---
56
+
57
+ # Adds one or multiple methods (source code strings) to this builder.
58
+ def add_methods(*method)
59
+ @methods.push(*method)
60
+ end
61
+
62
+ # Adds a block (source code string) to this builder.
63
+ def add_block(block)
64
+ @blocks.push(block)
65
+ end
66
+
67
+ def add_previous_kernel_parameter(parameter)
68
+ @previous_kernel_input.push(parameter)
69
+ end
70
+
71
+ # Add additional parameters to the kernel function that might be needed for some computations
72
+ def add_additional_parameters(parameter)
73
+ @additional_parameters.push(parameter)
74
+ end
75
+
76
+ # Adds a result that has to be kept on GPU. Therefore additional memory allocations will be made
77
+ def add_cached_result(result_id, type)
78
+ @cached_results[result_id] = type
79
+ end
80
+
81
+ def assert_ready_to_build
82
+ required_values = [:block_invocation, :result_type]
83
+
84
+ for selector in required_values
85
+ if send(selector) == nil
86
+ raise AssertionError.new(
87
+ "Not ready to build (KernelBuilder): #{selector} is not set")
88
+ end
89
+ end
90
+ end
91
+
92
+
93
+ # --- Constructor source code ---
94
+
95
+ def build_methods
96
+ return @methods.join("\n\n")
97
+ end
98
+
99
+ def build_blocks
100
+ return @blocks.join("\n\n")
101
+ end
102
+
103
+ def build_kernel
104
+ Log.info("Building kernel (num_blocks=#{@blocks.size})")
105
+ assert_ready_to_build
106
+
107
+ # Build parameters
108
+ p_env = Constants::ENV_TYPE + " *" + Constants::ENV_IDENTIFIER
109
+ p_num_threads = Constants::NUM_THREADS_TYPE + " " + Constants::NUM_THREADS_IDENTIFIER
110
+ p_result = result_type.to_c_type + " *" + Constants::RESULT_IDENTIFIER
111
+ p_cached_results = cached_results.map do |result_id, type|
112
+ type.to_c_type + " *" + Constants::RESULT_IDENTIFIER + result_id
113
+ end
114
+
115
+ cached_results.each do |result_id, type|
116
+ @execution = execution + "\n" + " " + Constants::RESULT_IDENTIFIER + result_id + "[_tid_] = " + Constants::TEMP_RESULT_IDENTIFIER + result_id + ";"
117
+ end
118
+
119
+ previous_kernel_params = []
120
+ for var in previous_kernel_input
121
+ previous_kernel_params.push(var.type.to_c_type + " *" + var.name.to_s)
122
+ end
123
+
124
+ parameters = ([p_env, p_num_threads, p_result] + p_cached_results + previous_kernel_params + additional_parameters).join(", ")
125
+
126
+ # Build kernel
127
+ return Translator.read_file(file_name: "kernel.cpp", replacements: {
128
+ "block_invocation" => block_invocation,
129
+ "execution" => execution,
130
+ "kernel_name" => kernel_name,
131
+ "parameters" => parameters,
132
+ "num_threads" => Constants::NUM_THREADS_IDENTIFIER})
133
+ end
134
+ end
135
+ end
136
+ end
137
+ end