ikra 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (104) hide show
  1. checksums.yaml +4 -4
  2. data/lib/ast/builder.rb +225 -77
  3. data/lib/ast/host_section_builder.rb +38 -0
  4. data/lib/ast/interpreter.rb +67 -0
  5. data/lib/ast/lexical_variables_enumerator.rb +3 -2
  6. data/lib/ast/nodes.rb +521 -31
  7. data/lib/ast/printer.rb +116 -18
  8. data/lib/ast/ssa_generator.rb +192 -0
  9. data/lib/ast/visitor.rb +235 -21
  10. data/lib/config/configuration.rb +28 -3
  11. data/lib/config/os_configuration.rb +62 -9
  12. data/lib/cpu/cpu_implementation.rb +39 -0
  13. data/lib/ikra.rb +13 -3
  14. data/lib/resources/cuda/allocate_device_memory.cpp +5 -0
  15. data/lib/resources/cuda/allocate_host_memory.cpp +1 -0
  16. data/lib/resources/cuda/allocate_memcpy_environment_to_device.cpp +11 -0
  17. data/lib/resources/cuda/ast/assignment.cpp +1 -0
  18. data/lib/resources/cuda/block_function_head.cpp +7 -1
  19. data/lib/resources/cuda/entry_point.cpp +47 -0
  20. data/lib/resources/cuda/env_builder_copy_array.cpp +8 -2
  21. data/lib/resources/cuda/free_device_memory.cpp +3 -0
  22. data/lib/resources/cuda/free_memory_for_command.cpp +24 -0
  23. data/lib/resources/cuda/header.cpp +23 -9
  24. data/lib/resources/cuda/header_structs.cpp +92 -0
  25. data/lib/resources/cuda/host_section_block_function_head.cpp +12 -0
  26. data/lib/resources/cuda/host_section_entry_point.cpp +55 -0
  27. data/lib/resources/cuda/host_section_free_device_memory.cpp +18 -0
  28. data/lib/resources/cuda/host_section_launch_parallel_section.cpp +14 -0
  29. data/lib/resources/cuda/host_section_malloc_memcpy_device_to_host.cpp +10 -0
  30. data/lib/resources/cuda/kernel.cpp +9 -2
  31. data/lib/resources/cuda/launch_kernel.cpp +5 -0
  32. data/lib/resources/cuda/memcpy_device_to_host.cpp +3 -0
  33. data/lib/resources/cuda/memcpy_device_to_host_expr.cpp +10 -0
  34. data/lib/resources/cuda/reduce_body.cpp +88 -0
  35. data/lib/resources/cuda/stencil_array_reconstruction.cpp +2 -0
  36. data/lib/resources/cuda/stencil_body.cpp +16 -0
  37. data/lib/resources/cuda/struct_definition.cpp +4 -0
  38. data/lib/ruby_core/array.rb +34 -0
  39. data/lib/ruby_core/array_command.rb +313 -0
  40. data/lib/ruby_core/core.rb +103 -0
  41. data/lib/ruby_core/interpreter.rb +16 -0
  42. data/lib/ruby_core/math.rb +32 -0
  43. data/lib/ruby_core/ruby_integration.rb +256 -0
  44. data/lib/symbolic/host_section.rb +115 -0
  45. data/lib/symbolic/input.rb +87 -0
  46. data/lib/symbolic/input_visitor.rb +68 -0
  47. data/lib/symbolic/symbolic.rb +793 -117
  48. data/lib/symbolic/visitor.rb +70 -8
  49. data/lib/translator/array_command_struct_builder.rb +163 -0
  50. data/lib/translator/ast_translator.rb +572 -0
  51. data/lib/translator/block_translator.rb +104 -48
  52. data/lib/translator/commands/array_combine_command.rb +41 -0
  53. data/lib/translator/commands/array_identity_command.rb +28 -0
  54. data/lib/translator/commands/array_index_command.rb +52 -0
  55. data/lib/translator/commands/array_reduce_command.rb +135 -0
  56. data/lib/translator/commands/array_stencil_command.rb +129 -0
  57. data/lib/translator/commands/array_zip_command.rb +30 -0
  58. data/lib/translator/commands/command_translator.rb +264 -0
  59. data/lib/translator/cuda_errors.rb +32 -0
  60. data/lib/translator/environment_builder.rb +263 -0
  61. data/lib/translator/host_section/array_host_section_command.rb +150 -0
  62. data/lib/translator/host_section/array_in_host_section_command.rb +41 -0
  63. data/lib/translator/host_section/ast_translator.rb +14 -0
  64. data/lib/translator/host_section/parallel_section_invocation_visitor.rb +20 -0
  65. data/lib/translator/host_section/program_builder.rb +89 -0
  66. data/lib/translator/input_translator.rb +226 -0
  67. data/lib/translator/kernel_builder.rb +137 -0
  68. data/lib/translator/kernel_launcher/for_loop_kernel_launcher.rb +40 -0
  69. data/lib/translator/kernel_launcher/kernel_launcher.rb +259 -0
  70. data/lib/translator/kernel_launcher/while_loop_kernel_launcher.rb +38 -0
  71. data/lib/translator/last_returns_visitor.rb +19 -10
  72. data/lib/translator/program_builder.rb +197 -0
  73. data/lib/translator/program_launcher.rb +273 -0
  74. data/lib/translator/struct_type.rb +55 -0
  75. data/lib/translator/translator.rb +34 -11
  76. data/lib/translator/variable_classifier_visitor.rb +56 -0
  77. data/lib/types/inference/ast_inference.rb +586 -0
  78. data/lib/types/inference/clear_types_visitor.rb +11 -0
  79. data/lib/types/inference/command_inference.rb +101 -0
  80. data/lib/types/inference/input_inference.rb +62 -0
  81. data/lib/types/{object_tracer.rb → inference/object_tracer.rb} +5 -6
  82. data/lib/types/inference/ruby_extension.rb +35 -0
  83. data/lib/types/inference/symbol_table.rb +131 -0
  84. data/lib/types/types.rb +14 -0
  85. data/lib/types/types/array_command_type.rb +123 -0
  86. data/lib/types/types/array_type.rb +137 -0
  87. data/lib/types/{class_type.rb → types/class_type.rb} +42 -18
  88. data/lib/types/{primitive_type.rb → types/primitive_type.rb} +20 -7
  89. data/lib/types/types/ruby_type.rb +88 -0
  90. data/lib/types/types/struct_type.rb +179 -0
  91. data/lib/types/types/union_type.rb +239 -0
  92. metadata +160 -18
  93. data/lib/ast/method_definition.rb +0 -37
  94. data/lib/ast/translator.rb +0 -264
  95. data/lib/resources/cuda/kernel_launcher.cpp +0 -28
  96. data/lib/scope.rb +0 -166
  97. data/lib/translator/command_translator.rb +0 -421
  98. data/lib/translator/local_variables_enumerator.rb +0 -35
  99. data/lib/translator/method_translator.rb +0 -24
  100. data/lib/types/array_type.rb +0 -51
  101. data/lib/types/ruby_extension.rb +0 -67
  102. data/lib/types/ruby_type.rb +0 -45
  103. data/lib/types/type_inference.rb +0 -382
  104. data/lib/types/union_type.rb +0 -155
@@ -0,0 +1,14 @@
1
+ require_relative "../ast_translator"
2
+
3
+ module Ikra
4
+ module Translator
5
+ class HostSectionASTTranslator < ASTTranslator
6
+ attr_reader :command_translator
7
+
8
+ def initialize(command_translator:)
9
+ super()
10
+ @command_translator = command_translator
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,20 @@
1
+ require_relative "../../ast/nodes"
2
+ require_relative "../../ast/visitor"
3
+
4
+ module Ikra
5
+ module Translator
6
+
7
+ # This visitor inserts a synthetic method call whenever a parallel section should be
8
+ # invoked, i.e.:
9
+ # - The return value of the host section (must be an ArrayCommand-typed expression)
10
+ # - When the content of an ArrayCommand-typed expression is accessed
11
+ class ParallelSectionInvocationVisitor < AST::Visitor
12
+ def visit_return_node(node)
13
+ node.replace_child(
14
+ node.value,
15
+ AST::SendNode.new(receiver: node.value, selector: :__call__))
16
+ end
17
+ end
18
+
19
+ end
20
+ end
@@ -0,0 +1,89 @@
1
+ require "set"
2
+
3
+ require_relative "../program_builder"
4
+
5
+ module Ikra
6
+ module Translator
7
+ class CommandTranslator
8
+ class HostSectionProgramBuilder < ProgramBuilder
9
+ # A host C++ function containing the source code of the host section.
10
+ attr_accessor :host_section_source
11
+
12
+ # The type of the result (not an array type, just the inner type).
13
+ attr_accessor :result_type
14
+
15
+ # An expression that returns the final result, as an `variable_size_array_t` object
16
+ # pointing to an array in the host memory.
17
+ attr_accessor :host_result_expression
18
+
19
+ def initialize(environment_builder:, root_command:)
20
+ super
21
+
22
+ @kernel_builders = Set.new
23
+ end
24
+
25
+ def assert_ready_to_build
26
+ if host_section_source == nil
27
+ raise AssertionError.new("Not ready to build (HostSectionProgramBuilder): No host section source code defined")
28
+ end
29
+
30
+ if result_type == nil
31
+ raise AssertionError.new("Not ready to build (HostSectionProgramBuilder): No result type defined")
32
+ end
33
+
34
+ if host_result_expression == nil
35
+ raise AssertionError.new("Not ready to build (HostSectionProgramBuilder): No host result expression defined")
36
+ end
37
+ end
38
+
39
+ def clear_kernel_launchers
40
+ @kernel_launchers.clear
41
+ end
42
+
43
+ def add_kernel_launcher(launcher)
44
+ super
45
+
46
+ # Let's keep track of kernels here by ourselves
47
+ @kernel_builders.merge(launcher.kernel_builders)
48
+ end
49
+
50
+ def all_kernel_builders
51
+ return @kernel_builders
52
+ end
53
+
54
+ def prepare_additional_args_for_launch(command)
55
+ kernel_launchers.each do |launcher|
56
+ launcher.prepare_additional_args_for_launch(command)
57
+ end
58
+ end
59
+
60
+ def build_memory_free_except_last
61
+ result = ""
62
+
63
+ for launcher in kernel_launchers[0...-1]
64
+ if !launcher.reuse_memory?
65
+ result = result + launcher.build_device_memory_free_in_host_section
66
+ end
67
+ end
68
+
69
+ return result
70
+ end
71
+
72
+ # Builds the CUDA program. Returns the source code string.
73
+ def build_program
74
+ assert_ready_to_build
75
+
76
+ result = build_header + build_struct_types + build_header_structs +
77
+ build_array_command_struct_types + build_environment_struct +
78
+ build_kernels + host_section_source
79
+
80
+ # Build program entry point
81
+ return result + Translator.read_file(file_name: "host_section_entry_point.cpp", replacements: {
82
+ "prepare_environment" => environment_builder.build_environment_variable,
83
+ "host_env_var_name" => Constants::ENV_HOST_IDENTIFIER,
84
+ "host_result_array" => host_result_expression})
85
+ end
86
+ end
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,226 @@
1
+ module Ikra
2
+ module Symbolic
3
+ class Input
4
+ def translate_input(**kwargs)
5
+ raise NotImplementedError.new
6
+ end
7
+ end
8
+
9
+ class SingleInput < Input
10
+ def translate_input(parent_command:, command_translator:, start_eat_params_offset: 0)
11
+ # Translate input using visitor
12
+ input_command_translation_result = command_translator.translate_input(self)
13
+
14
+ parameters = [Translator::Variable.new(
15
+ name: parent_command.block_parameter_names[start_eat_params_offset],
16
+ type: input_command_translation_result.result_type)]
17
+
18
+ return Translator::InputTranslationResult.new(
19
+ parameters: parameters,
20
+ command_translation_result: input_command_translation_result)
21
+ end
22
+ end
23
+
24
+ class ReduceInput < SingleInput
25
+ def translate_input(parent_command:, command_translator:, start_eat_params_offset: 0)
26
+ # Translate input using visitor
27
+ input_command_translation_result = command_translator.translate_input(self)
28
+
29
+ # TODO: Fix type inference (sometimes type has to be expanded)
30
+ parameters = [
31
+ Translator::Variable.new(
32
+ name: parent_command.block_parameter_names[start_eat_params_offset],
33
+ type: input_command_translation_result.result_type),
34
+ Translator::Variable.new(
35
+ name: parent_command.block_parameter_names[start_eat_params_offset + 1],
36
+ type: input_command_translation_result.result_type)]
37
+
38
+ return Translator::InputTranslationResult.new(
39
+ parameters: parameters,
40
+ command_translation_result: input_command_translation_result)
41
+ end
42
+ end
43
+
44
+ class StencilArrayInput < Input
45
+ def translate_input(parent_command:, command_translator:, start_eat_params_offset: 0)
46
+ # Parameters are allocated in a constant-sized array
47
+
48
+ # Count number of parameters
49
+ num_parameters = parent_command.offsets.size
50
+
51
+ # Get single parameter name
52
+ block_param_name = parent_command.block_parameter_names[start_eat_params_offset]
53
+
54
+ # Translate input using visitor
55
+ input_command_translation_result = command_translator.translate_input(self)
56
+
57
+ # Take return type from previous computation
58
+ parameters = [Translator::Variable.new(
59
+ name: block_param_name,
60
+ type: input_command_translation_result.result_type.to_array_type)]
61
+
62
+
63
+ # Allocate and fill array of parameters
64
+ actual_parameter_names = (0...num_parameters).map do |param_index|
65
+ "_#{block_param_name}_#{param_index}"
66
+ end
67
+
68
+ param_array_init = "{ " + actual_parameter_names.join(", ") + " }"
69
+
70
+ pre_execution = Translator.read_file(file_name: "stencil_array_reconstruction.cpp", replacements: {
71
+ "type" => input_command_translation_result.result_type.to_c_type,
72
+ "name" => block_param_name.to_s,
73
+ "initializer" => param_array_init})
74
+
75
+ # Pass multiple single values instead of array
76
+ override_block_parameters = actual_parameter_names.map do |param_name|
77
+ Translator::Variable.new(
78
+ name: param_name,
79
+ type: input_command_translation_result.result_type)
80
+ end
81
+
82
+ return Translator::InputTranslationResult.new(
83
+ pre_execution: pre_execution,
84
+ parameters: parameters,
85
+ override_block_parameters: override_block_parameters,
86
+ command_translation_result: input_command_translation_result)
87
+ end
88
+ end
89
+
90
+ class StencilSingleInput < Input
91
+ def translate_input(parent_command:, command_translator:, start_eat_params_offset: 0)
92
+ # Pass separate parameters
93
+
94
+ # Translate input using visitor
95
+ input_command_translation_result = command_translator.translate_input(self)
96
+
97
+ # Count number of parameters
98
+ num_parameters = parent_command.offsets.size
99
+
100
+ # Take return type from previous computation
101
+ parameters = []
102
+ for index in start_eat_params_offset...(start_eat_params_offset + num_parameters)
103
+ parameters.push(Translator::Variable.new(
104
+ name: parent_command.block_parameter_names[index],
105
+ type: input_command_translation_result.result_type))
106
+ end
107
+
108
+ return Translator::InputTranslationResult.new(
109
+ parameters: parameters,
110
+ command_translation_result: input_command_translation_result)
111
+ end
112
+ end
113
+ end
114
+
115
+ module Translator
116
+ class InputTranslationResult
117
+ # Code to be executed before the actual execution of the block begins (but inside the
118
+ # block function)
119
+ attr_reader :pre_execution
120
+
121
+ # Parameter names and types of the block (for type inference)
122
+ attr_reader :parameters
123
+
124
+ # Change (override) parameters of the block (to actually pass different parameters).
125
+ # This does not affect type inference.
126
+ attr_reader :override_block_parameters
127
+
128
+ attr_reader :command_translation_result
129
+
130
+ def initialize(
131
+ pre_execution: "",
132
+ parameters:,
133
+ override_block_parameters: nil,
134
+ command_translation_result:)
135
+
136
+ @pre_execution = pre_execution
137
+ @parameters = parameters
138
+ @override_block_parameters = override_block_parameters
139
+ @command_translation_result = command_translation_result
140
+ end
141
+ end
142
+
143
+ # Instance of this class store the result of translation of multiple input commands.
144
+ # Instance methods can be used to access the values of the translated commands. Most
145
+ # methods support access by index and access by range, in which case values are
146
+ # aggregated, if meaningful.
147
+ class EntireInputTranslationResult
148
+ def initialize(input_translation_results)
149
+ @input = input_translation_results
150
+ end
151
+
152
+ def block_parameters(index = 0..-1)
153
+ if index.is_a?(Fixnum)
154
+ return @input[index].parameters
155
+ elsif index.is_a?(Range)
156
+ return @input[index].reduce([]) do |acc, n|
157
+ acc + n.parameters
158
+ end
159
+ else
160
+ raise ArgumentError.new("Expected Fixnum or Range")
161
+ end
162
+ end
163
+
164
+ def pre_execution(index = 0..-1)
165
+ if index.is_a?(Fixnum)
166
+ return @input[index].pre_execution
167
+ elsif index.is_a?(Range)
168
+ return @input[index].reduce("") do |acc, n|
169
+ acc + "\n" + n.pre_execution
170
+ end
171
+ else
172
+ raise ArgumentError.new("Expected Fixnum or Range")
173
+ end
174
+ end
175
+
176
+ def override_block_parameters(index = 0..-1)
177
+ if index.is_a?(Fixnum)
178
+ if @input[index].override_block_parameters == nil
179
+ # No override specified
180
+ return @input[index].parameters
181
+ else
182
+ return @input[index].override_block_parameters
183
+ end
184
+ elsif index.is_a?(Range)
185
+ return @input[index].reduce([]) do |acc, n|
186
+ if n.override_block_parameters == nil
187
+ acc + n.parameters
188
+ else
189
+ acc + n.override_block_parameters
190
+ end
191
+ end
192
+ else
193
+ raise ArgumentError.new("Expected Fixnum or Range")
194
+ end
195
+ end
196
+
197
+ def execution(index = 0..-1)
198
+ if index.is_a?(Fixnum)
199
+ return @input[index].command_translation_result.execution
200
+ elsif index.is_a?(Range)
201
+ return @input[index].reduce("") do |acc, n|
202
+ acc + n.command_translation_result.execution
203
+ end
204
+ else
205
+ raise ArgumentError.new("Expected Fixnum or Range")
206
+ end
207
+ end
208
+
209
+ def result(index = 0..-1)
210
+ if index.is_a?(Fixnum)
211
+ return @input[index].command_translation_result.result
212
+ elsif index.is_a?(Range)
213
+ return @input[index].map do |n|
214
+ n.command_translation_result.result
215
+ end
216
+ else
217
+ raise ArgumentError.new("Expected Fixnum or Range")
218
+ end
219
+ end
220
+
221
+ def command_translation_result(index)
222
+ return @input[index].command_translation_result
223
+ end
224
+ end
225
+ end
226
+ end
@@ -0,0 +1,137 @@
1
+ module Ikra
2
+ module Translator
3
+ class CommandTranslator
4
+
5
+ # Builds a CUDA kernel. This class is responsible for generating the kernel function
6
+ # itself (not the block functions/methods though).
7
+ #
8
+ # For example:
9
+ # __global__ void kernel(env_t *_env_, int *_result_, int *_previous_1_*, ...) { ... }
10
+
11
+ class KernelBuilder
12
+ attr_accessor :kernel_name
13
+
14
+ # --- Optional fields ---
15
+
16
+ # An array of all methods that should be translated
17
+ attr_accessor :methods
18
+
19
+ # An array of all blocks that should be translated
20
+ attr_accessor :blocks
21
+
22
+ # Additional parameters that this kernel should accept (to access the result
23
+ # of previous kernels)
24
+ attr_accessor :previous_kernel_input
25
+
26
+ # --- Required fields ---
27
+
28
+ # A string returning the result of this kernel for one thread
29
+ attr_accessor :block_invocation
30
+
31
+ # A string containing the statements that execute the body of the kernel
32
+ attr_accessor :execution
33
+
34
+ # The result type of this kernel
35
+ attr_accessor :result_type
36
+
37
+ # Additional Parameters for certain commands that are attached to the kernel
38
+ attr_accessor :additional_parameters
39
+
40
+ # IDs of commands that whose results are kept on the GPU
41
+ attr_accessor :cached_results
42
+
43
+ def initialize
44
+ @methods = []
45
+ @blocks = []
46
+ @previous_kernel_input = []
47
+ @block_invocation = nil
48
+ @num_threads = nil
49
+ @additional_parameters = []
50
+ @kernel_name = "kernel_" + CommandTranslator.next_unique_id.to_s
51
+ @cached_results = {}
52
+ @execution = ""
53
+ end
54
+
55
+ # --- Prepare kernel ---
56
+
57
+ # Adds one or multiple methods (source code strings) to this builder.
58
+ def add_methods(*method)
59
+ @methods.push(*method)
60
+ end
61
+
62
+ # Adds a block (source code string) to this builder.
63
+ def add_block(block)
64
+ @blocks.push(block)
65
+ end
66
+
67
+ def add_previous_kernel_parameter(parameter)
68
+ @previous_kernel_input.push(parameter)
69
+ end
70
+
71
+ # Add additional parameters to the kernel function that might be needed for some computations
72
+ def add_additional_parameters(parameter)
73
+ @additional_parameters.push(parameter)
74
+ end
75
+
76
+ # Adds a result that has to be kept on GPU. Therefore additional memory allocations will be made
77
+ def add_cached_result(result_id, type)
78
+ @cached_results[result_id] = type
79
+ end
80
+
81
+ def assert_ready_to_build
82
+ required_values = [:block_invocation, :result_type]
83
+
84
+ for selector in required_values
85
+ if send(selector) == nil
86
+ raise AssertionError.new(
87
+ "Not ready to build (KernelBuilder): #{selector} is not set")
88
+ end
89
+ end
90
+ end
91
+
92
+
93
+ # --- Constructor source code ---
94
+
95
+ def build_methods
96
+ return @methods.join("\n\n")
97
+ end
98
+
99
+ def build_blocks
100
+ return @blocks.join("\n\n")
101
+ end
102
+
103
+ def build_kernel
104
+ Log.info("Building kernel (num_blocks=#{@blocks.size})")
105
+ assert_ready_to_build
106
+
107
+ # Build parameters
108
+ p_env = Constants::ENV_TYPE + " *" + Constants::ENV_IDENTIFIER
109
+ p_num_threads = Constants::NUM_THREADS_TYPE + " " + Constants::NUM_THREADS_IDENTIFIER
110
+ p_result = result_type.to_c_type + " *" + Constants::RESULT_IDENTIFIER
111
+ p_cached_results = cached_results.map do |result_id, type|
112
+ type.to_c_type + " *" + Constants::RESULT_IDENTIFIER + result_id
113
+ end
114
+
115
+ cached_results.each do |result_id, type|
116
+ @execution = execution + "\n" + " " + Constants::RESULT_IDENTIFIER + result_id + "[_tid_] = " + Constants::TEMP_RESULT_IDENTIFIER + result_id + ";"
117
+ end
118
+
119
+ previous_kernel_params = []
120
+ for var in previous_kernel_input
121
+ previous_kernel_params.push(var.type.to_c_type + " *" + var.name.to_s)
122
+ end
123
+
124
+ parameters = ([p_env, p_num_threads, p_result] + p_cached_results + previous_kernel_params + additional_parameters).join(", ")
125
+
126
+ # Build kernel
127
+ return Translator.read_file(file_name: "kernel.cpp", replacements: {
128
+ "block_invocation" => block_invocation,
129
+ "execution" => execution,
130
+ "kernel_name" => kernel_name,
131
+ "parameters" => parameters,
132
+ "num_threads" => Constants::NUM_THREADS_IDENTIFIER})
133
+ end
134
+ end
135
+ end
136
+ end
137
+ end