ikra 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. checksums.yaml +4 -4
  2. data/lib/ast/builder.rb +225 -77
  3. data/lib/ast/host_section_builder.rb +38 -0
  4. data/lib/ast/interpreter.rb +67 -0
  5. data/lib/ast/lexical_variables_enumerator.rb +3 -2
  6. data/lib/ast/nodes.rb +521 -31
  7. data/lib/ast/printer.rb +116 -18
  8. data/lib/ast/ssa_generator.rb +192 -0
  9. data/lib/ast/visitor.rb +235 -21
  10. data/lib/config/configuration.rb +28 -3
  11. data/lib/config/os_configuration.rb +62 -9
  12. data/lib/cpu/cpu_implementation.rb +39 -0
  13. data/lib/ikra.rb +13 -3
  14. data/lib/resources/cuda/allocate_device_memory.cpp +5 -0
  15. data/lib/resources/cuda/allocate_host_memory.cpp +1 -0
  16. data/lib/resources/cuda/allocate_memcpy_environment_to_device.cpp +11 -0
  17. data/lib/resources/cuda/ast/assignment.cpp +1 -0
  18. data/lib/resources/cuda/block_function_head.cpp +7 -1
  19. data/lib/resources/cuda/entry_point.cpp +47 -0
  20. data/lib/resources/cuda/env_builder_copy_array.cpp +8 -2
  21. data/lib/resources/cuda/free_device_memory.cpp +3 -0
  22. data/lib/resources/cuda/free_memory_for_command.cpp +24 -0
  23. data/lib/resources/cuda/header.cpp +23 -9
  24. data/lib/resources/cuda/header_structs.cpp +92 -0
  25. data/lib/resources/cuda/host_section_block_function_head.cpp +12 -0
  26. data/lib/resources/cuda/host_section_entry_point.cpp +55 -0
  27. data/lib/resources/cuda/host_section_free_device_memory.cpp +18 -0
  28. data/lib/resources/cuda/host_section_launch_parallel_section.cpp +14 -0
  29. data/lib/resources/cuda/host_section_malloc_memcpy_device_to_host.cpp +10 -0
  30. data/lib/resources/cuda/kernel.cpp +9 -2
  31. data/lib/resources/cuda/launch_kernel.cpp +5 -0
  32. data/lib/resources/cuda/memcpy_device_to_host.cpp +3 -0
  33. data/lib/resources/cuda/memcpy_device_to_host_expr.cpp +10 -0
  34. data/lib/resources/cuda/reduce_body.cpp +88 -0
  35. data/lib/resources/cuda/stencil_array_reconstruction.cpp +2 -0
  36. data/lib/resources/cuda/stencil_body.cpp +16 -0
  37. data/lib/resources/cuda/struct_definition.cpp +4 -0
  38. data/lib/ruby_core/array.rb +34 -0
  39. data/lib/ruby_core/array_command.rb +313 -0
  40. data/lib/ruby_core/core.rb +103 -0
  41. data/lib/ruby_core/interpreter.rb +16 -0
  42. data/lib/ruby_core/math.rb +32 -0
  43. data/lib/ruby_core/ruby_integration.rb +256 -0
  44. data/lib/symbolic/host_section.rb +115 -0
  45. data/lib/symbolic/input.rb +87 -0
  46. data/lib/symbolic/input_visitor.rb +68 -0
  47. data/lib/symbolic/symbolic.rb +793 -117
  48. data/lib/symbolic/visitor.rb +70 -8
  49. data/lib/translator/array_command_struct_builder.rb +163 -0
  50. data/lib/translator/ast_translator.rb +572 -0
  51. data/lib/translator/block_translator.rb +104 -48
  52. data/lib/translator/commands/array_combine_command.rb +41 -0
  53. data/lib/translator/commands/array_identity_command.rb +28 -0
  54. data/lib/translator/commands/array_index_command.rb +52 -0
  55. data/lib/translator/commands/array_reduce_command.rb +135 -0
  56. data/lib/translator/commands/array_stencil_command.rb +129 -0
  57. data/lib/translator/commands/array_zip_command.rb +30 -0
  58. data/lib/translator/commands/command_translator.rb +264 -0
  59. data/lib/translator/cuda_errors.rb +32 -0
  60. data/lib/translator/environment_builder.rb +263 -0
  61. data/lib/translator/host_section/array_host_section_command.rb +150 -0
  62. data/lib/translator/host_section/array_in_host_section_command.rb +41 -0
  63. data/lib/translator/host_section/ast_translator.rb +14 -0
  64. data/lib/translator/host_section/parallel_section_invocation_visitor.rb +20 -0
  65. data/lib/translator/host_section/program_builder.rb +89 -0
  66. data/lib/translator/input_translator.rb +226 -0
  67. data/lib/translator/kernel_builder.rb +137 -0
  68. data/lib/translator/kernel_launcher/for_loop_kernel_launcher.rb +40 -0
  69. data/lib/translator/kernel_launcher/kernel_launcher.rb +259 -0
  70. data/lib/translator/kernel_launcher/while_loop_kernel_launcher.rb +38 -0
  71. data/lib/translator/last_returns_visitor.rb +19 -10
  72. data/lib/translator/program_builder.rb +197 -0
  73. data/lib/translator/program_launcher.rb +273 -0
  74. data/lib/translator/struct_type.rb +55 -0
  75. data/lib/translator/translator.rb +34 -11
  76. data/lib/translator/variable_classifier_visitor.rb +56 -0
  77. data/lib/types/inference/ast_inference.rb +586 -0
  78. data/lib/types/inference/clear_types_visitor.rb +11 -0
  79. data/lib/types/inference/command_inference.rb +101 -0
  80. data/lib/types/inference/input_inference.rb +62 -0
  81. data/lib/types/{object_tracer.rb → inference/object_tracer.rb} +5 -6
  82. data/lib/types/inference/ruby_extension.rb +35 -0
  83. data/lib/types/inference/symbol_table.rb +131 -0
  84. data/lib/types/types.rb +14 -0
  85. data/lib/types/types/array_command_type.rb +123 -0
  86. data/lib/types/types/array_type.rb +137 -0
  87. data/lib/types/{class_type.rb → types/class_type.rb} +42 -18
  88. data/lib/types/{primitive_type.rb → types/primitive_type.rb} +20 -7
  89. data/lib/types/types/ruby_type.rb +88 -0
  90. data/lib/types/types/struct_type.rb +179 -0
  91. data/lib/types/types/union_type.rb +239 -0
  92. metadata +160 -18
  93. data/lib/ast/method_definition.rb +0 -37
  94. data/lib/ast/translator.rb +0 -264
  95. data/lib/resources/cuda/kernel_launcher.cpp +0 -28
  96. data/lib/scope.rb +0 -166
  97. data/lib/translator/command_translator.rb +0 -421
  98. data/lib/translator/local_variables_enumerator.rb +0 -35
  99. data/lib/translator/method_translator.rb +0 -24
  100. data/lib/types/array_type.rb +0 -51
  101. data/lib/types/ruby_extension.rb +0 -67
  102. data/lib/types/ruby_type.rb +0 -45
  103. data/lib/types/type_inference.rb +0 -382
  104. data/lib/types/union_type.rb +0 -155
@@ -0,0 +1,129 @@
1
+ module Ikra
2
+ module Translator
3
+ class CommandTranslator < Symbolic::Visitor
4
+ def visit_array_stencil_command(command)
5
+ Log.info("Translating ArrayStencilCommand [#{command.unique_id}]")
6
+
7
+ super
8
+
9
+ num_dims = command.dimensions.size
10
+
11
+ # Process dependent computation (receiver), returns [InputTranslationResult]
12
+ input = translate_entire_input(command)
13
+
14
+ # Count number of parameters
15
+ num_parameters = command.offsets.size
16
+
17
+ # All variables accessed by this block should be prefixed with the unique ID
18
+ # of the command in the environment.
19
+ env_builder = @environment_builder[command.unique_id]
20
+
21
+ block_translation_result = Translator.translate_block(
22
+ block_def_node: command.block_def_node,
23
+ environment_builder: env_builder,
24
+ lexical_variables: command.lexical_externals,
25
+ command_id: command.unique_id,
26
+ entire_input_translation: input)
27
+
28
+ kernel_builder.add_methods(block_translation_result.aux_methods)
29
+ kernel_builder.add_block(block_translation_result.block_source)
30
+
31
+ # Compute indices in all dimensions
32
+ index_generators = (0...num_dims).map do |dim_index|
33
+ index_div = command.dimensions.drop(dim_index + 1).reduce(1, :*)
34
+ index_mod = command.dimensions[dim_index]
35
+
36
+ if dim_index > 0
37
+ "(_tid_ / #{index_div}) % #{index_mod}"
38
+ else
39
+ # No modulo required for first dimension
40
+ "_tid_ / #{index_div}"
41
+ end
42
+ end
43
+
44
+ compute_indices = index_generators.map.with_index do |gen, dim_index|
45
+ "int temp_stencil_dim_#{dim_index} = #{gen};"
46
+ end.join("\n")
47
+
48
+ # Check if an index is out of bounds in any dimension
49
+ out_of_bounds_check = Array.new(num_dims) do |dim_index|
50
+ min_in_dim = command.offsets.map do |offset|
51
+ offset[dim_index]
52
+ end.min
53
+ max_in_dim = command.offsets.map do |offset|
54
+ offset[dim_index]
55
+ end.max
56
+
57
+ dim_size = command.dimensions[dim_index]
58
+
59
+ if dim_size.is_a?(String)
60
+ # This is not a compile-time constant. Pass dimension size as argument
61
+ # to the kernel.
62
+
63
+ dim_size_expr = "dim_size_#{dim_index}"
64
+ kernel_builder.add_additional_parameters("int #{dim_size_expr}")
65
+ kernel_launcher.add_additional_arguments(dim_size)
66
+ else
67
+ dim_size_expr = dim_size
68
+ end
69
+
70
+ "temp_stencil_dim_#{dim_index} + #{min_in_dim} >= 0 && temp_stencil_dim_#{dim_index} + #{max_in_dim} < #{dim_size_expr}"
71
+ end.join(" && ")
72
+
73
+ # `previous_result` should be an expression returning the array containing the
74
+ # result of the previous computation.
75
+ previous_result = input.result(0)
76
+
77
+ arguments = ["_env_"]
78
+
79
+ # Pass values from previous computation that are required by this thread.
80
+ # Reconstruct actual indices from indices for each dimension.
81
+ for i in 0...num_parameters
82
+ multiplier = 1
83
+ global_index = []
84
+
85
+ for dim_index in (num_dims - 1).downto(0)
86
+ global_index.push("(temp_stencil_dim_#{dim_index} + #{command.offsets[i][dim_index]}) * #{multiplier}")
87
+
88
+ next_dim_size = command.dimensions[dim_index]
89
+
90
+ if next_dim_size.is_a?(String)
91
+ Log.warn("Cannot handle multi-dimensional stencil computations in host sections yet.")
92
+ else
93
+ multiplier = multiplier * next_dim_size
94
+ end
95
+ end
96
+
97
+ arguments.push("#{previous_result}[#{global_index.join(" + ")}]")
98
+ end
99
+
100
+ # Push additional arguments (e.g., index)
101
+ arguments.push(*input.result(1..-1))
102
+ argument_str = arguments.join(", ")
103
+ stencil_computation = block_translation_result.function_name + "(#{argument_str})"
104
+
105
+ temp_var_name = "temp_stencil_#{CommandTranslator.next_unique_id}"
106
+
107
+ # The following template checks if there is at least one index out of bounds. If
108
+ # so, the fallback value is used. Otherwise, the block is executed.
109
+ command_execution = Translator.read_file(file_name: "stencil_body.cpp", replacements: {
110
+ "execution" => input.execution,
111
+ "temp_var" => temp_var_name,
112
+ "result_type" => command.result_type.to_c_type,
113
+ "compute_indices" => compute_indices,
114
+ "out_of_bounds_check" => out_of_bounds_check,
115
+ "out_of_bounds_fallback" => command.out_of_range_value.to_s,
116
+ "stencil_computation" => stencil_computation})
117
+
118
+ command_translation = build_command_translation_result(
119
+ execution: command_execution,
120
+ result: temp_var_name,
121
+ command: command)
122
+
123
+ Log.info("DONE translating ArrayStencilCommand [#{command.unique_id}]")
124
+
125
+ return command_translation
126
+ end
127
+ end
128
+ end
129
+ end
@@ -0,0 +1,30 @@
1
+ module Ikra
2
+ module Translator
3
+ class CommandTranslator < Symbolic::Visitor
4
+ def visit_array_zip_command(command)
5
+ Log.info("Translating ArrayZipCommand [#{command.unique_id}]")
6
+
7
+ super
8
+
9
+ # Process dependent computation (receiver), returns [InputTranslationResult]
10
+ input = translate_entire_input(command)
11
+
12
+ # Get Ikra struct type
13
+ zipped_type_singleton = command.result_type.singleton_type
14
+
15
+ # Add struct type to program builder, so that we can generate the source code
16
+ # for its definition.
17
+ program_builder.structs.add(zipped_type_singleton)
18
+
19
+ command_translation = CommandTranslationResult.new(
20
+ execution: input.execution,
21
+ result: zipped_type_singleton.generate_inline_initialization(input.result),
22
+ command: command)
23
+
24
+ Log.info("DONE translating ArrayZipCommand [#{command.unique_id}]")
25
+
26
+ return command_translation
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,264 @@
1
+ require_relative "../translator"
2
+ require_relative "../../config/configuration"
3
+ require_relative "../../config/os_configuration"
4
+ require_relative "../../symbolic/symbolic"
5
+ require_relative "../../symbolic/visitor"
6
+ require_relative "../../types/types"
7
+ require_relative "../input_translator"
8
+
9
+ module Ikra
10
+ module Translator
11
+ class CommandTranslator < Symbolic::Visitor
12
+ @@unique_id = 0
13
+
14
+ def self.next_unique_id
15
+ @@unique_id = @@unique_id + 1
16
+ return @@unique_id
17
+ end
18
+
19
+ class CommandTranslationResult
20
+ # Source code that performs the computation of this command for one thread. May
21
+ # consist of multiple statement. Optional.
22
+ attr_reader :execution
23
+
24
+ # Source code that returns the result of the computation. If the computation can
25
+ # be expressed in a single expression, this string can contain the entire
26
+ # computation and `execution` should then be empty.
27
+ attr_reader :result
28
+
29
+ attr_reader :command
30
+
31
+ def initialize(execution: "", result:, command:)
32
+ @execution = execution
33
+ @command = command
34
+ @result = result;
35
+ end
36
+
37
+ def result_type
38
+ return command.result_type
39
+ end
40
+ end
41
+
42
+ # Entry point for translator. Returns a [ProgramBuilder], which contains all
43
+ # required information for compiling and executing the CUDA program.
44
+ def self.translate_command(command)
45
+ command_translator = self.new(root_command: command)
46
+ command_translator.start_translation
47
+ return command_translator.program_builder
48
+ end
49
+
50
+ attr_reader :environment_builder
51
+ attr_reader :kernel_launcher_stack
52
+ attr_reader :program_builder
53
+ attr_reader :object_tracer
54
+ attr_reader :root_command
55
+
56
+ def initialize(root_command:)
57
+ @kernel_launcher_stack = []
58
+ @environment_builder = EnvironmentBuilder.new
59
+
60
+ # Select correct program builder based on command type
61
+ @program_builder = ProgramBuilder.new(
62
+ environment_builder: environment_builder,
63
+ root_command: root_command)
64
+
65
+ @root_command = root_command
66
+ end
67
+
68
+ def start_translation
69
+ Log.info("CommandTranslator: Starting translation...")
70
+
71
+ # Trace all objects
72
+ @object_tracer = TypeInference::ObjectTracer.new(root_command)
73
+ all_objects = object_tracer.trace_all
74
+
75
+
76
+ # --- Translate ---
77
+
78
+ # Create new kernel launcher
79
+ push_kernel_launcher
80
+
81
+ # Translate the command (might create additional kernels)
82
+ result = root_command.accept(self)
83
+
84
+ # Add kernel builder to ProgramBuilder
85
+ pop_kernel_launcher(result)
86
+
87
+ # --- End of Translation ---
88
+
89
+
90
+ # Add SoA arrays to environment
91
+ object_tracer.register_soa_arrays(environment_builder)
92
+ end
93
+
94
+ def kernel_launcher
95
+ return kernel_launcher_stack.last
96
+ end
97
+
98
+ def kernel_builder
99
+ return kernel_launcher_stack.last.kernel_builder
100
+ end
101
+
102
+
103
+ # --- Actual Visitor parts stars here ---
104
+
105
+ def visit_array_command(command)
106
+ if command.keep && !command.has_previous_result?
107
+ # Create slot for result pointer on GPU in env
108
+ environment_builder.allocate_previous_pointer(command.unique_id)
109
+ end
110
+ end
111
+
112
+ def push_kernel_launcher(kernel_builder: nil, kernel_launcher: nil)
113
+ if kernel_builder != nil && kernel_launcher == nil
114
+ @kernel_launcher_stack.push(KernelLauncher.new(kernel_builder))
115
+ elsif kernel_builder == nil && kernel_launcher != nil
116
+ @kernel_launcher_stack.push(kernel_launcher)
117
+ elsif kernel_builder == nil && kernel_launcher == nil
118
+ # Default: add new kernel builder
119
+ @kernel_launcher_stack.push(KernelLauncher.new(KernelBuilder.new))
120
+ else
121
+ raise ArgumentError.new("kernel_builder and kernel_laucher given but only expected one")
122
+ end
123
+ end
124
+
125
+ # Pops a KernelBuilder from the kernel builder stack. This method is called when all
126
+ # blocks (parallel sections) for that kernel have been translated, i.e., the kernel
127
+ # is fully built.
128
+ def pop_kernel_launcher(command_translation_result)
129
+ previous_launcher = kernel_launcher_stack.pop
130
+
131
+ kernel_builder = previous_launcher.kernel_builder
132
+ kernel_builder.block_invocation = command_translation_result.result
133
+ kernel_builder.execution = command_translation_result.execution
134
+ kernel_builder.result_type = command_translation_result.result_type
135
+
136
+ if previous_launcher == nil
137
+ raise AssertionError.new("Attempt to pop kernel launcher, but stack is empty")
138
+ end
139
+
140
+ program_builder.add_kernel_launcher(previous_launcher)
141
+
142
+ return previous_launcher
143
+ end
144
+
145
+ def translate_entire_input(command)
146
+ input_translated = command.input.each_with_index.map do |input, index|
147
+ input.translate_input(
148
+ parent_command: command,
149
+ command_translator: self,
150
+ # Assuming that every input consumes exactly one parameter
151
+ start_eat_params_offset: index)
152
+ end
153
+
154
+ return EntireInputTranslationResult.new(input_translated)
155
+ end
156
+
157
+ # Processes a [Symbolic::Input] objects, which contains a reference to a command
158
+ # object and information about how elements are accessed. If elements are only
159
+ # accessed according to the current thread ID, this input can be fused. Otherwise,
160
+ # a new kernel will be built.
161
+ def translate_input(input)
162
+ previous_result = ""
163
+
164
+ if input.command.has_previous_result?
165
+ # Read previously computed (cached) value
166
+ Log.info("Reusing kept result for command #{input.command.unique_id}: #{input.command.gpu_result_pointer}")
167
+
168
+ environment_builder.add_previous_result(
169
+ input.command.unique_id, input.command.gpu_result_pointer)
170
+ environment_builder.add_previous_result_type(
171
+ input.command.unique_id, input.command.result_type)
172
+
173
+ cell_access = ""
174
+ if input.pattern == :tid
175
+ cell_access = "[_tid_]"
176
+ end
177
+
178
+ kernel_launcher.configure_grid(input.command.size)
179
+ previous_result = CommandTranslationResult.new(
180
+ execution: "",
181
+ result: "((#{input.command.result_type.to_c_type} *)(_env_->" + "prev_#{input.command.unique_id}))#{cell_access}",
182
+ command: input.command)
183
+
184
+ if input.pattern == :tid
185
+ return previous_result
186
+ else
187
+ end
188
+ end
189
+
190
+ if input.pattern == :tid
191
+ # Stay in current kernel
192
+ return input.command.accept(self)
193
+ elsif input.pattern == :entire
194
+ if !input.command.has_previous_result?
195
+ # Create new kernel
196
+ push_kernel_launcher
197
+
198
+ previous_result = input.command.accept(self)
199
+ previous_result_kernel_var = kernel_launcher.kernel_result_var_name
200
+
201
+ pop_kernel_launcher(previous_result)
202
+ else
203
+ kernel_launcher.use_cached_result(
204
+ input.command.unique_id, input.command.result_type)
205
+ previous_result_kernel_var = "prev_" + input.command.unique_id.to_s
206
+ end
207
+
208
+ # Add parameter for previous input to this kernel
209
+ kernel_launcher.add_previous_kernel_parameter(Variable.new(
210
+ name: previous_result_kernel_var,
211
+ type: previous_result.result_type))
212
+
213
+ # This is a root command for this kernel, determine grid/block dimensions
214
+ kernel_launcher.configure_grid(input.command.size, block_size: input.command.block_size)
215
+
216
+ kernel_translation = CommandTranslationResult.new(
217
+ result: previous_result_kernel_var,
218
+ command: input.command)
219
+
220
+ return kernel_translation
221
+ else
222
+ raise NotImplementedError.new("Unknown input pattern: #{input.pattern}")
223
+ end
224
+ end
225
+
226
+ def build_command_translation_result(
227
+ execution: "", result:, command:)
228
+
229
+ result_type = command.result_type
230
+ unique_id = command.unique_id
231
+
232
+ if command.keep
233
+ # Store result in global array
234
+ # TODO: Remove DEBUG
235
+ command_result = Constants::TEMP_RESULT_IDENTIFIER + unique_id.to_s
236
+ command_execution = execution + "\n " + result_type.to_c_type + " " + command_result + " = " + result + ";"
237
+
238
+ kernel_builder.add_cached_result(unique_id.to_s, result_type)
239
+ kernel_launcher.add_cached_result(unique_id.to_s, result_type)
240
+ environment_builder.add_previous_result_type(unique_id, result_type)
241
+ else
242
+ command_result = result
243
+ command_execution = execution
244
+ end
245
+
246
+ command_translation = CommandTranslationResult.new(
247
+ execution: command_execution,
248
+ result: command_result,
249
+ command: command)
250
+ end
251
+ end
252
+ end
253
+ end
254
+
255
+ require_relative "array_combine_command"
256
+ require_relative "array_index_command"
257
+ require_relative "array_identity_command"
258
+ require_relative "array_reduce_command"
259
+ require_relative "array_stencil_command"
260
+ require_relative "array_zip_command"
261
+ require_relative "../host_section/array_host_section_command"
262
+
263
+ require_relative "../program_builder"
264
+ require_relative "../kernel_launcher/kernel_launcher"
@@ -0,0 +1,32 @@
1
+ module Ikra
2
+ module Errors
3
+ class CudaError < Exception
4
+
5
+ end
6
+
7
+ class CudaErrorIllegalAddress < CudaError
8
+
9
+ end
10
+
11
+ class CudaUnknownError < CudaError
12
+ attr_reader :error_code
13
+
14
+ def initialize(error_code)
15
+ @error_code = error_code
16
+ end
17
+
18
+ def to_s
19
+ "CudaUnknownError (#{error_code})"
20
+ end
21
+ end
22
+
23
+ def self.raiseCudaError(error_code)
24
+ case error_code
25
+ when 77
26
+ raise CudaErrorIllegalAddress.new
27
+ else
28
+ raise CudaUnknownError.new(error_code)
29
+ end
30
+ end
31
+ end
32
+ end