ikra 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (104) hide show
  1. checksums.yaml +4 -4
  2. data/lib/ast/builder.rb +225 -77
  3. data/lib/ast/host_section_builder.rb +38 -0
  4. data/lib/ast/interpreter.rb +67 -0
  5. data/lib/ast/lexical_variables_enumerator.rb +3 -2
  6. data/lib/ast/nodes.rb +521 -31
  7. data/lib/ast/printer.rb +116 -18
  8. data/lib/ast/ssa_generator.rb +192 -0
  9. data/lib/ast/visitor.rb +235 -21
  10. data/lib/config/configuration.rb +28 -3
  11. data/lib/config/os_configuration.rb +62 -9
  12. data/lib/cpu/cpu_implementation.rb +39 -0
  13. data/lib/ikra.rb +13 -3
  14. data/lib/resources/cuda/allocate_device_memory.cpp +5 -0
  15. data/lib/resources/cuda/allocate_host_memory.cpp +1 -0
  16. data/lib/resources/cuda/allocate_memcpy_environment_to_device.cpp +11 -0
  17. data/lib/resources/cuda/ast/assignment.cpp +1 -0
  18. data/lib/resources/cuda/block_function_head.cpp +7 -1
  19. data/lib/resources/cuda/entry_point.cpp +47 -0
  20. data/lib/resources/cuda/env_builder_copy_array.cpp +8 -2
  21. data/lib/resources/cuda/free_device_memory.cpp +3 -0
  22. data/lib/resources/cuda/free_memory_for_command.cpp +24 -0
  23. data/lib/resources/cuda/header.cpp +23 -9
  24. data/lib/resources/cuda/header_structs.cpp +92 -0
  25. data/lib/resources/cuda/host_section_block_function_head.cpp +12 -0
  26. data/lib/resources/cuda/host_section_entry_point.cpp +55 -0
  27. data/lib/resources/cuda/host_section_free_device_memory.cpp +18 -0
  28. data/lib/resources/cuda/host_section_launch_parallel_section.cpp +14 -0
  29. data/lib/resources/cuda/host_section_malloc_memcpy_device_to_host.cpp +10 -0
  30. data/lib/resources/cuda/kernel.cpp +9 -2
  31. data/lib/resources/cuda/launch_kernel.cpp +5 -0
  32. data/lib/resources/cuda/memcpy_device_to_host.cpp +3 -0
  33. data/lib/resources/cuda/memcpy_device_to_host_expr.cpp +10 -0
  34. data/lib/resources/cuda/reduce_body.cpp +88 -0
  35. data/lib/resources/cuda/stencil_array_reconstruction.cpp +2 -0
  36. data/lib/resources/cuda/stencil_body.cpp +16 -0
  37. data/lib/resources/cuda/struct_definition.cpp +4 -0
  38. data/lib/ruby_core/array.rb +34 -0
  39. data/lib/ruby_core/array_command.rb +313 -0
  40. data/lib/ruby_core/core.rb +103 -0
  41. data/lib/ruby_core/interpreter.rb +16 -0
  42. data/lib/ruby_core/math.rb +32 -0
  43. data/lib/ruby_core/ruby_integration.rb +256 -0
  44. data/lib/symbolic/host_section.rb +115 -0
  45. data/lib/symbolic/input.rb +87 -0
  46. data/lib/symbolic/input_visitor.rb +68 -0
  47. data/lib/symbolic/symbolic.rb +793 -117
  48. data/lib/symbolic/visitor.rb +70 -8
  49. data/lib/translator/array_command_struct_builder.rb +163 -0
  50. data/lib/translator/ast_translator.rb +572 -0
  51. data/lib/translator/block_translator.rb +104 -48
  52. data/lib/translator/commands/array_combine_command.rb +41 -0
  53. data/lib/translator/commands/array_identity_command.rb +28 -0
  54. data/lib/translator/commands/array_index_command.rb +52 -0
  55. data/lib/translator/commands/array_reduce_command.rb +135 -0
  56. data/lib/translator/commands/array_stencil_command.rb +129 -0
  57. data/lib/translator/commands/array_zip_command.rb +30 -0
  58. data/lib/translator/commands/command_translator.rb +264 -0
  59. data/lib/translator/cuda_errors.rb +32 -0
  60. data/lib/translator/environment_builder.rb +263 -0
  61. data/lib/translator/host_section/array_host_section_command.rb +150 -0
  62. data/lib/translator/host_section/array_in_host_section_command.rb +41 -0
  63. data/lib/translator/host_section/ast_translator.rb +14 -0
  64. data/lib/translator/host_section/parallel_section_invocation_visitor.rb +20 -0
  65. data/lib/translator/host_section/program_builder.rb +89 -0
  66. data/lib/translator/input_translator.rb +226 -0
  67. data/lib/translator/kernel_builder.rb +137 -0
  68. data/lib/translator/kernel_launcher/for_loop_kernel_launcher.rb +40 -0
  69. data/lib/translator/kernel_launcher/kernel_launcher.rb +259 -0
  70. data/lib/translator/kernel_launcher/while_loop_kernel_launcher.rb +38 -0
  71. data/lib/translator/last_returns_visitor.rb +19 -10
  72. data/lib/translator/program_builder.rb +197 -0
  73. data/lib/translator/program_launcher.rb +273 -0
  74. data/lib/translator/struct_type.rb +55 -0
  75. data/lib/translator/translator.rb +34 -11
  76. data/lib/translator/variable_classifier_visitor.rb +56 -0
  77. data/lib/types/inference/ast_inference.rb +586 -0
  78. data/lib/types/inference/clear_types_visitor.rb +11 -0
  79. data/lib/types/inference/command_inference.rb +101 -0
  80. data/lib/types/inference/input_inference.rb +62 -0
  81. data/lib/types/{object_tracer.rb → inference/object_tracer.rb} +5 -6
  82. data/lib/types/inference/ruby_extension.rb +35 -0
  83. data/lib/types/inference/symbol_table.rb +131 -0
  84. data/lib/types/types.rb +14 -0
  85. data/lib/types/types/array_command_type.rb +123 -0
  86. data/lib/types/types/array_type.rb +137 -0
  87. data/lib/types/{class_type.rb → types/class_type.rb} +42 -18
  88. data/lib/types/{primitive_type.rb → types/primitive_type.rb} +20 -7
  89. data/lib/types/types/ruby_type.rb +88 -0
  90. data/lib/types/types/struct_type.rb +179 -0
  91. data/lib/types/types/union_type.rb +239 -0
  92. metadata +160 -18
  93. data/lib/ast/method_definition.rb +0 -37
  94. data/lib/ast/translator.rb +0 -264
  95. data/lib/resources/cuda/kernel_launcher.cpp +0 -28
  96. data/lib/scope.rb +0 -166
  97. data/lib/translator/command_translator.rb +0 -421
  98. data/lib/translator/local_variables_enumerator.rb +0 -35
  99. data/lib/translator/method_translator.rb +0 -24
  100. data/lib/types/array_type.rb +0 -51
  101. data/lib/types/ruby_extension.rb +0 -67
  102. data/lib/types/ruby_type.rb +0 -45
  103. data/lib/types/type_inference.rb +0 -382
  104. data/lib/types/union_type.rb +0 -155
@@ -0,0 +1,129 @@
1
+ module Ikra
2
+ module Translator
3
+ class CommandTranslator < Symbolic::Visitor
4
+ def visit_array_stencil_command(command)
5
+ Log.info("Translating ArrayStencilCommand [#{command.unique_id}]")
6
+
7
+ super
8
+
9
+ num_dims = command.dimensions.size
10
+
11
+ # Process dependent computation (receiver), returns [InputTranslationResult]
12
+ input = translate_entire_input(command)
13
+
14
+ # Count number of parameters
15
+ num_parameters = command.offsets.size
16
+
17
+ # All variables accessed by this block should be prefixed with the unique ID
18
+ # of the command in the environment.
19
+ env_builder = @environment_builder[command.unique_id]
20
+
21
+ block_translation_result = Translator.translate_block(
22
+ block_def_node: command.block_def_node,
23
+ environment_builder: env_builder,
24
+ lexical_variables: command.lexical_externals,
25
+ command_id: command.unique_id,
26
+ entire_input_translation: input)
27
+
28
+ kernel_builder.add_methods(block_translation_result.aux_methods)
29
+ kernel_builder.add_block(block_translation_result.block_source)
30
+
31
+ # Compute indices in all dimensions
32
+ index_generators = (0...num_dims).map do |dim_index|
33
+ index_div = command.dimensions.drop(dim_index + 1).reduce(1, :*)
34
+ index_mod = command.dimensions[dim_index]
35
+
36
+ if dim_index > 0
37
+ "(_tid_ / #{index_div}) % #{index_mod}"
38
+ else
39
+ # No modulo required for first dimension
40
+ "_tid_ / #{index_div}"
41
+ end
42
+ end
43
+
44
+ compute_indices = index_generators.map.with_index do |gen, dim_index|
45
+ "int temp_stencil_dim_#{dim_index} = #{gen};"
46
+ end.join("\n")
47
+
48
+ # Check if an index is out of bounds in any dimension
49
+ out_of_bounds_check = Array.new(num_dims) do |dim_index|
50
+ min_in_dim = command.offsets.map do |offset|
51
+ offset[dim_index]
52
+ end.min
53
+ max_in_dim = command.offsets.map do |offset|
54
+ offset[dim_index]
55
+ end.max
56
+
57
+ dim_size = command.dimensions[dim_index]
58
+
59
+ if dim_size.is_a?(String)
60
+ # This is not a compile-time constant. Pass dimension size as argument
61
+ # to the kernel.
62
+
63
+ dim_size_expr = "dim_size_#{dim_index}"
64
+ kernel_builder.add_additional_parameters("int #{dim_size_expr}")
65
+ kernel_launcher.add_additional_arguments(dim_size)
66
+ else
67
+ dim_size_expr = dim_size
68
+ end
69
+
70
+ "temp_stencil_dim_#{dim_index} + #{min_in_dim} >= 0 && temp_stencil_dim_#{dim_index} + #{max_in_dim} < #{dim_size_expr}"
71
+ end.join(" && ")
72
+
73
+ # `previous_result` should be an expression returning the array containing the
74
+ # result of the previous computation.
75
+ previous_result = input.result(0)
76
+
77
+ arguments = ["_env_"]
78
+
79
+ # Pass values from previous computation that are required by this thread.
80
+ # Reconstruct actual indices from indices for each dimension.
81
+ for i in 0...num_parameters
82
+ multiplier = 1
83
+ global_index = []
84
+
85
+ for dim_index in (num_dims - 1).downto(0)
86
+ global_index.push("(temp_stencil_dim_#{dim_index} + #{command.offsets[i][dim_index]}) * #{multiplier}")
87
+
88
+ next_dim_size = command.dimensions[dim_index]
89
+
90
+ if next_dim_size.is_a?(String)
91
+ Log.warn("Cannot handle multi-dimensional stencil computations in host sections yet.")
92
+ else
93
+ multiplier = multiplier * next_dim_size
94
+ end
95
+ end
96
+
97
+ arguments.push("#{previous_result}[#{global_index.join(" + ")}]")
98
+ end
99
+
100
+ # Push additional arguments (e.g., index)
101
+ arguments.push(*input.result(1..-1))
102
+ argument_str = arguments.join(", ")
103
+ stencil_computation = block_translation_result.function_name + "(#{argument_str})"
104
+
105
+ temp_var_name = "temp_stencil_#{CommandTranslator.next_unique_id}"
106
+
107
+ # The following template checks if there is at least one index out of bounds. If
108
+ # so, the fallback value is used. Otherwise, the block is executed.
109
+ command_execution = Translator.read_file(file_name: "stencil_body.cpp", replacements: {
110
+ "execution" => input.execution,
111
+ "temp_var" => temp_var_name,
112
+ "result_type" => command.result_type.to_c_type,
113
+ "compute_indices" => compute_indices,
114
+ "out_of_bounds_check" => out_of_bounds_check,
115
+ "out_of_bounds_fallback" => command.out_of_range_value.to_s,
116
+ "stencil_computation" => stencil_computation})
117
+
118
+ command_translation = build_command_translation_result(
119
+ execution: command_execution,
120
+ result: temp_var_name,
121
+ command: command)
122
+
123
+ Log.info("DONE translating ArrayStencilCommand [#{command.unique_id}]")
124
+
125
+ return command_translation
126
+ end
127
+ end
128
+ end
129
+ end
@@ -0,0 +1,30 @@
1
+ module Ikra
2
+ module Translator
3
+ class CommandTranslator < Symbolic::Visitor
4
+ def visit_array_zip_command(command)
5
+ Log.info("Translating ArrayZipCommand [#{command.unique_id}]")
6
+
7
+ super
8
+
9
+ # Process dependent computation (receiver), returns [InputTranslationResult]
10
+ input = translate_entire_input(command)
11
+
12
+ # Get Ikra struct type
13
+ zipped_type_singleton = command.result_type.singleton_type
14
+
15
+ # Add struct type to program builder, so that we can generate the source code
16
+ # for its definition.
17
+ program_builder.structs.add(zipped_type_singleton)
18
+
19
+ command_translation = CommandTranslationResult.new(
20
+ execution: input.execution,
21
+ result: zipped_type_singleton.generate_inline_initialization(input.result),
22
+ command: command)
23
+
24
+ Log.info("DONE translating ArrayZipCommand [#{command.unique_id}]")
25
+
26
+ return command_translation
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,264 @@
1
+ require_relative "../translator"
2
+ require_relative "../../config/configuration"
3
+ require_relative "../../config/os_configuration"
4
+ require_relative "../../symbolic/symbolic"
5
+ require_relative "../../symbolic/visitor"
6
+ require_relative "../../types/types"
7
+ require_relative "../input_translator"
8
+
9
+ module Ikra
10
+ module Translator
11
+ class CommandTranslator < Symbolic::Visitor
12
+ @@unique_id = 0
13
+
14
+ def self.next_unique_id
15
+ @@unique_id = @@unique_id + 1
16
+ return @@unique_id
17
+ end
18
+
19
+ class CommandTranslationResult
20
+ # Source code that performs the computation of this command for one thread. May
21
+ # consist of multiple statement. Optional.
22
+ attr_reader :execution
23
+
24
+ # Source code that returns the result of the computation. If the computation can
25
+ # be expressed in a single expression, this string can contain the entire
26
+ # computation and `execution` should then be empty.
27
+ attr_reader :result
28
+
29
+ attr_reader :command
30
+
31
+ def initialize(execution: "", result:, command:)
32
+ @execution = execution
33
+ @command = command
34
+ @result = result;
35
+ end
36
+
37
+ def result_type
38
+ return command.result_type
39
+ end
40
+ end
41
+
42
+ # Entry point for translator. Returns a [ProgramBuilder], which contains all
43
+ # required information for compiling and executing the CUDA program.
44
+ def self.translate_command(command)
45
+ command_translator = self.new(root_command: command)
46
+ command_translator.start_translation
47
+ return command_translator.program_builder
48
+ end
49
+
50
+ attr_reader :environment_builder
51
+ attr_reader :kernel_launcher_stack
52
+ attr_reader :program_builder
53
+ attr_reader :object_tracer
54
+ attr_reader :root_command
55
+
56
+ def initialize(root_command:)
57
+ @kernel_launcher_stack = []
58
+ @environment_builder = EnvironmentBuilder.new
59
+
60
+ # Select correct program builder based on command type
61
+ @program_builder = ProgramBuilder.new(
62
+ environment_builder: environment_builder,
63
+ root_command: root_command)
64
+
65
+ @root_command = root_command
66
+ end
67
+
68
+ def start_translation
69
+ Log.info("CommandTranslator: Starting translation...")
70
+
71
+ # Trace all objects
72
+ @object_tracer = TypeInference::ObjectTracer.new(root_command)
73
+ all_objects = object_tracer.trace_all
74
+
75
+
76
+ # --- Translate ---
77
+
78
+ # Create new kernel launcher
79
+ push_kernel_launcher
80
+
81
+ # Translate the command (might create additional kernels)
82
+ result = root_command.accept(self)
83
+
84
+ # Add kernel builder to ProgramBuilder
85
+ pop_kernel_launcher(result)
86
+
87
+ # --- End of Translation ---
88
+
89
+
90
+ # Add SoA arrays to environment
91
+ object_tracer.register_soa_arrays(environment_builder)
92
+ end
93
+
94
+ def kernel_launcher
95
+ return kernel_launcher_stack.last
96
+ end
97
+
98
+ def kernel_builder
99
+ return kernel_launcher_stack.last.kernel_builder
100
+ end
101
+
102
+
103
+ # --- Actual Visitor parts stars here ---
104
+
105
+ def visit_array_command(command)
106
+ if command.keep && !command.has_previous_result?
107
+ # Create slot for result pointer on GPU in env
108
+ environment_builder.allocate_previous_pointer(command.unique_id)
109
+ end
110
+ end
111
+
112
+ def push_kernel_launcher(kernel_builder: nil, kernel_launcher: nil)
113
+ if kernel_builder != nil && kernel_launcher == nil
114
+ @kernel_launcher_stack.push(KernelLauncher.new(kernel_builder))
115
+ elsif kernel_builder == nil && kernel_launcher != nil
116
+ @kernel_launcher_stack.push(kernel_launcher)
117
+ elsif kernel_builder == nil && kernel_launcher == nil
118
+ # Default: add new kernel builder
119
+ @kernel_launcher_stack.push(KernelLauncher.new(KernelBuilder.new))
120
+ else
121
+ raise ArgumentError.new("kernel_builder and kernel_laucher given but only expected one")
122
+ end
123
+ end
124
+
125
+ # Pops a KernelBuilder from the kernel builder stack. This method is called when all
126
+ # blocks (parallel sections) for that kernel have been translated, i.e., the kernel
127
+ # is fully built.
128
+ def pop_kernel_launcher(command_translation_result)
129
+ previous_launcher = kernel_launcher_stack.pop
130
+
131
+ kernel_builder = previous_launcher.kernel_builder
132
+ kernel_builder.block_invocation = command_translation_result.result
133
+ kernel_builder.execution = command_translation_result.execution
134
+ kernel_builder.result_type = command_translation_result.result_type
135
+
136
+ if previous_launcher == nil
137
+ raise AssertionError.new("Attempt to pop kernel launcher, but stack is empty")
138
+ end
139
+
140
+ program_builder.add_kernel_launcher(previous_launcher)
141
+
142
+ return previous_launcher
143
+ end
144
+
145
+ def translate_entire_input(command)
146
+ input_translated = command.input.each_with_index.map do |input, index|
147
+ input.translate_input(
148
+ parent_command: command,
149
+ command_translator: self,
150
+ # Assuming that every input consumes exactly one parameter
151
+ start_eat_params_offset: index)
152
+ end
153
+
154
+ return EntireInputTranslationResult.new(input_translated)
155
+ end
156
+
157
+ # Processes a [Symbolic::Input] objects, which contains a reference to a command
158
+ # object and information about how elements are accessed. If elements are only
159
+ # accessed according to the current thread ID, this input can be fused. Otherwise,
160
+ # a new kernel will be built.
161
+ def translate_input(input)
162
+ previous_result = ""
163
+
164
+ if input.command.has_previous_result?
165
+ # Read previously computed (cached) value
166
+ Log.info("Reusing kept result for command #{input.command.unique_id}: #{input.command.gpu_result_pointer}")
167
+
168
+ environment_builder.add_previous_result(
169
+ input.command.unique_id, input.command.gpu_result_pointer)
170
+ environment_builder.add_previous_result_type(
171
+ input.command.unique_id, input.command.result_type)
172
+
173
+ cell_access = ""
174
+ if input.pattern == :tid
175
+ cell_access = "[_tid_]"
176
+ end
177
+
178
+ kernel_launcher.configure_grid(input.command.size)
179
+ previous_result = CommandTranslationResult.new(
180
+ execution: "",
181
+ result: "((#{input.command.result_type.to_c_type} *)(_env_->" + "prev_#{input.command.unique_id}))#{cell_access}",
182
+ command: input.command)
183
+
184
+ if input.pattern == :tid
185
+ return previous_result
186
+ else
187
+ end
188
+ end
189
+
190
+ if input.pattern == :tid
191
+ # Stay in current kernel
192
+ return input.command.accept(self)
193
+ elsif input.pattern == :entire
194
+ if !input.command.has_previous_result?
195
+ # Create new kernel
196
+ push_kernel_launcher
197
+
198
+ previous_result = input.command.accept(self)
199
+ previous_result_kernel_var = kernel_launcher.kernel_result_var_name
200
+
201
+ pop_kernel_launcher(previous_result)
202
+ else
203
+ kernel_launcher.use_cached_result(
204
+ input.command.unique_id, input.command.result_type)
205
+ previous_result_kernel_var = "prev_" + input.command.unique_id.to_s
206
+ end
207
+
208
+ # Add parameter for previous input to this kernel
209
+ kernel_launcher.add_previous_kernel_parameter(Variable.new(
210
+ name: previous_result_kernel_var,
211
+ type: previous_result.result_type))
212
+
213
+ # This is a root command for this kernel, determine grid/block dimensions
214
+ kernel_launcher.configure_grid(input.command.size, block_size: input.command.block_size)
215
+
216
+ kernel_translation = CommandTranslationResult.new(
217
+ result: previous_result_kernel_var,
218
+ command: input.command)
219
+
220
+ return kernel_translation
221
+ else
222
+ raise NotImplementedError.new("Unknown input pattern: #{input.pattern}")
223
+ end
224
+ end
225
+
226
+ def build_command_translation_result(
227
+ execution: "", result:, command:)
228
+
229
+ result_type = command.result_type
230
+ unique_id = command.unique_id
231
+
232
+ if command.keep
233
+ # Store result in global array
234
+ # TODO: Remove DEBUG
235
+ command_result = Constants::TEMP_RESULT_IDENTIFIER + unique_id.to_s
236
+ command_execution = execution + "\n " + result_type.to_c_type + " " + command_result + " = " + result + ";"
237
+
238
+ kernel_builder.add_cached_result(unique_id.to_s, result_type)
239
+ kernel_launcher.add_cached_result(unique_id.to_s, result_type)
240
+ environment_builder.add_previous_result_type(unique_id, result_type)
241
+ else
242
+ command_result = result
243
+ command_execution = execution
244
+ end
245
+
246
+ command_translation = CommandTranslationResult.new(
247
+ execution: command_execution,
248
+ result: command_result,
249
+ command: command)
250
+ end
251
+ end
252
+ end
253
+ end
254
+
255
+ require_relative "array_combine_command"
256
+ require_relative "array_index_command"
257
+ require_relative "array_identity_command"
258
+ require_relative "array_reduce_command"
259
+ require_relative "array_stencil_command"
260
+ require_relative "array_zip_command"
261
+ require_relative "../host_section/array_host_section_command"
262
+
263
+ require_relative "../program_builder"
264
+ require_relative "../kernel_launcher/kernel_launcher"
@@ -0,0 +1,32 @@
1
+ module Ikra
2
+ module Errors
3
+ class CudaError < Exception
4
+
5
+ end
6
+
7
+ class CudaErrorIllegalAddress < CudaError
8
+
9
+ end
10
+
11
+ class CudaUnknownError < CudaError
12
+ attr_reader :error_code
13
+
14
+ def initialize(error_code)
15
+ @error_code = error_code
16
+ end
17
+
18
+ def to_s
19
+ "CudaUnknownError (#{error_code})"
20
+ end
21
+ end
22
+
23
+ def self.raiseCudaError(error_code)
24
+ case error_code
25
+ when 77
26
+ raise CudaErrorIllegalAddress.new
27
+ else
28
+ raise CudaUnknownError.new(error_code)
29
+ end
30
+ end
31
+ end
32
+ end