ikra 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. checksums.yaml +4 -4
  2. data/lib/ast/builder.rb +225 -77
  3. data/lib/ast/host_section_builder.rb +38 -0
  4. data/lib/ast/interpreter.rb +67 -0
  5. data/lib/ast/lexical_variables_enumerator.rb +3 -2
  6. data/lib/ast/nodes.rb +521 -31
  7. data/lib/ast/printer.rb +116 -18
  8. data/lib/ast/ssa_generator.rb +192 -0
  9. data/lib/ast/visitor.rb +235 -21
  10. data/lib/config/configuration.rb +28 -3
  11. data/lib/config/os_configuration.rb +62 -9
  12. data/lib/cpu/cpu_implementation.rb +39 -0
  13. data/lib/ikra.rb +13 -3
  14. data/lib/resources/cuda/allocate_device_memory.cpp +5 -0
  15. data/lib/resources/cuda/allocate_host_memory.cpp +1 -0
  16. data/lib/resources/cuda/allocate_memcpy_environment_to_device.cpp +11 -0
  17. data/lib/resources/cuda/ast/assignment.cpp +1 -0
  18. data/lib/resources/cuda/block_function_head.cpp +7 -1
  19. data/lib/resources/cuda/entry_point.cpp +47 -0
  20. data/lib/resources/cuda/env_builder_copy_array.cpp +8 -2
  21. data/lib/resources/cuda/free_device_memory.cpp +3 -0
  22. data/lib/resources/cuda/free_memory_for_command.cpp +24 -0
  23. data/lib/resources/cuda/header.cpp +23 -9
  24. data/lib/resources/cuda/header_structs.cpp +92 -0
  25. data/lib/resources/cuda/host_section_block_function_head.cpp +12 -0
  26. data/lib/resources/cuda/host_section_entry_point.cpp +55 -0
  27. data/lib/resources/cuda/host_section_free_device_memory.cpp +18 -0
  28. data/lib/resources/cuda/host_section_launch_parallel_section.cpp +14 -0
  29. data/lib/resources/cuda/host_section_malloc_memcpy_device_to_host.cpp +10 -0
  30. data/lib/resources/cuda/kernel.cpp +9 -2
  31. data/lib/resources/cuda/launch_kernel.cpp +5 -0
  32. data/lib/resources/cuda/memcpy_device_to_host.cpp +3 -0
  33. data/lib/resources/cuda/memcpy_device_to_host_expr.cpp +10 -0
  34. data/lib/resources/cuda/reduce_body.cpp +88 -0
  35. data/lib/resources/cuda/stencil_array_reconstruction.cpp +2 -0
  36. data/lib/resources/cuda/stencil_body.cpp +16 -0
  37. data/lib/resources/cuda/struct_definition.cpp +4 -0
  38. data/lib/ruby_core/array.rb +34 -0
  39. data/lib/ruby_core/array_command.rb +313 -0
  40. data/lib/ruby_core/core.rb +103 -0
  41. data/lib/ruby_core/interpreter.rb +16 -0
  42. data/lib/ruby_core/math.rb +32 -0
  43. data/lib/ruby_core/ruby_integration.rb +256 -0
  44. data/lib/symbolic/host_section.rb +115 -0
  45. data/lib/symbolic/input.rb +87 -0
  46. data/lib/symbolic/input_visitor.rb +68 -0
  47. data/lib/symbolic/symbolic.rb +793 -117
  48. data/lib/symbolic/visitor.rb +70 -8
  49. data/lib/translator/array_command_struct_builder.rb +163 -0
  50. data/lib/translator/ast_translator.rb +572 -0
  51. data/lib/translator/block_translator.rb +104 -48
  52. data/lib/translator/commands/array_combine_command.rb +41 -0
  53. data/lib/translator/commands/array_identity_command.rb +28 -0
  54. data/lib/translator/commands/array_index_command.rb +52 -0
  55. data/lib/translator/commands/array_reduce_command.rb +135 -0
  56. data/lib/translator/commands/array_stencil_command.rb +129 -0
  57. data/lib/translator/commands/array_zip_command.rb +30 -0
  58. data/lib/translator/commands/command_translator.rb +264 -0
  59. data/lib/translator/cuda_errors.rb +32 -0
  60. data/lib/translator/environment_builder.rb +263 -0
  61. data/lib/translator/host_section/array_host_section_command.rb +150 -0
  62. data/lib/translator/host_section/array_in_host_section_command.rb +41 -0
  63. data/lib/translator/host_section/ast_translator.rb +14 -0
  64. data/lib/translator/host_section/parallel_section_invocation_visitor.rb +20 -0
  65. data/lib/translator/host_section/program_builder.rb +89 -0
  66. data/lib/translator/input_translator.rb +226 -0
  67. data/lib/translator/kernel_builder.rb +137 -0
  68. data/lib/translator/kernel_launcher/for_loop_kernel_launcher.rb +40 -0
  69. data/lib/translator/kernel_launcher/kernel_launcher.rb +259 -0
  70. data/lib/translator/kernel_launcher/while_loop_kernel_launcher.rb +38 -0
  71. data/lib/translator/last_returns_visitor.rb +19 -10
  72. data/lib/translator/program_builder.rb +197 -0
  73. data/lib/translator/program_launcher.rb +273 -0
  74. data/lib/translator/struct_type.rb +55 -0
  75. data/lib/translator/translator.rb +34 -11
  76. data/lib/translator/variable_classifier_visitor.rb +56 -0
  77. data/lib/types/inference/ast_inference.rb +586 -0
  78. data/lib/types/inference/clear_types_visitor.rb +11 -0
  79. data/lib/types/inference/command_inference.rb +101 -0
  80. data/lib/types/inference/input_inference.rb +62 -0
  81. data/lib/types/{object_tracer.rb → inference/object_tracer.rb} +5 -6
  82. data/lib/types/inference/ruby_extension.rb +35 -0
  83. data/lib/types/inference/symbol_table.rb +131 -0
  84. data/lib/types/types.rb +14 -0
  85. data/lib/types/types/array_command_type.rb +123 -0
  86. data/lib/types/types/array_type.rb +137 -0
  87. data/lib/types/{class_type.rb → types/class_type.rb} +42 -18
  88. data/lib/types/{primitive_type.rb → types/primitive_type.rb} +20 -7
  89. data/lib/types/types/ruby_type.rb +88 -0
  90. data/lib/types/types/struct_type.rb +179 -0
  91. data/lib/types/types/union_type.rb +239 -0
  92. metadata +160 -18
  93. data/lib/ast/method_definition.rb +0 -37
  94. data/lib/ast/translator.rb +0 -264
  95. data/lib/resources/cuda/kernel_launcher.cpp +0 -28
  96. data/lib/scope.rb +0 -166
  97. data/lib/translator/command_translator.rb +0 -421
  98. data/lib/translator/local_variables_enumerator.rb +0 -35
  99. data/lib/translator/method_translator.rb +0 -24
  100. data/lib/types/array_type.rb +0 -51
  101. data/lib/types/ruby_extension.rb +0 -67
  102. data/lib/types/ruby_type.rb +0 -45
  103. data/lib/types/type_inference.rb +0 -382
  104. data/lib/types/union_type.rb +0 -155
@@ -1,421 +0,0 @@
1
- require "tempfile"
2
- require "ffi"
3
- require_relative "translator"
4
- require_relative "block_translator"
5
- require_relative "../config/os_configuration"
6
- require_relative "../symbolic/symbolic"
7
- require_relative "../symbolic/visitor"
8
- require_relative "../types/object_tracer"
9
- require_relative "../config/configuration"
10
-
11
- module Ikra
12
- module Translator
13
-
14
- # Interface for transferring data to the CUDA side using FFI. Builds a struct containing all required objects (including lexical variables). Traces objects.
15
- class EnvironmentBuilder
16
-
17
- class UnionTypeStruct < FFI::Struct
18
- layout :class_id, :int32, :object_id, :int32
19
- end
20
-
21
- attr_accessor :objects
22
- attr_accessor :device_struct_allocation
23
-
24
- def initialize
25
- @objects = {}
26
- @device_struct_allocation = ""
27
- end
28
-
29
- # Adds an objects as a lexical variable.
30
- def add_object(command_id, identifier, object)
31
- cuda_id = "l#{command_id}_#{identifier}"
32
- objects[cuda_id] = object
33
-
34
- update_dev_struct_allocation(cuda_id, object)
35
-
36
- cuda_id
37
- end
38
-
39
- # Adds an object as a base array
40
- def add_base_array(command_id, object)
41
- cuda_id = "b#{command_id}_base"
42
- objects[cuda_id] = object
43
-
44
- cuda_id_size = "b#{command_id}_size"
45
- if object.class == FFI::MemoryPointer
46
- objects[cuda_id_size] = object.size / UnionTypeStruct.size
47
- else
48
- objects[cuda_id_size] = object.size
49
- end
50
-
51
- # Generate code for copying data to global memory
52
- update_dev_struct_allocation(cuda_id, object)
53
-
54
- cuda_id
55
- end
56
-
57
- # Add an array for the Structure of Arrays object layout
58
- def add_soa_array(name, object)
59
- objects[name] = object
60
- objects["#{name}_size"] = object.size
61
-
62
- update_dev_struct_allocation(name, object)
63
- end
64
-
65
- def update_dev_struct_allocation(field, object)
66
- if object.class == Array
67
- # Allocate new array
68
- @device_struct_allocation += Translator.read_file(
69
- file_name: "env_builder_copy_array.cpp",
70
- replacements: {
71
- "field" => field,
72
- "host_env" => Constants::ENV_HOST_IDENTIFIER,
73
- "dev_env" => Constants::ENV_DEVICE_IDENTIFIER,
74
- "size_bytes" => (object.first.class.to_ikra_type.c_size * object.size).to_s})
75
- elsif object.class == FFI::MemoryPointer
76
- # This is an array of union type structs
77
- # Allocate new array
78
- @device_struct_allocation += Translator.read_file(
79
- file_name: "env_builder_copy_array.cpp",
80
- replacements: {
81
- "field" => field,
82
- "host_env" => Constants::ENV_HOST_IDENTIFIER,
83
- "dev_env" => Constants::ENV_DEVICE_IDENTIFIER,
84
- "size_bytes" => object.size.to_s})
85
- else
86
- # Nothing to do, this case is handled by mem-copying the struct
87
- end
88
- end
89
-
90
- # Returns the name of the field containing the base array for a certain identity command.
91
- def self.base_identifier(command_id)
92
- "b#{command_id}_base"
93
- end
94
-
95
- def build_struct_definition
96
- @objects.freeze
97
-
98
- struct_def = "struct environment_struct\n{\n"
99
- @objects.each do |key, value|
100
- if value.class == FFI::MemoryPointer
101
- # TODO: can this be an extension method of FFI::MemoryPointer?
102
- struct_def += " union_t * #{key};\n"
103
- else
104
- struct_def += " #{value.class.to_ikra_type_obj(value).to_c_type} #{key};\n"
105
- end
106
- end
107
- struct_def += "};\n"
108
-
109
- struct_def
110
- end
111
-
112
- def build_ffi_type
113
- struct_layout = []
114
- @objects.each do |key, value|
115
- if value.class == FFI::MemoryPointer
116
- # TODO: can this be an extension method of FFI::MemoryPointer?
117
- struct_layout += [key.to_sym, :pointer]
118
- else
119
- struct_layout += [key.to_sym, value.class.to_ikra_type_obj(value).to_ffi_type]
120
- end
121
- end
122
-
123
- struct_type = Class.new(FFI::Struct)
124
- struct_type.layout(*struct_layout)
125
-
126
- struct_type
127
- end
128
-
129
- def build_ffi_object
130
- struct_type = build_ffi_type
131
- struct = struct_type.new
132
-
133
- @objects.each do |key, value|
134
- # TODO: need proper Array handling
135
- if value.class == Array
136
- # Check first element to determine type of array
137
- # TODO: check for polymorphic
138
- inner_type = value.first.class.to_ikra_type
139
- array_ptr = FFI::MemoryPointer.new(value.size * inner_type.c_size)
140
-
141
- if inner_type == Types::PrimitiveType::Int
142
- array_ptr.put_array_of_int(0, value)
143
- elsif inner_type == Types::PrimitiveType::Float
144
- array_ptr.put_array_of_float(0, value)
145
- else
146
- raise NotImplementedError
147
- end
148
-
149
- struct[key.to_sym] = array_ptr
150
- else
151
- struct[key.to_sym] = value
152
- end
153
- end
154
-
155
- struct.to_ptr
156
- end
157
-
158
- def [](command_id)
159
- CurriedBuilder.new(self, command_id)
160
- end
161
-
162
- class CurriedBuilder
163
- def initialize(builder, command_id)
164
- @builder = builder
165
- @command_id = command_id
166
- end
167
-
168
- def add_object(identifier, object)
169
- @builder.add_object(@command_id, identifier, object)
170
- end
171
-
172
- def add_base_array(object)
173
- @builder.add_base_array(@command_id, object)
174
- end
175
- end
176
-
177
- def clone
178
- result = self.class.new
179
- result.objects = @objects.clone
180
- result.device_struct_allocation = @device_struct_allocation
181
- result
182
- end
183
- end
184
-
185
- # Result of translating a {Ikra::Symbolic::ArrayCommand}.
186
- class CommandTranslationResult
187
- attr_accessor :environment_builder # @return [EnvironmentBuilder] instance that generates the struct containing accessed lexical variables.
188
- attr_accessor :generated_source # @return [String] containing the currently generated source code.
189
- attr_accessor :invocation # @return [String] source code used for invoking the block function.
190
- attr_accessor :size # @return [Fixnum] number of elements in base array
191
- attr_accessor :return_type # @return [Types::UnionType] return type of the block.
192
-
193
- def initialize(environment_builder)
194
- @environment_builder = environment_builder
195
- @generated_source = ""
196
- @invocation = "NULL"
197
- @return_type = Types::UnionType.new
198
- @size = 0
199
-
200
- @so_filename = "" # [String] file name of shared library containing CUDA kernel
201
- end
202
-
203
- def result_size
204
- @size
205
- end
206
-
207
- # Compiles CUDA source code and generates a shared library.
208
- def compile
209
- # Prepare file replacements
210
- file_replacements = {} # [Hash{String => String}] contains strings that should be replaced when reading a file
211
- file_replacements["grid_dim[0]"] = "#{[size / 250, 1].max}"
212
- file_replacements["grid_dim[1]"] = "1"
213
- file_replacements["grid_dim[2]"] = "1"
214
- file_replacements["block_dim[0]"] = "#{size >= 250 ? 250 : size}"
215
- file_replacements["block_dim[1]"] = "1"
216
- file_replacements["block_dim[2]"] = "1"
217
- file_replacements["result_type"] = @return_type.singleton_type.to_c_type
218
- file_replacements["result_size"] = "#{result_size}"
219
- file_replacements["block_invocation"] = @invocation
220
- file_replacements["env_identifier"] = Constants::ENV_IDENTIFIER
221
- file_replacements["copy_env"] = @environment_builder.device_struct_allocation
222
- file_replacements["dev_env"] = Constants::ENV_DEVICE_IDENTIFIER
223
- file_replacements["host_env"] = Constants::ENV_HOST_IDENTIFIER
224
-
225
- # Generate source code
226
- source = Translator.read_file(file_name: "header.cpp", replacements: file_replacements) +
227
- @environment_builder.build_struct_definition +
228
- @generated_source +
229
- Translator.read_file(file_name: "kernel.cpp", replacements: file_replacements) +
230
- Translator.read_file(file_name: "kernel_launcher.cpp", replacements: file_replacements)
231
-
232
- line_no_digits = Math.log(source.lines.count, 10).ceil
233
- source_with_line_numbers = source.lines.each_with_index.map do |line, num|
234
- "[#{(num + 1).to_s.rjust(line_no_digits, "0")}] #{line}"
235
- end.join("")
236
-
237
- Log.info("Generated source code:\n#{source_with_line_numbers}")
238
-
239
- # Write source code to temporary file
240
- file = Tempfile.new(["ikra_kernel", ".cu"])
241
- file.write(source)
242
- file.close
243
-
244
- # Write to codegen_expect
245
- if Configuration.codegen_expect_file_name != nil
246
- expect_file = File.new(Configuration.codegen_expect_file_name, "w+")
247
- expect_file.write(source)
248
- expect_file.close
249
- end
250
-
251
- # Run compiler
252
- @so_filename = "#{file.path}.#{Configuration.so_suffix}"
253
- nvcc_command = Configuration.nvcc_invocation_string(file.path, @so_filename)
254
-
255
- Log.info("Compiling kernel: #{nvcc_command}")
256
- time_before = Time.now
257
- compile_status = %x(#{nvcc_command})
258
- Log.info("Done, took #{Time.now - time_before} s")
259
-
260
- if $? != 0
261
- raise "nvcc failed: #{compile_status}"
262
- end
263
- end
264
-
265
- # Attaches a the compiled shared library via Ruby FFI and invokes the kernel.
266
- def execute
267
- if !File.exist?(@so_filename)
268
- compile
269
- end
270
-
271
- time_before = Time.now
272
- ffi_interface = Module.new
273
- ffi_interface.extend(FFI::Library)
274
- ffi_interface.ffi_lib(@so_filename)
275
- ffi_interface.attach_function(:launch_kernel, [:pointer], :pointer)
276
- environment_object = @environment_builder.build_ffi_object
277
- Log.info("FFI transfer time: #{Time.now - time_before} s")
278
-
279
- time_before = Time.now
280
- result = ffi_interface.launch_kernel(environment_object)
281
- Log.info("Kernel time: #{Time.now - time_before} s")
282
-
283
- if return_type.singleton_type == Types::PrimitiveType::Int
284
- result.read_array_of_int(result_size)
285
- elsif return_type.singleton_type == Types::PrimitiveType::Float
286
- result.read_array_of_float(result_size)
287
- else
288
- raise NotImplementedError
289
- end
290
- end
291
- end
292
-
293
- # A visitor traversing the tree (currently list) of symbolic array commands. Every command is converted into a {CommandTranslationResult} and possibly merged with the result of dependent (previous) results. This is how kernel fusion is implemented.
294
- class ArrayCommandVisitor < Symbolic::Visitor
295
-
296
- def initialize(environment_builder)
297
- @environment_builder = environment_builder
298
- end
299
-
300
- def visit_array_new_command(command)
301
- # create brand new result
302
- command_translation_result = CommandTranslationResult.new(@environment_builder)
303
-
304
- block_translation_result = Translator.translate_block(
305
- ast: command.ast,
306
- # only one block parameter (int)
307
- block_parameter_types: {command.block_parameter_names.first => Types::UnionType.create_int},
308
- environment_builder: @environment_builder[command.unique_id],
309
- lexical_variables: command.lexical_externals,
310
- command_id: command.unique_id)
311
-
312
- command_translation_result.generated_source = block_translation_result.generated_source
313
-
314
- tid = "threadIdx.x + blockIdx.x * blockDim.x"
315
- command_translation_result.invocation = "#{block_translation_result.function_name}(#{Constants::ENV_IDENTIFIER}, #{tid})"
316
- command_translation_result.size = command.size
317
- command_translation_result.return_type = block_translation_result.result_type
318
-
319
- command_translation_result
320
- end
321
-
322
- def visit_array_identity_command(command)
323
- # create brand new result
324
- command_translation_result = CommandTranslationResult.new(@environment_builder)
325
-
326
- # no source code generation
327
-
328
- if Configuration::JOB_REORDERING
329
- reordering_array = command.target.each_with_index.sort do |a, b|
330
- a.first.class.object_id <=> b.first.class.object_id
331
- end.map(&:last)
332
-
333
- # Generate debug output
334
- dbg_elements = []
335
- dbg_last = command.target[reordering_array[0]].class
336
- dbg_counter = 1
337
-
338
- for idx in 1..(command.target.size - 1)
339
- dbg_next = command.target[reordering_array[idx]].class
340
-
341
- if dbg_next == dbg_last
342
- dbg_counter += 1
343
- else
344
- dbg_elements.push("#{dbg_last.to_s} (#{dbg_counter})")
345
- dbg_last = dbg_next
346
- dbg_counter = 1
347
- end
348
- end
349
- dbg_elements.push("#{dbg_last.to_s} (#{dbg_counter})")
350
-
351
- Log.info("Generated job reordering array, resulting in: [#{dbg_elements.join(", ")}]")
352
-
353
- reordering_array_name = @environment_builder.add_base_array("#{command.unique_id}j", reordering_array)
354
- command_translation_result.invocation = "#{Constants::ENV_IDENTIFIER}->#{EnvironmentBuilder.base_identifier(command.unique_id)}[#{Constants::ENV_IDENTIFIER}->#{reordering_array_name}[threadIdx.x + blockIdx.x * blockDim.x]]"
355
- else
356
- command_translation_result.invocation = "#{Constants::ENV_IDENTIFIER}->#{EnvironmentBuilder.base_identifier(command.unique_id)}[threadIdx.x + blockIdx.x * blockDim.x]"
357
- end
358
-
359
- command_translation_result.size = command.size
360
- command_translation_result.return_type = command.base_type
361
-
362
- command_translation_result
363
- end
364
-
365
- def visit_array_map_command(command)
366
- dependent_result = super # visit target (dependent) command
367
- command_translation_result = CommandTranslationResult.new(@environment_builder)
368
-
369
- block_translation_result = Translator.translate_block(
370
- ast: command.ast,
371
- block_parameter_types: {command.block_parameter_names.first => dependent_result.return_type},
372
- environment_builder: @environment_builder[command.unique_id],
373
- lexical_variables: command.lexical_externals,
374
- command_id: command.unique_id)
375
-
376
- command_translation_result.generated_source = dependent_result.generated_source + "\n\n" + block_translation_result.generated_source
377
-
378
- command_translation_result.invocation = "#{block_translation_result.function_name}(#{Constants::ENV_IDENTIFIER}, #{dependent_result.invocation})"
379
- command_translation_result.size = dependent_result.size
380
- command_translation_result.return_type = block_translation_result.result_type
381
-
382
- command_translation_result
383
- end
384
- end
385
-
386
- # Retrieves all base arrays and registers them with the {EnvironmentBuilder}. Yhis functionality is in a separate class to avoid scattering with object tracer calls.
387
- class BaseArrayRegistrator < Symbolic::Visitor
388
- def initialize(environment_builder, object_tracer)
389
- @environment_builder = environment_builder
390
- @object_tracer = object_tracer
391
- end
392
-
393
- def visit_array_identity_command(command)
394
- need_union_type = !command.base_type.is_singleton?
395
- transformed_base_array = @object_tracer.convert_base_array(command.target, need_union_type)
396
- @environment_builder.add_base_array(command.unique_id, transformed_base_array)
397
- end
398
- end
399
-
400
- class << self
401
- def translate_command(command)
402
- environment_builder = EnvironmentBuilder.new
403
-
404
- # Run type inference for objects/classes and trace objects
405
- object_tracer = TypeInference::ObjectTracer.new(command)
406
- all_objects = object_tracer.trace_all
407
-
408
- # Translate command
409
- command_translation_result = command.accept(ArrayCommandVisitor.new(environment_builder))
410
-
411
- # Add SoA arrays to environment
412
- object_tracer.register_soa_arrays(environment_builder)
413
-
414
- # Add base arrays to environment
415
- command.accept(BaseArrayRegistrator.new(environment_builder, object_tracer))
416
-
417
- command_translation_result
418
- end
419
- end
420
- end
421
- end
@@ -1,35 +0,0 @@
1
- require_relative "../ast/nodes"
2
- require_relative "../ast/visitor"
3
- require_relative "../types/type_inference"
4
-
5
- module Ikra
6
- module Translator
7
- class LocalVariablesEnumerator < AST::Visitor
8
- def initialize
9
- @vars = {}
10
- end
11
-
12
- def add_local_var(var, type)
13
- @vars[var] = type
14
- end
15
-
16
- def local_variables
17
- @vars
18
- end
19
-
20
- def visit_lvar_read_node(node)
21
- add_local_var(node.identifier, node.get_type)
22
- end
23
-
24
- def visit_lvar_write_node(node)
25
- add_local_var(node.identifier, node.get_type)
26
- super(node)
27
- end
28
-
29
- def visit_for_node(node)
30
- add_local_var(node.iterator_identifier, Types::UnionType.create_int)
31
- super(node)
32
- end
33
- end
34
- end
35
- end