ikra 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (104) hide show
  1. checksums.yaml +4 -4
  2. data/lib/ast/builder.rb +225 -77
  3. data/lib/ast/host_section_builder.rb +38 -0
  4. data/lib/ast/interpreter.rb +67 -0
  5. data/lib/ast/lexical_variables_enumerator.rb +3 -2
  6. data/lib/ast/nodes.rb +521 -31
  7. data/lib/ast/printer.rb +116 -18
  8. data/lib/ast/ssa_generator.rb +192 -0
  9. data/lib/ast/visitor.rb +235 -21
  10. data/lib/config/configuration.rb +28 -3
  11. data/lib/config/os_configuration.rb +62 -9
  12. data/lib/cpu/cpu_implementation.rb +39 -0
  13. data/lib/ikra.rb +13 -3
  14. data/lib/resources/cuda/allocate_device_memory.cpp +5 -0
  15. data/lib/resources/cuda/allocate_host_memory.cpp +1 -0
  16. data/lib/resources/cuda/allocate_memcpy_environment_to_device.cpp +11 -0
  17. data/lib/resources/cuda/ast/assignment.cpp +1 -0
  18. data/lib/resources/cuda/block_function_head.cpp +7 -1
  19. data/lib/resources/cuda/entry_point.cpp +47 -0
  20. data/lib/resources/cuda/env_builder_copy_array.cpp +8 -2
  21. data/lib/resources/cuda/free_device_memory.cpp +3 -0
  22. data/lib/resources/cuda/free_memory_for_command.cpp +24 -0
  23. data/lib/resources/cuda/header.cpp +23 -9
  24. data/lib/resources/cuda/header_structs.cpp +92 -0
  25. data/lib/resources/cuda/host_section_block_function_head.cpp +12 -0
  26. data/lib/resources/cuda/host_section_entry_point.cpp +55 -0
  27. data/lib/resources/cuda/host_section_free_device_memory.cpp +18 -0
  28. data/lib/resources/cuda/host_section_launch_parallel_section.cpp +14 -0
  29. data/lib/resources/cuda/host_section_malloc_memcpy_device_to_host.cpp +10 -0
  30. data/lib/resources/cuda/kernel.cpp +9 -2
  31. data/lib/resources/cuda/launch_kernel.cpp +5 -0
  32. data/lib/resources/cuda/memcpy_device_to_host.cpp +3 -0
  33. data/lib/resources/cuda/memcpy_device_to_host_expr.cpp +10 -0
  34. data/lib/resources/cuda/reduce_body.cpp +88 -0
  35. data/lib/resources/cuda/stencil_array_reconstruction.cpp +2 -0
  36. data/lib/resources/cuda/stencil_body.cpp +16 -0
  37. data/lib/resources/cuda/struct_definition.cpp +4 -0
  38. data/lib/ruby_core/array.rb +34 -0
  39. data/lib/ruby_core/array_command.rb +313 -0
  40. data/lib/ruby_core/core.rb +103 -0
  41. data/lib/ruby_core/interpreter.rb +16 -0
  42. data/lib/ruby_core/math.rb +32 -0
  43. data/lib/ruby_core/ruby_integration.rb +256 -0
  44. data/lib/symbolic/host_section.rb +115 -0
  45. data/lib/symbolic/input.rb +87 -0
  46. data/lib/symbolic/input_visitor.rb +68 -0
  47. data/lib/symbolic/symbolic.rb +793 -117
  48. data/lib/symbolic/visitor.rb +70 -8
  49. data/lib/translator/array_command_struct_builder.rb +163 -0
  50. data/lib/translator/ast_translator.rb +572 -0
  51. data/lib/translator/block_translator.rb +104 -48
  52. data/lib/translator/commands/array_combine_command.rb +41 -0
  53. data/lib/translator/commands/array_identity_command.rb +28 -0
  54. data/lib/translator/commands/array_index_command.rb +52 -0
  55. data/lib/translator/commands/array_reduce_command.rb +135 -0
  56. data/lib/translator/commands/array_stencil_command.rb +129 -0
  57. data/lib/translator/commands/array_zip_command.rb +30 -0
  58. data/lib/translator/commands/command_translator.rb +264 -0
  59. data/lib/translator/cuda_errors.rb +32 -0
  60. data/lib/translator/environment_builder.rb +263 -0
  61. data/lib/translator/host_section/array_host_section_command.rb +150 -0
  62. data/lib/translator/host_section/array_in_host_section_command.rb +41 -0
  63. data/lib/translator/host_section/ast_translator.rb +14 -0
  64. data/lib/translator/host_section/parallel_section_invocation_visitor.rb +20 -0
  65. data/lib/translator/host_section/program_builder.rb +89 -0
  66. data/lib/translator/input_translator.rb +226 -0
  67. data/lib/translator/kernel_builder.rb +137 -0
  68. data/lib/translator/kernel_launcher/for_loop_kernel_launcher.rb +40 -0
  69. data/lib/translator/kernel_launcher/kernel_launcher.rb +259 -0
  70. data/lib/translator/kernel_launcher/while_loop_kernel_launcher.rb +38 -0
  71. data/lib/translator/last_returns_visitor.rb +19 -10
  72. data/lib/translator/program_builder.rb +197 -0
  73. data/lib/translator/program_launcher.rb +273 -0
  74. data/lib/translator/struct_type.rb +55 -0
  75. data/lib/translator/translator.rb +34 -11
  76. data/lib/translator/variable_classifier_visitor.rb +56 -0
  77. data/lib/types/inference/ast_inference.rb +586 -0
  78. data/lib/types/inference/clear_types_visitor.rb +11 -0
  79. data/lib/types/inference/command_inference.rb +101 -0
  80. data/lib/types/inference/input_inference.rb +62 -0
  81. data/lib/types/{object_tracer.rb → inference/object_tracer.rb} +5 -6
  82. data/lib/types/inference/ruby_extension.rb +35 -0
  83. data/lib/types/inference/symbol_table.rb +131 -0
  84. data/lib/types/types.rb +14 -0
  85. data/lib/types/types/array_command_type.rb +123 -0
  86. data/lib/types/types/array_type.rb +137 -0
  87. data/lib/types/{class_type.rb → types/class_type.rb} +42 -18
  88. data/lib/types/{primitive_type.rb → types/primitive_type.rb} +20 -7
  89. data/lib/types/types/ruby_type.rb +88 -0
  90. data/lib/types/types/struct_type.rb +179 -0
  91. data/lib/types/types/union_type.rb +239 -0
  92. metadata +160 -18
  93. data/lib/ast/method_definition.rb +0 -37
  94. data/lib/ast/translator.rb +0 -264
  95. data/lib/resources/cuda/kernel_launcher.cpp +0 -28
  96. data/lib/scope.rb +0 -166
  97. data/lib/translator/command_translator.rb +0 -421
  98. data/lib/translator/local_variables_enumerator.rb +0 -35
  99. data/lib/translator/method_translator.rb +0 -24
  100. data/lib/types/array_type.rb +0 -51
  101. data/lib/types/ruby_extension.rb +0 -67
  102. data/lib/types/ruby_type.rb +0 -45
  103. data/lib/types/type_inference.rb +0 -382
  104. data/lib/types/union_type.rb +0 -155
@@ -1,421 +0,0 @@
1
- require "tempfile"
2
- require "ffi"
3
- require_relative "translator"
4
- require_relative "block_translator"
5
- require_relative "../config/os_configuration"
6
- require_relative "../symbolic/symbolic"
7
- require_relative "../symbolic/visitor"
8
- require_relative "../types/object_tracer"
9
- require_relative "../config/configuration"
10
-
11
- module Ikra
12
- module Translator
13
-
14
- # Interface for transferring data to the CUDA side using FFI. Builds a struct containing all required objects (including lexical variables). Traces objects.
15
- class EnvironmentBuilder
16
-
17
- class UnionTypeStruct < FFI::Struct
18
- layout :class_id, :int32, :object_id, :int32
19
- end
20
-
21
- attr_accessor :objects
22
- attr_accessor :device_struct_allocation
23
-
24
- def initialize
25
- @objects = {}
26
- @device_struct_allocation = ""
27
- end
28
-
29
- # Adds an objects as a lexical variable.
30
- def add_object(command_id, identifier, object)
31
- cuda_id = "l#{command_id}_#{identifier}"
32
- objects[cuda_id] = object
33
-
34
- update_dev_struct_allocation(cuda_id, object)
35
-
36
- cuda_id
37
- end
38
-
39
- # Adds an object as a base array
40
- def add_base_array(command_id, object)
41
- cuda_id = "b#{command_id}_base"
42
- objects[cuda_id] = object
43
-
44
- cuda_id_size = "b#{command_id}_size"
45
- if object.class == FFI::MemoryPointer
46
- objects[cuda_id_size] = object.size / UnionTypeStruct.size
47
- else
48
- objects[cuda_id_size] = object.size
49
- end
50
-
51
- # Generate code for copying data to global memory
52
- update_dev_struct_allocation(cuda_id, object)
53
-
54
- cuda_id
55
- end
56
-
57
- # Add an array for the Structure of Arrays object layout
58
- def add_soa_array(name, object)
59
- objects[name] = object
60
- objects["#{name}_size"] = object.size
61
-
62
- update_dev_struct_allocation(name, object)
63
- end
64
-
65
- def update_dev_struct_allocation(field, object)
66
- if object.class == Array
67
- # Allocate new array
68
- @device_struct_allocation += Translator.read_file(
69
- file_name: "env_builder_copy_array.cpp",
70
- replacements: {
71
- "field" => field,
72
- "host_env" => Constants::ENV_HOST_IDENTIFIER,
73
- "dev_env" => Constants::ENV_DEVICE_IDENTIFIER,
74
- "size_bytes" => (object.first.class.to_ikra_type.c_size * object.size).to_s})
75
- elsif object.class == FFI::MemoryPointer
76
- # This is an array of union type structs
77
- # Allocate new array
78
- @device_struct_allocation += Translator.read_file(
79
- file_name: "env_builder_copy_array.cpp",
80
- replacements: {
81
- "field" => field,
82
- "host_env" => Constants::ENV_HOST_IDENTIFIER,
83
- "dev_env" => Constants::ENV_DEVICE_IDENTIFIER,
84
- "size_bytes" => object.size.to_s})
85
- else
86
- # Nothing to do, this case is handled by mem-copying the struct
87
- end
88
- end
89
-
90
- # Returns the name of the field containing the base array for a certain identity command.
91
- def self.base_identifier(command_id)
92
- "b#{command_id}_base"
93
- end
94
-
95
- def build_struct_definition
96
- @objects.freeze
97
-
98
- struct_def = "struct environment_struct\n{\n"
99
- @objects.each do |key, value|
100
- if value.class == FFI::MemoryPointer
101
- # TODO: can this be an extension method of FFI::MemoryPointer?
102
- struct_def += " union_t * #{key};\n"
103
- else
104
- struct_def += " #{value.class.to_ikra_type_obj(value).to_c_type} #{key};\n"
105
- end
106
- end
107
- struct_def += "};\n"
108
-
109
- struct_def
110
- end
111
-
112
- def build_ffi_type
113
- struct_layout = []
114
- @objects.each do |key, value|
115
- if value.class == FFI::MemoryPointer
116
- # TODO: can this be an extension method of FFI::MemoryPointer?
117
- struct_layout += [key.to_sym, :pointer]
118
- else
119
- struct_layout += [key.to_sym, value.class.to_ikra_type_obj(value).to_ffi_type]
120
- end
121
- end
122
-
123
- struct_type = Class.new(FFI::Struct)
124
- struct_type.layout(*struct_layout)
125
-
126
- struct_type
127
- end
128
-
129
- def build_ffi_object
130
- struct_type = build_ffi_type
131
- struct = struct_type.new
132
-
133
- @objects.each do |key, value|
134
- # TODO: need proper Array handling
135
- if value.class == Array
136
- # Check first element to determine type of array
137
- # TODO: check for polymorphic
138
- inner_type = value.first.class.to_ikra_type
139
- array_ptr = FFI::MemoryPointer.new(value.size * inner_type.c_size)
140
-
141
- if inner_type == Types::PrimitiveType::Int
142
- array_ptr.put_array_of_int(0, value)
143
- elsif inner_type == Types::PrimitiveType::Float
144
- array_ptr.put_array_of_float(0, value)
145
- else
146
- raise NotImplementedError
147
- end
148
-
149
- struct[key.to_sym] = array_ptr
150
- else
151
- struct[key.to_sym] = value
152
- end
153
- end
154
-
155
- struct.to_ptr
156
- end
157
-
158
- def [](command_id)
159
- CurriedBuilder.new(self, command_id)
160
- end
161
-
162
- class CurriedBuilder
163
- def initialize(builder, command_id)
164
- @builder = builder
165
- @command_id = command_id
166
- end
167
-
168
- def add_object(identifier, object)
169
- @builder.add_object(@command_id, identifier, object)
170
- end
171
-
172
- def add_base_array(object)
173
- @builder.add_base_array(@command_id, object)
174
- end
175
- end
176
-
177
- def clone
178
- result = self.class.new
179
- result.objects = @objects.clone
180
- result.device_struct_allocation = @device_struct_allocation
181
- result
182
- end
183
- end
184
-
185
- # Result of translating a {Ikra::Symbolic::ArrayCommand}.
186
- class CommandTranslationResult
187
- attr_accessor :environment_builder # @return [EnvironmentBuilder] instance that generates the struct containing accessed lexical variables.
188
- attr_accessor :generated_source # @return [String] containing the currently generated source code.
189
- attr_accessor :invocation # @return [String] source code used for invoking the block function.
190
- attr_accessor :size # @return [Fixnum] number of elements in base array
191
- attr_accessor :return_type # @return [Types::UnionType] return type of the block.
192
-
193
- def initialize(environment_builder)
194
- @environment_builder = environment_builder
195
- @generated_source = ""
196
- @invocation = "NULL"
197
- @return_type = Types::UnionType.new
198
- @size = 0
199
-
200
- @so_filename = "" # [String] file name of shared library containing CUDA kernel
201
- end
202
-
203
- def result_size
204
- @size
205
- end
206
-
207
- # Compiles CUDA source code and generates a shared library.
208
- def compile
209
- # Prepare file replacements
210
- file_replacements = {} # [Hash{String => String}] contains strings that should be replaced when reading a file
211
- file_replacements["grid_dim[0]"] = "#{[size / 250, 1].max}"
212
- file_replacements["grid_dim[1]"] = "1"
213
- file_replacements["grid_dim[2]"] = "1"
214
- file_replacements["block_dim[0]"] = "#{size >= 250 ? 250 : size}"
215
- file_replacements["block_dim[1]"] = "1"
216
- file_replacements["block_dim[2]"] = "1"
217
- file_replacements["result_type"] = @return_type.singleton_type.to_c_type
218
- file_replacements["result_size"] = "#{result_size}"
219
- file_replacements["block_invocation"] = @invocation
220
- file_replacements["env_identifier"] = Constants::ENV_IDENTIFIER
221
- file_replacements["copy_env"] = @environment_builder.device_struct_allocation
222
- file_replacements["dev_env"] = Constants::ENV_DEVICE_IDENTIFIER
223
- file_replacements["host_env"] = Constants::ENV_HOST_IDENTIFIER
224
-
225
- # Generate source code
226
- source = Translator.read_file(file_name: "header.cpp", replacements: file_replacements) +
227
- @environment_builder.build_struct_definition +
228
- @generated_source +
229
- Translator.read_file(file_name: "kernel.cpp", replacements: file_replacements) +
230
- Translator.read_file(file_name: "kernel_launcher.cpp", replacements: file_replacements)
231
-
232
- line_no_digits = Math.log(source.lines.count, 10).ceil
233
- source_with_line_numbers = source.lines.each_with_index.map do |line, num|
234
- "[#{(num + 1).to_s.rjust(line_no_digits, "0")}] #{line}"
235
- end.join("")
236
-
237
- Log.info("Generated source code:\n#{source_with_line_numbers}")
238
-
239
- # Write source code to temporary file
240
- file = Tempfile.new(["ikra_kernel", ".cu"])
241
- file.write(source)
242
- file.close
243
-
244
- # Write to codegen_expect
245
- if Configuration.codegen_expect_file_name != nil
246
- expect_file = File.new(Configuration.codegen_expect_file_name, "w+")
247
- expect_file.write(source)
248
- expect_file.close
249
- end
250
-
251
- # Run compiler
252
- @so_filename = "#{file.path}.#{Configuration.so_suffix}"
253
- nvcc_command = Configuration.nvcc_invocation_string(file.path, @so_filename)
254
-
255
- Log.info("Compiling kernel: #{nvcc_command}")
256
- time_before = Time.now
257
- compile_status = %x(#{nvcc_command})
258
- Log.info("Done, took #{Time.now - time_before} s")
259
-
260
- if $? != 0
261
- raise "nvcc failed: #{compile_status}"
262
- end
263
- end
264
-
265
- # Attaches a the compiled shared library via Ruby FFI and invokes the kernel.
266
- def execute
267
- if !File.exist?(@so_filename)
268
- compile
269
- end
270
-
271
- time_before = Time.now
272
- ffi_interface = Module.new
273
- ffi_interface.extend(FFI::Library)
274
- ffi_interface.ffi_lib(@so_filename)
275
- ffi_interface.attach_function(:launch_kernel, [:pointer], :pointer)
276
- environment_object = @environment_builder.build_ffi_object
277
- Log.info("FFI transfer time: #{Time.now - time_before} s")
278
-
279
- time_before = Time.now
280
- result = ffi_interface.launch_kernel(environment_object)
281
- Log.info("Kernel time: #{Time.now - time_before} s")
282
-
283
- if return_type.singleton_type == Types::PrimitiveType::Int
284
- result.read_array_of_int(result_size)
285
- elsif return_type.singleton_type == Types::PrimitiveType::Float
286
- result.read_array_of_float(result_size)
287
- else
288
- raise NotImplementedError
289
- end
290
- end
291
- end
292
-
293
- # A visitor traversing the tree (currently list) of symbolic array commands. Every command is converted into a {CommandTranslationResult} and possibly merged with the result of dependent (previous) results. This is how kernel fusion is implemented.
294
- class ArrayCommandVisitor < Symbolic::Visitor
295
-
296
- def initialize(environment_builder)
297
- @environment_builder = environment_builder
298
- end
299
-
300
- def visit_array_new_command(command)
301
- # create brand new result
302
- command_translation_result = CommandTranslationResult.new(@environment_builder)
303
-
304
- block_translation_result = Translator.translate_block(
305
- ast: command.ast,
306
- # only one block parameter (int)
307
- block_parameter_types: {command.block_parameter_names.first => Types::UnionType.create_int},
308
- environment_builder: @environment_builder[command.unique_id],
309
- lexical_variables: command.lexical_externals,
310
- command_id: command.unique_id)
311
-
312
- command_translation_result.generated_source = block_translation_result.generated_source
313
-
314
- tid = "threadIdx.x + blockIdx.x * blockDim.x"
315
- command_translation_result.invocation = "#{block_translation_result.function_name}(#{Constants::ENV_IDENTIFIER}, #{tid})"
316
- command_translation_result.size = command.size
317
- command_translation_result.return_type = block_translation_result.result_type
318
-
319
- command_translation_result
320
- end
321
-
322
- def visit_array_identity_command(command)
323
- # create brand new result
324
- command_translation_result = CommandTranslationResult.new(@environment_builder)
325
-
326
- # no source code generation
327
-
328
- if Configuration::JOB_REORDERING
329
- reordering_array = command.target.each_with_index.sort do |a, b|
330
- a.first.class.object_id <=> b.first.class.object_id
331
- end.map(&:last)
332
-
333
- # Generate debug output
334
- dbg_elements = []
335
- dbg_last = command.target[reordering_array[0]].class
336
- dbg_counter = 1
337
-
338
- for idx in 1..(command.target.size - 1)
339
- dbg_next = command.target[reordering_array[idx]].class
340
-
341
- if dbg_next == dbg_last
342
- dbg_counter += 1
343
- else
344
- dbg_elements.push("#{dbg_last.to_s} (#{dbg_counter})")
345
- dbg_last = dbg_next
346
- dbg_counter = 1
347
- end
348
- end
349
- dbg_elements.push("#{dbg_last.to_s} (#{dbg_counter})")
350
-
351
- Log.info("Generated job reordering array, resulting in: [#{dbg_elements.join(", ")}]")
352
-
353
- reordering_array_name = @environment_builder.add_base_array("#{command.unique_id}j", reordering_array)
354
- command_translation_result.invocation = "#{Constants::ENV_IDENTIFIER}->#{EnvironmentBuilder.base_identifier(command.unique_id)}[#{Constants::ENV_IDENTIFIER}->#{reordering_array_name}[threadIdx.x + blockIdx.x * blockDim.x]]"
355
- else
356
- command_translation_result.invocation = "#{Constants::ENV_IDENTIFIER}->#{EnvironmentBuilder.base_identifier(command.unique_id)}[threadIdx.x + blockIdx.x * blockDim.x]"
357
- end
358
-
359
- command_translation_result.size = command.size
360
- command_translation_result.return_type = command.base_type
361
-
362
- command_translation_result
363
- end
364
-
365
- def visit_array_map_command(command)
366
- dependent_result = super # visit target (dependent) command
367
- command_translation_result = CommandTranslationResult.new(@environment_builder)
368
-
369
- block_translation_result = Translator.translate_block(
370
- ast: command.ast,
371
- block_parameter_types: {command.block_parameter_names.first => dependent_result.return_type},
372
- environment_builder: @environment_builder[command.unique_id],
373
- lexical_variables: command.lexical_externals,
374
- command_id: command.unique_id)
375
-
376
- command_translation_result.generated_source = dependent_result.generated_source + "\n\n" + block_translation_result.generated_source
377
-
378
- command_translation_result.invocation = "#{block_translation_result.function_name}(#{Constants::ENV_IDENTIFIER}, #{dependent_result.invocation})"
379
- command_translation_result.size = dependent_result.size
380
- command_translation_result.return_type = block_translation_result.result_type
381
-
382
- command_translation_result
383
- end
384
- end
385
-
386
- # Retrieves all base arrays and registers them with the {EnvironmentBuilder}. Yhis functionality is in a separate class to avoid scattering with object tracer calls.
387
- class BaseArrayRegistrator < Symbolic::Visitor
388
- def initialize(environment_builder, object_tracer)
389
- @environment_builder = environment_builder
390
- @object_tracer = object_tracer
391
- end
392
-
393
- def visit_array_identity_command(command)
394
- need_union_type = !command.base_type.is_singleton?
395
- transformed_base_array = @object_tracer.convert_base_array(command.target, need_union_type)
396
- @environment_builder.add_base_array(command.unique_id, transformed_base_array)
397
- end
398
- end
399
-
400
- class << self
401
- def translate_command(command)
402
- environment_builder = EnvironmentBuilder.new
403
-
404
- # Run type inference for objects/classes and trace objects
405
- object_tracer = TypeInference::ObjectTracer.new(command)
406
- all_objects = object_tracer.trace_all
407
-
408
- # Translate command
409
- command_translation_result = command.accept(ArrayCommandVisitor.new(environment_builder))
410
-
411
- # Add SoA arrays to environment
412
- object_tracer.register_soa_arrays(environment_builder)
413
-
414
- # Add base arrays to environment
415
- command.accept(BaseArrayRegistrator.new(environment_builder, object_tracer))
416
-
417
- command_translation_result
418
- end
419
- end
420
- end
421
- end
@@ -1,35 +0,0 @@
1
- require_relative "../ast/nodes"
2
- require_relative "../ast/visitor"
3
- require_relative "../types/type_inference"
4
-
5
- module Ikra
6
- module Translator
7
- class LocalVariablesEnumerator < AST::Visitor
8
- def initialize
9
- @vars = {}
10
- end
11
-
12
- def add_local_var(var, type)
13
- @vars[var] = type
14
- end
15
-
16
- def local_variables
17
- @vars
18
- end
19
-
20
- def visit_lvar_read_node(node)
21
- add_local_var(node.identifier, node.get_type)
22
- end
23
-
24
- def visit_lvar_write_node(node)
25
- add_local_var(node.identifier, node.get_type)
26
- super(node)
27
- end
28
-
29
- def visit_for_node(node)
30
- add_local_var(node.iterator_identifier, Types::UnionType.create_int)
31
- super(node)
32
- end
33
- end
34
- end
35
- end