ikra 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (104) hide show
  1. checksums.yaml +4 -4
  2. data/lib/ast/builder.rb +225 -77
  3. data/lib/ast/host_section_builder.rb +38 -0
  4. data/lib/ast/interpreter.rb +67 -0
  5. data/lib/ast/lexical_variables_enumerator.rb +3 -2
  6. data/lib/ast/nodes.rb +521 -31
  7. data/lib/ast/printer.rb +116 -18
  8. data/lib/ast/ssa_generator.rb +192 -0
  9. data/lib/ast/visitor.rb +235 -21
  10. data/lib/config/configuration.rb +28 -3
  11. data/lib/config/os_configuration.rb +62 -9
  12. data/lib/cpu/cpu_implementation.rb +39 -0
  13. data/lib/ikra.rb +13 -3
  14. data/lib/resources/cuda/allocate_device_memory.cpp +5 -0
  15. data/lib/resources/cuda/allocate_host_memory.cpp +1 -0
  16. data/lib/resources/cuda/allocate_memcpy_environment_to_device.cpp +11 -0
  17. data/lib/resources/cuda/ast/assignment.cpp +1 -0
  18. data/lib/resources/cuda/block_function_head.cpp +7 -1
  19. data/lib/resources/cuda/entry_point.cpp +47 -0
  20. data/lib/resources/cuda/env_builder_copy_array.cpp +8 -2
  21. data/lib/resources/cuda/free_device_memory.cpp +3 -0
  22. data/lib/resources/cuda/free_memory_for_command.cpp +24 -0
  23. data/lib/resources/cuda/header.cpp +23 -9
  24. data/lib/resources/cuda/header_structs.cpp +92 -0
  25. data/lib/resources/cuda/host_section_block_function_head.cpp +12 -0
  26. data/lib/resources/cuda/host_section_entry_point.cpp +55 -0
  27. data/lib/resources/cuda/host_section_free_device_memory.cpp +18 -0
  28. data/lib/resources/cuda/host_section_launch_parallel_section.cpp +14 -0
  29. data/lib/resources/cuda/host_section_malloc_memcpy_device_to_host.cpp +10 -0
  30. data/lib/resources/cuda/kernel.cpp +9 -2
  31. data/lib/resources/cuda/launch_kernel.cpp +5 -0
  32. data/lib/resources/cuda/memcpy_device_to_host.cpp +3 -0
  33. data/lib/resources/cuda/memcpy_device_to_host_expr.cpp +10 -0
  34. data/lib/resources/cuda/reduce_body.cpp +88 -0
  35. data/lib/resources/cuda/stencil_array_reconstruction.cpp +2 -0
  36. data/lib/resources/cuda/stencil_body.cpp +16 -0
  37. data/lib/resources/cuda/struct_definition.cpp +4 -0
  38. data/lib/ruby_core/array.rb +34 -0
  39. data/lib/ruby_core/array_command.rb +313 -0
  40. data/lib/ruby_core/core.rb +103 -0
  41. data/lib/ruby_core/interpreter.rb +16 -0
  42. data/lib/ruby_core/math.rb +32 -0
  43. data/lib/ruby_core/ruby_integration.rb +256 -0
  44. data/lib/symbolic/host_section.rb +115 -0
  45. data/lib/symbolic/input.rb +87 -0
  46. data/lib/symbolic/input_visitor.rb +68 -0
  47. data/lib/symbolic/symbolic.rb +793 -117
  48. data/lib/symbolic/visitor.rb +70 -8
  49. data/lib/translator/array_command_struct_builder.rb +163 -0
  50. data/lib/translator/ast_translator.rb +572 -0
  51. data/lib/translator/block_translator.rb +104 -48
  52. data/lib/translator/commands/array_combine_command.rb +41 -0
  53. data/lib/translator/commands/array_identity_command.rb +28 -0
  54. data/lib/translator/commands/array_index_command.rb +52 -0
  55. data/lib/translator/commands/array_reduce_command.rb +135 -0
  56. data/lib/translator/commands/array_stencil_command.rb +129 -0
  57. data/lib/translator/commands/array_zip_command.rb +30 -0
  58. data/lib/translator/commands/command_translator.rb +264 -0
  59. data/lib/translator/cuda_errors.rb +32 -0
  60. data/lib/translator/environment_builder.rb +263 -0
  61. data/lib/translator/host_section/array_host_section_command.rb +150 -0
  62. data/lib/translator/host_section/array_in_host_section_command.rb +41 -0
  63. data/lib/translator/host_section/ast_translator.rb +14 -0
  64. data/lib/translator/host_section/parallel_section_invocation_visitor.rb +20 -0
  65. data/lib/translator/host_section/program_builder.rb +89 -0
  66. data/lib/translator/input_translator.rb +226 -0
  67. data/lib/translator/kernel_builder.rb +137 -0
  68. data/lib/translator/kernel_launcher/for_loop_kernel_launcher.rb +40 -0
  69. data/lib/translator/kernel_launcher/kernel_launcher.rb +259 -0
  70. data/lib/translator/kernel_launcher/while_loop_kernel_launcher.rb +38 -0
  71. data/lib/translator/last_returns_visitor.rb +19 -10
  72. data/lib/translator/program_builder.rb +197 -0
  73. data/lib/translator/program_launcher.rb +273 -0
  74. data/lib/translator/struct_type.rb +55 -0
  75. data/lib/translator/translator.rb +34 -11
  76. data/lib/translator/variable_classifier_visitor.rb +56 -0
  77. data/lib/types/inference/ast_inference.rb +586 -0
  78. data/lib/types/inference/clear_types_visitor.rb +11 -0
  79. data/lib/types/inference/command_inference.rb +101 -0
  80. data/lib/types/inference/input_inference.rb +62 -0
  81. data/lib/types/{object_tracer.rb → inference/object_tracer.rb} +5 -6
  82. data/lib/types/inference/ruby_extension.rb +35 -0
  83. data/lib/types/inference/symbol_table.rb +131 -0
  84. data/lib/types/types.rb +14 -0
  85. data/lib/types/types/array_command_type.rb +123 -0
  86. data/lib/types/types/array_type.rb +137 -0
  87. data/lib/types/{class_type.rb → types/class_type.rb} +42 -18
  88. data/lib/types/{primitive_type.rb → types/primitive_type.rb} +20 -7
  89. data/lib/types/types/ruby_type.rb +88 -0
  90. data/lib/types/types/struct_type.rb +179 -0
  91. data/lib/types/types/union_type.rb +239 -0
  92. metadata +160 -18
  93. data/lib/ast/method_definition.rb +0 -37
  94. data/lib/ast/translator.rb +0 -264
  95. data/lib/resources/cuda/kernel_launcher.cpp +0 -28
  96. data/lib/scope.rb +0 -166
  97. data/lib/translator/command_translator.rb +0 -421
  98. data/lib/translator/local_variables_enumerator.rb +0 -35
  99. data/lib/translator/method_translator.rb +0 -24
  100. data/lib/types/array_type.rb +0 -51
  101. data/lib/types/ruby_extension.rb +0 -67
  102. data/lib/types/ruby_type.rb +0 -45
  103. data/lib/types/type_inference.rb +0 -382
  104. data/lib/types/union_type.rb +0 -155
@@ -0,0 +1,273 @@
1
+ require "ffi"
2
+
3
+ module Ikra
4
+ module Translator
5
+ class CommandTranslator
6
+ class ProgramBuilder
7
+ class Launcher
8
+ class CommandNotifier < Symbolic::Visitor
9
+ # This visitor executes the post_execute function on every Array Command
10
+ # That way the responsible command node will recieve the adress of the computed result that will be kept on GPU
11
+
12
+ attr_reader :environment
13
+
14
+ def initialize(environment)
15
+ @environment = environment
16
+ end
17
+
18
+ def visit_array_command(command)
19
+ super(command)
20
+ command.post_execute(environment)
21
+ end
22
+ end
23
+
24
+ class FixedSizeArrayStruct < FFI::Struct
25
+ layout :content, :pointer,
26
+ :size, :int32
27
+ end
28
+
29
+ class UnionTypeValue < FFI::Union
30
+ # There are some values missing here, but we don't need them at the moment
31
+ layout :int_, :int32,
32
+ :variable_size_array, FixedSizeArrayStruct
33
+ end
34
+
35
+ class UnionTypeStruct < FFI::Struct
36
+ layout :class_id, :int32,
37
+ :value, UnionTypeValue
38
+ end
39
+
40
+ class KernelUnionResultStruct < FFI::Struct
41
+ layout :result, UnionTypeStruct,
42
+ :error_code, :int32,
43
+ :time_setup_cuda, :uint64,
44
+ :time_prepare_env, :uint64,
45
+ :time_kernel, :uint64,
46
+ :time_free_memory, :uint64,
47
+ :time_transfer_memory, :uint64,
48
+ :time_allocate_memory, :uint64
49
+ end
50
+
51
+ class KernelResultStruct < FFI::Struct
52
+ layout :result, FixedSizeArrayStruct,
53
+ :error_code, :int32,
54
+ :time_setup_cuda, :uint64,
55
+ :time_prepare_env, :uint64,
56
+ :time_kernel, :uint64,
57
+ :time_free_memory, :uint64,
58
+ :time_transfer_memory, :uint64,
59
+ :time_allocate_memory, :uint64
60
+ end
61
+
62
+ attr_reader :root_command
63
+ attr_reader :source
64
+ attr_reader :environment_builder
65
+ attr_reader :result_type
66
+
67
+ class << self
68
+ attr_accessor :last_time_setup_cuda
69
+ attr_accessor :last_time_prepare_env
70
+ attr_accessor :last_time_kernel
71
+ attr_accessor :last_time_free_memory
72
+ attr_accessor :last_time_transfer_memory
73
+ attr_accessor :last_time_allocate_memory
74
+ attr_accessor :last_time_total_external
75
+ attr_accessor :last_time_compiler
76
+ attr_accessor :last_time_read_result_ffi
77
+
78
+ def reset_time
79
+ @last_time_setup_cuda = 0
80
+ @last_time_prepare_env = 0
81
+ @last_time_kernel = 0
82
+ @last_time_free_memory = 0
83
+ @last_time_transfer_memory = 0
84
+ @last_time_allocate_memory = 0
85
+ @last_time_total_external = 0
86
+ @last_time_compiler = 0
87
+ @last_time_read_result_ffi = 0
88
+ end
89
+ end
90
+
91
+ def initialize(source:, environment_builder:, result_type:, root_command:)
92
+ @source = source
93
+ @environment_builder = environment_builder
94
+ @result_type = result_type
95
+ @root_command = root_command
96
+ end
97
+
98
+ def compile
99
+ # Generate debug output with line numbers
100
+ line_no_digits = Math.log(source.lines.count, 10).ceil
101
+ source_with_line_numbers = source.lines.each_with_index.map do |line, num|
102
+ "[#{(num + 1).to_s.rjust(line_no_digits, "0")}] #{line}"
103
+ end.join("")
104
+
105
+ Log.info("Generated source code:\n#{source_with_line_numbers}")
106
+
107
+ # Write source code to temporary file
108
+ file = Tempfile.new(["ikra_kernel", ".cu"])
109
+ file.write(source)
110
+ file.close
111
+
112
+ # Write to codegen_expect
113
+ if Configuration.codegen_expect_file_name != nil
114
+ expect_file = File.new(Configuration.codegen_expect_file_name, "w+")
115
+ expect_file.write(source)
116
+ expect_file.close
117
+ end
118
+
119
+ # Run compiler
120
+ @so_filename = "#{file.path}.#{Configuration.so_suffix}"
121
+ nvcc_command = Configuration.nvcc_invocation_string(
122
+ file.path, @so_filename)
123
+ Log.info("Compiling kernel: #{nvcc_command}")
124
+ time_before = Time.now
125
+ compile_status = %x(#{nvcc_command})
126
+ Log.info("Done, took #{Time.now - time_before} s")
127
+ self.class.last_time_compiler = Time.now - time_before
128
+
129
+ if $? != 0
130
+ Log.fatal("nvcc failed: #{compile_status}")
131
+ raise RuntimeError.new("nvcc failed: #{compile_status}")
132
+ else
133
+ Log.info("nvcc successful: #{compile_status}")
134
+ end
135
+ end
136
+
137
+ # Attaches the compiled shared library via Ruby FFI and invokes the kernel.
138
+ def execute
139
+ if !File.exist?(@so_filename)
140
+ compile
141
+ end
142
+
143
+ time_before = Time.now
144
+ ffi_interface = Module.new
145
+ ffi_interface.extend(FFI::Library)
146
+ ffi_interface.ffi_lib(@so_filename)
147
+ ffi_interface.attach_function(:launch_kernel, [:pointer], :pointer)
148
+ environment_object = environment_builder.build_ffi_object
149
+ Log.info("FFI transfer time: #{Time.now - time_before} s")
150
+
151
+ time_before = Time.now
152
+ kernel_result = ffi_interface.launch_kernel(environment_object)
153
+ total_time_external = Time.now - time_before
154
+ Log.info("Kernel time: #{total_time_external} s")
155
+
156
+ # Update command
157
+ root_command.accept(CommandNotifier.new(environment_builder.ffi_struct))
158
+
159
+ # TODO: Currently, this only works if result_type.is_singleton?
160
+ if result_type.is_singleton?
161
+ result_t_struct = KernelResultStruct.new(kernel_result)
162
+ else
163
+ result_t_struct = KernelUnionResultStruct.new(kernel_result)
164
+ end
165
+
166
+ # Extract error code and return value
167
+ error_code = result_t_struct[:error_code]
168
+
169
+ # Extract time measurements
170
+ self.class.last_time_setup_cuda += result_t_struct[:time_setup_cuda] * 0.000001
171
+ self.class.last_time_prepare_env += result_t_struct[:time_prepare_env] * 0.000001
172
+ self.class.last_time_kernel += result_t_struct[:time_kernel] * 0.000001
173
+ self.class.last_time_free_memory += result_t_struct[:time_free_memory] * 0.000001
174
+ self.class.last_time_transfer_memory += result_t_struct[:time_transfer_memory] * 0.000001
175
+ self.class.last_time_allocate_memory += result_t_struct[:time_allocate_memory] * 0.000001
176
+ self.class.last_time_total_external += total_time_external
177
+
178
+ if error_code != 0
179
+ # Kernel failed
180
+ Errors.raiseCudaError(error_code)
181
+ end
182
+
183
+ time_before = Time.now
184
+
185
+ # Check type of result: It should be one of `result_type`
186
+ if result_type.is_singleton?
187
+ array_type = result_type.singleton_type
188
+
189
+ if !array_type.is_a?(Types::ArrayType)
190
+ raise AssertionError.new(
191
+ "ArrayType expected, but #{array_type} found")
192
+ end
193
+
194
+ result = result_t_struct[:result][:content]
195
+ result_size = result_t_struct[:result][:size]
196
+ else
197
+ array_type = result_type.find do |sing_type|
198
+ sing_type.class_id == result_t_struct[:result][:class_id]
199
+ end
200
+
201
+ if array_type == nil
202
+ raise AssertionError.new(
203
+ "Unknown class_id: #{result_t_struct[:result][:class_id]}")
204
+ end
205
+
206
+ if !array_type.is_a?(Types::ArrayType)
207
+ raise AssertionError.new(
208
+ "ArrayType expected, but #{array_type} found")
209
+ end
210
+
211
+ result = result_t_struct[:result][:value][:variable_size_array][:content]
212
+ result_size = result_t_struct[:result][:value][:variable_size_array][:size]
213
+ end
214
+
215
+ inner_type = array_type.inner_type
216
+
217
+ if inner_type.is_singleton?
218
+ # Read in entire array
219
+ if inner_type.singleton_type == Types::PrimitiveType::Int
220
+ computation_result = result.read_array_of_int(result_size)
221
+ elsif inner_type.singleton_type == Types::PrimitiveType::Float
222
+ computation_result = result.read_array_of_float(result_size)
223
+ elsif inner_type.singleton_type == Types::PrimitiveType::Bool
224
+ computation_result = result.read_array_of_uint8(result_size).map do |v|
225
+ v == 1
226
+ end
227
+ elsif inner_type.singleton_type == Types::PrimitiveType::Nil
228
+ computation_result = [nil] * result_size
229
+ elsif inner_type.singleton_type.is_a?(Types::ZipStructType)
230
+ result_struct_type = inner_type.singleton_type.to_ruby_type
231
+ computation_result = Array.new(result_size) do |index|
232
+ result_struct_type.new(result + index * result_struct_type.size)
233
+ end
234
+ else
235
+ raise NotImplementedError.new("Type not implemented")
236
+ end
237
+
238
+ self.class.last_time_read_result_ffi = Time.now - time_before
239
+ return computation_result
240
+ else
241
+ # Read union type struct
242
+ # Have to read one by one and assemble object
243
+ result_values = Array.new(result_size)
244
+
245
+ for index in 0...result_size
246
+ # TODO: Size of union type (12 bytes) should not be hard-coded here
247
+ s = Constants::UNION_TYPE_SIZE
248
+ o = Constants::UNION_TYPE_VALUE_OFFSET
249
+ next_type = (result + (s * index)).read_int
250
+
251
+ if next_type == Types::PrimitiveType::Int.class_id
252
+ result_values[index] = (result + s * index + o).read_int
253
+ elsif next_type == Types::PrimitiveType::Float.class_id
254
+ result_values[index] = (result + s * index + o).read_float
255
+ elsif next_type == Types::PrimitiveType::Bool.class_id
256
+ result_values[index] = (result + s * index + o).read_uint8 == 1
257
+ elsif next_type == Types::PrimitiveType::Nil.class_id
258
+ result_values[index] = nil
259
+ else
260
+ raise NotImplementedError.new("Implement class objs for \##{index}: #{next_type}")
261
+ end
262
+ end
263
+
264
+ self.class.last_time_read_result_ffi = Time.now - time_before
265
+ return result_values
266
+ end
267
+ end
268
+ end
269
+ end
270
+ end
271
+ end
272
+ end
273
+
@@ -0,0 +1,55 @@
1
+ module Ikra
2
+ module Types
3
+ class StructType
4
+ def generate_definition
5
+ raise NotImplementedError.new("ZipStructType is the only implementation")
6
+ end
7
+ end
8
+
9
+ class ZipStructType < StructType
10
+ # Generates a source code expression that creates and initializes an instance of
11
+ # this struct.
12
+ def generate_inline_initialization(*input)
13
+ field_init = input.join(", ")
14
+ return "((#{to_c_type}) {#{field_init}})"
15
+ end
16
+
17
+ def generate_definition
18
+ fields_def = @fields.map do |field_name, type|
19
+ "#{type.to_c_type} #{field_name};"
20
+ end
21
+
22
+ all_fields = fields_def.join("\n")
23
+
24
+ return Translator.read_file(file_name: "struct_definition.cpp", replacements: {
25
+ "name" => to_c_type,
26
+ "fields" => all_fields})
27
+ end
28
+
29
+ # Generates a source code expression that reads a fields of this struct by index.
30
+ def generate_read(receiver, selector, index)
31
+ # Type inference already ensured that there is exactly one parameter which is
32
+ # an IntLiteral.
33
+
34
+ return "#{receiver}.field_#{index}"
35
+ end
36
+
37
+ def generate_non_constant_read(receiver, selector, index_expression_identifier)
38
+ expression = ""
39
+
40
+ for index in 0...@fields.size
41
+ expression = expression + "(#{index_expression_identifier} == #{index} ? #{receiver}.field_#{index} : "
42
+ end
43
+
44
+ # Out of bounds case should throw and exception
45
+ expression = expression + "NULL"
46
+
47
+ for index in 0...@fields.size
48
+ expression = expression + ")"
49
+ end
50
+
51
+ return expression
52
+ end
53
+ end
54
+ end
55
+ end
@@ -1,18 +1,47 @@
1
+ require_relative "../config/configuration"
2
+
3
+ require_relative "ast_translator"
1
4
  require_relative "block_translator"
2
- require_relative "command_translator"
5
+ require_relative "cuda_errors"
6
+ require_relative "environment_builder"
7
+ require_relative "commands/command_translator"
3
8
  require_relative "last_returns_visitor"
4
- require_relative "local_variables_enumerator"
5
- require_relative "method_translator"
6
- require_relative "../config/configuration"
9
+ require_relative "struct_type"
10
+ require_relative "array_command_struct_builder"
7
11
 
8
12
  module Ikra
9
13
 
10
14
  # This module contains functionality for translating Ruby code to CUDA (C++) code.
11
15
  module Translator
12
16
  module Constants
17
+ ENV_TYPE = "environment_t"
13
18
  ENV_IDENTIFIER = "_env_"
14
19
  ENV_DEVICE_IDENTIFIER = "dev_env"
15
20
  ENV_HOST_IDENTIFIER = "host_env"
21
+ LEXICAL_VAR_PREFIX = "lex_"
22
+ RESULT_IDENTIFIER = "_result_"
23
+ NUM_THREADS_TYPE = "int"
24
+ NUM_THREADS_IDENTIFIER = "_num_threads_"
25
+ TEMP_RESULT_IDENTIFIER = "_temp_result_"
26
+ ODD_TYPE = "bool"
27
+ ODD_IDENTIFIER = "_odd_"
28
+ PROGRAM_RESULT_TYPE = "result_t"
29
+ PROGRAM_RESULT_IDENTIFIER = "program_result"
30
+ SELF_IDENTIFIER = "_self_"
31
+
32
+ # Make sure that these constants keep in sync with header declaration CPP file
33
+ UNION_TYPE_SIZE = 24
34
+ UNION_TYPE_VALUE_OFFSET = 8
35
+ end
36
+
37
+ class Variable
38
+ attr_reader :type
39
+ attr_reader :name
40
+
41
+ def initialize(name:, type:)
42
+ @name = name
43
+ @type = type
44
+ end
16
45
  end
17
46
 
18
47
  class << self
@@ -26,7 +55,7 @@ module Ikra
26
55
  def read_file(file_name:, replacements: {})
27
56
  full_name = Ikra::Configuration.resource_file_name(file_name)
28
57
  if !File.exist?(full_name)
29
- raise "File does not exist: #{full_name}"
58
+ raise AssertionError.new("File does not exist: #{full_name}")
30
59
  end
31
60
 
32
61
  contents = File.open(full_name, "rb").read
@@ -40,10 +69,4 @@ module Ikra
40
69
  end
41
70
  end
42
71
  end
43
-
44
- module AST
45
- module Constants
46
- SELF_IDENTIFIER = "_self_"
47
- end
48
- end
49
72
  end
@@ -0,0 +1,56 @@
1
+ require_relative "../ast/nodes"
2
+ require_relative "../ast/visitor"
3
+ require_relative "../types/types"
4
+
5
+ module Ikra
6
+ module AST
7
+ class LVarReadNode
8
+ attr_accessor :variable_kind
9
+
10
+ def mangled_identifier
11
+ if variable_kind == :lexical
12
+ return Translator::Constants::LEXICAL_VAR_PREFIX + identifier.to_s
13
+ else
14
+ return identifier
15
+ end
16
+ end
17
+ end
18
+
19
+ class LVarWriteNode
20
+ attr_accessor :variable_kind
21
+
22
+ def mangled_identifier
23
+ if variable_kind == :lexical
24
+ return Translator::Constants::LEXICAL_VAR_PREFIX + identifier.to_s
25
+ else
26
+ return identifier
27
+ end
28
+ end
29
+ end
30
+ end
31
+
32
+ module Translator
33
+ class VariableClassifier < AST::Visitor
34
+ def initialize(lexical_variable_names:)
35
+ @lexical_variable_names = lexical_variable_names
36
+ end
37
+
38
+ def visit_lvar_read_node(node)
39
+ node.variable_kind = var_type(node.identifier)
40
+ end
41
+
42
+ def visit_lvar_write_node(node)
43
+ node.variable_kind = var_type(node.identifier)
44
+ super(node)
45
+ end
46
+
47
+ def var_type(identifier)
48
+ if @lexical_variable_names.include?(identifier)
49
+ return :lexical
50
+ else
51
+ return :local
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end