ikra 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/ast/builder.rb +225 -77
- data/lib/ast/host_section_builder.rb +38 -0
- data/lib/ast/interpreter.rb +67 -0
- data/lib/ast/lexical_variables_enumerator.rb +3 -2
- data/lib/ast/nodes.rb +521 -31
- data/lib/ast/printer.rb +116 -18
- data/lib/ast/ssa_generator.rb +192 -0
- data/lib/ast/visitor.rb +235 -21
- data/lib/config/configuration.rb +28 -3
- data/lib/config/os_configuration.rb +62 -9
- data/lib/cpu/cpu_implementation.rb +39 -0
- data/lib/ikra.rb +13 -3
- data/lib/resources/cuda/allocate_device_memory.cpp +5 -0
- data/lib/resources/cuda/allocate_host_memory.cpp +1 -0
- data/lib/resources/cuda/allocate_memcpy_environment_to_device.cpp +11 -0
- data/lib/resources/cuda/ast/assignment.cpp +1 -0
- data/lib/resources/cuda/block_function_head.cpp +7 -1
- data/lib/resources/cuda/entry_point.cpp +47 -0
- data/lib/resources/cuda/env_builder_copy_array.cpp +8 -2
- data/lib/resources/cuda/free_device_memory.cpp +3 -0
- data/lib/resources/cuda/free_memory_for_command.cpp +24 -0
- data/lib/resources/cuda/header.cpp +23 -9
- data/lib/resources/cuda/header_structs.cpp +92 -0
- data/lib/resources/cuda/host_section_block_function_head.cpp +12 -0
- data/lib/resources/cuda/host_section_entry_point.cpp +55 -0
- data/lib/resources/cuda/host_section_free_device_memory.cpp +18 -0
- data/lib/resources/cuda/host_section_launch_parallel_section.cpp +14 -0
- data/lib/resources/cuda/host_section_malloc_memcpy_device_to_host.cpp +10 -0
- data/lib/resources/cuda/kernel.cpp +9 -2
- data/lib/resources/cuda/launch_kernel.cpp +5 -0
- data/lib/resources/cuda/memcpy_device_to_host.cpp +3 -0
- data/lib/resources/cuda/memcpy_device_to_host_expr.cpp +10 -0
- data/lib/resources/cuda/reduce_body.cpp +88 -0
- data/lib/resources/cuda/stencil_array_reconstruction.cpp +2 -0
- data/lib/resources/cuda/stencil_body.cpp +16 -0
- data/lib/resources/cuda/struct_definition.cpp +4 -0
- data/lib/ruby_core/array.rb +34 -0
- data/lib/ruby_core/array_command.rb +313 -0
- data/lib/ruby_core/core.rb +103 -0
- data/lib/ruby_core/interpreter.rb +16 -0
- data/lib/ruby_core/math.rb +32 -0
- data/lib/ruby_core/ruby_integration.rb +256 -0
- data/lib/symbolic/host_section.rb +115 -0
- data/lib/symbolic/input.rb +87 -0
- data/lib/symbolic/input_visitor.rb +68 -0
- data/lib/symbolic/symbolic.rb +793 -117
- data/lib/symbolic/visitor.rb +70 -8
- data/lib/translator/array_command_struct_builder.rb +163 -0
- data/lib/translator/ast_translator.rb +572 -0
- data/lib/translator/block_translator.rb +104 -48
- data/lib/translator/commands/array_combine_command.rb +41 -0
- data/lib/translator/commands/array_identity_command.rb +28 -0
- data/lib/translator/commands/array_index_command.rb +52 -0
- data/lib/translator/commands/array_reduce_command.rb +135 -0
- data/lib/translator/commands/array_stencil_command.rb +129 -0
- data/lib/translator/commands/array_zip_command.rb +30 -0
- data/lib/translator/commands/command_translator.rb +264 -0
- data/lib/translator/cuda_errors.rb +32 -0
- data/lib/translator/environment_builder.rb +263 -0
- data/lib/translator/host_section/array_host_section_command.rb +150 -0
- data/lib/translator/host_section/array_in_host_section_command.rb +41 -0
- data/lib/translator/host_section/ast_translator.rb +14 -0
- data/lib/translator/host_section/parallel_section_invocation_visitor.rb +20 -0
- data/lib/translator/host_section/program_builder.rb +89 -0
- data/lib/translator/input_translator.rb +226 -0
- data/lib/translator/kernel_builder.rb +137 -0
- data/lib/translator/kernel_launcher/for_loop_kernel_launcher.rb +40 -0
- data/lib/translator/kernel_launcher/kernel_launcher.rb +259 -0
- data/lib/translator/kernel_launcher/while_loop_kernel_launcher.rb +38 -0
- data/lib/translator/last_returns_visitor.rb +19 -10
- data/lib/translator/program_builder.rb +197 -0
- data/lib/translator/program_launcher.rb +273 -0
- data/lib/translator/struct_type.rb +55 -0
- data/lib/translator/translator.rb +34 -11
- data/lib/translator/variable_classifier_visitor.rb +56 -0
- data/lib/types/inference/ast_inference.rb +586 -0
- data/lib/types/inference/clear_types_visitor.rb +11 -0
- data/lib/types/inference/command_inference.rb +101 -0
- data/lib/types/inference/input_inference.rb +62 -0
- data/lib/types/{object_tracer.rb → inference/object_tracer.rb} +5 -6
- data/lib/types/inference/ruby_extension.rb +35 -0
- data/lib/types/inference/symbol_table.rb +131 -0
- data/lib/types/types.rb +14 -0
- data/lib/types/types/array_command_type.rb +123 -0
- data/lib/types/types/array_type.rb +137 -0
- data/lib/types/{class_type.rb → types/class_type.rb} +42 -18
- data/lib/types/{primitive_type.rb → types/primitive_type.rb} +20 -7
- data/lib/types/types/ruby_type.rb +88 -0
- data/lib/types/types/struct_type.rb +179 -0
- data/lib/types/types/union_type.rb +239 -0
- metadata +160 -18
- data/lib/ast/method_definition.rb +0 -37
- data/lib/ast/translator.rb +0 -264
- data/lib/resources/cuda/kernel_launcher.cpp +0 -28
- data/lib/scope.rb +0 -166
- data/lib/translator/command_translator.rb +0 -421
- data/lib/translator/local_variables_enumerator.rb +0 -35
- data/lib/translator/method_translator.rb +0 -24
- data/lib/types/array_type.rb +0 -51
- data/lib/types/ruby_extension.rb +0 -67
- data/lib/types/ruby_type.rb +0 -45
- data/lib/types/type_inference.rb +0 -382
- data/lib/types/union_type.rb +0 -155
@@ -0,0 +1,129 @@
|
|
1
|
+
module Ikra
|
2
|
+
module Translator
|
3
|
+
class CommandTranslator < Symbolic::Visitor
|
4
|
+
def visit_array_stencil_command(command)
|
5
|
+
Log.info("Translating ArrayStencilCommand [#{command.unique_id}]")
|
6
|
+
|
7
|
+
super
|
8
|
+
|
9
|
+
num_dims = command.dimensions.size
|
10
|
+
|
11
|
+
# Process dependent computation (receiver), returns [InputTranslationResult]
|
12
|
+
input = translate_entire_input(command)
|
13
|
+
|
14
|
+
# Count number of parameters
|
15
|
+
num_parameters = command.offsets.size
|
16
|
+
|
17
|
+
# All variables accessed by this block should be prefixed with the unique ID
|
18
|
+
# of the command in the environment.
|
19
|
+
env_builder = @environment_builder[command.unique_id]
|
20
|
+
|
21
|
+
block_translation_result = Translator.translate_block(
|
22
|
+
block_def_node: command.block_def_node,
|
23
|
+
environment_builder: env_builder,
|
24
|
+
lexical_variables: command.lexical_externals,
|
25
|
+
command_id: command.unique_id,
|
26
|
+
entire_input_translation: input)
|
27
|
+
|
28
|
+
kernel_builder.add_methods(block_translation_result.aux_methods)
|
29
|
+
kernel_builder.add_block(block_translation_result.block_source)
|
30
|
+
|
31
|
+
# Compute indices in all dimensions
|
32
|
+
index_generators = (0...num_dims).map do |dim_index|
|
33
|
+
index_div = command.dimensions.drop(dim_index + 1).reduce(1, :*)
|
34
|
+
index_mod = command.dimensions[dim_index]
|
35
|
+
|
36
|
+
if dim_index > 0
|
37
|
+
"(_tid_ / #{index_div}) % #{index_mod}"
|
38
|
+
else
|
39
|
+
# No modulo required for first dimension
|
40
|
+
"_tid_ / #{index_div}"
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
compute_indices = index_generators.map.with_index do |gen, dim_index|
|
45
|
+
"int temp_stencil_dim_#{dim_index} = #{gen};"
|
46
|
+
end.join("\n")
|
47
|
+
|
48
|
+
# Check if an index is out of bounds in any dimension
|
49
|
+
out_of_bounds_check = Array.new(num_dims) do |dim_index|
|
50
|
+
min_in_dim = command.offsets.map do |offset|
|
51
|
+
offset[dim_index]
|
52
|
+
end.min
|
53
|
+
max_in_dim = command.offsets.map do |offset|
|
54
|
+
offset[dim_index]
|
55
|
+
end.max
|
56
|
+
|
57
|
+
dim_size = command.dimensions[dim_index]
|
58
|
+
|
59
|
+
if dim_size.is_a?(String)
|
60
|
+
# This is not a compile-time constant. Pass dimension size as argument
|
61
|
+
# to the kernel.
|
62
|
+
|
63
|
+
dim_size_expr = "dim_size_#{dim_index}"
|
64
|
+
kernel_builder.add_additional_parameters("int #{dim_size_expr}")
|
65
|
+
kernel_launcher.add_additional_arguments(dim_size)
|
66
|
+
else
|
67
|
+
dim_size_expr = dim_size
|
68
|
+
end
|
69
|
+
|
70
|
+
"temp_stencil_dim_#{dim_index} + #{min_in_dim} >= 0 && temp_stencil_dim_#{dim_index} + #{max_in_dim} < #{dim_size_expr}"
|
71
|
+
end.join(" && ")
|
72
|
+
|
73
|
+
# `previous_result` should be an expression returning the array containing the
|
74
|
+
# result of the previous computation.
|
75
|
+
previous_result = input.result(0)
|
76
|
+
|
77
|
+
arguments = ["_env_"]
|
78
|
+
|
79
|
+
# Pass values from previous computation that are required by this thread.
|
80
|
+
# Reconstruct actual indices from indices for each dimension.
|
81
|
+
for i in 0...num_parameters
|
82
|
+
multiplier = 1
|
83
|
+
global_index = []
|
84
|
+
|
85
|
+
for dim_index in (num_dims - 1).downto(0)
|
86
|
+
global_index.push("(temp_stencil_dim_#{dim_index} + #{command.offsets[i][dim_index]}) * #{multiplier}")
|
87
|
+
|
88
|
+
next_dim_size = command.dimensions[dim_index]
|
89
|
+
|
90
|
+
if next_dim_size.is_a?(String)
|
91
|
+
Log.warn("Cannot handle multi-dimensional stencil computations in host sections yet.")
|
92
|
+
else
|
93
|
+
multiplier = multiplier * next_dim_size
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
arguments.push("#{previous_result}[#{global_index.join(" + ")}]")
|
98
|
+
end
|
99
|
+
|
100
|
+
# Push additional arguments (e.g., index)
|
101
|
+
arguments.push(*input.result(1..-1))
|
102
|
+
argument_str = arguments.join(", ")
|
103
|
+
stencil_computation = block_translation_result.function_name + "(#{argument_str})"
|
104
|
+
|
105
|
+
temp_var_name = "temp_stencil_#{CommandTranslator.next_unique_id}"
|
106
|
+
|
107
|
+
# The following template checks if there is at least one index out of bounds. If
|
108
|
+
# so, the fallback value is used. Otherwise, the block is executed.
|
109
|
+
command_execution = Translator.read_file(file_name: "stencil_body.cpp", replacements: {
|
110
|
+
"execution" => input.execution,
|
111
|
+
"temp_var" => temp_var_name,
|
112
|
+
"result_type" => command.result_type.to_c_type,
|
113
|
+
"compute_indices" => compute_indices,
|
114
|
+
"out_of_bounds_check" => out_of_bounds_check,
|
115
|
+
"out_of_bounds_fallback" => command.out_of_range_value.to_s,
|
116
|
+
"stencil_computation" => stencil_computation})
|
117
|
+
|
118
|
+
command_translation = build_command_translation_result(
|
119
|
+
execution: command_execution,
|
120
|
+
result: temp_var_name,
|
121
|
+
command: command)
|
122
|
+
|
123
|
+
Log.info("DONE translating ArrayStencilCommand [#{command.unique_id}]")
|
124
|
+
|
125
|
+
return command_translation
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Ikra
|
2
|
+
module Translator
|
3
|
+
class CommandTranslator < Symbolic::Visitor
|
4
|
+
def visit_array_zip_command(command)
|
5
|
+
Log.info("Translating ArrayZipCommand [#{command.unique_id}]")
|
6
|
+
|
7
|
+
super
|
8
|
+
|
9
|
+
# Process dependent computation (receiver), returns [InputTranslationResult]
|
10
|
+
input = translate_entire_input(command)
|
11
|
+
|
12
|
+
# Get Ikra struct type
|
13
|
+
zipped_type_singleton = command.result_type.singleton_type
|
14
|
+
|
15
|
+
# Add struct type to program builder, so that we can generate the source code
|
16
|
+
# for its definition.
|
17
|
+
program_builder.structs.add(zipped_type_singleton)
|
18
|
+
|
19
|
+
command_translation = CommandTranslationResult.new(
|
20
|
+
execution: input.execution,
|
21
|
+
result: zipped_type_singleton.generate_inline_initialization(input.result),
|
22
|
+
command: command)
|
23
|
+
|
24
|
+
Log.info("DONE translating ArrayZipCommand [#{command.unique_id}]")
|
25
|
+
|
26
|
+
return command_translation
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,264 @@
|
|
1
|
+
require_relative "../translator"
|
2
|
+
require_relative "../../config/configuration"
|
3
|
+
require_relative "../../config/os_configuration"
|
4
|
+
require_relative "../../symbolic/symbolic"
|
5
|
+
require_relative "../../symbolic/visitor"
|
6
|
+
require_relative "../../types/types"
|
7
|
+
require_relative "../input_translator"
|
8
|
+
|
9
|
+
module Ikra
|
10
|
+
module Translator
|
11
|
+
class CommandTranslator < Symbolic::Visitor
|
12
|
+
@@unique_id = 0
|
13
|
+
|
14
|
+
def self.next_unique_id
|
15
|
+
@@unique_id = @@unique_id + 1
|
16
|
+
return @@unique_id
|
17
|
+
end
|
18
|
+
|
19
|
+
class CommandTranslationResult
|
20
|
+
# Source code that performs the computation of this command for one thread. May
|
21
|
+
# consist of multiple statement. Optional.
|
22
|
+
attr_reader :execution
|
23
|
+
|
24
|
+
# Source code that returns the result of the computation. If the computation can
|
25
|
+
# be expressed in a single expression, this string can contain the entire
|
26
|
+
# computation and `execution` should then be empty.
|
27
|
+
attr_reader :result
|
28
|
+
|
29
|
+
attr_reader :command
|
30
|
+
|
31
|
+
def initialize(execution: "", result:, command:)
|
32
|
+
@execution = execution
|
33
|
+
@command = command
|
34
|
+
@result = result;
|
35
|
+
end
|
36
|
+
|
37
|
+
def result_type
|
38
|
+
return command.result_type
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# Entry point for translator. Returns a [ProgramBuilder], which contains all
|
43
|
+
# required information for compiling and executing the CUDA program.
|
44
|
+
def self.translate_command(command)
|
45
|
+
command_translator = self.new(root_command: command)
|
46
|
+
command_translator.start_translation
|
47
|
+
return command_translator.program_builder
|
48
|
+
end
|
49
|
+
|
50
|
+
attr_reader :environment_builder
|
51
|
+
attr_reader :kernel_launcher_stack
|
52
|
+
attr_reader :program_builder
|
53
|
+
attr_reader :object_tracer
|
54
|
+
attr_reader :root_command
|
55
|
+
|
56
|
+
def initialize(root_command:)
|
57
|
+
@kernel_launcher_stack = []
|
58
|
+
@environment_builder = EnvironmentBuilder.new
|
59
|
+
|
60
|
+
# Select correct program builder based on command type
|
61
|
+
@program_builder = ProgramBuilder.new(
|
62
|
+
environment_builder: environment_builder,
|
63
|
+
root_command: root_command)
|
64
|
+
|
65
|
+
@root_command = root_command
|
66
|
+
end
|
67
|
+
|
68
|
+
def start_translation
|
69
|
+
Log.info("CommandTranslator: Starting translation...")
|
70
|
+
|
71
|
+
# Trace all objects
|
72
|
+
@object_tracer = TypeInference::ObjectTracer.new(root_command)
|
73
|
+
all_objects = object_tracer.trace_all
|
74
|
+
|
75
|
+
|
76
|
+
# --- Translate ---
|
77
|
+
|
78
|
+
# Create new kernel launcher
|
79
|
+
push_kernel_launcher
|
80
|
+
|
81
|
+
# Translate the command (might create additional kernels)
|
82
|
+
result = root_command.accept(self)
|
83
|
+
|
84
|
+
# Add kernel builder to ProgramBuilder
|
85
|
+
pop_kernel_launcher(result)
|
86
|
+
|
87
|
+
# --- End of Translation ---
|
88
|
+
|
89
|
+
|
90
|
+
# Add SoA arrays to environment
|
91
|
+
object_tracer.register_soa_arrays(environment_builder)
|
92
|
+
end
|
93
|
+
|
94
|
+
def kernel_launcher
|
95
|
+
return kernel_launcher_stack.last
|
96
|
+
end
|
97
|
+
|
98
|
+
def kernel_builder
|
99
|
+
return kernel_launcher_stack.last.kernel_builder
|
100
|
+
end
|
101
|
+
|
102
|
+
|
103
|
+
# --- Actual Visitor parts stars here ---
|
104
|
+
|
105
|
+
def visit_array_command(command)
|
106
|
+
if command.keep && !command.has_previous_result?
|
107
|
+
# Create slot for result pointer on GPU in env
|
108
|
+
environment_builder.allocate_previous_pointer(command.unique_id)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def push_kernel_launcher(kernel_builder: nil, kernel_launcher: nil)
|
113
|
+
if kernel_builder != nil && kernel_launcher == nil
|
114
|
+
@kernel_launcher_stack.push(KernelLauncher.new(kernel_builder))
|
115
|
+
elsif kernel_builder == nil && kernel_launcher != nil
|
116
|
+
@kernel_launcher_stack.push(kernel_launcher)
|
117
|
+
elsif kernel_builder == nil && kernel_launcher == nil
|
118
|
+
# Default: add new kernel builder
|
119
|
+
@kernel_launcher_stack.push(KernelLauncher.new(KernelBuilder.new))
|
120
|
+
else
|
121
|
+
raise ArgumentError.new("kernel_builder and kernel_laucher given but only expected one")
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
# Pops a KernelBuilder from the kernel builder stack. This method is called when all
|
126
|
+
# blocks (parallel sections) for that kernel have been translated, i.e., the kernel
|
127
|
+
# is fully built.
|
128
|
+
def pop_kernel_launcher(command_translation_result)
|
129
|
+
previous_launcher = kernel_launcher_stack.pop
|
130
|
+
|
131
|
+
kernel_builder = previous_launcher.kernel_builder
|
132
|
+
kernel_builder.block_invocation = command_translation_result.result
|
133
|
+
kernel_builder.execution = command_translation_result.execution
|
134
|
+
kernel_builder.result_type = command_translation_result.result_type
|
135
|
+
|
136
|
+
if previous_launcher == nil
|
137
|
+
raise AssertionError.new("Attempt to pop kernel launcher, but stack is empty")
|
138
|
+
end
|
139
|
+
|
140
|
+
program_builder.add_kernel_launcher(previous_launcher)
|
141
|
+
|
142
|
+
return previous_launcher
|
143
|
+
end
|
144
|
+
|
145
|
+
def translate_entire_input(command)
|
146
|
+
input_translated = command.input.each_with_index.map do |input, index|
|
147
|
+
input.translate_input(
|
148
|
+
parent_command: command,
|
149
|
+
command_translator: self,
|
150
|
+
# Assuming that every input consumes exactly one parameter
|
151
|
+
start_eat_params_offset: index)
|
152
|
+
end
|
153
|
+
|
154
|
+
return EntireInputTranslationResult.new(input_translated)
|
155
|
+
end
|
156
|
+
|
157
|
+
# Processes a [Symbolic::Input] objects, which contains a reference to a command
|
158
|
+
# object and information about how elements are accessed. If elements are only
|
159
|
+
# accessed according to the current thread ID, this input can be fused. Otherwise,
|
160
|
+
# a new kernel will be built.
|
161
|
+
def translate_input(input)
|
162
|
+
previous_result = ""
|
163
|
+
|
164
|
+
if input.command.has_previous_result?
|
165
|
+
# Read previously computed (cached) value
|
166
|
+
Log.info("Reusing kept result for command #{input.command.unique_id}: #{input.command.gpu_result_pointer}")
|
167
|
+
|
168
|
+
environment_builder.add_previous_result(
|
169
|
+
input.command.unique_id, input.command.gpu_result_pointer)
|
170
|
+
environment_builder.add_previous_result_type(
|
171
|
+
input.command.unique_id, input.command.result_type)
|
172
|
+
|
173
|
+
cell_access = ""
|
174
|
+
if input.pattern == :tid
|
175
|
+
cell_access = "[_tid_]"
|
176
|
+
end
|
177
|
+
|
178
|
+
kernel_launcher.configure_grid(input.command.size)
|
179
|
+
previous_result = CommandTranslationResult.new(
|
180
|
+
execution: "",
|
181
|
+
result: "((#{input.command.result_type.to_c_type} *)(_env_->" + "prev_#{input.command.unique_id}))#{cell_access}",
|
182
|
+
command: input.command)
|
183
|
+
|
184
|
+
if input.pattern == :tid
|
185
|
+
return previous_result
|
186
|
+
else
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
if input.pattern == :tid
|
191
|
+
# Stay in current kernel
|
192
|
+
return input.command.accept(self)
|
193
|
+
elsif input.pattern == :entire
|
194
|
+
if !input.command.has_previous_result?
|
195
|
+
# Create new kernel
|
196
|
+
push_kernel_launcher
|
197
|
+
|
198
|
+
previous_result = input.command.accept(self)
|
199
|
+
previous_result_kernel_var = kernel_launcher.kernel_result_var_name
|
200
|
+
|
201
|
+
pop_kernel_launcher(previous_result)
|
202
|
+
else
|
203
|
+
kernel_launcher.use_cached_result(
|
204
|
+
input.command.unique_id, input.command.result_type)
|
205
|
+
previous_result_kernel_var = "prev_" + input.command.unique_id.to_s
|
206
|
+
end
|
207
|
+
|
208
|
+
# Add parameter for previous input to this kernel
|
209
|
+
kernel_launcher.add_previous_kernel_parameter(Variable.new(
|
210
|
+
name: previous_result_kernel_var,
|
211
|
+
type: previous_result.result_type))
|
212
|
+
|
213
|
+
# This is a root command for this kernel, determine grid/block dimensions
|
214
|
+
kernel_launcher.configure_grid(input.command.size, block_size: input.command.block_size)
|
215
|
+
|
216
|
+
kernel_translation = CommandTranslationResult.new(
|
217
|
+
result: previous_result_kernel_var,
|
218
|
+
command: input.command)
|
219
|
+
|
220
|
+
return kernel_translation
|
221
|
+
else
|
222
|
+
raise NotImplementedError.new("Unknown input pattern: #{input.pattern}")
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
def build_command_translation_result(
|
227
|
+
execution: "", result:, command:)
|
228
|
+
|
229
|
+
result_type = command.result_type
|
230
|
+
unique_id = command.unique_id
|
231
|
+
|
232
|
+
if command.keep
|
233
|
+
# Store result in global array
|
234
|
+
# TODO: Remove DEBUG
|
235
|
+
command_result = Constants::TEMP_RESULT_IDENTIFIER + unique_id.to_s
|
236
|
+
command_execution = execution + "\n " + result_type.to_c_type + " " + command_result + " = " + result + ";"
|
237
|
+
|
238
|
+
kernel_builder.add_cached_result(unique_id.to_s, result_type)
|
239
|
+
kernel_launcher.add_cached_result(unique_id.to_s, result_type)
|
240
|
+
environment_builder.add_previous_result_type(unique_id, result_type)
|
241
|
+
else
|
242
|
+
command_result = result
|
243
|
+
command_execution = execution
|
244
|
+
end
|
245
|
+
|
246
|
+
command_translation = CommandTranslationResult.new(
|
247
|
+
execution: command_execution,
|
248
|
+
result: command_result,
|
249
|
+
command: command)
|
250
|
+
end
|
251
|
+
end
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
255
|
+
require_relative "array_combine_command"
|
256
|
+
require_relative "array_index_command"
|
257
|
+
require_relative "array_identity_command"
|
258
|
+
require_relative "array_reduce_command"
|
259
|
+
require_relative "array_stencil_command"
|
260
|
+
require_relative "array_zip_command"
|
261
|
+
require_relative "../host_section/array_host_section_command"
|
262
|
+
|
263
|
+
require_relative "../program_builder"
|
264
|
+
require_relative "../kernel_launcher/kernel_launcher"
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Ikra
|
2
|
+
module Errors
|
3
|
+
class CudaError < Exception
|
4
|
+
|
5
|
+
end
|
6
|
+
|
7
|
+
class CudaErrorIllegalAddress < CudaError
|
8
|
+
|
9
|
+
end
|
10
|
+
|
11
|
+
class CudaUnknownError < CudaError
|
12
|
+
attr_reader :error_code
|
13
|
+
|
14
|
+
def initialize(error_code)
|
15
|
+
@error_code = error_code
|
16
|
+
end
|
17
|
+
|
18
|
+
def to_s
|
19
|
+
"CudaUnknownError (#{error_code})"
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.raiseCudaError(error_code)
|
24
|
+
case error_code
|
25
|
+
when 77
|
26
|
+
raise CudaErrorIllegalAddress.new
|
27
|
+
else
|
28
|
+
raise CudaUnknownError.new(error_code)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|