ikra 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/ast/builder.rb +225 -77
- data/lib/ast/host_section_builder.rb +38 -0
- data/lib/ast/interpreter.rb +67 -0
- data/lib/ast/lexical_variables_enumerator.rb +3 -2
- data/lib/ast/nodes.rb +521 -31
- data/lib/ast/printer.rb +116 -18
- data/lib/ast/ssa_generator.rb +192 -0
- data/lib/ast/visitor.rb +235 -21
- data/lib/config/configuration.rb +28 -3
- data/lib/config/os_configuration.rb +62 -9
- data/lib/cpu/cpu_implementation.rb +39 -0
- data/lib/ikra.rb +13 -3
- data/lib/resources/cuda/allocate_device_memory.cpp +5 -0
- data/lib/resources/cuda/allocate_host_memory.cpp +1 -0
- data/lib/resources/cuda/allocate_memcpy_environment_to_device.cpp +11 -0
- data/lib/resources/cuda/ast/assignment.cpp +1 -0
- data/lib/resources/cuda/block_function_head.cpp +7 -1
- data/lib/resources/cuda/entry_point.cpp +47 -0
- data/lib/resources/cuda/env_builder_copy_array.cpp +8 -2
- data/lib/resources/cuda/free_device_memory.cpp +3 -0
- data/lib/resources/cuda/free_memory_for_command.cpp +24 -0
- data/lib/resources/cuda/header.cpp +23 -9
- data/lib/resources/cuda/header_structs.cpp +92 -0
- data/lib/resources/cuda/host_section_block_function_head.cpp +12 -0
- data/lib/resources/cuda/host_section_entry_point.cpp +55 -0
- data/lib/resources/cuda/host_section_free_device_memory.cpp +18 -0
- data/lib/resources/cuda/host_section_launch_parallel_section.cpp +14 -0
- data/lib/resources/cuda/host_section_malloc_memcpy_device_to_host.cpp +10 -0
- data/lib/resources/cuda/kernel.cpp +9 -2
- data/lib/resources/cuda/launch_kernel.cpp +5 -0
- data/lib/resources/cuda/memcpy_device_to_host.cpp +3 -0
- data/lib/resources/cuda/memcpy_device_to_host_expr.cpp +10 -0
- data/lib/resources/cuda/reduce_body.cpp +88 -0
- data/lib/resources/cuda/stencil_array_reconstruction.cpp +2 -0
- data/lib/resources/cuda/stencil_body.cpp +16 -0
- data/lib/resources/cuda/struct_definition.cpp +4 -0
- data/lib/ruby_core/array.rb +34 -0
- data/lib/ruby_core/array_command.rb +313 -0
- data/lib/ruby_core/core.rb +103 -0
- data/lib/ruby_core/interpreter.rb +16 -0
- data/lib/ruby_core/math.rb +32 -0
- data/lib/ruby_core/ruby_integration.rb +256 -0
- data/lib/symbolic/host_section.rb +115 -0
- data/lib/symbolic/input.rb +87 -0
- data/lib/symbolic/input_visitor.rb +68 -0
- data/lib/symbolic/symbolic.rb +793 -117
- data/lib/symbolic/visitor.rb +70 -8
- data/lib/translator/array_command_struct_builder.rb +163 -0
- data/lib/translator/ast_translator.rb +572 -0
- data/lib/translator/block_translator.rb +104 -48
- data/lib/translator/commands/array_combine_command.rb +41 -0
- data/lib/translator/commands/array_identity_command.rb +28 -0
- data/lib/translator/commands/array_index_command.rb +52 -0
- data/lib/translator/commands/array_reduce_command.rb +135 -0
- data/lib/translator/commands/array_stencil_command.rb +129 -0
- data/lib/translator/commands/array_zip_command.rb +30 -0
- data/lib/translator/commands/command_translator.rb +264 -0
- data/lib/translator/cuda_errors.rb +32 -0
- data/lib/translator/environment_builder.rb +263 -0
- data/lib/translator/host_section/array_host_section_command.rb +150 -0
- data/lib/translator/host_section/array_in_host_section_command.rb +41 -0
- data/lib/translator/host_section/ast_translator.rb +14 -0
- data/lib/translator/host_section/parallel_section_invocation_visitor.rb +20 -0
- data/lib/translator/host_section/program_builder.rb +89 -0
- data/lib/translator/input_translator.rb +226 -0
- data/lib/translator/kernel_builder.rb +137 -0
- data/lib/translator/kernel_launcher/for_loop_kernel_launcher.rb +40 -0
- data/lib/translator/kernel_launcher/kernel_launcher.rb +259 -0
- data/lib/translator/kernel_launcher/while_loop_kernel_launcher.rb +38 -0
- data/lib/translator/last_returns_visitor.rb +19 -10
- data/lib/translator/program_builder.rb +197 -0
- data/lib/translator/program_launcher.rb +273 -0
- data/lib/translator/struct_type.rb +55 -0
- data/lib/translator/translator.rb +34 -11
- data/lib/translator/variable_classifier_visitor.rb +56 -0
- data/lib/types/inference/ast_inference.rb +586 -0
- data/lib/types/inference/clear_types_visitor.rb +11 -0
- data/lib/types/inference/command_inference.rb +101 -0
- data/lib/types/inference/input_inference.rb +62 -0
- data/lib/types/{object_tracer.rb → inference/object_tracer.rb} +5 -6
- data/lib/types/inference/ruby_extension.rb +35 -0
- data/lib/types/inference/symbol_table.rb +131 -0
- data/lib/types/types.rb +14 -0
- data/lib/types/types/array_command_type.rb +123 -0
- data/lib/types/types/array_type.rb +137 -0
- data/lib/types/{class_type.rb → types/class_type.rb} +42 -18
- data/lib/types/{primitive_type.rb → types/primitive_type.rb} +20 -7
- data/lib/types/types/ruby_type.rb +88 -0
- data/lib/types/types/struct_type.rb +179 -0
- data/lib/types/types/union_type.rb +239 -0
- metadata +160 -18
- data/lib/ast/method_definition.rb +0 -37
- data/lib/ast/translator.rb +0 -264
- data/lib/resources/cuda/kernel_launcher.cpp +0 -28
- data/lib/scope.rb +0 -166
- data/lib/translator/command_translator.rb +0 -421
- data/lib/translator/local_variables_enumerator.rb +0 -35
- data/lib/translator/method_translator.rb +0 -24
- data/lib/types/array_type.rb +0 -51
- data/lib/types/ruby_extension.rb +0 -67
- data/lib/types/ruby_type.rb +0 -45
- data/lib/types/type_inference.rb +0 -382
- data/lib/types/union_type.rb +0 -155
@@ -1,12 +1,9 @@
|
|
1
1
|
require_relative "../ast/nodes.rb"
|
2
2
|
require_relative "../ast/builder.rb"
|
3
|
-
require_relative "../
|
4
|
-
require_relative "../types/type_inference"
|
5
|
-
require_relative "../types/primitive_type"
|
3
|
+
require_relative "../types/types"
|
6
4
|
require_relative "../parsing"
|
7
|
-
require_relative "../scope"
|
8
5
|
require_relative "../ast/printer"
|
9
|
-
require_relative "
|
6
|
+
require_relative "variable_classifier_visitor"
|
10
7
|
|
11
8
|
module Ikra
|
12
9
|
module Translator
|
@@ -20,10 +17,11 @@ module Ikra
|
|
20
17
|
# @return [UnionType] Return value type of method/block
|
21
18
|
attr_accessor :result_type
|
22
19
|
|
23
|
-
# @return [String] Name of function in CUDA source code
|
20
|
+
# @return [String] Name of function of block in CUDA source code
|
24
21
|
attr_accessor :function_name
|
25
22
|
|
26
|
-
# @return [
|
23
|
+
# @return [String] Auxiliary methods that are called by this block
|
24
|
+
# (including transitive method calls)
|
27
25
|
attr_accessor :aux_methods
|
28
26
|
|
29
27
|
def initialize(c_source:, result_type:, function_name:, aux_methods: [])
|
@@ -32,87 +30,145 @@ module Ikra
|
|
32
30
|
@function_name = function_name
|
33
31
|
@aux_methods = aux_methods
|
34
32
|
end
|
35
|
-
|
36
|
-
def generated_source
|
37
|
-
@aux_methods.map do |meth|
|
38
|
-
meth.to_c_source
|
39
|
-
end.join("\n\n") + @block_source
|
40
|
-
end
|
41
33
|
end
|
42
34
|
|
43
35
|
BlockSelectorDummy = :"<BLOCK>"
|
44
36
|
|
45
37
|
class << self
|
46
38
|
# Translates a Ruby block to CUDA source code.
|
47
|
-
# @param [AST::
|
48
|
-
# @param [EnvironmentBuilder] environment_builder environment builder instance
|
49
|
-
#
|
50
|
-
# @param [
|
39
|
+
# @param [AST::BlockDefNode] block_def_node AST (abstract syntax tree) of the block
|
40
|
+
# @param [EnvironmentBuilder] environment_builder environment builder instance
|
41
|
+
# collecting information about lexical variables (environment)
|
42
|
+
# @param [Array{Variable}] block_parameters types and names of parameters
|
43
|
+
# to the block
|
44
|
+
# @param [Hash{Symbol => Object}] lexical_variables all lexical variables that are
|
45
|
+
# accessed within the block
|
51
46
|
# @param [Fixnum] command_id a unique identifier of the block
|
47
|
+
# @param [String] pre_execution source code that should be run before executing the
|
48
|
+
# block
|
49
|
+
# @param [Array{Variable}] override_block_parameters overrides the the declaration of
|
50
|
+
# parameters that this block accepts.
|
51
|
+
# @param [EntireInputTranslationResult] entire_input_translation The result of
|
52
|
+
# `translate_entire_input`
|
52
53
|
# @return [BlockTranslationResult]
|
53
|
-
def translate_block(
|
54
|
+
def translate_block(
|
55
|
+
block_def_node:,
|
56
|
+
environment_builder:,
|
57
|
+
command_id:,
|
58
|
+
lexical_variables: {},
|
59
|
+
|
60
|
+
# One one of the two following parameter configurations is valid:
|
61
|
+
# a) Either this parameter is given:
|
62
|
+
entire_input_translation: nil,
|
63
|
+
|
64
|
+
# b) or these parameters are given (some are optional):
|
65
|
+
pre_execution: nil,
|
66
|
+
override_block_parameters: nil,
|
67
|
+
block_parameters: nil)
|
68
|
+
|
69
|
+
# Check and prepare arguments
|
70
|
+
if pre_execution != nil and entire_input_translation != nil
|
71
|
+
raise ArgumentError.new("pre_execution and entire_input_translation given")
|
72
|
+
elsif entire_input_translation != nil
|
73
|
+
pre_execution = entire_input_translation.pre_execution
|
74
|
+
elsif pre_execution == nil
|
75
|
+
pre_execution = ""
|
76
|
+
end
|
77
|
+
|
78
|
+
if block_parameters != nil and entire_input_translation != nil
|
79
|
+
raise ArgumentError.new("block_parameters and entire_input_translation given")
|
80
|
+
elsif entire_input_translation != nil
|
81
|
+
block_parameters = entire_input_translation.block_parameters
|
82
|
+
elsif block_parameters == nil
|
83
|
+
block_parameters = []
|
84
|
+
end
|
85
|
+
|
86
|
+
if override_block_parameters != nil and entire_input_translation != nil
|
87
|
+
raise ArgumentError.new("override_block_parameters and entire_input_translation given")
|
88
|
+
elsif entire_input_translation != nil
|
89
|
+
override_block_parameters = entire_input_translation.override_block_parameters
|
90
|
+
elsif override_block_parameters == nil
|
91
|
+
override_block_parameters = block_parameters
|
92
|
+
end
|
93
|
+
|
94
|
+
|
95
|
+
# Build hash of parameter name -> type mappings
|
96
|
+
block_parameter_types = {}
|
97
|
+
for variable in block_parameters
|
98
|
+
block_parameter_types[variable.name] = variable.type
|
99
|
+
end
|
100
|
+
|
54
101
|
parameter_types_string = "[" + block_parameter_types.map do |id, type| "#{id}: #{type}" end.join(", ") + "]"
|
55
102
|
Log.info("Translating block with input types #{parameter_types_string}")
|
56
103
|
|
57
|
-
#
|
58
|
-
|
59
|
-
type: Types::UnionType.new, # TODO: what to pass in here?
|
60
|
-
selector: BlockSelectorDummy,
|
61
|
-
parameter_variables: block_parameter_types,
|
62
|
-
return_type: Types::UnionType.new,
|
63
|
-
ast: ast)
|
104
|
+
# Add information to block_def_node
|
105
|
+
block_def_node.parameters_names_and_types = block_parameter_types
|
64
106
|
|
65
107
|
# Lexical variables
|
66
108
|
lexical_variables.each do |name, value|
|
67
|
-
|
109
|
+
block_def_node.lexical_variables_names_and_types[name] = value.ikra_type.to_union_type
|
68
110
|
end
|
69
111
|
|
70
112
|
# Type inference
|
71
113
|
type_inference_visitor = TypeInference::Visitor.new
|
72
|
-
return_type = type_inference_visitor.
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
114
|
+
return_type = type_inference_visitor.process_block(block_def_node)
|
115
|
+
|
116
|
+
# Translation to source code
|
117
|
+
ast_translator = ASTTranslator.new
|
118
|
+
|
119
|
+
# Auxiliary methods are instance methods that are called by the block
|
120
|
+
aux_methods = type_inference_visitor.all_methods.map do |method|
|
121
|
+
ast_translator.translate_method(method)
|
122
|
+
end
|
123
|
+
|
124
|
+
# Generate method predeclarations
|
125
|
+
aux_methods_predecl = type_inference_visitor.all_methods.map do |method|
|
126
|
+
ast_translator.translate_method_predecl(method)
|
127
|
+
end
|
128
|
+
|
129
|
+
# Start with predeclarations
|
130
|
+
aux_methods = aux_methods_predecl + aux_methods
|
131
|
+
|
132
|
+
# Classify variables (lexical or local)
|
133
|
+
block_def_node.accept(VariableClassifier.new(
|
134
|
+
lexical_variable_names: lexical_variables.keys))
|
77
135
|
|
78
136
|
# Translate to CUDA/C++ code
|
79
|
-
translation_result =
|
137
|
+
translation_result = ast_translator.translate_block(block_def_node)
|
80
138
|
|
81
139
|
# Load environment variables
|
82
140
|
lexical_variables.each do |name, value|
|
83
|
-
type = value.
|
141
|
+
type = value.ikra_type
|
84
142
|
mangled_name = environment_builder.add_object(name, value)
|
85
|
-
translation_result.prepend("#{type.to_c_type} #{name} = #{Constants::ENV_IDENTIFIER}->#{mangled_name};\n")
|
143
|
+
translation_result.prepend("#{type.to_c_type} #{Constants::LEXICAL_VAR_PREFIX}#{name} = #{Constants::ENV_IDENTIFIER}->#{mangled_name};\n")
|
86
144
|
end
|
87
145
|
|
88
146
|
# Declare local variables
|
89
|
-
|
90
|
-
translation_result.prepend("#{
|
147
|
+
block_def_node.local_variables_names_and_types.each do |name, type|
|
148
|
+
translation_result.prepend("#{type.to_c_type} #{name};\n")
|
91
149
|
end
|
92
150
|
|
93
151
|
# Function signature
|
94
152
|
mangled_name = "_block_k_#{command_id}_"
|
95
153
|
|
96
|
-
if not return_type.is_singleton?
|
97
|
-
raise "Cannot handle polymorphic return types yet"
|
98
|
-
end
|
99
|
-
|
100
154
|
function_parameters = ["environment_t *#{Constants::ENV_IDENTIFIER}"]
|
101
|
-
|
102
|
-
|
155
|
+
|
156
|
+
parameter_decls = override_block_parameters.map do |variable|
|
157
|
+
"#{variable.type.to_c_type} #{variable.name}"
|
103
158
|
end
|
104
159
|
|
105
|
-
|
160
|
+
function_parameters.push(*parameter_decls)
|
161
|
+
|
162
|
+
translation_result = Translator.read_file(
|
106
163
|
file_name: "block_function_head.cpp",
|
107
164
|
replacements: {
|
108
165
|
"name" => mangled_name,
|
109
|
-
"
|
110
|
-
"parameters" => function_parameters.join(", ")
|
111
|
-
|
112
|
-
translation_result = function_head + wrap_in_c_block(translation_result)
|
166
|
+
"result_type" => return_type.to_c_type,
|
167
|
+
"parameters" => function_parameters.join(", "),
|
168
|
+
"body" => wrap_in_c_block(pre_execution + "\n" + translation_result)})
|
113
169
|
|
114
170
|
# TODO: handle more than one result type
|
115
|
-
BlockTranslationResult.new(
|
171
|
+
return BlockTranslationResult.new(
|
116
172
|
c_source: translation_result,
|
117
173
|
result_type: return_type,
|
118
174
|
function_name: mangled_name,
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module Ikra
|
2
|
+
module Translator
|
3
|
+
class CommandTranslator < Symbolic::Visitor
|
4
|
+
def visit_array_combine_command(command)
|
5
|
+
Log.info("Translating ArrayCombineCommand [#{command.unique_id}]")
|
6
|
+
|
7
|
+
super
|
8
|
+
|
9
|
+
# Process dependent computation (receiver), returns [InputTranslationResult]
|
10
|
+
input = translate_entire_input(command)
|
11
|
+
|
12
|
+
# All variables accessed by this block should be prefixed with the unique ID
|
13
|
+
# of the command in the environment.
|
14
|
+
env_builder = @environment_builder[command.unique_id]
|
15
|
+
|
16
|
+
block_translation_result = Translator.translate_block(
|
17
|
+
block_def_node: command.block_def_node,
|
18
|
+
environment_builder: env_builder,
|
19
|
+
lexical_variables: command.lexical_externals,
|
20
|
+
command_id: command.unique_id,
|
21
|
+
entire_input_translation: input)
|
22
|
+
|
23
|
+
kernel_builder.add_methods(block_translation_result.aux_methods)
|
24
|
+
kernel_builder.add_block(block_translation_result.block_source)
|
25
|
+
|
26
|
+
# Build command invocation string
|
27
|
+
result = block_translation_result.function_name + "(" +
|
28
|
+
(["_env_"] + input.result).join(", ") + ")"
|
29
|
+
|
30
|
+
command_translation = build_command_translation_result(
|
31
|
+
execution: input.execution,
|
32
|
+
result: result,
|
33
|
+
command: command)
|
34
|
+
|
35
|
+
Log.info("DONE translating ArrayCombineCommand [#{command.unique_id}]")
|
36
|
+
|
37
|
+
return command_translation
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module Ikra
|
2
|
+
module Translator
|
3
|
+
class CommandTranslator < Symbolic::Visitor
|
4
|
+
def visit_array_identity_command(command)
|
5
|
+
Log.info("Translating ArrayIdentityCommand [#{command.unique_id}]")
|
6
|
+
|
7
|
+
super
|
8
|
+
|
9
|
+
# This is a root command, determine grid/block dimensions
|
10
|
+
kernel_launcher.configure_grid(command.size, block_size: command.block_size)
|
11
|
+
|
12
|
+
# Add base array to environment
|
13
|
+
need_union_type = !command.base_type.is_singleton?
|
14
|
+
transformed_base_array = object_tracer.convert_base_array(
|
15
|
+
command.input.first.command, need_union_type)
|
16
|
+
environment_builder.add_base_array(command.unique_id, transformed_base_array)
|
17
|
+
|
18
|
+
command_translation = build_command_translation_result(
|
19
|
+
result: "#{Constants::ENV_IDENTIFIER}->#{EnvironmentBuilder.base_identifier(command.unique_id)}[_tid_]",
|
20
|
+
command: command)
|
21
|
+
|
22
|
+
Log.info("DONE translating ArrayIdentityCommand [#{command.unique_id}]")
|
23
|
+
|
24
|
+
return command_translation
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
module Ikra
|
2
|
+
module Translator
|
3
|
+
class CommandTranslator < Symbolic::Visitor
|
4
|
+
# Translate the block of an `Array.pnew` section.
|
5
|
+
def visit_array_index_command(command)
|
6
|
+
Log.info("Translating ArrayIndexCommand [#{command.unique_id}]")
|
7
|
+
|
8
|
+
super
|
9
|
+
|
10
|
+
# This is a root command, determine grid/block dimensions
|
11
|
+
kernel_launcher.configure_grid(command.size, block_size: command.block_size)
|
12
|
+
|
13
|
+
num_dims = command.dimensions.size
|
14
|
+
|
15
|
+
# This is a root command, determine grid/block dimensions
|
16
|
+
kernel_launcher.configure_grid(command.size, block_size: command.block_size)
|
17
|
+
|
18
|
+
index_generators = (0...num_dims).map do |dim_index|
|
19
|
+
index_div = command.dimensions.drop(dim_index + 1).reduce(1, :*)
|
20
|
+
index_mod = command.dimensions[dim_index]
|
21
|
+
|
22
|
+
if dim_index > 0
|
23
|
+
"(_tid_ / #{index_div}) % #{index_mod}"
|
24
|
+
else
|
25
|
+
# No modulo required for first dimension
|
26
|
+
"_tid_ / #{index_div}"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
if num_dims > 1
|
31
|
+
# Retrieve type that was generated earlier
|
32
|
+
zipped_type_singleton = command.result_type.singleton_type
|
33
|
+
result = zipped_type_singleton.generate_inline_initialization(index_generators)
|
34
|
+
|
35
|
+
# Add struct type to program builder, so that we can generate the source code
|
36
|
+
# for its definition.
|
37
|
+
program_builder.structs.add(zipped_type_singleton)
|
38
|
+
else
|
39
|
+
result = "_tid_"
|
40
|
+
end
|
41
|
+
|
42
|
+
command_translation = CommandTranslationResult.new(
|
43
|
+
result: result,
|
44
|
+
command: command)
|
45
|
+
|
46
|
+
Log.info("DONE translating ArrayIndexCommand [#{command.unique_id}]")
|
47
|
+
|
48
|
+
return command_translation
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,135 @@
|
|
1
|
+
module Ikra
|
2
|
+
module Translator
|
3
|
+
class CommandTranslator < Symbolic::Visitor
|
4
|
+
def visit_array_reduce_command(command)
|
5
|
+
Log.info("Translating ArrayReduceCommand [#{command.unique_id}]")
|
6
|
+
|
7
|
+
super
|
8
|
+
|
9
|
+
if command.input.size != 1
|
10
|
+
raise AssertionError.new("Expected exactly one input for ArrayReduceCommand")
|
11
|
+
end
|
12
|
+
|
13
|
+
# Process dependent computation (receiver)
|
14
|
+
input = translate_entire_input(command)
|
15
|
+
|
16
|
+
block_size = command.block_size
|
17
|
+
|
18
|
+
# All variables accessed by this block should be prefixed with the unique ID
|
19
|
+
# of the command in the environment.
|
20
|
+
env_builder = @environment_builder[command.unique_id]
|
21
|
+
|
22
|
+
block_translation_result = Translator.translate_block(
|
23
|
+
block_def_node: command.block_def_node,
|
24
|
+
environment_builder: env_builder,
|
25
|
+
lexical_variables: command.lexical_externals,
|
26
|
+
command_id: command.unique_id,
|
27
|
+
entire_input_translation: input)
|
28
|
+
|
29
|
+
kernel_builder.add_methods(block_translation_result.aux_methods)
|
30
|
+
kernel_builder.add_block(block_translation_result.block_source)
|
31
|
+
|
32
|
+
# Add "odd" parameter to the kernel which is needed for reduction
|
33
|
+
kernel_builder.add_additional_parameters(Constants::ODD_TYPE + " " + Constants::ODD_IDENTIFIER)
|
34
|
+
|
35
|
+
# Number of elements that will be reduced
|
36
|
+
num_threads = command.input_size
|
37
|
+
|
38
|
+
if num_threads.is_a?(Fixnum)
|
39
|
+
# Easy case: Number of required reductions known statically
|
40
|
+
|
41
|
+
odd = (num_threads % 2 == 1).to_s
|
42
|
+
|
43
|
+
# Number of threads needed for reduction
|
44
|
+
num_threads = num_threads.fdiv(2).ceil
|
45
|
+
|
46
|
+
previous_result_kernel_var = input.result.first
|
47
|
+
first_launch = true
|
48
|
+
|
49
|
+
# While more kernel launches than one are needed to finish reduction
|
50
|
+
while num_threads >= block_size + 1
|
51
|
+
# Launch new kernel (with same kernel builder)
|
52
|
+
push_kernel_launcher(kernel_builder: kernel_builder)
|
53
|
+
# Configure kernel with correct arguments and grid
|
54
|
+
kernel_launcher.add_additional_arguments(odd)
|
55
|
+
kernel_launcher.configure_grid(num_threads, block_size: block_size)
|
56
|
+
|
57
|
+
# First launch of kernel is supposed to allocate new memory, so only reuse memory after first launch
|
58
|
+
if first_launch
|
59
|
+
first_launch = false
|
60
|
+
else
|
61
|
+
kernel_launcher.reuse_memory!(previous_result_kernel_var)
|
62
|
+
end
|
63
|
+
|
64
|
+
previous_result_kernel_var = kernel_launcher.kernel_result_var_name
|
65
|
+
|
66
|
+
pop_kernel_launcher(input.command_translation_result(0))
|
67
|
+
|
68
|
+
# Update number of threads needed
|
69
|
+
num_threads = num_threads.fdiv(block_size).ceil
|
70
|
+
odd = (num_threads % 2 == 1).to_s
|
71
|
+
num_threads = num_threads.fdiv(2).ceil
|
72
|
+
end
|
73
|
+
|
74
|
+
# Configuration for last launch of kernel
|
75
|
+
kernel_launcher.add_additional_arguments(odd)
|
76
|
+
kernel_launcher.configure_grid(num_threads, block_size: block_size)
|
77
|
+
else
|
78
|
+
# More difficult case: Have to generate loop for reductions
|
79
|
+
|
80
|
+
# Add one regular kernel launcher for setting up the memory etc.
|
81
|
+
odd_first = "(#{num_threads} % 2 == 1)"
|
82
|
+
num_threads_first = "((int) ceil(#{num_threads} / 2.0))"
|
83
|
+
push_kernel_launcher(kernel_builder: kernel_builder)
|
84
|
+
kernel_launcher.add_additional_arguments(odd_first)
|
85
|
+
kernel_launcher.configure_grid(num_threads_first, block_size: block_size)
|
86
|
+
previous_result_kernel_var = kernel_launcher.kernel_result_var_name
|
87
|
+
pop_kernel_launcher(input.command_translation_result(0))
|
88
|
+
|
89
|
+
# Add loop
|
90
|
+
# Set up state (variables that are updated inside the loop)
|
91
|
+
# 1. Calculate number of elements from previous computation
|
92
|
+
# 2. Check if odd number
|
93
|
+
# 3. Calculate number of threads that we need
|
94
|
+
loop_setup = "int _num_elements = ceil(#{num_threads_first} / (double) #{block_size});\nbool _next_odd = _num_elements % 2 == 1;\nint _next_threads = ceil(_num_elements / 2.0);\n"
|
95
|
+
|
96
|
+
# Update loop state after iteration
|
97
|
+
update_loop = "_num_elements = ceil(_next_threads / (double) #{block_size});\nbool _next_odd = _num_elements % 2 == 0;\n_next_threads = ceil(_num_elements / 2.0);\n"
|
98
|
+
|
99
|
+
push_kernel_launcher(kernel_launcher: WhileLoopKernelLauncher.new(
|
100
|
+
kernel_builder: kernel_builder,
|
101
|
+
condition: "_num_elements > 1",
|
102
|
+
before_loop: loop_setup,
|
103
|
+
post_iteration: update_loop))
|
104
|
+
|
105
|
+
kernel_launcher.add_additional_arguments("_next_odd")
|
106
|
+
kernel_launcher.configure_grid("_next_threads", block_size: block_size)
|
107
|
+
#pop_kernel_launcher(input.command_translation_result(0))
|
108
|
+
end
|
109
|
+
|
110
|
+
if !first_launch
|
111
|
+
kernel_launcher.reuse_memory!(previous_result_kernel_var)
|
112
|
+
end
|
113
|
+
|
114
|
+
command_execution = Translator.read_file(file_name: "reduce_body.cpp", replacements: {
|
115
|
+
"previous_result" => input.result.first,
|
116
|
+
"block_name" => block_translation_result.function_name,
|
117
|
+
"arguments" => Constants::ENV_IDENTIFIER,
|
118
|
+
"block_size" => block_size.to_s,
|
119
|
+
"temp_result" => Constants::TEMP_RESULT_IDENTIFIER,
|
120
|
+
"odd" => Constants::ODD_IDENTIFIER,
|
121
|
+
"type" => command.result_type.to_c_type,
|
122
|
+
"num_threads" => Constants::NUM_THREADS_IDENTIFIER})
|
123
|
+
|
124
|
+
command_translation = CommandTranslationResult.new(
|
125
|
+
execution: command_execution,
|
126
|
+
result: Constants::TEMP_RESULT_IDENTIFIER,
|
127
|
+
command: command)
|
128
|
+
|
129
|
+
Log.info("DONE translating ArrayReduceCommand [#{command.unique_id}]")
|
130
|
+
|
131
|
+
return command_translation
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|