ikra 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/ast/builder.rb +225 -77
- data/lib/ast/host_section_builder.rb +38 -0
- data/lib/ast/interpreter.rb +67 -0
- data/lib/ast/lexical_variables_enumerator.rb +3 -2
- data/lib/ast/nodes.rb +521 -31
- data/lib/ast/printer.rb +116 -18
- data/lib/ast/ssa_generator.rb +192 -0
- data/lib/ast/visitor.rb +235 -21
- data/lib/config/configuration.rb +28 -3
- data/lib/config/os_configuration.rb +62 -9
- data/lib/cpu/cpu_implementation.rb +39 -0
- data/lib/ikra.rb +13 -3
- data/lib/resources/cuda/allocate_device_memory.cpp +5 -0
- data/lib/resources/cuda/allocate_host_memory.cpp +1 -0
- data/lib/resources/cuda/allocate_memcpy_environment_to_device.cpp +11 -0
- data/lib/resources/cuda/ast/assignment.cpp +1 -0
- data/lib/resources/cuda/block_function_head.cpp +7 -1
- data/lib/resources/cuda/entry_point.cpp +47 -0
- data/lib/resources/cuda/env_builder_copy_array.cpp +8 -2
- data/lib/resources/cuda/free_device_memory.cpp +3 -0
- data/lib/resources/cuda/free_memory_for_command.cpp +24 -0
- data/lib/resources/cuda/header.cpp +23 -9
- data/lib/resources/cuda/header_structs.cpp +92 -0
- data/lib/resources/cuda/host_section_block_function_head.cpp +12 -0
- data/lib/resources/cuda/host_section_entry_point.cpp +55 -0
- data/lib/resources/cuda/host_section_free_device_memory.cpp +18 -0
- data/lib/resources/cuda/host_section_launch_parallel_section.cpp +14 -0
- data/lib/resources/cuda/host_section_malloc_memcpy_device_to_host.cpp +10 -0
- data/lib/resources/cuda/kernel.cpp +9 -2
- data/lib/resources/cuda/launch_kernel.cpp +5 -0
- data/lib/resources/cuda/memcpy_device_to_host.cpp +3 -0
- data/lib/resources/cuda/memcpy_device_to_host_expr.cpp +10 -0
- data/lib/resources/cuda/reduce_body.cpp +88 -0
- data/lib/resources/cuda/stencil_array_reconstruction.cpp +2 -0
- data/lib/resources/cuda/stencil_body.cpp +16 -0
- data/lib/resources/cuda/struct_definition.cpp +4 -0
- data/lib/ruby_core/array.rb +34 -0
- data/lib/ruby_core/array_command.rb +313 -0
- data/lib/ruby_core/core.rb +103 -0
- data/lib/ruby_core/interpreter.rb +16 -0
- data/lib/ruby_core/math.rb +32 -0
- data/lib/ruby_core/ruby_integration.rb +256 -0
- data/lib/symbolic/host_section.rb +115 -0
- data/lib/symbolic/input.rb +87 -0
- data/lib/symbolic/input_visitor.rb +68 -0
- data/lib/symbolic/symbolic.rb +793 -117
- data/lib/symbolic/visitor.rb +70 -8
- data/lib/translator/array_command_struct_builder.rb +163 -0
- data/lib/translator/ast_translator.rb +572 -0
- data/lib/translator/block_translator.rb +104 -48
- data/lib/translator/commands/array_combine_command.rb +41 -0
- data/lib/translator/commands/array_identity_command.rb +28 -0
- data/lib/translator/commands/array_index_command.rb +52 -0
- data/lib/translator/commands/array_reduce_command.rb +135 -0
- data/lib/translator/commands/array_stencil_command.rb +129 -0
- data/lib/translator/commands/array_zip_command.rb +30 -0
- data/lib/translator/commands/command_translator.rb +264 -0
- data/lib/translator/cuda_errors.rb +32 -0
- data/lib/translator/environment_builder.rb +263 -0
- data/lib/translator/host_section/array_host_section_command.rb +150 -0
- data/lib/translator/host_section/array_in_host_section_command.rb +41 -0
- data/lib/translator/host_section/ast_translator.rb +14 -0
- data/lib/translator/host_section/parallel_section_invocation_visitor.rb +20 -0
- data/lib/translator/host_section/program_builder.rb +89 -0
- data/lib/translator/input_translator.rb +226 -0
- data/lib/translator/kernel_builder.rb +137 -0
- data/lib/translator/kernel_launcher/for_loop_kernel_launcher.rb +40 -0
- data/lib/translator/kernel_launcher/kernel_launcher.rb +259 -0
- data/lib/translator/kernel_launcher/while_loop_kernel_launcher.rb +38 -0
- data/lib/translator/last_returns_visitor.rb +19 -10
- data/lib/translator/program_builder.rb +197 -0
- data/lib/translator/program_launcher.rb +273 -0
- data/lib/translator/struct_type.rb +55 -0
- data/lib/translator/translator.rb +34 -11
- data/lib/translator/variable_classifier_visitor.rb +56 -0
- data/lib/types/inference/ast_inference.rb +586 -0
- data/lib/types/inference/clear_types_visitor.rb +11 -0
- data/lib/types/inference/command_inference.rb +101 -0
- data/lib/types/inference/input_inference.rb +62 -0
- data/lib/types/{object_tracer.rb → inference/object_tracer.rb} +5 -6
- data/lib/types/inference/ruby_extension.rb +35 -0
- data/lib/types/inference/symbol_table.rb +131 -0
- data/lib/types/types.rb +14 -0
- data/lib/types/types/array_command_type.rb +123 -0
- data/lib/types/types/array_type.rb +137 -0
- data/lib/types/{class_type.rb → types/class_type.rb} +42 -18
- data/lib/types/{primitive_type.rb → types/primitive_type.rb} +20 -7
- data/lib/types/types/ruby_type.rb +88 -0
- data/lib/types/types/struct_type.rb +179 -0
- data/lib/types/types/union_type.rb +239 -0
- metadata +160 -18
- data/lib/ast/method_definition.rb +0 -37
- data/lib/ast/translator.rb +0 -264
- data/lib/resources/cuda/kernel_launcher.cpp +0 -28
- data/lib/scope.rb +0 -166
- data/lib/translator/command_translator.rb +0 -421
- data/lib/translator/local_variables_enumerator.rb +0 -35
- data/lib/translator/method_translator.rb +0 -24
- data/lib/types/array_type.rb +0 -51
- data/lib/types/ruby_extension.rb +0 -67
- data/lib/types/ruby_type.rb +0 -45
- data/lib/types/type_inference.rb +0 -382
- data/lib/types/union_type.rb +0 -155
@@ -1,12 +1,9 @@
|
|
1
1
|
require_relative "../ast/nodes.rb"
|
2
2
|
require_relative "../ast/builder.rb"
|
3
|
-
require_relative "../
|
4
|
-
require_relative "../types/type_inference"
|
5
|
-
require_relative "../types/primitive_type"
|
3
|
+
require_relative "../types/types"
|
6
4
|
require_relative "../parsing"
|
7
|
-
require_relative "../scope"
|
8
5
|
require_relative "../ast/printer"
|
9
|
-
require_relative "
|
6
|
+
require_relative "variable_classifier_visitor"
|
10
7
|
|
11
8
|
module Ikra
|
12
9
|
module Translator
|
@@ -20,10 +17,11 @@ module Ikra
|
|
20
17
|
# @return [UnionType] Return value type of method/block
|
21
18
|
attr_accessor :result_type
|
22
19
|
|
23
|
-
# @return [String] Name of function in CUDA source code
|
20
|
+
# @return [String] Name of function of block in CUDA source code
|
24
21
|
attr_accessor :function_name
|
25
22
|
|
26
|
-
# @return [
|
23
|
+
# @return [String] Auxiliary methods that are called by this block
|
24
|
+
# (including transitive method calls)
|
27
25
|
attr_accessor :aux_methods
|
28
26
|
|
29
27
|
def initialize(c_source:, result_type:, function_name:, aux_methods: [])
|
@@ -32,87 +30,145 @@ module Ikra
|
|
32
30
|
@function_name = function_name
|
33
31
|
@aux_methods = aux_methods
|
34
32
|
end
|
35
|
-
|
36
|
-
def generated_source
|
37
|
-
@aux_methods.map do |meth|
|
38
|
-
meth.to_c_source
|
39
|
-
end.join("\n\n") + @block_source
|
40
|
-
end
|
41
33
|
end
|
42
34
|
|
43
35
|
BlockSelectorDummy = :"<BLOCK>"
|
44
36
|
|
45
37
|
class << self
|
46
38
|
# Translates a Ruby block to CUDA source code.
|
47
|
-
# @param [AST::
|
48
|
-
# @param [EnvironmentBuilder] environment_builder environment builder instance
|
49
|
-
#
|
50
|
-
# @param [
|
39
|
+
# @param [AST::BlockDefNode] block_def_node AST (abstract syntax tree) of the block
|
40
|
+
# @param [EnvironmentBuilder] environment_builder environment builder instance
|
41
|
+
# collecting information about lexical variables (environment)
|
42
|
+
# @param [Array{Variable}] block_parameters types and names of parameters
|
43
|
+
# to the block
|
44
|
+
# @param [Hash{Symbol => Object}] lexical_variables all lexical variables that are
|
45
|
+
# accessed within the block
|
51
46
|
# @param [Fixnum] command_id a unique identifier of the block
|
47
|
+
# @param [String] pre_execution source code that should be run before executing the
|
48
|
+
# block
|
49
|
+
# @param [Array{Variable}] override_block_parameters overrides the the declaration of
|
50
|
+
# parameters that this block accepts.
|
51
|
+
# @param [EntireInputTranslationResult] entire_input_translation The result of
|
52
|
+
# `translate_entire_input`
|
52
53
|
# @return [BlockTranslationResult]
|
53
|
-
def translate_block(
|
54
|
+
def translate_block(
|
55
|
+
block_def_node:,
|
56
|
+
environment_builder:,
|
57
|
+
command_id:,
|
58
|
+
lexical_variables: {},
|
59
|
+
|
60
|
+
# One one of the two following parameter configurations is valid:
|
61
|
+
# a) Either this parameter is given:
|
62
|
+
entire_input_translation: nil,
|
63
|
+
|
64
|
+
# b) or these parameters are given (some are optional):
|
65
|
+
pre_execution: nil,
|
66
|
+
override_block_parameters: nil,
|
67
|
+
block_parameters: nil)
|
68
|
+
|
69
|
+
# Check and prepare arguments
|
70
|
+
if pre_execution != nil and entire_input_translation != nil
|
71
|
+
raise ArgumentError.new("pre_execution and entire_input_translation given")
|
72
|
+
elsif entire_input_translation != nil
|
73
|
+
pre_execution = entire_input_translation.pre_execution
|
74
|
+
elsif pre_execution == nil
|
75
|
+
pre_execution = ""
|
76
|
+
end
|
77
|
+
|
78
|
+
if block_parameters != nil and entire_input_translation != nil
|
79
|
+
raise ArgumentError.new("block_parameters and entire_input_translation given")
|
80
|
+
elsif entire_input_translation != nil
|
81
|
+
block_parameters = entire_input_translation.block_parameters
|
82
|
+
elsif block_parameters == nil
|
83
|
+
block_parameters = []
|
84
|
+
end
|
85
|
+
|
86
|
+
if override_block_parameters != nil and entire_input_translation != nil
|
87
|
+
raise ArgumentError.new("override_block_parameters and entire_input_translation given")
|
88
|
+
elsif entire_input_translation != nil
|
89
|
+
override_block_parameters = entire_input_translation.override_block_parameters
|
90
|
+
elsif override_block_parameters == nil
|
91
|
+
override_block_parameters = block_parameters
|
92
|
+
end
|
93
|
+
|
94
|
+
|
95
|
+
# Build hash of parameter name -> type mappings
|
96
|
+
block_parameter_types = {}
|
97
|
+
for variable in block_parameters
|
98
|
+
block_parameter_types[variable.name] = variable.type
|
99
|
+
end
|
100
|
+
|
54
101
|
parameter_types_string = "[" + block_parameter_types.map do |id, type| "#{id}: #{type}" end.join(", ") + "]"
|
55
102
|
Log.info("Translating block with input types #{parameter_types_string}")
|
56
103
|
|
57
|
-
#
|
58
|
-
|
59
|
-
type: Types::UnionType.new, # TODO: what to pass in here?
|
60
|
-
selector: BlockSelectorDummy,
|
61
|
-
parameter_variables: block_parameter_types,
|
62
|
-
return_type: Types::UnionType.new,
|
63
|
-
ast: ast)
|
104
|
+
# Add information to block_def_node
|
105
|
+
block_def_node.parameters_names_and_types = block_parameter_types
|
64
106
|
|
65
107
|
# Lexical variables
|
66
108
|
lexical_variables.each do |name, value|
|
67
|
-
|
109
|
+
block_def_node.lexical_variables_names_and_types[name] = value.ikra_type.to_union_type
|
68
110
|
end
|
69
111
|
|
70
112
|
# Type inference
|
71
113
|
type_inference_visitor = TypeInference::Visitor.new
|
72
|
-
return_type = type_inference_visitor.
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
114
|
+
return_type = type_inference_visitor.process_block(block_def_node)
|
115
|
+
|
116
|
+
# Translation to source code
|
117
|
+
ast_translator = ASTTranslator.new
|
118
|
+
|
119
|
+
# Auxiliary methods are instance methods that are called by the block
|
120
|
+
aux_methods = type_inference_visitor.all_methods.map do |method|
|
121
|
+
ast_translator.translate_method(method)
|
122
|
+
end
|
123
|
+
|
124
|
+
# Generate method predeclarations
|
125
|
+
aux_methods_predecl = type_inference_visitor.all_methods.map do |method|
|
126
|
+
ast_translator.translate_method_predecl(method)
|
127
|
+
end
|
128
|
+
|
129
|
+
# Start with predeclarations
|
130
|
+
aux_methods = aux_methods_predecl + aux_methods
|
131
|
+
|
132
|
+
# Classify variables (lexical or local)
|
133
|
+
block_def_node.accept(VariableClassifier.new(
|
134
|
+
lexical_variable_names: lexical_variables.keys))
|
77
135
|
|
78
136
|
# Translate to CUDA/C++ code
|
79
|
-
translation_result =
|
137
|
+
translation_result = ast_translator.translate_block(block_def_node)
|
80
138
|
|
81
139
|
# Load environment variables
|
82
140
|
lexical_variables.each do |name, value|
|
83
|
-
type = value.
|
141
|
+
type = value.ikra_type
|
84
142
|
mangled_name = environment_builder.add_object(name, value)
|
85
|
-
translation_result.prepend("#{type.to_c_type} #{name} = #{Constants::ENV_IDENTIFIER}->#{mangled_name};\n")
|
143
|
+
translation_result.prepend("#{type.to_c_type} #{Constants::LEXICAL_VAR_PREFIX}#{name} = #{Constants::ENV_IDENTIFIER}->#{mangled_name};\n")
|
86
144
|
end
|
87
145
|
|
88
146
|
# Declare local variables
|
89
|
-
|
90
|
-
translation_result.prepend("#{
|
147
|
+
block_def_node.local_variables_names_and_types.each do |name, type|
|
148
|
+
translation_result.prepend("#{type.to_c_type} #{name};\n")
|
91
149
|
end
|
92
150
|
|
93
151
|
# Function signature
|
94
152
|
mangled_name = "_block_k_#{command_id}_"
|
95
153
|
|
96
|
-
if not return_type.is_singleton?
|
97
|
-
raise "Cannot handle polymorphic return types yet"
|
98
|
-
end
|
99
|
-
|
100
154
|
function_parameters = ["environment_t *#{Constants::ENV_IDENTIFIER}"]
|
101
|
-
|
102
|
-
|
155
|
+
|
156
|
+
parameter_decls = override_block_parameters.map do |variable|
|
157
|
+
"#{variable.type.to_c_type} #{variable.name}"
|
103
158
|
end
|
104
159
|
|
105
|
-
|
160
|
+
function_parameters.push(*parameter_decls)
|
161
|
+
|
162
|
+
translation_result = Translator.read_file(
|
106
163
|
file_name: "block_function_head.cpp",
|
107
164
|
replacements: {
|
108
165
|
"name" => mangled_name,
|
109
|
-
"
|
110
|
-
"parameters" => function_parameters.join(", ")
|
111
|
-
|
112
|
-
translation_result = function_head + wrap_in_c_block(translation_result)
|
166
|
+
"result_type" => return_type.to_c_type,
|
167
|
+
"parameters" => function_parameters.join(", "),
|
168
|
+
"body" => wrap_in_c_block(pre_execution + "\n" + translation_result)})
|
113
169
|
|
114
170
|
# TODO: handle more than one result type
|
115
|
-
BlockTranslationResult.new(
|
171
|
+
return BlockTranslationResult.new(
|
116
172
|
c_source: translation_result,
|
117
173
|
result_type: return_type,
|
118
174
|
function_name: mangled_name,
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module Ikra
|
2
|
+
module Translator
|
3
|
+
class CommandTranslator < Symbolic::Visitor
|
4
|
+
def visit_array_combine_command(command)
|
5
|
+
Log.info("Translating ArrayCombineCommand [#{command.unique_id}]")
|
6
|
+
|
7
|
+
super
|
8
|
+
|
9
|
+
# Process dependent computation (receiver), returns [InputTranslationResult]
|
10
|
+
input = translate_entire_input(command)
|
11
|
+
|
12
|
+
# All variables accessed by this block should be prefixed with the unique ID
|
13
|
+
# of the command in the environment.
|
14
|
+
env_builder = @environment_builder[command.unique_id]
|
15
|
+
|
16
|
+
block_translation_result = Translator.translate_block(
|
17
|
+
block_def_node: command.block_def_node,
|
18
|
+
environment_builder: env_builder,
|
19
|
+
lexical_variables: command.lexical_externals,
|
20
|
+
command_id: command.unique_id,
|
21
|
+
entire_input_translation: input)
|
22
|
+
|
23
|
+
kernel_builder.add_methods(block_translation_result.aux_methods)
|
24
|
+
kernel_builder.add_block(block_translation_result.block_source)
|
25
|
+
|
26
|
+
# Build command invocation string
|
27
|
+
result = block_translation_result.function_name + "(" +
|
28
|
+
(["_env_"] + input.result).join(", ") + ")"
|
29
|
+
|
30
|
+
command_translation = build_command_translation_result(
|
31
|
+
execution: input.execution,
|
32
|
+
result: result,
|
33
|
+
command: command)
|
34
|
+
|
35
|
+
Log.info("DONE translating ArrayCombineCommand [#{command.unique_id}]")
|
36
|
+
|
37
|
+
return command_translation
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module Ikra
|
2
|
+
module Translator
|
3
|
+
class CommandTranslator < Symbolic::Visitor
|
4
|
+
def visit_array_identity_command(command)
|
5
|
+
Log.info("Translating ArrayIdentityCommand [#{command.unique_id}]")
|
6
|
+
|
7
|
+
super
|
8
|
+
|
9
|
+
# This is a root command, determine grid/block dimensions
|
10
|
+
kernel_launcher.configure_grid(command.size, block_size: command.block_size)
|
11
|
+
|
12
|
+
# Add base array to environment
|
13
|
+
need_union_type = !command.base_type.is_singleton?
|
14
|
+
transformed_base_array = object_tracer.convert_base_array(
|
15
|
+
command.input.first.command, need_union_type)
|
16
|
+
environment_builder.add_base_array(command.unique_id, transformed_base_array)
|
17
|
+
|
18
|
+
command_translation = build_command_translation_result(
|
19
|
+
result: "#{Constants::ENV_IDENTIFIER}->#{EnvironmentBuilder.base_identifier(command.unique_id)}[_tid_]",
|
20
|
+
command: command)
|
21
|
+
|
22
|
+
Log.info("DONE translating ArrayIdentityCommand [#{command.unique_id}]")
|
23
|
+
|
24
|
+
return command_translation
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
module Ikra
|
2
|
+
module Translator
|
3
|
+
class CommandTranslator < Symbolic::Visitor
|
4
|
+
# Translate the block of an `Array.pnew` section.
|
5
|
+
def visit_array_index_command(command)
|
6
|
+
Log.info("Translating ArrayIndexCommand [#{command.unique_id}]")
|
7
|
+
|
8
|
+
super
|
9
|
+
|
10
|
+
# This is a root command, determine grid/block dimensions
|
11
|
+
kernel_launcher.configure_grid(command.size, block_size: command.block_size)
|
12
|
+
|
13
|
+
num_dims = command.dimensions.size
|
14
|
+
|
15
|
+
# This is a root command, determine grid/block dimensions
|
16
|
+
kernel_launcher.configure_grid(command.size, block_size: command.block_size)
|
17
|
+
|
18
|
+
index_generators = (0...num_dims).map do |dim_index|
|
19
|
+
index_div = command.dimensions.drop(dim_index + 1).reduce(1, :*)
|
20
|
+
index_mod = command.dimensions[dim_index]
|
21
|
+
|
22
|
+
if dim_index > 0
|
23
|
+
"(_tid_ / #{index_div}) % #{index_mod}"
|
24
|
+
else
|
25
|
+
# No modulo required for first dimension
|
26
|
+
"_tid_ / #{index_div}"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
if num_dims > 1
|
31
|
+
# Retrieve type that was generated earlier
|
32
|
+
zipped_type_singleton = command.result_type.singleton_type
|
33
|
+
result = zipped_type_singleton.generate_inline_initialization(index_generators)
|
34
|
+
|
35
|
+
# Add struct type to program builder, so that we can generate the source code
|
36
|
+
# for its definition.
|
37
|
+
program_builder.structs.add(zipped_type_singleton)
|
38
|
+
else
|
39
|
+
result = "_tid_"
|
40
|
+
end
|
41
|
+
|
42
|
+
command_translation = CommandTranslationResult.new(
|
43
|
+
result: result,
|
44
|
+
command: command)
|
45
|
+
|
46
|
+
Log.info("DONE translating ArrayIndexCommand [#{command.unique_id}]")
|
47
|
+
|
48
|
+
return command_translation
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,135 @@
|
|
1
|
+
module Ikra
|
2
|
+
module Translator
|
3
|
+
class CommandTranslator < Symbolic::Visitor
|
4
|
+
def visit_array_reduce_command(command)
|
5
|
+
Log.info("Translating ArrayReduceCommand [#{command.unique_id}]")
|
6
|
+
|
7
|
+
super
|
8
|
+
|
9
|
+
if command.input.size != 1
|
10
|
+
raise AssertionError.new("Expected exactly one input for ArrayReduceCommand")
|
11
|
+
end
|
12
|
+
|
13
|
+
# Process dependent computation (receiver)
|
14
|
+
input = translate_entire_input(command)
|
15
|
+
|
16
|
+
block_size = command.block_size
|
17
|
+
|
18
|
+
# All variables accessed by this block should be prefixed with the unique ID
|
19
|
+
# of the command in the environment.
|
20
|
+
env_builder = @environment_builder[command.unique_id]
|
21
|
+
|
22
|
+
block_translation_result = Translator.translate_block(
|
23
|
+
block_def_node: command.block_def_node,
|
24
|
+
environment_builder: env_builder,
|
25
|
+
lexical_variables: command.lexical_externals,
|
26
|
+
command_id: command.unique_id,
|
27
|
+
entire_input_translation: input)
|
28
|
+
|
29
|
+
kernel_builder.add_methods(block_translation_result.aux_methods)
|
30
|
+
kernel_builder.add_block(block_translation_result.block_source)
|
31
|
+
|
32
|
+
# Add "odd" parameter to the kernel which is needed for reduction
|
33
|
+
kernel_builder.add_additional_parameters(Constants::ODD_TYPE + " " + Constants::ODD_IDENTIFIER)
|
34
|
+
|
35
|
+
# Number of elements that will be reduced
|
36
|
+
num_threads = command.input_size
|
37
|
+
|
38
|
+
if num_threads.is_a?(Fixnum)
|
39
|
+
# Easy case: Number of required reductions known statically
|
40
|
+
|
41
|
+
odd = (num_threads % 2 == 1).to_s
|
42
|
+
|
43
|
+
# Number of threads needed for reduction
|
44
|
+
num_threads = num_threads.fdiv(2).ceil
|
45
|
+
|
46
|
+
previous_result_kernel_var = input.result.first
|
47
|
+
first_launch = true
|
48
|
+
|
49
|
+
# While more kernel launches than one are needed to finish reduction
|
50
|
+
while num_threads >= block_size + 1
|
51
|
+
# Launch new kernel (with same kernel builder)
|
52
|
+
push_kernel_launcher(kernel_builder: kernel_builder)
|
53
|
+
# Configure kernel with correct arguments and grid
|
54
|
+
kernel_launcher.add_additional_arguments(odd)
|
55
|
+
kernel_launcher.configure_grid(num_threads, block_size: block_size)
|
56
|
+
|
57
|
+
# First launch of kernel is supposed to allocate new memory, so only reuse memory after first launch
|
58
|
+
if first_launch
|
59
|
+
first_launch = false
|
60
|
+
else
|
61
|
+
kernel_launcher.reuse_memory!(previous_result_kernel_var)
|
62
|
+
end
|
63
|
+
|
64
|
+
previous_result_kernel_var = kernel_launcher.kernel_result_var_name
|
65
|
+
|
66
|
+
pop_kernel_launcher(input.command_translation_result(0))
|
67
|
+
|
68
|
+
# Update number of threads needed
|
69
|
+
num_threads = num_threads.fdiv(block_size).ceil
|
70
|
+
odd = (num_threads % 2 == 1).to_s
|
71
|
+
num_threads = num_threads.fdiv(2).ceil
|
72
|
+
end
|
73
|
+
|
74
|
+
# Configuration for last launch of kernel
|
75
|
+
kernel_launcher.add_additional_arguments(odd)
|
76
|
+
kernel_launcher.configure_grid(num_threads, block_size: block_size)
|
77
|
+
else
|
78
|
+
# More difficult case: Have to generate loop for reductions
|
79
|
+
|
80
|
+
# Add one regular kernel launcher for setting up the memory etc.
|
81
|
+
odd_first = "(#{num_threads} % 2 == 1)"
|
82
|
+
num_threads_first = "((int) ceil(#{num_threads} / 2.0))"
|
83
|
+
push_kernel_launcher(kernel_builder: kernel_builder)
|
84
|
+
kernel_launcher.add_additional_arguments(odd_first)
|
85
|
+
kernel_launcher.configure_grid(num_threads_first, block_size: block_size)
|
86
|
+
previous_result_kernel_var = kernel_launcher.kernel_result_var_name
|
87
|
+
pop_kernel_launcher(input.command_translation_result(0))
|
88
|
+
|
89
|
+
# Add loop
|
90
|
+
# Set up state (variables that are updated inside the loop)
|
91
|
+
# 1. Calculate number of elements from previous computation
|
92
|
+
# 2. Check if odd number
|
93
|
+
# 3. Calculate number of threads that we need
|
94
|
+
loop_setup = "int _num_elements = ceil(#{num_threads_first} / (double) #{block_size});\nbool _next_odd = _num_elements % 2 == 1;\nint _next_threads = ceil(_num_elements / 2.0);\n"
|
95
|
+
|
96
|
+
# Update loop state after iteration
|
97
|
+
update_loop = "_num_elements = ceil(_next_threads / (double) #{block_size});\nbool _next_odd = _num_elements % 2 == 0;\n_next_threads = ceil(_num_elements / 2.0);\n"
|
98
|
+
|
99
|
+
push_kernel_launcher(kernel_launcher: WhileLoopKernelLauncher.new(
|
100
|
+
kernel_builder: kernel_builder,
|
101
|
+
condition: "_num_elements > 1",
|
102
|
+
before_loop: loop_setup,
|
103
|
+
post_iteration: update_loop))
|
104
|
+
|
105
|
+
kernel_launcher.add_additional_arguments("_next_odd")
|
106
|
+
kernel_launcher.configure_grid("_next_threads", block_size: block_size)
|
107
|
+
#pop_kernel_launcher(input.command_translation_result(0))
|
108
|
+
end
|
109
|
+
|
110
|
+
if !first_launch
|
111
|
+
kernel_launcher.reuse_memory!(previous_result_kernel_var)
|
112
|
+
end
|
113
|
+
|
114
|
+
command_execution = Translator.read_file(file_name: "reduce_body.cpp", replacements: {
|
115
|
+
"previous_result" => input.result.first,
|
116
|
+
"block_name" => block_translation_result.function_name,
|
117
|
+
"arguments" => Constants::ENV_IDENTIFIER,
|
118
|
+
"block_size" => block_size.to_s,
|
119
|
+
"temp_result" => Constants::TEMP_RESULT_IDENTIFIER,
|
120
|
+
"odd" => Constants::ODD_IDENTIFIER,
|
121
|
+
"type" => command.result_type.to_c_type,
|
122
|
+
"num_threads" => Constants::NUM_THREADS_IDENTIFIER})
|
123
|
+
|
124
|
+
command_translation = CommandTranslationResult.new(
|
125
|
+
execution: command_execution,
|
126
|
+
result: Constants::TEMP_RESULT_IDENTIFIER,
|
127
|
+
command: command)
|
128
|
+
|
129
|
+
Log.info("DONE translating ArrayReduceCommand [#{command.unique_id}]")
|
130
|
+
|
131
|
+
return command_translation
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|