ikra 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. checksums.yaml +4 -4
  2. data/lib/ast/builder.rb +225 -77
  3. data/lib/ast/host_section_builder.rb +38 -0
  4. data/lib/ast/interpreter.rb +67 -0
  5. data/lib/ast/lexical_variables_enumerator.rb +3 -2
  6. data/lib/ast/nodes.rb +521 -31
  7. data/lib/ast/printer.rb +116 -18
  8. data/lib/ast/ssa_generator.rb +192 -0
  9. data/lib/ast/visitor.rb +235 -21
  10. data/lib/config/configuration.rb +28 -3
  11. data/lib/config/os_configuration.rb +62 -9
  12. data/lib/cpu/cpu_implementation.rb +39 -0
  13. data/lib/ikra.rb +13 -3
  14. data/lib/resources/cuda/allocate_device_memory.cpp +5 -0
  15. data/lib/resources/cuda/allocate_host_memory.cpp +1 -0
  16. data/lib/resources/cuda/allocate_memcpy_environment_to_device.cpp +11 -0
  17. data/lib/resources/cuda/ast/assignment.cpp +1 -0
  18. data/lib/resources/cuda/block_function_head.cpp +7 -1
  19. data/lib/resources/cuda/entry_point.cpp +47 -0
  20. data/lib/resources/cuda/env_builder_copy_array.cpp +8 -2
  21. data/lib/resources/cuda/free_device_memory.cpp +3 -0
  22. data/lib/resources/cuda/free_memory_for_command.cpp +24 -0
  23. data/lib/resources/cuda/header.cpp +23 -9
  24. data/lib/resources/cuda/header_structs.cpp +92 -0
  25. data/lib/resources/cuda/host_section_block_function_head.cpp +12 -0
  26. data/lib/resources/cuda/host_section_entry_point.cpp +55 -0
  27. data/lib/resources/cuda/host_section_free_device_memory.cpp +18 -0
  28. data/lib/resources/cuda/host_section_launch_parallel_section.cpp +14 -0
  29. data/lib/resources/cuda/host_section_malloc_memcpy_device_to_host.cpp +10 -0
  30. data/lib/resources/cuda/kernel.cpp +9 -2
  31. data/lib/resources/cuda/launch_kernel.cpp +5 -0
  32. data/lib/resources/cuda/memcpy_device_to_host.cpp +3 -0
  33. data/lib/resources/cuda/memcpy_device_to_host_expr.cpp +10 -0
  34. data/lib/resources/cuda/reduce_body.cpp +88 -0
  35. data/lib/resources/cuda/stencil_array_reconstruction.cpp +2 -0
  36. data/lib/resources/cuda/stencil_body.cpp +16 -0
  37. data/lib/resources/cuda/struct_definition.cpp +4 -0
  38. data/lib/ruby_core/array.rb +34 -0
  39. data/lib/ruby_core/array_command.rb +313 -0
  40. data/lib/ruby_core/core.rb +103 -0
  41. data/lib/ruby_core/interpreter.rb +16 -0
  42. data/lib/ruby_core/math.rb +32 -0
  43. data/lib/ruby_core/ruby_integration.rb +256 -0
  44. data/lib/symbolic/host_section.rb +115 -0
  45. data/lib/symbolic/input.rb +87 -0
  46. data/lib/symbolic/input_visitor.rb +68 -0
  47. data/lib/symbolic/symbolic.rb +793 -117
  48. data/lib/symbolic/visitor.rb +70 -8
  49. data/lib/translator/array_command_struct_builder.rb +163 -0
  50. data/lib/translator/ast_translator.rb +572 -0
  51. data/lib/translator/block_translator.rb +104 -48
  52. data/lib/translator/commands/array_combine_command.rb +41 -0
  53. data/lib/translator/commands/array_identity_command.rb +28 -0
  54. data/lib/translator/commands/array_index_command.rb +52 -0
  55. data/lib/translator/commands/array_reduce_command.rb +135 -0
  56. data/lib/translator/commands/array_stencil_command.rb +129 -0
  57. data/lib/translator/commands/array_zip_command.rb +30 -0
  58. data/lib/translator/commands/command_translator.rb +264 -0
  59. data/lib/translator/cuda_errors.rb +32 -0
  60. data/lib/translator/environment_builder.rb +263 -0
  61. data/lib/translator/host_section/array_host_section_command.rb +150 -0
  62. data/lib/translator/host_section/array_in_host_section_command.rb +41 -0
  63. data/lib/translator/host_section/ast_translator.rb +14 -0
  64. data/lib/translator/host_section/parallel_section_invocation_visitor.rb +20 -0
  65. data/lib/translator/host_section/program_builder.rb +89 -0
  66. data/lib/translator/input_translator.rb +226 -0
  67. data/lib/translator/kernel_builder.rb +137 -0
  68. data/lib/translator/kernel_launcher/for_loop_kernel_launcher.rb +40 -0
  69. data/lib/translator/kernel_launcher/kernel_launcher.rb +259 -0
  70. data/lib/translator/kernel_launcher/while_loop_kernel_launcher.rb +38 -0
  71. data/lib/translator/last_returns_visitor.rb +19 -10
  72. data/lib/translator/program_builder.rb +197 -0
  73. data/lib/translator/program_launcher.rb +273 -0
  74. data/lib/translator/struct_type.rb +55 -0
  75. data/lib/translator/translator.rb +34 -11
  76. data/lib/translator/variable_classifier_visitor.rb +56 -0
  77. data/lib/types/inference/ast_inference.rb +586 -0
  78. data/lib/types/inference/clear_types_visitor.rb +11 -0
  79. data/lib/types/inference/command_inference.rb +101 -0
  80. data/lib/types/inference/input_inference.rb +62 -0
  81. data/lib/types/{object_tracer.rb → inference/object_tracer.rb} +5 -6
  82. data/lib/types/inference/ruby_extension.rb +35 -0
  83. data/lib/types/inference/symbol_table.rb +131 -0
  84. data/lib/types/types.rb +14 -0
  85. data/lib/types/types/array_command_type.rb +123 -0
  86. data/lib/types/types/array_type.rb +137 -0
  87. data/lib/types/{class_type.rb → types/class_type.rb} +42 -18
  88. data/lib/types/{primitive_type.rb → types/primitive_type.rb} +20 -7
  89. data/lib/types/types/ruby_type.rb +88 -0
  90. data/lib/types/types/struct_type.rb +179 -0
  91. data/lib/types/types/union_type.rb +239 -0
  92. metadata +160 -18
  93. data/lib/ast/method_definition.rb +0 -37
  94. data/lib/ast/translator.rb +0 -264
  95. data/lib/resources/cuda/kernel_launcher.cpp +0 -28
  96. data/lib/scope.rb +0 -166
  97. data/lib/translator/command_translator.rb +0 -421
  98. data/lib/translator/local_variables_enumerator.rb +0 -35
  99. data/lib/translator/method_translator.rb +0 -24
  100. data/lib/types/array_type.rb +0 -51
  101. data/lib/types/ruby_extension.rb +0 -67
  102. data/lib/types/ruby_type.rb +0 -45
  103. data/lib/types/type_inference.rb +0 -382
  104. data/lib/types/union_type.rb +0 -155
@@ -1,12 +1,9 @@
1
1
  require_relative "../ast/nodes.rb"
2
2
  require_relative "../ast/builder.rb"
3
- require_relative "../ast/translator.rb"
4
- require_relative "../types/type_inference"
5
- require_relative "../types/primitive_type"
3
+ require_relative "../types/types"
6
4
  require_relative "../parsing"
7
- require_relative "../scope"
8
5
  require_relative "../ast/printer"
9
- require_relative "../ast/method_definition"
6
+ require_relative "variable_classifier_visitor"
10
7
 
11
8
  module Ikra
12
9
  module Translator
@@ -20,10 +17,11 @@ module Ikra
20
17
  # @return [UnionType] Return value type of method/block
21
18
  attr_accessor :result_type
22
19
 
23
- # @return [String] Name of function in CUDA source code
20
+ # @return [String] Name of function of block in CUDA source code
24
21
  attr_accessor :function_name
25
22
 
26
- # @return [Array<Ikra::AST::MethodDefinition>] Auxiliary methods that are called by this block (including transitive method calls)
23
+ # @return [String] Auxiliary methods that are called by this block
24
+ # (including transitive method calls)
27
25
  attr_accessor :aux_methods
28
26
 
29
27
  def initialize(c_source:, result_type:, function_name:, aux_methods: [])
@@ -32,87 +30,145 @@ module Ikra
32
30
  @function_name = function_name
33
31
  @aux_methods = aux_methods
34
32
  end
35
-
36
- def generated_source
37
- @aux_methods.map do |meth|
38
- meth.to_c_source
39
- end.join("\n\n") + @block_source
40
- end
41
33
  end
42
34
 
43
35
  BlockSelectorDummy = :"<BLOCK>"
44
36
 
45
37
  class << self
46
38
  # Translates a Ruby block to CUDA source code.
47
- # @param [AST::Node] ast abstract syntax tree of the block
48
- # @param [EnvironmentBuilder] environment_builder environment builder instance collecting information about lexical variables (environment)
49
- # @param [Hash{Symbol => UnionType}] block_parameter_types types of arguments passed to the block
50
- # @param [Hash{Symbol => Object}] lexical_variables all lexical variables that are accessed within the block
39
+ # @param [AST::BlockDefNode] block_def_node AST (abstract syntax tree) of the block
40
+ # @param [EnvironmentBuilder] environment_builder environment builder instance
41
+ # collecting information about lexical variables (environment)
42
+ # @param [Array{Variable}] block_parameters types and names of parameters
43
+ # to the block
44
+ # @param [Hash{Symbol => Object}] lexical_variables all lexical variables that are
45
+ # accessed within the block
51
46
  # @param [Fixnum] command_id a unique identifier of the block
47
+ # @param [String] pre_execution source code that should be run before executing the
48
+ # block
49
+ # @param [Array{Variable}] override_block_parameters overrides the the declaration of
50
+ # parameters that this block accepts.
51
+ # @param [EntireInputTranslationResult] entire_input_translation The result of
52
+ # `translate_entire_input`
52
53
  # @return [BlockTranslationResult]
53
- def translate_block(ast:, environment_builder:, command_id:, block_parameter_types: {}, lexical_variables: {})
54
+ def translate_block(
55
+ block_def_node:,
56
+ environment_builder:,
57
+ command_id:,
58
+ lexical_variables: {},
59
+
60
+ # One one of the two following parameter configurations is valid:
61
+ # a) Either this parameter is given:
62
+ entire_input_translation: nil,
63
+
64
+ # b) or these parameters are given (some are optional):
65
+ pre_execution: nil,
66
+ override_block_parameters: nil,
67
+ block_parameters: nil)
68
+
69
+ # Check and prepare arguments
70
+ if pre_execution != nil and entire_input_translation != nil
71
+ raise ArgumentError.new("pre_execution and entire_input_translation given")
72
+ elsif entire_input_translation != nil
73
+ pre_execution = entire_input_translation.pre_execution
74
+ elsif pre_execution == nil
75
+ pre_execution = ""
76
+ end
77
+
78
+ if block_parameters != nil and entire_input_translation != nil
79
+ raise ArgumentError.new("block_parameters and entire_input_translation given")
80
+ elsif entire_input_translation != nil
81
+ block_parameters = entire_input_translation.block_parameters
82
+ elsif block_parameters == nil
83
+ block_parameters = []
84
+ end
85
+
86
+ if override_block_parameters != nil and entire_input_translation != nil
87
+ raise ArgumentError.new("override_block_parameters and entire_input_translation given")
88
+ elsif entire_input_translation != nil
89
+ override_block_parameters = entire_input_translation.override_block_parameters
90
+ elsif override_block_parameters == nil
91
+ override_block_parameters = block_parameters
92
+ end
93
+
94
+
95
+ # Build hash of parameter name -> type mappings
96
+ block_parameter_types = {}
97
+ for variable in block_parameters
98
+ block_parameter_types[variable.name] = variable.type
99
+ end
100
+
54
101
  parameter_types_string = "[" + block_parameter_types.map do |id, type| "#{id}: #{type}" end.join(", ") + "]"
55
102
  Log.info("Translating block with input types #{parameter_types_string}")
56
103
 
57
- # Define MethodDefinition for block
58
- block_def = AST::MethodDefinition.new(
59
- type: Types::UnionType.new, # TODO: what to pass in here?
60
- selector: BlockSelectorDummy,
61
- parameter_variables: block_parameter_types,
62
- return_type: Types::UnionType.new,
63
- ast: ast)
104
+ # Add information to block_def_node
105
+ block_def_node.parameters_names_and_types = block_parameter_types
64
106
 
65
107
  # Lexical variables
66
108
  lexical_variables.each do |name, value|
67
- block_def.lexical_variables[name] = Types::UnionType.new(value.class.to_ikra_type)
109
+ block_def_node.lexical_variables_names_and_types[name] = value.ikra_type.to_union_type
68
110
  end
69
111
 
70
112
  # Type inference
71
113
  type_inference_visitor = TypeInference::Visitor.new
72
- return_type = type_inference_visitor.process_method(block_def)
73
- # The following method returns nested dictionaries, but we only need the values
74
- aux_methods = type_inference_visitor.methods.values.map do |hash|
75
- hash.values
76
- end.flatten
114
+ return_type = type_inference_visitor.process_block(block_def_node)
115
+
116
+ # Translation to source code
117
+ ast_translator = ASTTranslator.new
118
+
119
+ # Auxiliary methods are instance methods that are called by the block
120
+ aux_methods = type_inference_visitor.all_methods.map do |method|
121
+ ast_translator.translate_method(method)
122
+ end
123
+
124
+ # Generate method predeclarations
125
+ aux_methods_predecl = type_inference_visitor.all_methods.map do |method|
126
+ ast_translator.translate_method_predecl(method)
127
+ end
128
+
129
+ # Start with predeclarations
130
+ aux_methods = aux_methods_predecl + aux_methods
131
+
132
+ # Classify variables (lexical or local)
133
+ block_def_node.accept(VariableClassifier.new(
134
+ lexical_variable_names: lexical_variables.keys))
77
135
 
78
136
  # Translate to CUDA/C++ code
79
- translation_result = ast.translate_statement
137
+ translation_result = ast_translator.translate_block(block_def_node)
80
138
 
81
139
  # Load environment variables
82
140
  lexical_variables.each do |name, value|
83
- type = value.class.to_ikra_type
141
+ type = value.ikra_type
84
142
  mangled_name = environment_builder.add_object(name, value)
85
- translation_result.prepend("#{type.to_c_type} #{name} = #{Constants::ENV_IDENTIFIER}->#{mangled_name};\n")
143
+ translation_result.prepend("#{type.to_c_type} #{Constants::LEXICAL_VAR_PREFIX}#{name} = #{Constants::ENV_IDENTIFIER}->#{mangled_name};\n")
86
144
  end
87
145
 
88
146
  # Declare local variables
89
- block_def.local_variables.each do |name, types|
90
- translation_result.prepend("#{types.singleton_type.to_c_type} #{name};\n")
147
+ block_def_node.local_variables_names_and_types.each do |name, type|
148
+ translation_result.prepend("#{type.to_c_type} #{name};\n")
91
149
  end
92
150
 
93
151
  # Function signature
94
152
  mangled_name = "_block_k_#{command_id}_"
95
153
 
96
- if not return_type.is_singleton?
97
- raise "Cannot handle polymorphic return types yet"
98
- end
99
-
100
154
  function_parameters = ["environment_t *#{Constants::ENV_IDENTIFIER}"]
101
- block_parameter_types.each do |param|
102
- function_parameters.push("#{param[1].to_c_type} #{param[0].to_s}")
155
+
156
+ parameter_decls = override_block_parameters.map do |variable|
157
+ "#{variable.type.to_c_type} #{variable.name}"
103
158
  end
104
159
 
105
- function_head = Translator.read_file(
160
+ function_parameters.push(*parameter_decls)
161
+
162
+ translation_result = Translator.read_file(
106
163
  file_name: "block_function_head.cpp",
107
164
  replacements: {
108
165
  "name" => mangled_name,
109
- "return_type" => return_type.singleton_type.to_c_type,
110
- "parameters" => function_parameters.join(", ")})
111
-
112
- translation_result = function_head + wrap_in_c_block(translation_result)
166
+ "result_type" => return_type.to_c_type,
167
+ "parameters" => function_parameters.join(", "),
168
+ "body" => wrap_in_c_block(pre_execution + "\n" + translation_result)})
113
169
 
114
170
  # TODO: handle more than one result type
115
- BlockTranslationResult.new(
171
+ return BlockTranslationResult.new(
116
172
  c_source: translation_result,
117
173
  result_type: return_type,
118
174
  function_name: mangled_name,
@@ -0,0 +1,41 @@
1
+ module Ikra
2
+ module Translator
3
+ class CommandTranslator < Symbolic::Visitor
4
+ def visit_array_combine_command(command)
5
+ Log.info("Translating ArrayCombineCommand [#{command.unique_id}]")
6
+
7
+ super
8
+
9
+ # Process dependent computation (receiver), returns [InputTranslationResult]
10
+ input = translate_entire_input(command)
11
+
12
+ # All variables accessed by this block should be prefixed with the unique ID
13
+ # of the command in the environment.
14
+ env_builder = @environment_builder[command.unique_id]
15
+
16
+ block_translation_result = Translator.translate_block(
17
+ block_def_node: command.block_def_node,
18
+ environment_builder: env_builder,
19
+ lexical_variables: command.lexical_externals,
20
+ command_id: command.unique_id,
21
+ entire_input_translation: input)
22
+
23
+ kernel_builder.add_methods(block_translation_result.aux_methods)
24
+ kernel_builder.add_block(block_translation_result.block_source)
25
+
26
+ # Build command invocation string
27
+ result = block_translation_result.function_name + "(" +
28
+ (["_env_"] + input.result).join(", ") + ")"
29
+
30
+ command_translation = build_command_translation_result(
31
+ execution: input.execution,
32
+ result: result,
33
+ command: command)
34
+
35
+ Log.info("DONE translating ArrayCombineCommand [#{command.unique_id}]")
36
+
37
+ return command_translation
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,28 @@
1
+ module Ikra
2
+ module Translator
3
+ class CommandTranslator < Symbolic::Visitor
4
+ def visit_array_identity_command(command)
5
+ Log.info("Translating ArrayIdentityCommand [#{command.unique_id}]")
6
+
7
+ super
8
+
9
+ # This is a root command, determine grid/block dimensions
10
+ kernel_launcher.configure_grid(command.size, block_size: command.block_size)
11
+
12
+ # Add base array to environment
13
+ need_union_type = !command.base_type.is_singleton?
14
+ transformed_base_array = object_tracer.convert_base_array(
15
+ command.input.first.command, need_union_type)
16
+ environment_builder.add_base_array(command.unique_id, transformed_base_array)
17
+
18
+ command_translation = build_command_translation_result(
19
+ result: "#{Constants::ENV_IDENTIFIER}->#{EnvironmentBuilder.base_identifier(command.unique_id)}[_tid_]",
20
+ command: command)
21
+
22
+ Log.info("DONE translating ArrayIdentityCommand [#{command.unique_id}]")
23
+
24
+ return command_translation
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,52 @@
1
+ module Ikra
2
+ module Translator
3
+ class CommandTranslator < Symbolic::Visitor
4
+ # Translate the block of an `Array.pnew` section.
5
+ def visit_array_index_command(command)
6
+ Log.info("Translating ArrayIndexCommand [#{command.unique_id}]")
7
+
8
+ super
9
+
10
+ # This is a root command, determine grid/block dimensions
11
+ kernel_launcher.configure_grid(command.size, block_size: command.block_size)
12
+
13
+ num_dims = command.dimensions.size
14
+
15
+ # This is a root command, determine grid/block dimensions
16
+ kernel_launcher.configure_grid(command.size, block_size: command.block_size)
17
+
18
+ index_generators = (0...num_dims).map do |dim_index|
19
+ index_div = command.dimensions.drop(dim_index + 1).reduce(1, :*)
20
+ index_mod = command.dimensions[dim_index]
21
+
22
+ if dim_index > 0
23
+ "(_tid_ / #{index_div}) % #{index_mod}"
24
+ else
25
+ # No modulo required for first dimension
26
+ "_tid_ / #{index_div}"
27
+ end
28
+ end
29
+
30
+ if num_dims > 1
31
+ # Retrieve type that was generated earlier
32
+ zipped_type_singleton = command.result_type.singleton_type
33
+ result = zipped_type_singleton.generate_inline_initialization(index_generators)
34
+
35
+ # Add struct type to program builder, so that we can generate the source code
36
+ # for its definition.
37
+ program_builder.structs.add(zipped_type_singleton)
38
+ else
39
+ result = "_tid_"
40
+ end
41
+
42
+ command_translation = CommandTranslationResult.new(
43
+ result: result,
44
+ command: command)
45
+
46
+ Log.info("DONE translating ArrayIndexCommand [#{command.unique_id}]")
47
+
48
+ return command_translation
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,135 @@
1
+ module Ikra
2
+ module Translator
3
+ class CommandTranslator < Symbolic::Visitor
4
+ def visit_array_reduce_command(command)
5
+ Log.info("Translating ArrayReduceCommand [#{command.unique_id}]")
6
+
7
+ super
8
+
9
+ if command.input.size != 1
10
+ raise AssertionError.new("Expected exactly one input for ArrayReduceCommand")
11
+ end
12
+
13
+ # Process dependent computation (receiver)
14
+ input = translate_entire_input(command)
15
+
16
+ block_size = command.block_size
17
+
18
+ # All variables accessed by this block should be prefixed with the unique ID
19
+ # of the command in the environment.
20
+ env_builder = @environment_builder[command.unique_id]
21
+
22
+ block_translation_result = Translator.translate_block(
23
+ block_def_node: command.block_def_node,
24
+ environment_builder: env_builder,
25
+ lexical_variables: command.lexical_externals,
26
+ command_id: command.unique_id,
27
+ entire_input_translation: input)
28
+
29
+ kernel_builder.add_methods(block_translation_result.aux_methods)
30
+ kernel_builder.add_block(block_translation_result.block_source)
31
+
32
+ # Add "odd" parameter to the kernel which is needed for reduction
33
+ kernel_builder.add_additional_parameters(Constants::ODD_TYPE + " " + Constants::ODD_IDENTIFIER)
34
+
35
+ # Number of elements that will be reduced
36
+ num_threads = command.input_size
37
+
38
+ if num_threads.is_a?(Fixnum)
39
+ # Easy case: Number of required reductions known statically
40
+
41
+ odd = (num_threads % 2 == 1).to_s
42
+
43
+ # Number of threads needed for reduction
44
+ num_threads = num_threads.fdiv(2).ceil
45
+
46
+ previous_result_kernel_var = input.result.first
47
+ first_launch = true
48
+
49
+ # While more kernel launches than one are needed to finish reduction
50
+ while num_threads >= block_size + 1
51
+ # Launch new kernel (with same kernel builder)
52
+ push_kernel_launcher(kernel_builder: kernel_builder)
53
+ # Configure kernel with correct arguments and grid
54
+ kernel_launcher.add_additional_arguments(odd)
55
+ kernel_launcher.configure_grid(num_threads, block_size: block_size)
56
+
57
+ # First launch of kernel is supposed to allocate new memory, so only reuse memory after first launch
58
+ if first_launch
59
+ first_launch = false
60
+ else
61
+ kernel_launcher.reuse_memory!(previous_result_kernel_var)
62
+ end
63
+
64
+ previous_result_kernel_var = kernel_launcher.kernel_result_var_name
65
+
66
+ pop_kernel_launcher(input.command_translation_result(0))
67
+
68
+ # Update number of threads needed
69
+ num_threads = num_threads.fdiv(block_size).ceil
70
+ odd = (num_threads % 2 == 1).to_s
71
+ num_threads = num_threads.fdiv(2).ceil
72
+ end
73
+
74
+ # Configuration for last launch of kernel
75
+ kernel_launcher.add_additional_arguments(odd)
76
+ kernel_launcher.configure_grid(num_threads, block_size: block_size)
77
+ else
78
+ # More difficult case: Have to generate loop for reductions
79
+
80
+ # Add one regular kernel launcher for setting up the memory etc.
81
+ odd_first = "(#{num_threads} % 2 == 1)"
82
+ num_threads_first = "((int) ceil(#{num_threads} / 2.0))"
83
+ push_kernel_launcher(kernel_builder: kernel_builder)
84
+ kernel_launcher.add_additional_arguments(odd_first)
85
+ kernel_launcher.configure_grid(num_threads_first, block_size: block_size)
86
+ previous_result_kernel_var = kernel_launcher.kernel_result_var_name
87
+ pop_kernel_launcher(input.command_translation_result(0))
88
+
89
+ # Add loop
90
+ # Set up state (variables that are updated inside the loop)
91
+ # 1. Calculate number of elements from previous computation
92
+ # 2. Check if odd number
93
+ # 3. Calculate number of threads that we need
94
+ loop_setup = "int _num_elements = ceil(#{num_threads_first} / (double) #{block_size});\nbool _next_odd = _num_elements % 2 == 1;\nint _next_threads = ceil(_num_elements / 2.0);\n"
95
+
96
+ # Update loop state after iteration
97
+ update_loop = "_num_elements = ceil(_next_threads / (double) #{block_size});\nbool _next_odd = _num_elements % 2 == 0;\n_next_threads = ceil(_num_elements / 2.0);\n"
98
+
99
+ push_kernel_launcher(kernel_launcher: WhileLoopKernelLauncher.new(
100
+ kernel_builder: kernel_builder,
101
+ condition: "_num_elements > 1",
102
+ before_loop: loop_setup,
103
+ post_iteration: update_loop))
104
+
105
+ kernel_launcher.add_additional_arguments("_next_odd")
106
+ kernel_launcher.configure_grid("_next_threads", block_size: block_size)
107
+ #pop_kernel_launcher(input.command_translation_result(0))
108
+ end
109
+
110
+ if !first_launch
111
+ kernel_launcher.reuse_memory!(previous_result_kernel_var)
112
+ end
113
+
114
+ command_execution = Translator.read_file(file_name: "reduce_body.cpp", replacements: {
115
+ "previous_result" => input.result.first,
116
+ "block_name" => block_translation_result.function_name,
117
+ "arguments" => Constants::ENV_IDENTIFIER,
118
+ "block_size" => block_size.to_s,
119
+ "temp_result" => Constants::TEMP_RESULT_IDENTIFIER,
120
+ "odd" => Constants::ODD_IDENTIFIER,
121
+ "type" => command.result_type.to_c_type,
122
+ "num_threads" => Constants::NUM_THREADS_IDENTIFIER})
123
+
124
+ command_translation = CommandTranslationResult.new(
125
+ execution: command_execution,
126
+ result: Constants::TEMP_RESULT_IDENTIFIER,
127
+ command: command)
128
+
129
+ Log.info("DONE translating ArrayReduceCommand [#{command.unique_id}]")
130
+
131
+ return command_translation
132
+ end
133
+ end
134
+ end
135
+ end