ikra 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (104) hide show
  1. checksums.yaml +4 -4
  2. data/lib/ast/builder.rb +225 -77
  3. data/lib/ast/host_section_builder.rb +38 -0
  4. data/lib/ast/interpreter.rb +67 -0
  5. data/lib/ast/lexical_variables_enumerator.rb +3 -2
  6. data/lib/ast/nodes.rb +521 -31
  7. data/lib/ast/printer.rb +116 -18
  8. data/lib/ast/ssa_generator.rb +192 -0
  9. data/lib/ast/visitor.rb +235 -21
  10. data/lib/config/configuration.rb +28 -3
  11. data/lib/config/os_configuration.rb +62 -9
  12. data/lib/cpu/cpu_implementation.rb +39 -0
  13. data/lib/ikra.rb +13 -3
  14. data/lib/resources/cuda/allocate_device_memory.cpp +5 -0
  15. data/lib/resources/cuda/allocate_host_memory.cpp +1 -0
  16. data/lib/resources/cuda/allocate_memcpy_environment_to_device.cpp +11 -0
  17. data/lib/resources/cuda/ast/assignment.cpp +1 -0
  18. data/lib/resources/cuda/block_function_head.cpp +7 -1
  19. data/lib/resources/cuda/entry_point.cpp +47 -0
  20. data/lib/resources/cuda/env_builder_copy_array.cpp +8 -2
  21. data/lib/resources/cuda/free_device_memory.cpp +3 -0
  22. data/lib/resources/cuda/free_memory_for_command.cpp +24 -0
  23. data/lib/resources/cuda/header.cpp +23 -9
  24. data/lib/resources/cuda/header_structs.cpp +92 -0
  25. data/lib/resources/cuda/host_section_block_function_head.cpp +12 -0
  26. data/lib/resources/cuda/host_section_entry_point.cpp +55 -0
  27. data/lib/resources/cuda/host_section_free_device_memory.cpp +18 -0
  28. data/lib/resources/cuda/host_section_launch_parallel_section.cpp +14 -0
  29. data/lib/resources/cuda/host_section_malloc_memcpy_device_to_host.cpp +10 -0
  30. data/lib/resources/cuda/kernel.cpp +9 -2
  31. data/lib/resources/cuda/launch_kernel.cpp +5 -0
  32. data/lib/resources/cuda/memcpy_device_to_host.cpp +3 -0
  33. data/lib/resources/cuda/memcpy_device_to_host_expr.cpp +10 -0
  34. data/lib/resources/cuda/reduce_body.cpp +88 -0
  35. data/lib/resources/cuda/stencil_array_reconstruction.cpp +2 -0
  36. data/lib/resources/cuda/stencil_body.cpp +16 -0
  37. data/lib/resources/cuda/struct_definition.cpp +4 -0
  38. data/lib/ruby_core/array.rb +34 -0
  39. data/lib/ruby_core/array_command.rb +313 -0
  40. data/lib/ruby_core/core.rb +103 -0
  41. data/lib/ruby_core/interpreter.rb +16 -0
  42. data/lib/ruby_core/math.rb +32 -0
  43. data/lib/ruby_core/ruby_integration.rb +256 -0
  44. data/lib/symbolic/host_section.rb +115 -0
  45. data/lib/symbolic/input.rb +87 -0
  46. data/lib/symbolic/input_visitor.rb +68 -0
  47. data/lib/symbolic/symbolic.rb +793 -117
  48. data/lib/symbolic/visitor.rb +70 -8
  49. data/lib/translator/array_command_struct_builder.rb +163 -0
  50. data/lib/translator/ast_translator.rb +572 -0
  51. data/lib/translator/block_translator.rb +104 -48
  52. data/lib/translator/commands/array_combine_command.rb +41 -0
  53. data/lib/translator/commands/array_identity_command.rb +28 -0
  54. data/lib/translator/commands/array_index_command.rb +52 -0
  55. data/lib/translator/commands/array_reduce_command.rb +135 -0
  56. data/lib/translator/commands/array_stencil_command.rb +129 -0
  57. data/lib/translator/commands/array_zip_command.rb +30 -0
  58. data/lib/translator/commands/command_translator.rb +264 -0
  59. data/lib/translator/cuda_errors.rb +32 -0
  60. data/lib/translator/environment_builder.rb +263 -0
  61. data/lib/translator/host_section/array_host_section_command.rb +150 -0
  62. data/lib/translator/host_section/array_in_host_section_command.rb +41 -0
  63. data/lib/translator/host_section/ast_translator.rb +14 -0
  64. data/lib/translator/host_section/parallel_section_invocation_visitor.rb +20 -0
  65. data/lib/translator/host_section/program_builder.rb +89 -0
  66. data/lib/translator/input_translator.rb +226 -0
  67. data/lib/translator/kernel_builder.rb +137 -0
  68. data/lib/translator/kernel_launcher/for_loop_kernel_launcher.rb +40 -0
  69. data/lib/translator/kernel_launcher/kernel_launcher.rb +259 -0
  70. data/lib/translator/kernel_launcher/while_loop_kernel_launcher.rb +38 -0
  71. data/lib/translator/last_returns_visitor.rb +19 -10
  72. data/lib/translator/program_builder.rb +197 -0
  73. data/lib/translator/program_launcher.rb +273 -0
  74. data/lib/translator/struct_type.rb +55 -0
  75. data/lib/translator/translator.rb +34 -11
  76. data/lib/translator/variable_classifier_visitor.rb +56 -0
  77. data/lib/types/inference/ast_inference.rb +586 -0
  78. data/lib/types/inference/clear_types_visitor.rb +11 -0
  79. data/lib/types/inference/command_inference.rb +101 -0
  80. data/lib/types/inference/input_inference.rb +62 -0
  81. data/lib/types/{object_tracer.rb → inference/object_tracer.rb} +5 -6
  82. data/lib/types/inference/ruby_extension.rb +35 -0
  83. data/lib/types/inference/symbol_table.rb +131 -0
  84. data/lib/types/types.rb +14 -0
  85. data/lib/types/types/array_command_type.rb +123 -0
  86. data/lib/types/types/array_type.rb +137 -0
  87. data/lib/types/{class_type.rb → types/class_type.rb} +42 -18
  88. data/lib/types/{primitive_type.rb → types/primitive_type.rb} +20 -7
  89. data/lib/types/types/ruby_type.rb +88 -0
  90. data/lib/types/types/struct_type.rb +179 -0
  91. data/lib/types/types/union_type.rb +239 -0
  92. metadata +160 -18
  93. data/lib/ast/method_definition.rb +0 -37
  94. data/lib/ast/translator.rb +0 -264
  95. data/lib/resources/cuda/kernel_launcher.cpp +0 -28
  96. data/lib/scope.rb +0 -166
  97. data/lib/translator/command_translator.rb +0 -421
  98. data/lib/translator/local_variables_enumerator.rb +0 -35
  99. data/lib/translator/method_translator.rb +0 -24
  100. data/lib/types/array_type.rb +0 -51
  101. data/lib/types/ruby_extension.rb +0 -67
  102. data/lib/types/ruby_type.rb +0 -45
  103. data/lib/types/type_inference.rb +0 -382
  104. data/lib/types/union_type.rb +0 -155
@@ -1,12 +1,9 @@
1
1
  require_relative "../ast/nodes.rb"
2
2
  require_relative "../ast/builder.rb"
3
- require_relative "../ast/translator.rb"
4
- require_relative "../types/type_inference"
5
- require_relative "../types/primitive_type"
3
+ require_relative "../types/types"
6
4
  require_relative "../parsing"
7
- require_relative "../scope"
8
5
  require_relative "../ast/printer"
9
- require_relative "../ast/method_definition"
6
+ require_relative "variable_classifier_visitor"
10
7
 
11
8
  module Ikra
12
9
  module Translator
@@ -20,10 +17,11 @@ module Ikra
20
17
  # @return [UnionType] Return value type of method/block
21
18
  attr_accessor :result_type
22
19
 
23
- # @return [String] Name of function in CUDA source code
20
+ # @return [String] Name of function of block in CUDA source code
24
21
  attr_accessor :function_name
25
22
 
26
- # @return [Array<Ikra::AST::MethodDefinition>] Auxiliary methods that are called by this block (including transitive method calls)
23
+ # @return [String] Auxiliary methods that are called by this block
24
+ # (including transitive method calls)
27
25
  attr_accessor :aux_methods
28
26
 
29
27
  def initialize(c_source:, result_type:, function_name:, aux_methods: [])
@@ -32,87 +30,145 @@ module Ikra
32
30
  @function_name = function_name
33
31
  @aux_methods = aux_methods
34
32
  end
35
-
36
- def generated_source
37
- @aux_methods.map do |meth|
38
- meth.to_c_source
39
- end.join("\n\n") + @block_source
40
- end
41
33
  end
42
34
 
43
35
  BlockSelectorDummy = :"<BLOCK>"
44
36
 
45
37
  class << self
46
38
  # Translates a Ruby block to CUDA source code.
47
- # @param [AST::Node] ast abstract syntax tree of the block
48
- # @param [EnvironmentBuilder] environment_builder environment builder instance collecting information about lexical variables (environment)
49
- # @param [Hash{Symbol => UnionType}] block_parameter_types types of arguments passed to the block
50
- # @param [Hash{Symbol => Object}] lexical_variables all lexical variables that are accessed within the block
39
+ # @param [AST::BlockDefNode] block_def_node AST (abstract syntax tree) of the block
40
+ # @param [EnvironmentBuilder] environment_builder environment builder instance
41
+ # collecting information about lexical variables (environment)
42
+ # @param [Array{Variable}] block_parameters types and names of parameters
43
+ # to the block
44
+ # @param [Hash{Symbol => Object}] lexical_variables all lexical variables that are
45
+ # accessed within the block
51
46
  # @param [Fixnum] command_id a unique identifier of the block
47
+ # @param [String] pre_execution source code that should be run before executing the
48
+ # block
49
+ # @param [Array{Variable}] override_block_parameters overrides the the declaration of
50
+ # parameters that this block accepts.
51
+ # @param [EntireInputTranslationResult] entire_input_translation The result of
52
+ # `translate_entire_input`
52
53
  # @return [BlockTranslationResult]
53
- def translate_block(ast:, environment_builder:, command_id:, block_parameter_types: {}, lexical_variables: {})
54
+ def translate_block(
55
+ block_def_node:,
56
+ environment_builder:,
57
+ command_id:,
58
+ lexical_variables: {},
59
+
60
+ # One one of the two following parameter configurations is valid:
61
+ # a) Either this parameter is given:
62
+ entire_input_translation: nil,
63
+
64
+ # b) or these parameters are given (some are optional):
65
+ pre_execution: nil,
66
+ override_block_parameters: nil,
67
+ block_parameters: nil)
68
+
69
+ # Check and prepare arguments
70
+ if pre_execution != nil and entire_input_translation != nil
71
+ raise ArgumentError.new("pre_execution and entire_input_translation given")
72
+ elsif entire_input_translation != nil
73
+ pre_execution = entire_input_translation.pre_execution
74
+ elsif pre_execution == nil
75
+ pre_execution = ""
76
+ end
77
+
78
+ if block_parameters != nil and entire_input_translation != nil
79
+ raise ArgumentError.new("block_parameters and entire_input_translation given")
80
+ elsif entire_input_translation != nil
81
+ block_parameters = entire_input_translation.block_parameters
82
+ elsif block_parameters == nil
83
+ block_parameters = []
84
+ end
85
+
86
+ if override_block_parameters != nil and entire_input_translation != nil
87
+ raise ArgumentError.new("override_block_parameters and entire_input_translation given")
88
+ elsif entire_input_translation != nil
89
+ override_block_parameters = entire_input_translation.override_block_parameters
90
+ elsif override_block_parameters == nil
91
+ override_block_parameters = block_parameters
92
+ end
93
+
94
+
95
+ # Build hash of parameter name -> type mappings
96
+ block_parameter_types = {}
97
+ for variable in block_parameters
98
+ block_parameter_types[variable.name] = variable.type
99
+ end
100
+
54
101
  parameter_types_string = "[" + block_parameter_types.map do |id, type| "#{id}: #{type}" end.join(", ") + "]"
55
102
  Log.info("Translating block with input types #{parameter_types_string}")
56
103
 
57
- # Define MethodDefinition for block
58
- block_def = AST::MethodDefinition.new(
59
- type: Types::UnionType.new, # TODO: what to pass in here?
60
- selector: BlockSelectorDummy,
61
- parameter_variables: block_parameter_types,
62
- return_type: Types::UnionType.new,
63
- ast: ast)
104
+ # Add information to block_def_node
105
+ block_def_node.parameters_names_and_types = block_parameter_types
64
106
 
65
107
  # Lexical variables
66
108
  lexical_variables.each do |name, value|
67
- block_def.lexical_variables[name] = Types::UnionType.new(value.class.to_ikra_type)
109
+ block_def_node.lexical_variables_names_and_types[name] = value.ikra_type.to_union_type
68
110
  end
69
111
 
70
112
  # Type inference
71
113
  type_inference_visitor = TypeInference::Visitor.new
72
- return_type = type_inference_visitor.process_method(block_def)
73
- # The following method returns nested dictionaries, but we only need the values
74
- aux_methods = type_inference_visitor.methods.values.map do |hash|
75
- hash.values
76
- end.flatten
114
+ return_type = type_inference_visitor.process_block(block_def_node)
115
+
116
+ # Translation to source code
117
+ ast_translator = ASTTranslator.new
118
+
119
+ # Auxiliary methods are instance methods that are called by the block
120
+ aux_methods = type_inference_visitor.all_methods.map do |method|
121
+ ast_translator.translate_method(method)
122
+ end
123
+
124
+ # Generate method predeclarations
125
+ aux_methods_predecl = type_inference_visitor.all_methods.map do |method|
126
+ ast_translator.translate_method_predecl(method)
127
+ end
128
+
129
+ # Start with predeclarations
130
+ aux_methods = aux_methods_predecl + aux_methods
131
+
132
+ # Classify variables (lexical or local)
133
+ block_def_node.accept(VariableClassifier.new(
134
+ lexical_variable_names: lexical_variables.keys))
77
135
 
78
136
  # Translate to CUDA/C++ code
79
- translation_result = ast.translate_statement
137
+ translation_result = ast_translator.translate_block(block_def_node)
80
138
 
81
139
  # Load environment variables
82
140
  lexical_variables.each do |name, value|
83
- type = value.class.to_ikra_type
141
+ type = value.ikra_type
84
142
  mangled_name = environment_builder.add_object(name, value)
85
- translation_result.prepend("#{type.to_c_type} #{name} = #{Constants::ENV_IDENTIFIER}->#{mangled_name};\n")
143
+ translation_result.prepend("#{type.to_c_type} #{Constants::LEXICAL_VAR_PREFIX}#{name} = #{Constants::ENV_IDENTIFIER}->#{mangled_name};\n")
86
144
  end
87
145
 
88
146
  # Declare local variables
89
- block_def.local_variables.each do |name, types|
90
- translation_result.prepend("#{types.singleton_type.to_c_type} #{name};\n")
147
+ block_def_node.local_variables_names_and_types.each do |name, type|
148
+ translation_result.prepend("#{type.to_c_type} #{name};\n")
91
149
  end
92
150
 
93
151
  # Function signature
94
152
  mangled_name = "_block_k_#{command_id}_"
95
153
 
96
- if not return_type.is_singleton?
97
- raise "Cannot handle polymorphic return types yet"
98
- end
99
-
100
154
  function_parameters = ["environment_t *#{Constants::ENV_IDENTIFIER}"]
101
- block_parameter_types.each do |param|
102
- function_parameters.push("#{param[1].to_c_type} #{param[0].to_s}")
155
+
156
+ parameter_decls = override_block_parameters.map do |variable|
157
+ "#{variable.type.to_c_type} #{variable.name}"
103
158
  end
104
159
 
105
- function_head = Translator.read_file(
160
+ function_parameters.push(*parameter_decls)
161
+
162
+ translation_result = Translator.read_file(
106
163
  file_name: "block_function_head.cpp",
107
164
  replacements: {
108
165
  "name" => mangled_name,
109
- "return_type" => return_type.singleton_type.to_c_type,
110
- "parameters" => function_parameters.join(", ")})
111
-
112
- translation_result = function_head + wrap_in_c_block(translation_result)
166
+ "result_type" => return_type.to_c_type,
167
+ "parameters" => function_parameters.join(", "),
168
+ "body" => wrap_in_c_block(pre_execution + "\n" + translation_result)})
113
169
 
114
170
  # TODO: handle more than one result type
115
- BlockTranslationResult.new(
171
+ return BlockTranslationResult.new(
116
172
  c_source: translation_result,
117
173
  result_type: return_type,
118
174
  function_name: mangled_name,
@@ -0,0 +1,41 @@
1
+ module Ikra
2
+ module Translator
3
+ class CommandTranslator < Symbolic::Visitor
4
+ def visit_array_combine_command(command)
5
+ Log.info("Translating ArrayCombineCommand [#{command.unique_id}]")
6
+
7
+ super
8
+
9
+ # Process dependent computation (receiver), returns [InputTranslationResult]
10
+ input = translate_entire_input(command)
11
+
12
+ # All variables accessed by this block should be prefixed with the unique ID
13
+ # of the command in the environment.
14
+ env_builder = @environment_builder[command.unique_id]
15
+
16
+ block_translation_result = Translator.translate_block(
17
+ block_def_node: command.block_def_node,
18
+ environment_builder: env_builder,
19
+ lexical_variables: command.lexical_externals,
20
+ command_id: command.unique_id,
21
+ entire_input_translation: input)
22
+
23
+ kernel_builder.add_methods(block_translation_result.aux_methods)
24
+ kernel_builder.add_block(block_translation_result.block_source)
25
+
26
+ # Build command invocation string
27
+ result = block_translation_result.function_name + "(" +
28
+ (["_env_"] + input.result).join(", ") + ")"
29
+
30
+ command_translation = build_command_translation_result(
31
+ execution: input.execution,
32
+ result: result,
33
+ command: command)
34
+
35
+ Log.info("DONE translating ArrayCombineCommand [#{command.unique_id}]")
36
+
37
+ return command_translation
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,28 @@
1
+ module Ikra
2
+ module Translator
3
+ class CommandTranslator < Symbolic::Visitor
4
+ def visit_array_identity_command(command)
5
+ Log.info("Translating ArrayIdentityCommand [#{command.unique_id}]")
6
+
7
+ super
8
+
9
+ # This is a root command, determine grid/block dimensions
10
+ kernel_launcher.configure_grid(command.size, block_size: command.block_size)
11
+
12
+ # Add base array to environment
13
+ need_union_type = !command.base_type.is_singleton?
14
+ transformed_base_array = object_tracer.convert_base_array(
15
+ command.input.first.command, need_union_type)
16
+ environment_builder.add_base_array(command.unique_id, transformed_base_array)
17
+
18
+ command_translation = build_command_translation_result(
19
+ result: "#{Constants::ENV_IDENTIFIER}->#{EnvironmentBuilder.base_identifier(command.unique_id)}[_tid_]",
20
+ command: command)
21
+
22
+ Log.info("DONE translating ArrayIdentityCommand [#{command.unique_id}]")
23
+
24
+ return command_translation
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,52 @@
1
+ module Ikra
2
+ module Translator
3
+ class CommandTranslator < Symbolic::Visitor
4
+ # Translate the block of an `Array.pnew` section.
5
+ def visit_array_index_command(command)
6
+ Log.info("Translating ArrayIndexCommand [#{command.unique_id}]")
7
+
8
+ super
9
+
10
+ # This is a root command, determine grid/block dimensions
11
+ kernel_launcher.configure_grid(command.size, block_size: command.block_size)
12
+
13
+ num_dims = command.dimensions.size
14
+
15
+ # This is a root command, determine grid/block dimensions
16
+ kernel_launcher.configure_grid(command.size, block_size: command.block_size)
17
+
18
+ index_generators = (0...num_dims).map do |dim_index|
19
+ index_div = command.dimensions.drop(dim_index + 1).reduce(1, :*)
20
+ index_mod = command.dimensions[dim_index]
21
+
22
+ if dim_index > 0
23
+ "(_tid_ / #{index_div}) % #{index_mod}"
24
+ else
25
+ # No modulo required for first dimension
26
+ "_tid_ / #{index_div}"
27
+ end
28
+ end
29
+
30
+ if num_dims > 1
31
+ # Retrieve type that was generated earlier
32
+ zipped_type_singleton = command.result_type.singleton_type
33
+ result = zipped_type_singleton.generate_inline_initialization(index_generators)
34
+
35
+ # Add struct type to program builder, so that we can generate the source code
36
+ # for its definition.
37
+ program_builder.structs.add(zipped_type_singleton)
38
+ else
39
+ result = "_tid_"
40
+ end
41
+
42
+ command_translation = CommandTranslationResult.new(
43
+ result: result,
44
+ command: command)
45
+
46
+ Log.info("DONE translating ArrayIndexCommand [#{command.unique_id}]")
47
+
48
+ return command_translation
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,135 @@
1
+ module Ikra
2
+ module Translator
3
+ class CommandTranslator < Symbolic::Visitor
4
+ def visit_array_reduce_command(command)
5
+ Log.info("Translating ArrayReduceCommand [#{command.unique_id}]")
6
+
7
+ super
8
+
9
+ if command.input.size != 1
10
+ raise AssertionError.new("Expected exactly one input for ArrayReduceCommand")
11
+ end
12
+
13
+ # Process dependent computation (receiver)
14
+ input = translate_entire_input(command)
15
+
16
+ block_size = command.block_size
17
+
18
+ # All variables accessed by this block should be prefixed with the unique ID
19
+ # of the command in the environment.
20
+ env_builder = @environment_builder[command.unique_id]
21
+
22
+ block_translation_result = Translator.translate_block(
23
+ block_def_node: command.block_def_node,
24
+ environment_builder: env_builder,
25
+ lexical_variables: command.lexical_externals,
26
+ command_id: command.unique_id,
27
+ entire_input_translation: input)
28
+
29
+ kernel_builder.add_methods(block_translation_result.aux_methods)
30
+ kernel_builder.add_block(block_translation_result.block_source)
31
+
32
+ # Add "odd" parameter to the kernel which is needed for reduction
33
+ kernel_builder.add_additional_parameters(Constants::ODD_TYPE + " " + Constants::ODD_IDENTIFIER)
34
+
35
+ # Number of elements that will be reduced
36
+ num_threads = command.input_size
37
+
38
+ if num_threads.is_a?(Fixnum)
39
+ # Easy case: Number of required reductions known statically
40
+
41
+ odd = (num_threads % 2 == 1).to_s
42
+
43
+ # Number of threads needed for reduction
44
+ num_threads = num_threads.fdiv(2).ceil
45
+
46
+ previous_result_kernel_var = input.result.first
47
+ first_launch = true
48
+
49
+ # While more kernel launches than one are needed to finish reduction
50
+ while num_threads >= block_size + 1
51
+ # Launch new kernel (with same kernel builder)
52
+ push_kernel_launcher(kernel_builder: kernel_builder)
53
+ # Configure kernel with correct arguments and grid
54
+ kernel_launcher.add_additional_arguments(odd)
55
+ kernel_launcher.configure_grid(num_threads, block_size: block_size)
56
+
57
+ # First launch of kernel is supposed to allocate new memory, so only reuse memory after first launch
58
+ if first_launch
59
+ first_launch = false
60
+ else
61
+ kernel_launcher.reuse_memory!(previous_result_kernel_var)
62
+ end
63
+
64
+ previous_result_kernel_var = kernel_launcher.kernel_result_var_name
65
+
66
+ pop_kernel_launcher(input.command_translation_result(0))
67
+
68
+ # Update number of threads needed
69
+ num_threads = num_threads.fdiv(block_size).ceil
70
+ odd = (num_threads % 2 == 1).to_s
71
+ num_threads = num_threads.fdiv(2).ceil
72
+ end
73
+
74
+ # Configuration for last launch of kernel
75
+ kernel_launcher.add_additional_arguments(odd)
76
+ kernel_launcher.configure_grid(num_threads, block_size: block_size)
77
+ else
78
+ # More difficult case: Have to generate loop for reductions
79
+
80
+ # Add one regular kernel launcher for setting up the memory etc.
81
+ odd_first = "(#{num_threads} % 2 == 1)"
82
+ num_threads_first = "((int) ceil(#{num_threads} / 2.0))"
83
+ push_kernel_launcher(kernel_builder: kernel_builder)
84
+ kernel_launcher.add_additional_arguments(odd_first)
85
+ kernel_launcher.configure_grid(num_threads_first, block_size: block_size)
86
+ previous_result_kernel_var = kernel_launcher.kernel_result_var_name
87
+ pop_kernel_launcher(input.command_translation_result(0))
88
+
89
+ # Add loop
90
+ # Set up state (variables that are updated inside the loop)
91
+ # 1. Calculate number of elements from previous computation
92
+ # 2. Check if odd number
93
+ # 3. Calculate number of threads that we need
94
+ loop_setup = "int _num_elements = ceil(#{num_threads_first} / (double) #{block_size});\nbool _next_odd = _num_elements % 2 == 1;\nint _next_threads = ceil(_num_elements / 2.0);\n"
95
+
96
+ # Update loop state after iteration
97
+ update_loop = "_num_elements = ceil(_next_threads / (double) #{block_size});\nbool _next_odd = _num_elements % 2 == 0;\n_next_threads = ceil(_num_elements / 2.0);\n"
98
+
99
+ push_kernel_launcher(kernel_launcher: WhileLoopKernelLauncher.new(
100
+ kernel_builder: kernel_builder,
101
+ condition: "_num_elements > 1",
102
+ before_loop: loop_setup,
103
+ post_iteration: update_loop))
104
+
105
+ kernel_launcher.add_additional_arguments("_next_odd")
106
+ kernel_launcher.configure_grid("_next_threads", block_size: block_size)
107
+ #pop_kernel_launcher(input.command_translation_result(0))
108
+ end
109
+
110
+ if !first_launch
111
+ kernel_launcher.reuse_memory!(previous_result_kernel_var)
112
+ end
113
+
114
+ command_execution = Translator.read_file(file_name: "reduce_body.cpp", replacements: {
115
+ "previous_result" => input.result.first,
116
+ "block_name" => block_translation_result.function_name,
117
+ "arguments" => Constants::ENV_IDENTIFIER,
118
+ "block_size" => block_size.to_s,
119
+ "temp_result" => Constants::TEMP_RESULT_IDENTIFIER,
120
+ "odd" => Constants::ODD_IDENTIFIER,
121
+ "type" => command.result_type.to_c_type,
122
+ "num_threads" => Constants::NUM_THREADS_IDENTIFIER})
123
+
124
+ command_translation = CommandTranslationResult.new(
125
+ execution: command_execution,
126
+ result: Constants::TEMP_RESULT_IDENTIFIER,
127
+ command: command)
128
+
129
+ Log.info("DONE translating ArrayReduceCommand [#{command.unique_id}]")
130
+
131
+ return command_translation
132
+ end
133
+ end
134
+ end
135
+ end