kumi 0.0.23 → 0.0.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/README.md +2 -2
- data/golden/array_element/expected/schema_ruby.rb +1 -1
- data/golden/array_index/expected/schema_ruby.rb +1 -1
- data/golden/array_operations/expected/schema_ruby.rb +1 -1
- data/golden/cascade_logic/expected/lir_02_inlined.txt +8 -8
- data/golden/cascade_logic/expected/schema_ruby.rb +1 -1
- data/golden/chained_fusion/expected/lir_02_inlined.txt +36 -36
- data/golden/chained_fusion/expected/lir_03_cse.txt +23 -23
- data/golden/chained_fusion/expected/lir_04_1_loop_fusion.txt +25 -25
- data/golden/chained_fusion/expected/lir_04_loop_invcm.txt +23 -23
- data/golden/chained_fusion/expected/lir_06_const_prop.txt +23 -23
- data/golden/chained_fusion/expected/schema_javascript.mjs +23 -23
- data/golden/chained_fusion/expected/schema_ruby.rb +28 -28
- data/golden/element_arrays/expected/schema_ruby.rb +1 -1
- data/golden/empty_and_null_inputs/expected/lir_02_inlined.txt +18 -18
- data/golden/empty_and_null_inputs/expected/lir_03_cse.txt +17 -17
- data/golden/empty_and_null_inputs/expected/lir_04_1_loop_fusion.txt +17 -17
- data/golden/empty_and_null_inputs/expected/lir_04_loop_invcm.txt +17 -17
- data/golden/empty_and_null_inputs/expected/lir_06_const_prop.txt +17 -17
- data/golden/empty_and_null_inputs/expected/schema_javascript.mjs +13 -13
- data/golden/empty_and_null_inputs/expected/schema_ruby.rb +18 -18
- data/golden/game_of_life/expected/lir_00_unoptimized.txt +33 -33
- data/golden/game_of_life/expected/lir_01_hoist_scalar_references.txt +33 -33
- data/golden/game_of_life/expected/lir_02_inlined.txt +1370 -1370
- data/golden/game_of_life/expected/lir_03_cse.txt +448 -448
- data/golden/game_of_life/expected/lir_04_1_loop_fusion.txt +448 -448
- data/golden/game_of_life/expected/lir_04_loop_invcm.txt +448 -448
- data/golden/game_of_life/expected/lir_06_const_prop.txt +448 -448
- data/golden/game_of_life/expected/schema_javascript.mjs +85 -85
- data/golden/game_of_life/expected/schema_ruby.rb +86 -86
- data/golden/hash_keys/expected/schema_ruby.rb +1 -1
- data/golden/hash_keys/schema.kumi +4 -5
- data/golden/hash_value/expected/schema_ruby.rb +1 -1
- data/golden/hierarchical_complex/expected/lir_02_inlined.txt +15 -15
- data/golden/hierarchical_complex/expected/lir_03_cse.txt +1 -1
- data/golden/hierarchical_complex/expected/lir_04_1_loop_fusion.txt +1 -1
- data/golden/hierarchical_complex/expected/lir_04_loop_invcm.txt +1 -1
- data/golden/hierarchical_complex/expected/lir_06_const_prop.txt +1 -1
- data/golden/hierarchical_complex/expected/schema_javascript.mjs +1 -1
- data/golden/hierarchical_complex/expected/schema_ruby.rb +2 -2
- data/golden/inline_rename_scope_leak/expected/ast.txt +48 -0
- data/golden/inline_rename_scope_leak/expected/input_plan.txt +10 -0
- data/golden/inline_rename_scope_leak/expected/lir_00_unoptimized.txt +35 -0
- data/golden/inline_rename_scope_leak/expected/lir_01_hoist_scalar_references.txt +35 -0
- data/golden/inline_rename_scope_leak/expected/lir_02_inlined.txt +49 -0
- data/golden/inline_rename_scope_leak/expected/lir_03_cse.txt +49 -0
- data/golden/inline_rename_scope_leak/expected/lir_04_1_loop_fusion.txt +49 -0
- data/golden/inline_rename_scope_leak/expected/lir_04_loop_invcm.txt +49 -0
- data/golden/inline_rename_scope_leak/expected/lir_06_const_prop.txt +49 -0
- data/golden/inline_rename_scope_leak/expected/nast.txt +31 -0
- data/golden/inline_rename_scope_leak/expected/schema_javascript.mjs +51 -0
- data/golden/inline_rename_scope_leak/expected/schema_ruby.rb +82 -0
- data/golden/inline_rename_scope_leak/expected/snast.txt +31 -0
- data/golden/inline_rename_scope_leak/expected.json +7 -0
- data/golden/inline_rename_scope_leak/input.json +4 -0
- data/golden/inline_rename_scope_leak/schema.kumi +24 -0
- data/golden/input_reference/expected/schema_ruby.rb +1 -1
- data/golden/interleaved_fusion/expected/lir_02_inlined.txt +35 -35
- data/golden/interleaved_fusion/expected/lir_03_cse.txt +26 -26
- data/golden/interleaved_fusion/expected/lir_04_1_loop_fusion.txt +27 -26
- data/golden/interleaved_fusion/expected/lir_04_loop_invcm.txt +26 -26
- data/golden/interleaved_fusion/expected/lir_06_const_prop.txt +26 -26
- data/golden/interleaved_fusion/expected/schema_javascript.mjs +23 -23
- data/golden/interleaved_fusion/expected/schema_ruby.rb +29 -29
- data/golden/let_inline/expected/schema_ruby.rb +1 -1
- data/golden/loop_fusion/expected/lir_02_inlined.txt +17 -17
- data/golden/loop_fusion/expected/lir_03_cse.txt +14 -14
- data/golden/loop_fusion/expected/lir_04_1_loop_fusion.txt +14 -14
- data/golden/loop_fusion/expected/lir_04_loop_invcm.txt +14 -14
- data/golden/loop_fusion/expected/lir_06_const_prop.txt +14 -14
- data/golden/loop_fusion/expected/schema_javascript.mjs +12 -12
- data/golden/loop_fusion/expected/schema_ruby.rb +16 -16
- data/golden/min_reduce_scope/expected/schema_ruby.rb +1 -1
- data/golden/mixed_dimensions/expected/lir_02_inlined.txt +5 -5
- data/golden/mixed_dimensions/expected/lir_03_cse.txt +5 -5
- data/golden/mixed_dimensions/expected/lir_04_1_loop_fusion.txt +5 -5
- data/golden/mixed_dimensions/expected/lir_04_loop_invcm.txt +5 -5
- data/golden/mixed_dimensions/expected/lir_06_const_prop.txt +5 -5
- data/golden/mixed_dimensions/expected/schema_javascript.mjs +3 -3
- data/golden/mixed_dimensions/expected/schema_ruby.rb +6 -6
- data/golden/multirank_hoisting/expected/lir_02_inlined.txt +48 -48
- data/golden/multirank_hoisting/expected/lir_03_cse.txt +35 -35
- data/golden/multirank_hoisting/expected/lir_04_1_loop_fusion.txt +35 -35
- data/golden/multirank_hoisting/expected/lir_04_loop_invcm.txt +35 -35
- data/golden/multirank_hoisting/expected/lir_06_const_prop.txt +35 -35
- data/golden/multirank_hoisting/expected/schema_javascript.mjs +34 -34
- data/golden/multirank_hoisting/expected/schema_ruby.rb +36 -36
- data/golden/nested_hash/expected/schema_ruby.rb +1 -1
- data/golden/reduction_broadcast/expected/lir_02_inlined.txt +30 -30
- data/golden/reduction_broadcast/expected/lir_03_cse.txt +22 -22
- data/golden/reduction_broadcast/expected/lir_04_1_loop_fusion.txt +22 -22
- data/golden/reduction_broadcast/expected/lir_04_loop_invcm.txt +22 -22
- data/golden/reduction_broadcast/expected/lir_06_const_prop.txt +22 -22
- data/golden/reduction_broadcast/expected/schema_javascript.mjs +18 -18
- data/golden/reduction_broadcast/expected/schema_ruby.rb +23 -23
- data/golden/roll/expected/lir_00_unoptimized.txt +8 -8
- data/golden/roll/expected/lir_01_hoist_scalar_references.txt +8 -8
- data/golden/roll/expected/lir_02_inlined.txt +8 -8
- data/golden/roll/expected/lir_03_cse.txt +8 -8
- data/golden/roll/expected/lir_04_1_loop_fusion.txt +8 -8
- data/golden/roll/expected/lir_04_loop_invcm.txt +8 -8
- data/golden/roll/expected/lir_06_const_prop.txt +8 -8
- data/golden/roll/expected/schema_ruby.rb +1 -1
- data/golden/shift/expected/lir_00_unoptimized.txt +12 -12
- data/golden/shift/expected/lir_01_hoist_scalar_references.txt +12 -12
- data/golden/shift/expected/lir_02_inlined.txt +12 -12
- data/golden/shift/expected/lir_03_cse.txt +12 -12
- data/golden/shift/expected/lir_04_1_loop_fusion.txt +12 -12
- data/golden/shift/expected/lir_04_loop_invcm.txt +12 -12
- data/golden/shift/expected/lir_06_const_prop.txt +12 -12
- data/golden/shift/expected/schema_ruby.rb +1 -1
- data/golden/shift_2d/expected/lir_00_unoptimized.txt +48 -48
- data/golden/shift_2d/expected/lir_01_hoist_scalar_references.txt +48 -48
- data/golden/shift_2d/expected/lir_02_inlined.txt +48 -48
- data/golden/shift_2d/expected/lir_03_cse.txt +48 -48
- data/golden/shift_2d/expected/lir_04_1_loop_fusion.txt +48 -48
- data/golden/shift_2d/expected/lir_04_loop_invcm.txt +48 -48
- data/golden/shift_2d/expected/lir_06_const_prop.txt +48 -48
- data/golden/shift_2d/expected/schema_ruby.rb +1 -1
- data/golden/simple_math/expected/schema_ruby.rb +1 -1
- data/golden/streaming_basics/expected/lir_02_inlined.txt +25 -25
- data/golden/streaming_basics/expected/lir_03_cse.txt +13 -13
- data/golden/streaming_basics/expected/lir_04_1_loop_fusion.txt +13 -13
- data/golden/streaming_basics/expected/lir_04_loop_invcm.txt +13 -13
- data/golden/streaming_basics/expected/lir_06_const_prop.txt +13 -13
- data/golden/streaming_basics/expected/schema_javascript.mjs +13 -13
- data/golden/streaming_basics/expected/schema_ruby.rb +14 -14
- data/golden/tuples/expected/lir_00_unoptimized.txt +4 -4
- data/golden/tuples/expected/lir_01_hoist_scalar_references.txt +4 -4
- data/golden/tuples/expected/lir_02_inlined.txt +4 -4
- data/golden/tuples/expected/lir_03_cse.txt +4 -4
- data/golden/tuples/expected/lir_04_1_loop_fusion.txt +4 -4
- data/golden/tuples/expected/lir_04_loop_invcm.txt +4 -4
- data/golden/tuples/expected/lir_06_const_prop.txt +4 -4
- data/golden/tuples/expected/schema_ruby.rb +1 -1
- data/golden/tuples_and_arrays/expected/lir_00_unoptimized.txt +1 -1
- data/golden/tuples_and_arrays/expected/lir_01_hoist_scalar_references.txt +1 -1
- data/golden/tuples_and_arrays/expected/lir_02_inlined.txt +17 -17
- data/golden/tuples_and_arrays/expected/lir_03_cse.txt +14 -14
- data/golden/tuples_and_arrays/expected/lir_04_1_loop_fusion.txt +14 -14
- data/golden/tuples_and_arrays/expected/lir_04_loop_invcm.txt +14 -14
- data/golden/tuples_and_arrays/expected/lir_06_const_prop.txt +14 -14
- data/golden/tuples_and_arrays/expected/schema_javascript.mjs +13 -13
- data/golden/tuples_and_arrays/expected/schema_ruby.rb +14 -14
- data/golden/us_tax_2024/expected/ast.txt +865 -0
- data/golden/us_tax_2024/expected/input_plan.txt +61 -0
- data/golden/us_tax_2024/expected/lir_00_unoptimized.txt +901 -0
- data/golden/us_tax_2024/expected/lir_01_hoist_scalar_references.txt +901 -0
- data/golden/us_tax_2024/expected/lir_02_inlined.txt +5178 -0
- data/golden/us_tax_2024/expected/lir_03_cse.txt +2499 -0
- data/golden/us_tax_2024/expected/lir_04_1_loop_fusion.txt +2519 -0
- data/golden/us_tax_2024/expected/lir_04_loop_invcm.txt +2499 -0
- data/golden/us_tax_2024/expected/lir_06_const_prop.txt +2499 -0
- data/golden/us_tax_2024/expected/nast.txt +976 -0
- data/golden/us_tax_2024/expected/schema_javascript.mjs +584 -0
- data/golden/us_tax_2024/expected/schema_ruby.rb +639 -0
- data/golden/us_tax_2024/expected/snast.txt +976 -0
- data/golden/us_tax_2024/expected.json +1 -0
- data/golden/us_tax_2024/input.json +168 -0
- data/golden/us_tax_2024/schema.kumi +203 -0
- data/golden/with_constants/expected/schema_ruby.rb +1 -1
- data/lib/kumi/core/analyzer/passes/lir/inline_declarations_pass.rb +227 -107
- data/lib/kumi/version.rb +1 -1
- metadata +33 -1
@@ -15,20 +15,18 @@ module Kumi
|
|
15
15
|
|
16
16
|
MAX_PASSES.times do
|
17
17
|
new_ops, changed = run_one_pass(current_ops)
|
18
|
-
|
19
18
|
unless changed
|
20
19
|
new_ops.freeze
|
21
|
-
return state.with(:lir_module, new_ops)
|
20
|
+
return state.with(:lir_module, new_ops)
|
21
|
+
.with(:lir_02_inlined_ops_by_decl, new_ops)
|
22
22
|
end
|
23
23
|
current_ops = new_ops
|
24
24
|
end
|
25
|
-
|
26
25
|
raise "LIR inlining did not converge after #{MAX_PASSES} passes."
|
27
26
|
end
|
28
27
|
|
29
28
|
private
|
30
29
|
|
31
|
-
# --- UNCHANGED: Top-level pass logic ---
|
32
30
|
def run_one_pass(ops_by_decl)
|
33
31
|
@ops_by_decl = ops_by_decl
|
34
32
|
@gamma = detect_all_gammas(@ops_by_decl)
|
@@ -36,102 +34,187 @@ module Kumi
|
|
36
34
|
fused = {}
|
37
35
|
@ops_by_decl.each do |name, payload|
|
38
36
|
original_ops = Array(payload[:operations])
|
39
|
-
|
40
|
-
|
41
|
-
inlined_ops = inline_top_level_decl(original_ops)
|
42
|
-
|
43
|
-
fused[name] = { operations: inlined_ops }
|
37
|
+
inlined_ops = inline_top_level_decl(original_ops)
|
38
|
+
fused[name] = { operations: inlined_ops }
|
44
39
|
changed ||= (inlined_ops != original_ops)
|
45
40
|
end
|
46
41
|
[fused, changed]
|
47
42
|
end
|
48
43
|
|
49
|
-
#
|
44
|
+
# ---------------- core ----------------
|
45
|
+
|
46
|
+
Hoist = Struct.new(:ops, :target_depth, keyword_init: true)
|
47
|
+
|
50
48
|
def inline_top_level_decl(ops)
|
51
|
-
env
|
52
|
-
reg_map
|
49
|
+
env = Env.new
|
50
|
+
reg_map = {}
|
53
51
|
rename_map = {}
|
54
|
-
|
52
|
+
processed, hoist_pkgs = process_and_hoist_block(ops, env, reg_map, rename_map)
|
55
53
|
|
56
|
-
|
57
|
-
raise "Orphaned code
|
54
|
+
top_emit, bubble = hoist_pkgs.partition { |p| p.target_depth == 0 }
|
55
|
+
raise "Orphaned code hoist with target depth(s): #{bubble.map(&:target_depth).uniq.inspect}" unless bubble.empty?
|
58
56
|
|
59
|
-
|
57
|
+
top_emit.flat_map(&:ops) + processed
|
60
58
|
end
|
61
59
|
|
62
|
-
#
|
63
|
-
#
|
60
|
+
# returns [processed_ops, hoist_pkgs]
|
61
|
+
# returns [processed_ops, hoist_pkgs]
|
64
62
|
def process_and_hoist_block(block_ops, env, reg_map, rename_map)
|
65
|
-
|
66
|
-
|
63
|
+
out = []
|
64
|
+
hoisted_pkgs = []
|
67
65
|
i = 0
|
68
|
-
|
69
66
|
while i < block_ops.length
|
70
67
|
ins = block_ops[i]
|
71
68
|
case ins.opcode
|
72
69
|
when :LoopStart
|
73
|
-
end_idx
|
70
|
+
end_idx = find_matching_loop_end(block_ops, i)
|
74
71
|
loop_body = block_ops[(i + 1)...end_idx]
|
75
72
|
|
76
73
|
env.push(ins)
|
77
|
-
|
74
|
+
child_rename = {}
|
75
|
+
processed_body, child_hoists =
|
76
|
+
process_and_hoist_block(loop_body, env, reg_map, child_rename)
|
77
|
+
|
78
|
+
depth_here = env.axes.length
|
79
|
+
child_el = ins.attributes[:as_element]
|
80
|
+
child_ix = ins.attributes[:as_index]
|
81
|
+
|
82
|
+
# Partition hoists: those that belong *inside* this loop vs bubble upward
|
83
|
+
inside_pkgs, bubble_pkgs = child_hoists.partition { |p| p.target_depth == depth_here }
|
84
|
+
|
85
|
+
# Renames: never let aliases to this loop's el/idx escape upward
|
86
|
+
safe_pairs = child_rename.reject { |_, v| v == child_el || v == child_ix }
|
78
87
|
env.pop
|
79
88
|
|
80
|
-
#
|
81
|
-
|
82
|
-
|
89
|
+
# Merge only safe renames into outer scope
|
90
|
+
rename_map.merge!(safe_pairs)
|
91
|
+
|
92
|
+
# Emit loop shell
|
93
|
+
out << rewrite(ins, reg_map, rename_map)
|
94
|
+
|
95
|
+
# Local view inside loop: apply both local and outer renames, with local taking precedence
|
96
|
+
local_map = child_rename.merge(rename_map)
|
97
|
+
|
98
|
+
# Emit hoists that belong at this depth *inside* the loop, before the body
|
99
|
+
inside_ops = inside_pkgs.flat_map(&:ops)
|
100
|
+
out.concat(rewrite_block(inside_ops, local_map))
|
101
|
+
|
102
|
+
# Emit rewritten body
|
103
|
+
out.concat(rewrite_block(processed_body, local_map))
|
104
|
+
|
105
|
+
# Close loop
|
106
|
+
out << rewrite(block_ops[end_idx], reg_map, rename_map)
|
83
107
|
|
84
|
-
#
|
85
|
-
|
86
|
-
out_ops.concat(processed_body)
|
87
|
-
out_ops << rewrite(block_ops[end_idx], reg_map, rename_map)
|
108
|
+
# Bubble remaining hoists to outer scopes
|
109
|
+
hoisted_pkgs.concat(bubble_pkgs)
|
88
110
|
|
89
|
-
|
111
|
+
# Extra safety: ensure no aliases to this loop's el/idx remain in outer map
|
112
|
+
rename_map.delete_if { |_, v| v == child_el || v == child_ix }
|
113
|
+
|
114
|
+
i = end_idx
|
90
115
|
|
91
116
|
when :LoadDeclaration
|
92
|
-
|
93
|
-
inline_ops
|
94
|
-
|
95
|
-
hoisted_out_ops.concat(hoist_ops)
|
117
|
+
inline_ops, new_pkgs = handle_load_declaration(ins, env, reg_map, rename_map)
|
118
|
+
out.concat(inline_ops)
|
119
|
+
hoisted_pkgs.concat(new_pkgs)
|
96
120
|
|
97
121
|
else
|
98
|
-
|
122
|
+
out << rewrite(ins, reg_map, rename_map)
|
99
123
|
end
|
100
124
|
i += 1
|
101
125
|
end
|
102
|
-
|
103
|
-
[out_ops, hoisted_out_ops]
|
126
|
+
[out, hoisted_pkgs]
|
104
127
|
end
|
105
128
|
|
106
|
-
#
|
107
|
-
|
108
|
-
def handle_load_declaration(ins, env, reg_map, rename_map)
|
129
|
+
# returns [inline_ops, hoist_pkgs]
|
130
|
+
def handle_load_declaration(ins, env, _reg_map, rename_map)
|
109
131
|
callee = ins.immediates.first.value.to_sym
|
110
|
-
|
111
|
-
|
132
|
+
|
133
|
+
# axes presence and agreement with callee gamma
|
134
|
+
decl_axes = ins.attributes.fetch(:axes) { raise "LoadDeclaration missing :axes for #{callee}" }
|
135
|
+
gamma_axes = @gamma.fetch(callee).axes
|
136
|
+
raise "axes mismatch for #{callee}: decl=#{decl_axes.inspect} gamma=#{gamma_axes.inspect}" unless decl_axes == gamma_axes
|
112
137
|
|
113
138
|
body, yield_reg, callee_regs = inline_callee_core(callee)
|
114
139
|
remap = remap_axes(callee_regs, env)
|
115
|
-
_acc, fresh_ops = freshen(body, reg_map, pre_map: remap)
|
116
140
|
|
117
|
-
|
141
|
+
# per-callsite freshening
|
142
|
+
local_reg_map = {}
|
143
|
+
_acc, fresh_ops = freshen(body, local_reg_map, pre_map: remap)
|
144
|
+
|
145
|
+
# recursively process nested calls
|
146
|
+
processed_inline, nested_pkgs = process_and_hoist_block(fresh_ops, env, {}, rename_map)
|
147
|
+
|
148
|
+
# compute yielded register mapping, then resolve through any renames created by nested inlines
|
149
|
+
mapped_yield =
|
150
|
+
local_reg_map[yield_reg] || remap[yield_reg] ||
|
151
|
+
(raise "inliner: yielded reg #{yield_reg} not produced in inlined body for #{callee}")
|
152
|
+
resolved_yield = resolve_rename(mapped_yield, rename_map)
|
153
|
+
|
154
|
+
# sanity: resolved_yield must be definable at site
|
155
|
+
emitted_defs = processed_inline.map(&:result_register).compact +
|
156
|
+
nested_pkgs.flat_map { |p| p.ops }.map(&:result_register).compact
|
157
|
+
unless emitted_defs.include?(resolved_yield) || env.ambient_regs.include?(resolved_yield)
|
158
|
+
raise "inliner: mapped yield #{resolved_yield} has no def in emitted ops for #{callee}\n" \
|
159
|
+
"original yield: #{yield_reg}\n" \
|
160
|
+
"inline defs size: #{processed_inline.count { |x| x.result_register }}\n" \
|
161
|
+
"nested hoist defs size: #{nested_pkgs.flat_map { |p| p.ops }.count { |x| x.result_register }}"
|
162
|
+
end
|
163
|
+
|
164
|
+
# final rename for call site result uses the resolved register
|
165
|
+
rename_map[ins.result_register] = resolved_yield
|
166
|
+
|
167
|
+
# decide placement by depth
|
168
|
+
site_depth = env.axes.length
|
169
|
+
callee_depth = decl_axes.length
|
170
|
+
|
171
|
+
if callee_depth < site_depth
|
172
|
+
forb = forbidden_ambient_after(callee_depth, env)
|
173
|
+
used = uses_of(processed_inline)
|
174
|
+
bad = used & forb
|
175
|
+
unless bad.empty?
|
176
|
+
raise "scope error: would hoist ops using deeper-axis regs #{bad.inspect} " \
|
177
|
+
"(callee_depth=#{callee_depth}, site_depth=#{site_depth})"
|
178
|
+
end
|
179
|
+
pkgs = nested_pkgs + [Hoist.new(ops: processed_inline, target_depth: callee_depth)]
|
180
|
+
[[], pkgs]
|
181
|
+
|
182
|
+
elsif callee_depth == site_depth
|
183
|
+
emit, bubble = nested_pkgs.partition { |p| p.target_depth == site_depth }
|
184
|
+
[(emit.flat_map(&:ops) + processed_inline), bubble]
|
118
185
|
|
119
|
-
# Case 1: Hoisting
|
120
|
-
if prefix?(decl_axes, site_axes) && decl_axes.length < site_axes.length
|
121
|
-
[[], fresh_ops] # Return ops in the 'hoist' bucket
|
122
|
-
# Case 2: In-place
|
123
|
-
elsif decl_axes == site_axes
|
124
|
-
[fresh_ops, []] # Return ops in the 'inline' bucket
|
125
|
-
# Case 3: Cannot inline
|
126
186
|
else
|
127
|
-
[[rewrite(ins,
|
187
|
+
[[rewrite(ins, {}, rename_map)], []]
|
128
188
|
end
|
129
189
|
end
|
130
190
|
|
131
|
-
#
|
191
|
+
# ---------------- helpers ----------------
|
192
|
+
def rewrite_block(ops, rename)
|
193
|
+
# Ensure late-added renames apply to a block we built earlier.
|
194
|
+
ops.map { |ins| rewrite(ins, {}, rename) }
|
195
|
+
end
|
196
|
+
|
197
|
+
def resolve_rename(reg, rename)
|
198
|
+
seen = {}
|
199
|
+
cur = reg
|
200
|
+
while (n = rename[cur]) && !seen[n]
|
201
|
+
seen[cur] = true
|
202
|
+
cur = n
|
203
|
+
end
|
204
|
+
cur
|
205
|
+
end
|
206
|
+
|
207
|
+
def uses_of(ops)
|
208
|
+
ops.flat_map { |x| Array(x.inputs) }.compact
|
209
|
+
end
|
210
|
+
|
211
|
+
def forbidden_ambient_after(depth, env)
|
212
|
+
env.frames_after(depth).flat_map { |f| [f[:el], f[:idx]] }
|
213
|
+
end
|
132
214
|
|
133
215
|
def find_matching_loop_end(ops, start_index)
|
134
|
-
depth = 1
|
216
|
+
depth = 1
|
217
|
+
(start_index + 1...ops.length).each do |i|
|
135
218
|
op = ops[i].opcode
|
136
219
|
depth += 1 if op == :LoopStart
|
137
220
|
depth -= 1 if op == :LoopEnd
|
@@ -143,47 +226,57 @@ module Kumi
|
|
143
226
|
def remap_axes(callee_axis_regs, env)
|
144
227
|
callee_axis_regs.each_with_object({}) do |r, h|
|
145
228
|
caller = env.reg_for_axis(r[:axis])
|
146
|
-
h[r[:el]]
|
229
|
+
h[r[:el]] = caller[:el]
|
147
230
|
h[r[:idx]] = caller[:idx]
|
148
231
|
end
|
149
232
|
end
|
150
233
|
|
151
|
-
def rename_yielded_register(ins, yielded_reg, reg_map, axis_remap, rename)
|
152
|
-
return unless ins.result_register && yielded_reg
|
153
|
-
|
154
|
-
mapped = reg_map.fetch(yielded_reg, axis_remap.fetch(yielded_reg, yielded_reg))
|
155
|
-
rename[ins.result_register] = mapped
|
156
|
-
end
|
157
|
-
|
158
|
-
# (Your original `detect_all_gammas`, `detect_gamma`, `inline_callee_core`,
|
159
|
-
# `Env`, `freshen`, `rewrite`, and `prefix?` methods go here, unchanged)
|
160
234
|
class Env
|
161
235
|
def initialize = @frames = []
|
162
236
|
def axes = @frames.map { _1[:axis] }
|
237
|
+
def ambient_regs = @frames.flat_map { |f| [f[:el], f[:idx]] }
|
163
238
|
|
164
239
|
def push(loop_ins)
|
165
|
-
@frames << {
|
240
|
+
@frames << {
|
241
|
+
axis: loop_ins.attributes[:axis],
|
242
|
+
el: loop_ins.attributes[:as_element],
|
243
|
+
idx: loop_ins.attributes[:as_index]
|
244
|
+
}
|
166
245
|
end
|
167
246
|
|
168
247
|
def pop = @frames.pop
|
169
|
-
|
248
|
+
|
249
|
+
def reg_for_axis(axis)
|
250
|
+
@frames.reverse.find { _1[:axis] == axis } ||
|
251
|
+
raise("no element for axis=#{axis.inspect}")
|
252
|
+
end
|
253
|
+
|
254
|
+
def frames_after(depth)
|
255
|
+
@frames[depth..] || []
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
def detect_all_gammas(ops_by_decl)
|
260
|
+
ops_by_decl.transform_values { |p| detect_gamma(Array(p[:operations])) }
|
170
261
|
end
|
171
262
|
|
172
|
-
def detect_all_gammas(ops_by_decl) = ops_by_decl.transform_values { |p| detect_gamma(Array(p[:operations])) }
|
173
263
|
GammaInfo = Struct.new(:start_idx, :axes, :axis_regs, keyword_init: true)
|
264
|
+
|
174
265
|
def detect_gamma(ops)
|
175
266
|
frames = []
|
176
267
|
ops.each do |ins|
|
177
268
|
case ins.opcode
|
178
269
|
when :LoopStart
|
179
|
-
frames << {
|
270
|
+
frames << {
|
271
|
+
axis: ins.attributes[:axis],
|
272
|
+
el: ins.attributes[:as_element],
|
273
|
+
idx: ins.attributes[:as_index]
|
274
|
+
}
|
180
275
|
when :LoopEnd
|
181
276
|
frames.pop
|
182
277
|
when :Yield
|
183
278
|
axes = frames.map { _1[:axis] }
|
184
|
-
axis_regs = frames.map
|
185
|
-
{ axis: f[:axis], el: f[:el], idx: f[:idx] }
|
186
|
-
end
|
279
|
+
axis_regs = frames.map { |f| { axis: f[:axis], el: f[:el], idx: f[:idx] } }
|
187
280
|
return GammaInfo.new(start_idx: nil, axes: axes, axis_regs: axis_regs)
|
188
281
|
end
|
189
282
|
end
|
@@ -191,33 +284,34 @@ module Kumi
|
|
191
284
|
end
|
192
285
|
|
193
286
|
def inline_callee_core(callee_name)
|
194
|
-
ops
|
287
|
+
ops = Array(@ops_by_decl.fetch(callee_name)[:operations])
|
195
288
|
info = @gamma.fetch(callee_name)
|
196
289
|
axes = info.axes
|
197
|
-
k
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
290
|
+
k = axes.length
|
291
|
+
|
292
|
+
yi = ops.rindex { |x| x.opcode == :Yield } or raise "callee #{callee_name} has no Yield"
|
293
|
+
yielded_reg = Array(ops[yi].inputs).first
|
294
|
+
|
295
|
+
first_loop = ops.index { |x| x.opcode == :LoopStart }
|
296
|
+
return [ops[0...yi], yielded_reg, info.axis_regs] unless first_loop
|
297
|
+
|
298
|
+
prologue = ops[0...first_loop]
|
299
|
+
main = ops[first_loop...yi]
|
205
300
|
inner_body = []
|
206
301
|
open_gamma = 0
|
207
|
-
|
208
|
-
|
302
|
+
stack = []
|
303
|
+
main.each do |ins|
|
209
304
|
case ins.opcode
|
210
305
|
when :LoopStart
|
211
306
|
if open_gamma < k && ins.attributes[:axis] == axes[open_gamma]
|
212
|
-
|
307
|
+
stack << :gamma
|
213
308
|
open_gamma += 1
|
214
309
|
else
|
215
|
-
|
310
|
+
stack << :inner
|
216
311
|
inner_body << ins
|
217
312
|
end
|
218
313
|
when :LoopEnd
|
219
|
-
|
220
|
-
inner_body << ins if kind == :inner
|
314
|
+
inner_body << ins if stack.pop == :inner
|
221
315
|
else
|
222
316
|
inner_body << ins
|
223
317
|
end
|
@@ -228,44 +322,70 @@ module Kumi
|
|
228
322
|
def freshen(block_ops, reg_map, pre_map: {})
|
229
323
|
acc_map = {}
|
230
324
|
new_ops = block_ops.map do |ins|
|
325
|
+
attrs = (ins.attributes || {}).dup
|
326
|
+
|
327
|
+
if ins.opcode == :LoopStart
|
328
|
+
attrs[:id] = @ids.generate_loop_id
|
329
|
+
new_el = @ids.generate_temp
|
330
|
+
new_idx = @ids.generate_temp
|
331
|
+
reg_map[attrs[:as_element]] = new_el
|
332
|
+
reg_map[attrs[:as_index]] = new_idx
|
333
|
+
attrs[:as_element] = new_el
|
334
|
+
attrs[:as_index] = new_idx
|
335
|
+
end
|
336
|
+
|
231
337
|
res = ins.result_register
|
232
338
|
reg_map[res] ||= @ids.generate_temp if res
|
339
|
+
|
233
340
|
new_inputs = Array(ins.inputs).map do |r|
|
234
341
|
r1 = pre_map.fetch(r, r)
|
235
342
|
reg_map.fetch(r1, r1)
|
236
343
|
end
|
237
|
-
|
344
|
+
|
238
345
|
case ins.opcode
|
239
346
|
when :DeclareAccumulator
|
240
|
-
|
241
|
-
acc_map[
|
242
|
-
res = acc_map[
|
347
|
+
orig = ins.result_register
|
348
|
+
acc_map[orig] ||= @ids.generate_acc
|
349
|
+
res = acc_map[orig]
|
243
350
|
when :Accumulate
|
244
|
-
|
245
|
-
acc_map[
|
246
|
-
res = acc_map[
|
351
|
+
orig = ins.result_register
|
352
|
+
acc_map[orig] ||= @ids.generate_acc
|
353
|
+
res = acc_map[orig]
|
247
354
|
when :LoadAccumulator
|
248
|
-
|
249
|
-
acc_map[
|
250
|
-
new_inputs[0] = acc_map[
|
251
|
-
when :LoopStart
|
252
|
-
attrs[:id] = @ids.generate_loop_id
|
355
|
+
orig = ins.inputs.first
|
356
|
+
acc_map[orig] ||= @ids.generate_acc
|
357
|
+
new_inputs[0] = acc_map[orig]
|
253
358
|
end
|
254
|
-
|
255
|
-
|
359
|
+
|
360
|
+
LIR::Instruction.new(
|
361
|
+
opcode: ins.opcode,
|
362
|
+
result_register: res ? reg_map.fetch(res, res) : nil,
|
363
|
+
stamp: ins.stamp,
|
364
|
+
inputs: new_inputs,
|
365
|
+
immediates: ins.immediates,
|
366
|
+
attributes: attrs,
|
367
|
+
location: ins.location
|
368
|
+
)
|
256
369
|
end
|
257
370
|
[acc_map, new_ops]
|
258
371
|
end
|
259
372
|
|
260
373
|
def rewrite(ins, _reg_map, rename)
|
261
|
-
new_inputs = Array(ins.inputs).map
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
374
|
+
new_inputs = Array(ins.inputs).map { |r| rename.fetch(r, r) }
|
375
|
+
LIR::Instruction.new(
|
376
|
+
opcode: ins.opcode,
|
377
|
+
result_register: ins.result_register,
|
378
|
+
stamp: ins.stamp,
|
379
|
+
inputs: new_inputs,
|
380
|
+
immediates: ins.immediates,
|
381
|
+
attributes: ins.attributes,
|
382
|
+
location: ins.location
|
383
|
+
)
|
266
384
|
end
|
267
385
|
|
268
|
-
def prefix?(pre, full)
|
386
|
+
def prefix?(pre, full)
|
387
|
+
pre.each_with_index.all? { |tok, i| full[i] == tok }
|
388
|
+
end
|
269
389
|
end
|
270
390
|
end
|
271
391
|
end
|
data/lib/kumi/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kumi
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.25
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- André Muta
|
@@ -265,6 +265,22 @@ files:
|
|
265
265
|
- golden/hierarchical_complex/expected/snast.txt
|
266
266
|
- golden/hierarchical_complex/input.json
|
267
267
|
- golden/hierarchical_complex/schema.kumi
|
268
|
+
- golden/inline_rename_scope_leak/expected.json
|
269
|
+
- golden/inline_rename_scope_leak/expected/ast.txt
|
270
|
+
- golden/inline_rename_scope_leak/expected/input_plan.txt
|
271
|
+
- golden/inline_rename_scope_leak/expected/lir_00_unoptimized.txt
|
272
|
+
- golden/inline_rename_scope_leak/expected/lir_01_hoist_scalar_references.txt
|
273
|
+
- golden/inline_rename_scope_leak/expected/lir_02_inlined.txt
|
274
|
+
- golden/inline_rename_scope_leak/expected/lir_03_cse.txt
|
275
|
+
- golden/inline_rename_scope_leak/expected/lir_04_1_loop_fusion.txt
|
276
|
+
- golden/inline_rename_scope_leak/expected/lir_04_loop_invcm.txt
|
277
|
+
- golden/inline_rename_scope_leak/expected/lir_06_const_prop.txt
|
278
|
+
- golden/inline_rename_scope_leak/expected/nast.txt
|
279
|
+
- golden/inline_rename_scope_leak/expected/schema_javascript.mjs
|
280
|
+
- golden/inline_rename_scope_leak/expected/schema_ruby.rb
|
281
|
+
- golden/inline_rename_scope_leak/expected/snast.txt
|
282
|
+
- golden/inline_rename_scope_leak/input.json
|
283
|
+
- golden/inline_rename_scope_leak/schema.kumi
|
268
284
|
- golden/input_reference/expected.json
|
269
285
|
- golden/input_reference/expected/ast.txt
|
270
286
|
- golden/input_reference/expected/input_plan.txt
|
@@ -521,6 +537,22 @@ files:
|
|
521
537
|
- golden/tuples_and_arrays/expected/snast.txt
|
522
538
|
- golden/tuples_and_arrays/input.json
|
523
539
|
- golden/tuples_and_arrays/schema.kumi
|
540
|
+
- golden/us_tax_2024/expected.json
|
541
|
+
- golden/us_tax_2024/expected/ast.txt
|
542
|
+
- golden/us_tax_2024/expected/input_plan.txt
|
543
|
+
- golden/us_tax_2024/expected/lir_00_unoptimized.txt
|
544
|
+
- golden/us_tax_2024/expected/lir_01_hoist_scalar_references.txt
|
545
|
+
- golden/us_tax_2024/expected/lir_02_inlined.txt
|
546
|
+
- golden/us_tax_2024/expected/lir_03_cse.txt
|
547
|
+
- golden/us_tax_2024/expected/lir_04_1_loop_fusion.txt
|
548
|
+
- golden/us_tax_2024/expected/lir_04_loop_invcm.txt
|
549
|
+
- golden/us_tax_2024/expected/lir_06_const_prop.txt
|
550
|
+
- golden/us_tax_2024/expected/nast.txt
|
551
|
+
- golden/us_tax_2024/expected/schema_javascript.mjs
|
552
|
+
- golden/us_tax_2024/expected/schema_ruby.rb
|
553
|
+
- golden/us_tax_2024/expected/snast.txt
|
554
|
+
- golden/us_tax_2024/input.json
|
555
|
+
- golden/us_tax_2024/schema.kumi
|
524
556
|
- golden/with_constants/expected/ast.txt
|
525
557
|
- golden/with_constants/expected/input_plan.txt
|
526
558
|
- golden/with_constants/expected/lir_00_unoptimized.txt
|