kumi 0.0.13 → 0.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec +0 -1
- data/BACKLOG.md +34 -0
- data/CHANGELOG.md +33 -0
- data/CLAUDE.md +4 -6
- data/README.md +0 -45
- data/config/functions.yaml +352 -0
- data/docs/dev/analyzer-debug.md +52 -0
- data/docs/dev/parse-command.md +64 -0
- data/docs/dev/vm-profiling.md +95 -0
- data/docs/features/README.md +0 -7
- data/docs/functions/analyzer_integration.md +199 -0
- data/docs/functions/signatures.md +171 -0
- data/examples/hash_objects_demo.rb +138 -0
- data/golden/array_operations/schema.kumi +17 -0
- data/golden/cascade_logic/schema.kumi +16 -0
- data/golden/mixed_nesting/schema.kumi +42 -0
- data/golden/simple_math/schema.kumi +10 -0
- data/lib/kumi/analyzer.rb +76 -22
- data/lib/kumi/compiler.rb +6 -5
- data/lib/kumi/core/analyzer/checkpoint.rb +72 -0
- data/lib/kumi/core/analyzer/debug.rb +167 -0
- data/lib/kumi/core/analyzer/passes/broadcast_detector.rb +1 -3
- data/lib/kumi/core/analyzer/passes/function_signature_pass.rb +199 -0
- data/lib/kumi/core/analyzer/passes/ir_dependency_pass.rb +67 -0
- data/lib/kumi/core/analyzer/passes/load_input_cse.rb +120 -0
- data/lib/kumi/core/analyzer/passes/lower_to_ir_pass.rb +72 -157
- data/lib/kumi/core/analyzer/passes/toposorter.rb +40 -36
- data/lib/kumi/core/analyzer/state_serde.rb +64 -0
- data/lib/kumi/core/analyzer/structs/access_plan.rb +12 -10
- data/lib/kumi/core/compiler/access_planner.rb +3 -2
- data/lib/kumi/core/function_registry/collection_functions.rb +3 -1
- data/lib/kumi/core/functions/dimension.rb +98 -0
- data/lib/kumi/core/functions/dtypes.rb +20 -0
- data/lib/kumi/core/functions/errors.rb +11 -0
- data/lib/kumi/core/functions/kernel_adapter.rb +45 -0
- data/lib/kumi/core/functions/loader.rb +119 -0
- data/lib/kumi/core/functions/registry_v2.rb +68 -0
- data/lib/kumi/core/functions/shape.rb +70 -0
- data/lib/kumi/core/functions/signature.rb +122 -0
- data/lib/kumi/core/functions/signature_parser.rb +86 -0
- data/lib/kumi/core/functions/signature_resolver.rb +272 -0
- data/lib/kumi/core/ir/execution_engine/interpreter.rb +110 -7
- data/lib/kumi/core/ir/execution_engine/profiler.rb +330 -0
- data/lib/kumi/core/ir/execution_engine.rb +6 -15
- data/lib/kumi/dev/ir.rb +75 -0
- data/lib/kumi/dev/parse.rb +105 -0
- data/lib/kumi/dev/profile_aggregator.rb +301 -0
- data/lib/kumi/dev/profile_runner.rb +199 -0
- data/lib/kumi/dev/runner.rb +85 -0
- data/lib/kumi/dev.rb +14 -0
- data/lib/kumi/frontends/ruby.rb +28 -0
- data/lib/kumi/frontends/text.rb +46 -0
- data/lib/kumi/frontends.rb +29 -0
- data/lib/kumi/kernels/ruby/aggregate_core.rb +105 -0
- data/lib/kumi/kernels/ruby/datetime_scalar.rb +21 -0
- data/lib/kumi/kernels/ruby/mask_scalar.rb +15 -0
- data/lib/kumi/kernels/ruby/scalar_core.rb +63 -0
- data/lib/kumi/kernels/ruby/string_scalar.rb +19 -0
- data/lib/kumi/kernels/ruby/vector_struct.rb +39 -0
- data/lib/kumi/runtime/executable.rb +108 -45
- data/lib/kumi/schema.rb +12 -6
- data/lib/kumi/support/diff.rb +22 -0
- data/lib/kumi/support/ir_render.rb +61 -0
- data/lib/kumi/version.rb +1 -1
- data/lib/kumi.rb +3 -0
- data/performance_results.txt +63 -0
- data/scripts/test_mixed_nesting_performance.rb +206 -0
- metadata +50 -6
- data/docs/features/analysis-cascade-mutual-exclusion.md +0 -89
- data/docs/features/javascript-transpiler.md +0 -148
- data/lib/kumi/js.rb +0 -23
- data/lib/kumi/support/ir_dump.rb +0 -491
@@ -0,0 +1,120 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Kumi
|
4
|
+
module Core
|
5
|
+
module Analyzer
|
6
|
+
module Passes
|
7
|
+
# Load Input Common Subexpression Elimination Pass
|
8
|
+
#
|
9
|
+
# Eliminates redundant load_input operations by reusing loads that
|
10
|
+
# were already stored by earlier declarations.
|
11
|
+
#
|
12
|
+
# OPTIMIZATION STRATEGY:
|
13
|
+
# - Cross-declaration load reuse: If a load_input with the same
|
14
|
+
# (plan_id, scope, is_scalar, has_idx) was already stored by an
|
15
|
+
# earlier declaration, rewrite later identical loads to ref the
|
16
|
+
# stored value instead of re-loading.
|
17
|
+
# - Only reuses producers that appear earlier in module order
|
18
|
+
# (no reordering/hoisting).
|
19
|
+
# - Safe because interpreter's outputs persist across declarations
|
20
|
+
# and ref operations resolve previously stored values.
|
21
|
+
#
|
22
|
+
# REQUIREMENTS:
|
23
|
+
# - Must run after LowerToIR pass
|
24
|
+
# - IR module must be available in state
|
25
|
+
#
|
26
|
+
# DEBUG:
|
27
|
+
# - Set DEBUG_LOAD_CSE=1 to see optimization decisions
|
28
|
+
class LoadInputCSE < PassBase
|
29
|
+
def run(errors)
|
30
|
+
ir = get_state(:ir_module, required: true)
|
31
|
+
return state unless ir&.decls
|
32
|
+
|
33
|
+
debug = ENV["DEBUG_LOAD_CSE"]
|
34
|
+
|
35
|
+
# Map: key -> { name:, decl_index: }
|
36
|
+
producers = {}
|
37
|
+
|
38
|
+
puts "LOAD_CSE: Analyzing #{ir.decls.length} declarations" if debug
|
39
|
+
|
40
|
+
# First pass: find canonical producers (earliest decl that stores a given load)
|
41
|
+
ir.decls.each_with_index do |decl, di|
|
42
|
+
decl.ops.each_with_index do |op, oi|
|
43
|
+
next unless op.tag == :load_input
|
44
|
+
|
45
|
+
key = load_key(op)
|
46
|
+
# Does this decl store that slot under a name?
|
47
|
+
store_name = name_storing_slot(decl.ops, oi)
|
48
|
+
next unless store_name
|
49
|
+
|
50
|
+
# Keep earliest producer only
|
51
|
+
if !producers.key?(key)
|
52
|
+
producers[key] = { name: store_name, decl_index: di }
|
53
|
+
puts "LOAD_CSE: Found producer #{store_name} in decl #{di} for key #{key.inspect}" if debug
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
puts "LOAD_CSE: Found #{producers.size} unique load patterns" if debug
|
59
|
+
|
60
|
+
# Second pass: rewrite later identical loads to refs
|
61
|
+
optimizations = 0
|
62
|
+
new_decls = ir.decls.each_with_index.map do |decl, di|
|
63
|
+
new_ops = decl.ops.each_with_index.map do |op, oi|
|
64
|
+
next op unless op.tag == :load_input
|
65
|
+
|
66
|
+
key = load_key(op)
|
67
|
+
prod = producers[key]
|
68
|
+
|
69
|
+
# Only rewrite if producer is in an earlier decl
|
70
|
+
if prod && prod[:decl_index] < di
|
71
|
+
optimizations += 1
|
72
|
+
puts "LOAD_CSE: Replacing load_input in #{decl.name}[#{oi}] with ref to #{prod[:name]}" if debug
|
73
|
+
Kumi::Core::IR::Ops.Ref(prod[:name])
|
74
|
+
else
|
75
|
+
op
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
Kumi::Core::IR::Decl.new(
|
80
|
+
name: decl.name,
|
81
|
+
kind: decl.kind,
|
82
|
+
shape: decl.shape,
|
83
|
+
ops: new_ops
|
84
|
+
)
|
85
|
+
end
|
86
|
+
|
87
|
+
puts "LOAD_CSE: Applied #{optimizations} optimizations" if debug
|
88
|
+
|
89
|
+
new_ir = Kumi::Core::IR::Module.new(inputs: ir.inputs, decls: new_decls)
|
90
|
+
state.with(:ir_module, new_ir)
|
91
|
+
end
|
92
|
+
|
93
|
+
private
|
94
|
+
|
95
|
+
# Generate a unique key for a load_input operation based on its attributes
|
96
|
+
def load_key(op)
|
97
|
+
attrs = op.attrs || {}
|
98
|
+
[
|
99
|
+
:load_input,
|
100
|
+
attrs[:plan_id],
|
101
|
+
Array(attrs[:scope]),
|
102
|
+
!!attrs[:is_scalar],
|
103
|
+
!!attrs[:has_idx]
|
104
|
+
]
|
105
|
+
end
|
106
|
+
|
107
|
+
# Find a store operation that names the given slot index
|
108
|
+
def name_storing_slot(ops, slot_id)
|
109
|
+
ops.each do |op|
|
110
|
+
next unless op.tag == :store
|
111
|
+
src = op.args && op.args[0]
|
112
|
+
return op.attrs[:name] if src == slot_id
|
113
|
+
end
|
114
|
+
nil
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
@@ -1,7 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require_relative "../../../support/ir_dump"
|
4
|
-
|
5
3
|
module Kumi
|
6
4
|
module Core
|
7
5
|
module Analyzer
|
@@ -196,9 +194,6 @@ module Kumi
|
|
196
194
|
when :reduce
|
197
195
|
rs = Array(op.attrs[:result_scope] || [])
|
198
196
|
rs.empty? ? SlotShape.scalar : SlotShape.vec(rs, has_idx: true)
|
199
|
-
|
200
|
-
when :lift
|
201
|
-
SlotShape.scalar # lift groups to nested Ruby arrays
|
202
197
|
when :switch
|
203
198
|
branch_shapes =
|
204
199
|
op.attrs[:cases].map { |(_, v)| determine_slot_shape(v, ops, access_plans) } +
|
@@ -217,7 +212,7 @@ module Kumi
|
|
217
212
|
end
|
218
213
|
|
219
214
|
else
|
220
|
-
|
215
|
+
raise "Op `#{op.tag}` not supported"
|
221
216
|
end
|
222
217
|
end
|
223
218
|
|
@@ -379,7 +374,7 @@ module Kumi
|
|
379
374
|
when Syntax::InputReference
|
380
375
|
plan_id = pick_plan_id_for_input([expr.name], access_plans,
|
381
376
|
scope_plan: scope_plan, need_indices: need_indices)
|
382
|
-
|
377
|
+
|
383
378
|
plans = access_plans.fetch(expr.name.to_s, [])
|
384
379
|
selected = plans.find { |p| p.accessor_key == plan_id }
|
385
380
|
scope = selected ? selected.scope : []
|
@@ -431,6 +426,9 @@ module Kumi
|
|
431
426
|
when Syntax::CallExpression
|
432
427
|
entry = Kumi::Registry.entry(expr.fn_name)
|
433
428
|
|
429
|
+
# Validate signature metadata from FunctionSignaturePass (read-only assertions)
|
430
|
+
validate_signature_metadata(expr, entry)
|
431
|
+
|
434
432
|
# Constant folding optimization: evaluate expressions with all literal arguments
|
435
433
|
if can_constant_fold?(expr, entry)
|
436
434
|
folded_value = constant_fold(expr, entry)
|
@@ -449,22 +447,13 @@ module Kumi
|
|
449
447
|
# For comparison ops with nested reducers, we need to ensure
|
450
448
|
# the nested reducer gets the right required_scope (per-player)
|
451
449
|
# instead of the full dimensional scope from infer_expr_scope
|
452
|
-
|
453
|
-
# Get the desired result scope from our scope plan (per-player scope)
|
454
|
-
# This should be [:players] for per-player operations
|
455
450
|
plan = @join_reduce_plans[@current_decl]
|
456
451
|
target_scope = if plan.is_a?(Kumi::Core::Analyzer::Plans::Reduce) && plan.result_scope && !plan.result_scope.empty?
|
457
452
|
plan.result_scope
|
458
453
|
elsif required_scope && !required_scope.empty?
|
459
454
|
required_scope
|
460
455
|
else
|
461
|
-
|
462
|
-
nested_reducer_arg = find_nested_reducer_arg(expr)
|
463
|
-
if nested_reducer_arg
|
464
|
-
infer_per_player_scope(nested_reducer_arg)
|
465
|
-
else
|
466
|
-
[]
|
467
|
-
end
|
456
|
+
[]
|
468
457
|
end
|
469
458
|
|
470
459
|
puts " NESTED_REDUCTION target_scope=#{target_scope.inspect}" if ENV["DEBUG_LOWER"]
|
@@ -767,44 +756,29 @@ module Kumi
|
|
767
756
|
twin = :"#{cond.name}__vec"
|
768
757
|
twin_meta = @vec_meta && @vec_meta[twin]
|
769
758
|
|
770
|
-
|
771
|
-
# Consumer needs a grouped view of this declaration.
|
772
|
-
if twin_meta && twin_meta[:scope] == Array(cascade_scope)
|
773
|
-
# We have a vectorized twin at exactly the required scope - use it!
|
774
|
-
ops << Kumi::Core::IR::Ops.Ref(twin)
|
775
|
-
ops.size - 1
|
776
|
-
else
|
777
|
-
# Need to inline re-lower the referenced declaration's *expression*
|
778
|
-
decl = @declarations.fetch(cond.name) { raise "unknown decl #{cond.name}" }
|
779
|
-
slot = lower_expression(decl.expression, ops, access_plans, scope_plan,
|
780
|
-
true, Array(cascade_scope), cacheable: true)
|
781
|
-
project_mask_to_scope(slot, cascade_scope, ops, access_plans)
|
782
|
-
end
|
783
|
-
else
|
784
|
-
# Plain (scalar) use, or already-materialized vec twin
|
785
|
-
ref = twin_meta ? twin : cond.name
|
786
|
-
ops << Kumi::Core::IR::Ops.Ref(ref)
|
787
|
-
ops.size - 1
|
788
|
-
end
|
759
|
+
raise "Missing cascade_scope" unless cascade_scope && !Array(cascade_scope).empty?
|
789
760
|
|
790
|
-
|
791
|
-
if
|
792
|
-
|
793
|
-
|
794
|
-
|
795
|
-
ops << Kumi::Core::IR::Ops.Map(:and, 2, acc, s)
|
796
|
-
ops.size - 1
|
797
|
-
end
|
761
|
+
# Consumer needs a grouped view of this declaration.
|
762
|
+
if twin_meta && twin_meta[:scope] == Array(cascade_scope)
|
763
|
+
# We have a vectorized twin at exactly the required scope - use it!
|
764
|
+
ops << Kumi::Core::IR::Ops.Ref(twin)
|
765
|
+
ops.size - 1
|
798
766
|
else
|
799
|
-
|
800
|
-
|
767
|
+
# Need to inline re-lower the referenced declaration's *expression*
|
768
|
+
decl = @declarations.fetch(cond.name) { raise "unknown decl #{cond.name}" }
|
769
|
+
slot = lower_expression(decl.expression, ops, access_plans, scope_plan,
|
770
|
+
true, Array(cascade_scope), cacheable: true)
|
801
771
|
project_mask_to_scope(slot, cascade_scope, ops, access_plans)
|
802
772
|
end
|
803
|
-
|
773
|
+
when Syntax::CallExpression
|
774
|
+
parts = cond.args.map { |a| lower_cascade_pred(a, cascade_scope, ops, access_plans, scope_plan) }
|
775
|
+
# They’re all @ cascade_scope (or scalar) now; align scalars broadcast, vecs already match.
|
776
|
+
parts.reduce do |acc, s|
|
777
|
+
ops << Kumi::Core::IR::Ops.Map(:and, 2, acc, s)
|
778
|
+
ops.size - 1
|
779
|
+
end
|
804
780
|
else
|
805
|
-
|
806
|
-
true, Array(cascade_scope), cacheable: false)
|
807
|
-
project_mask_to_scope(slot, cascade_scope, ops, access_plans)
|
781
|
+
raise "Unexpected Expression #{cond.class} in Cascade"
|
808
782
|
end
|
809
783
|
end
|
810
784
|
|
@@ -870,86 +844,10 @@ module Kumi
|
|
870
844
|
end
|
871
845
|
end
|
872
846
|
|
873
|
-
def find_nested_reducer_arg(expr)
|
874
|
-
return nil unless expr.is_a?(Kumi::Syntax::CallExpression)
|
875
|
-
|
876
|
-
expr.args.each do |arg|
|
877
|
-
case arg
|
878
|
-
when Kumi::Syntax::CallExpression
|
879
|
-
entry = Kumi::Registry.entry(arg.fn_name)
|
880
|
-
return arg if entry&.reducer
|
881
|
-
|
882
|
-
nested = find_nested_reducer_arg(arg)
|
883
|
-
return nested if nested
|
884
|
-
end
|
885
|
-
end
|
886
|
-
nil
|
887
|
-
end
|
888
|
-
|
889
|
-
def infer_per_player_scope(reducer_expr)
|
890
|
-
return [] unless reducer_expr.is_a?(Kumi::Syntax::CallExpression)
|
891
|
-
|
892
|
-
# Look at the reducer's argument to determine the full scope
|
893
|
-
arg = reducer_expr.args.first
|
894
|
-
return [] unless arg
|
895
|
-
|
896
|
-
case arg
|
897
|
-
when Kumi::Syntax::InputElementReference
|
898
|
-
# For paths like [:players, :score_matrices, :session, :points]
|
899
|
-
# We want to keep [:players] and reduce over the rest
|
900
|
-
arg.path.empty? ? [] : [arg.path.first]
|
901
|
-
when Kumi::Syntax::CallExpression
|
902
|
-
# For nested expressions, get the deepest input path and take first element
|
903
|
-
deepest = find_deepest_input_path(arg)
|
904
|
-
deepest && !deepest.empty? ? [deepest.first] : []
|
905
|
-
else
|
906
|
-
[]
|
907
|
-
end
|
908
|
-
end
|
909
|
-
|
910
|
-
def find_deepest_input_path(expr)
|
911
|
-
case expr
|
912
|
-
when Kumi::Syntax::InputElementReference
|
913
|
-
expr.path
|
914
|
-
when Kumi::Syntax::InputReference
|
915
|
-
[expr.name]
|
916
|
-
when Kumi::Syntax::CallExpression
|
917
|
-
paths = expr.args.map { |a| find_deepest_input_path(a) }.compact
|
918
|
-
paths.max_by(&:length)
|
919
|
-
else
|
920
|
-
nil
|
921
|
-
end
|
922
|
-
end
|
923
|
-
|
924
847
|
# Make sure a boolean mask lives at exactly cascade_scope.
|
925
848
|
def project_mask_to_scope(slot, cascade_scope, ops, access_plans)
|
926
849
|
sh = determine_slot_shape(slot, ops, access_plans)
|
927
850
|
return slot if sh.scope == cascade_scope
|
928
|
-
|
929
|
-
# If we have a scalar condition but need it at cascade scope, broadcast it
|
930
|
-
if sh.kind == :scalar && cascade_scope && !Array(cascade_scope).empty?
|
931
|
-
# Find a target vector that already has the cascade scope
|
932
|
-
target_slot = nil
|
933
|
-
ops.each_with_index do |op, i|
|
934
|
-
next unless %i[load_input map].include?(op.tag)
|
935
|
-
|
936
|
-
shape = determine_slot_shape(i, ops, access_plans)
|
937
|
-
if shape.kind == :vec && shape.scope == Array(cascade_scope) && shape.has_idx
|
938
|
-
target_slot = i
|
939
|
-
break
|
940
|
-
end
|
941
|
-
end
|
942
|
-
|
943
|
-
return slot unless target_slot
|
944
|
-
|
945
|
-
ops << Kumi::Core::IR::Ops.AlignTo(target_slot, slot, to_scope: Array(cascade_scope), on_missing: :error,
|
946
|
-
require_unique: true)
|
947
|
-
return ops.size - 1
|
948
|
-
|
949
|
-
# Can't broadcast, use as-is
|
950
|
-
|
951
|
-
end
|
952
|
-
|
953
851
|
return slot if sh.kind == :scalar
|
954
852
|
|
955
853
|
cascade_scope = Array(cascade_scope)
|
@@ -961,38 +859,23 @@ module Kumi
|
|
961
859
|
raise "cascade condition scope #{slot_scope.inspect} is not prefix-compatible with #{cascade_scope.inspect}"
|
962
860
|
end
|
963
861
|
|
964
|
-
|
965
|
-
# Need to broadcast UP: slot scope is shorter, needs to be aligned to cascade scope
|
966
|
-
# Find a target vector that already has the cascade scope
|
967
|
-
target_slot = nil
|
968
|
-
ops.each_with_index do |op, i|
|
969
|
-
next unless %i[load_input map].include?(op.tag)
|
862
|
+
return unless slot_scope.length < cascade_scope.length
|
970
863
|
|
971
|
-
|
972
|
-
|
973
|
-
|
974
|
-
|
975
|
-
|
976
|
-
end
|
864
|
+
# Need to broadcast UP: slot scope is shorter, needs to be aligned to cascade scope
|
865
|
+
# Find a target vector that already has the cascade scope
|
866
|
+
target_slot = nil
|
867
|
+
ops.each_with_index do |op, i|
|
868
|
+
next unless %i[load_input map].include?(op.tag)
|
977
869
|
|
978
|
-
|
979
|
-
|
980
|
-
|
981
|
-
|
982
|
-
# Fallback: use the slot itself (might not work but worth trying)
|
983
|
-
ops << Kumi::Core::IR::Ops.AlignTo(slot, slot, to_scope: cascade_scope, on_missing: :error, require_unique: true)
|
984
|
-
ops.size - 1
|
985
|
-
end
|
986
|
-
else
|
987
|
-
# Need to reduce DOWN: slot scope is longer, reduce extra dimensions
|
988
|
-
extra_axes = slot_scope - cascade_scope
|
989
|
-
if extra_axes.empty?
|
990
|
-
slot # should not happen due to early return above
|
991
|
-
else
|
992
|
-
ops << Kumi::Core::IR::Ops.Reduce(:any?, extra_axes, cascade_scope, [], slot)
|
993
|
-
ops.size - 1
|
870
|
+
shape = determine_slot_shape(i, ops, access_plans)
|
871
|
+
if shape.kind == :vec && shape.scope == cascade_scope && shape.has_idx
|
872
|
+
target_slot = i
|
873
|
+
break
|
994
874
|
end
|
995
875
|
end
|
876
|
+
|
877
|
+
ops << Kumi::Core::IR::Ops.AlignTo(target_slot, slot, to_scope: cascade_scope, on_missing: :error, require_unique: true)
|
878
|
+
ops.size - 1
|
996
879
|
end
|
997
880
|
|
998
881
|
# Constant folding optimization helpers
|
@@ -1000,14 +883,47 @@ module Kumi
|
|
1000
883
|
return false unless entry&.fn # Skip if function not found
|
1001
884
|
return false if entry.reducer # Skip reducer functions for now
|
1002
885
|
return false if expr.args.empty? # Need at least one argument
|
1003
|
-
|
886
|
+
|
1004
887
|
# Check if all arguments are literals
|
1005
888
|
expr.args.all? { |arg| arg.is_a?(Syntax::Literal) }
|
1006
889
|
end
|
1007
890
|
|
891
|
+
def validate_signature_metadata(expr, entry)
|
892
|
+
# Get the node index to access signature metadata
|
893
|
+
node_index = get_state(:node_index, required: false)
|
894
|
+
return unless node_index
|
895
|
+
|
896
|
+
node_entry = node_index[expr.object_id]
|
897
|
+
return unless node_entry
|
898
|
+
|
899
|
+
metadata = node_entry[:metadata]
|
900
|
+
return unless metadata
|
901
|
+
|
902
|
+
# Validate that dropped axes make sense for reduction functions
|
903
|
+
if entry&.reducer && metadata[:dropped_axes]
|
904
|
+
dropped_axes = metadata[:dropped_axes]
|
905
|
+
unless dropped_axes.is_a?(Array)
|
906
|
+
raise "Invalid dropped_axes metadata for reducer #{expr.fn_name}: expected Array, got #{dropped_axes.class}"
|
907
|
+
end
|
908
|
+
|
909
|
+
# For reductions, we should have at least one dropped axis (or empty for scalar reductions)
|
910
|
+
puts " SIGNATURE[#{expr.fn_name}] dropped_axes: #{dropped_axes.inspect}" if ENV["DEBUG_LOWER"]
|
911
|
+
end
|
912
|
+
|
913
|
+
# Validate join_policy is recognized
|
914
|
+
if metadata[:join_policy] && !%i[zip product].include?(metadata[:join_policy])
|
915
|
+
raise "Invalid join_policy for #{expr.fn_name}: #{metadata[:join_policy].inspect}"
|
916
|
+
end
|
917
|
+
|
918
|
+
# Warn about join_policy when no join op exists yet (future integration point)
|
919
|
+
return unless metadata[:join_policy] && ENV["DEBUG_LOWER"]
|
920
|
+
|
921
|
+
puts " SIGNATURE[#{expr.fn_name}] join_policy: #{metadata[:join_policy]} (join op not yet implemented)"
|
922
|
+
end
|
923
|
+
|
1008
924
|
def constant_fold(expr, entry)
|
1009
925
|
literal_values = expr.args.map(&:value)
|
1010
|
-
|
926
|
+
|
1011
927
|
begin
|
1012
928
|
# Call the function with literal values at compile time
|
1013
929
|
entry.fn.call(*literal_values)
|
@@ -1018,7 +934,6 @@ module Kumi
|
|
1018
934
|
raise "Cannot constant fold #{expr.fn_name}: #{e.message}"
|
1019
935
|
end
|
1020
936
|
end
|
1021
|
-
|
1022
937
|
end
|
1023
938
|
end
|
1024
939
|
end
|
@@ -5,38 +5,68 @@ module Kumi
|
|
5
5
|
module Core
|
6
6
|
module Analyzer
|
7
7
|
module Passes
|
8
|
-
# RESPONSIBILITY: Compute topological ordering of declarations,
|
9
|
-
# DEPENDENCIES: :dependencies from DependencyResolver, :declarations from NameIndexer
|
8
|
+
# RESPONSIBILITY: Compute topological ordering of declarations, blocking all cycles
|
9
|
+
# DEPENDENCIES: :dependencies from DependencyResolver, :declarations from NameIndexer
|
10
10
|
# PRODUCES: :evaluation_order - Array of declaration names in evaluation order
|
11
|
+
# :node_index - Hash mapping object_id to node metadata for later passes
|
11
12
|
# INTERFACE: new(schema, state).run(errors)
|
12
13
|
class Toposorter < PassBase
|
13
14
|
def run(errors)
|
14
15
|
dependency_graph = get_state(:dependencies, required: false) || {}
|
15
16
|
definitions = get_state(:declarations, required: false) || {}
|
16
17
|
|
18
|
+
# Create node index for later passes to use
|
19
|
+
node_index = build_node_index(definitions)
|
17
20
|
order = compute_topological_order(dependency_graph, definitions, errors)
|
18
|
-
|
21
|
+
|
22
|
+
state.with(:evaluation_order, order).with(:node_index, node_index)
|
19
23
|
end
|
20
24
|
|
21
25
|
private
|
22
26
|
|
27
|
+
def build_node_index(definitions)
|
28
|
+
index = {}
|
29
|
+
|
30
|
+
# Walk all declarations and their expressions to index every node
|
31
|
+
definitions.each_value do |decl|
|
32
|
+
index_node_recursive(decl, index)
|
33
|
+
end
|
34
|
+
|
35
|
+
index
|
36
|
+
end
|
37
|
+
|
38
|
+
def index_node_recursive(node, index)
|
39
|
+
return unless node
|
40
|
+
|
41
|
+
# Index this node by its object_id
|
42
|
+
index[node.object_id] = {
|
43
|
+
node: node,
|
44
|
+
type: node.class.name.split('::').last,
|
45
|
+
metadata: {}
|
46
|
+
}
|
47
|
+
|
48
|
+
# Use the same approach as the visitor pattern - recursively index all children
|
49
|
+
if node.respond_to?(:children)
|
50
|
+
node.children.each { |child| index_node_recursive(child, index) }
|
51
|
+
end
|
52
|
+
|
53
|
+
# Index expression for declaration nodes
|
54
|
+
if node.respond_to?(:expression)
|
55
|
+
index_node_recursive(node.expression, index)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
23
59
|
def compute_topological_order(graph, definitions, errors)
|
24
60
|
temp_marks = Set.new
|
25
61
|
perm_marks = Set.new
|
26
62
|
order = []
|
27
|
-
cascades = get_state(:cascades) || {}
|
28
63
|
|
29
64
|
visit_node = lambda do |node, path = []|
|
30
65
|
return if perm_marks.include?(node)
|
31
66
|
|
32
67
|
if temp_marks.include?(node)
|
33
|
-
#
|
34
|
-
cycle_path = path + [node]
|
35
|
-
return if safe_conditional_cycle?(cycle_path, graph, cascades)
|
36
|
-
|
37
|
-
# Allow this cycle - it's safe due to cascade mutual exclusion
|
68
|
+
# Block all cycles - no mutual recursion allowed
|
38
69
|
report_unexpected_cycle(temp_marks, node, errors)
|
39
|
-
|
40
70
|
return
|
41
71
|
end
|
42
72
|
|
@@ -66,32 +96,6 @@ module Kumi
|
|
66
96
|
order.freeze
|
67
97
|
end
|
68
98
|
|
69
|
-
def safe_conditional_cycle?(cycle_path, graph, cascades)
|
70
|
-
return false if cycle_path.nil? || cycle_path.size < 2
|
71
|
-
|
72
|
-
# Find where the cycle starts - look for the first occurrence of the repeated node
|
73
|
-
last_node = cycle_path.last
|
74
|
-
return false if last_node.nil?
|
75
|
-
|
76
|
-
cycle_start = cycle_path.index(last_node)
|
77
|
-
return false unless cycle_start && cycle_start < cycle_path.size - 1
|
78
|
-
|
79
|
-
cycle_nodes = cycle_path[cycle_start..]
|
80
|
-
|
81
|
-
# Check if all edges in the cycle are conditional
|
82
|
-
cycle_nodes.each_cons(2) do |from, to|
|
83
|
-
edges = graph[from] || []
|
84
|
-
edge = edges.find { |e| e.to == to }
|
85
|
-
|
86
|
-
return false unless edge&.conditional
|
87
|
-
|
88
|
-
# Check if the cascade has mutually exclusive conditions
|
89
|
-
cascade_meta = cascades[edge.cascade_owner]
|
90
|
-
return false unless cascade_meta&.dig(:all_mutually_exclusive)
|
91
|
-
end
|
92
|
-
|
93
|
-
true
|
94
|
-
end
|
95
99
|
|
96
100
|
def report_unexpected_cycle(temp_marks, current_node, errors)
|
97
101
|
cycle_path = temp_marks.to_a.join(" → ") + " → #{current_node}"
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "json"
|
4
|
+
require "set"
|
5
|
+
|
6
|
+
module Kumi
|
7
|
+
module Core
|
8
|
+
module Analyzer
|
9
|
+
module StateSerde
|
10
|
+
module_function
|
11
|
+
|
12
|
+
# Exact round-trip (recommended for resume)
|
13
|
+
def dump_marshal(state)
|
14
|
+
Marshal.dump({ v: 1, data: state.to_h })
|
15
|
+
end
|
16
|
+
|
17
|
+
def load_marshal(bytes)
|
18
|
+
payload = Marshal.load(bytes)
|
19
|
+
::Kumi::Core::Analyzer::AnalysisState.new(payload[:data])
|
20
|
+
end
|
21
|
+
|
22
|
+
# Human-readable snapshot (best-effort; not guaranteed resumable)
|
23
|
+
def dump_json(state, pretty: true)
|
24
|
+
h = encode_json_safe(state.to_h)
|
25
|
+
pretty ? JSON.pretty_generate(h) : JSON.generate(h)
|
26
|
+
end
|
27
|
+
|
28
|
+
def load_json(json_str)
|
29
|
+
h = JSON.parse(json_str) # Don't symbolize keys - let decode_json_safe handle it
|
30
|
+
::Kumi::Core::Analyzer::AnalysisState.new(decode_json_safe(h))
|
31
|
+
end
|
32
|
+
|
33
|
+
# ---- helpers ----
|
34
|
+
def encode_json_safe(x)
|
35
|
+
case x
|
36
|
+
when Hash then x.transform_keys(&:to_s).transform_values { |v| encode_json_safe(v) }
|
37
|
+
when Array then x.map { |v| encode_json_safe(v) }
|
38
|
+
when Set then { "$set" => x.to_a.map { |v| encode_json_safe(v) } }
|
39
|
+
when Symbol then { "$sym" => x.to_s }
|
40
|
+
when ::Kumi::Core::IR::Module, ::Kumi::Core::IR::Decl, ::Kumi::Core::IR::Op
|
41
|
+
{ "$ir" => x.inspect }
|
42
|
+
else x
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def decode_json_safe(x)
|
47
|
+
case x
|
48
|
+
when Hash
|
49
|
+
# Check for special encoding markers first (before key transformation)
|
50
|
+
if x.key?("$sym") then x["$sym"].to_sym
|
51
|
+
elsif x.key?("$set") then Set.new(x["$set"].map { |item| decode_json_safe(item) })
|
52
|
+
elsif x.key?("$ir") then x["$ir"] # Keep as string inspection for JSON round-trip
|
53
|
+
else
|
54
|
+
# Regular hash - transform keys to symbols and recursively decode values
|
55
|
+
x.transform_keys(&:to_sym).transform_values { |value| decode_json_safe(value) }
|
56
|
+
end
|
57
|
+
when Array then x.map { |item| decode_json_safe(item) }
|
58
|
+
else x
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -3,17 +3,19 @@
|
|
3
3
|
module Kumi
|
4
4
|
module Core
|
5
5
|
module Analyzer
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
6
|
+
module Structs
|
7
|
+
# One plan for a specific path and mode (path:mode)
|
8
|
+
AccessPlan = Struct.new(:path, :containers, :leaf, :scope, :depth, :mode,
|
9
|
+
:on_missing, :key_policy, :operations, keyword_init: true) do
|
10
|
+
def initialize(path:, containers:, leaf:, scope:, depth:, mode:, on_missing:, key_policy:, operations:)
|
11
|
+
super
|
12
|
+
freeze
|
13
|
+
end
|
13
14
|
|
14
|
-
|
15
|
-
|
16
|
-
|
15
|
+
def accessor_key = "#{path}:#{mode}"
|
16
|
+
def ndims = depth
|
17
|
+
def scalar? = depth.zero?
|
18
|
+
end
|
17
19
|
end
|
18
20
|
end
|
19
21
|
end
|
@@ -73,7 +73,7 @@ module Kumi
|
|
73
73
|
modes.each do |mode|
|
74
74
|
operations = build_operations(path, mode)
|
75
75
|
|
76
|
-
list << Kumi::Core::Analyzer::AccessPlan.new(
|
76
|
+
list << Kumi::Core::Analyzer::Structs::AccessPlan.new(
|
77
77
|
path: base[:path],
|
78
78
|
containers: base[:containers],
|
79
79
|
leaf: base[:leaf],
|
@@ -148,7 +148,8 @@ module Kumi
|
|
148
148
|
ops << enter_hash(seg)
|
149
149
|
puts " Added: enter_hash('#{seg}')" if ENV["DEBUG_ACCESSOR_OPS"]
|
150
150
|
else
|
151
|
-
raise ArgumentError,
|
151
|
+
raise ArgumentError,
|
152
|
+
"Invalid parent :container '#{container}' for segment '#{seg}'. Expected :array, :object, :hash, or nil (root)"
|
152
153
|
end
|
153
154
|
|
154
155
|
parent_meta = node
|