kumi 0.0.13 → 0.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. checksums.yaml +4 -4
  2. data/.rspec +0 -1
  3. data/BACKLOG.md +34 -0
  4. data/CHANGELOG.md +33 -0
  5. data/CLAUDE.md +4 -6
  6. data/README.md +0 -45
  7. data/config/functions.yaml +352 -0
  8. data/docs/dev/analyzer-debug.md +52 -0
  9. data/docs/dev/parse-command.md +64 -0
  10. data/docs/dev/vm-profiling.md +95 -0
  11. data/docs/features/README.md +0 -7
  12. data/docs/functions/analyzer_integration.md +199 -0
  13. data/docs/functions/signatures.md +171 -0
  14. data/examples/hash_objects_demo.rb +138 -0
  15. data/golden/array_operations/schema.kumi +17 -0
  16. data/golden/cascade_logic/schema.kumi +16 -0
  17. data/golden/mixed_nesting/schema.kumi +42 -0
  18. data/golden/simple_math/schema.kumi +10 -0
  19. data/lib/kumi/analyzer.rb +76 -22
  20. data/lib/kumi/compiler.rb +6 -5
  21. data/lib/kumi/core/analyzer/checkpoint.rb +72 -0
  22. data/lib/kumi/core/analyzer/debug.rb +167 -0
  23. data/lib/kumi/core/analyzer/passes/broadcast_detector.rb +1 -3
  24. data/lib/kumi/core/analyzer/passes/function_signature_pass.rb +199 -0
  25. data/lib/kumi/core/analyzer/passes/ir_dependency_pass.rb +67 -0
  26. data/lib/kumi/core/analyzer/passes/load_input_cse.rb +120 -0
  27. data/lib/kumi/core/analyzer/passes/lower_to_ir_pass.rb +72 -157
  28. data/lib/kumi/core/analyzer/passes/toposorter.rb +40 -36
  29. data/lib/kumi/core/analyzer/state_serde.rb +64 -0
  30. data/lib/kumi/core/analyzer/structs/access_plan.rb +12 -10
  31. data/lib/kumi/core/compiler/access_planner.rb +3 -2
  32. data/lib/kumi/core/function_registry/collection_functions.rb +3 -1
  33. data/lib/kumi/core/functions/dimension.rb +98 -0
  34. data/lib/kumi/core/functions/dtypes.rb +20 -0
  35. data/lib/kumi/core/functions/errors.rb +11 -0
  36. data/lib/kumi/core/functions/kernel_adapter.rb +45 -0
  37. data/lib/kumi/core/functions/loader.rb +119 -0
  38. data/lib/kumi/core/functions/registry_v2.rb +68 -0
  39. data/lib/kumi/core/functions/shape.rb +70 -0
  40. data/lib/kumi/core/functions/signature.rb +122 -0
  41. data/lib/kumi/core/functions/signature_parser.rb +86 -0
  42. data/lib/kumi/core/functions/signature_resolver.rb +272 -0
  43. data/lib/kumi/core/ir/execution_engine/interpreter.rb +110 -7
  44. data/lib/kumi/core/ir/execution_engine/profiler.rb +330 -0
  45. data/lib/kumi/core/ir/execution_engine.rb +6 -15
  46. data/lib/kumi/dev/ir.rb +75 -0
  47. data/lib/kumi/dev/parse.rb +105 -0
  48. data/lib/kumi/dev/profile_aggregator.rb +301 -0
  49. data/lib/kumi/dev/profile_runner.rb +199 -0
  50. data/lib/kumi/dev/runner.rb +85 -0
  51. data/lib/kumi/dev.rb +14 -0
  52. data/lib/kumi/frontends/ruby.rb +28 -0
  53. data/lib/kumi/frontends/text.rb +46 -0
  54. data/lib/kumi/frontends.rb +29 -0
  55. data/lib/kumi/kernels/ruby/aggregate_core.rb +105 -0
  56. data/lib/kumi/kernels/ruby/datetime_scalar.rb +21 -0
  57. data/lib/kumi/kernels/ruby/mask_scalar.rb +15 -0
  58. data/lib/kumi/kernels/ruby/scalar_core.rb +63 -0
  59. data/lib/kumi/kernels/ruby/string_scalar.rb +19 -0
  60. data/lib/kumi/kernels/ruby/vector_struct.rb +39 -0
  61. data/lib/kumi/runtime/executable.rb +108 -45
  62. data/lib/kumi/schema.rb +12 -6
  63. data/lib/kumi/support/diff.rb +22 -0
  64. data/lib/kumi/support/ir_render.rb +61 -0
  65. data/lib/kumi/version.rb +1 -1
  66. data/lib/kumi.rb +3 -0
  67. data/performance_results.txt +63 -0
  68. data/scripts/test_mixed_nesting_performance.rb +206 -0
  69. metadata +50 -6
  70. data/docs/features/analysis-cascade-mutual-exclusion.md +0 -89
  71. data/docs/features/javascript-transpiler.md +0 -148
  72. data/lib/kumi/js.rb +0 -23
  73. data/lib/kumi/support/ir_dump.rb +0 -491
@@ -0,0 +1,120 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kumi
4
+ module Core
5
+ module Analyzer
6
+ module Passes
7
+ # Load Input Common Subexpression Elimination Pass
8
+ #
9
+ # Eliminates redundant load_input operations by reusing loads that
10
+ # were already stored by earlier declarations.
11
+ #
12
+ # OPTIMIZATION STRATEGY:
13
+ # - Cross-declaration load reuse: If a load_input with the same
14
+ # (plan_id, scope, is_scalar, has_idx) was already stored by an
15
+ # earlier declaration, rewrite later identical loads to ref the
16
+ # stored value instead of re-loading.
17
+ # - Only reuses producers that appear earlier in module order
18
+ # (no reordering/hoisting).
19
+ # - Safe because interpreter's outputs persist across declarations
20
+ # and ref operations resolve previously stored values.
21
+ #
22
+ # REQUIREMENTS:
23
+ # - Must run after LowerToIR pass
24
+ # - IR module must be available in state
25
+ #
26
+ # DEBUG:
27
+ # - Set DEBUG_LOAD_CSE=1 to see optimization decisions
28
+ class LoadInputCSE < PassBase
29
+ def run(errors)
30
+ ir = get_state(:ir_module, required: true)
31
+ return state unless ir&.decls
32
+
33
+ debug = ENV["DEBUG_LOAD_CSE"]
34
+
35
+ # Map: key -> { name:, decl_index: }
36
+ producers = {}
37
+
38
+ puts "LOAD_CSE: Analyzing #{ir.decls.length} declarations" if debug
39
+
40
+ # First pass: find canonical producers (earliest decl that stores a given load)
41
+ ir.decls.each_with_index do |decl, di|
42
+ decl.ops.each_with_index do |op, oi|
43
+ next unless op.tag == :load_input
44
+
45
+ key = load_key(op)
46
+ # Does this decl store that slot under a name?
47
+ store_name = name_storing_slot(decl.ops, oi)
48
+ next unless store_name
49
+
50
+ # Keep earliest producer only
51
+ if !producers.key?(key)
52
+ producers[key] = { name: store_name, decl_index: di }
53
+ puts "LOAD_CSE: Found producer #{store_name} in decl #{di} for key #{key.inspect}" if debug
54
+ end
55
+ end
56
+ end
57
+
58
+ puts "LOAD_CSE: Found #{producers.size} unique load patterns" if debug
59
+
60
+ # Second pass: rewrite later identical loads to refs
61
+ optimizations = 0
62
+ new_decls = ir.decls.each_with_index.map do |decl, di|
63
+ new_ops = decl.ops.each_with_index.map do |op, oi|
64
+ next op unless op.tag == :load_input
65
+
66
+ key = load_key(op)
67
+ prod = producers[key]
68
+
69
+ # Only rewrite if producer is in an earlier decl
70
+ if prod && prod[:decl_index] < di
71
+ optimizations += 1
72
+ puts "LOAD_CSE: Replacing load_input in #{decl.name}[#{oi}] with ref to #{prod[:name]}" if debug
73
+ Kumi::Core::IR::Ops.Ref(prod[:name])
74
+ else
75
+ op
76
+ end
77
+ end
78
+
79
+ Kumi::Core::IR::Decl.new(
80
+ name: decl.name,
81
+ kind: decl.kind,
82
+ shape: decl.shape,
83
+ ops: new_ops
84
+ )
85
+ end
86
+
87
+ puts "LOAD_CSE: Applied #{optimizations} optimizations" if debug
88
+
89
+ new_ir = Kumi::Core::IR::Module.new(inputs: ir.inputs, decls: new_decls)
90
+ state.with(:ir_module, new_ir)
91
+ end
92
+
93
+ private
94
+
95
+ # Generate a unique key for a load_input operation based on its attributes
96
+ def load_key(op)
97
+ attrs = op.attrs || {}
98
+ [
99
+ :load_input,
100
+ attrs[:plan_id],
101
+ Array(attrs[:scope]),
102
+ !!attrs[:is_scalar],
103
+ !!attrs[:has_idx]
104
+ ]
105
+ end
106
+
107
+ # Find a store operation that names the given slot index
108
+ def name_storing_slot(ops, slot_id)
109
+ ops.each do |op|
110
+ next unless op.tag == :store
111
+ src = op.args && op.args[0]
112
+ return op.attrs[:name] if src == slot_id
113
+ end
114
+ nil
115
+ end
116
+ end
117
+ end
118
+ end
119
+ end
120
+ end
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "../../../support/ir_dump"
4
-
5
3
  module Kumi
6
4
  module Core
7
5
  module Analyzer
@@ -196,9 +194,6 @@ module Kumi
196
194
  when :reduce
197
195
  rs = Array(op.attrs[:result_scope] || [])
198
196
  rs.empty? ? SlotShape.scalar : SlotShape.vec(rs, has_idx: true)
199
-
200
- when :lift
201
- SlotShape.scalar # lift groups to nested Ruby arrays
202
197
  when :switch
203
198
  branch_shapes =
204
199
  op.attrs[:cases].map { |(_, v)| determine_slot_shape(v, ops, access_plans) } +
@@ -217,7 +212,7 @@ module Kumi
217
212
  end
218
213
 
219
214
  else
220
- SlotShape.scalar
215
+ raise "Op `#{op.tag}` not supported"
221
216
  end
222
217
  end
223
218
 
@@ -379,7 +374,7 @@ module Kumi
379
374
  when Syntax::InputReference
380
375
  plan_id = pick_plan_id_for_input([expr.name], access_plans,
381
376
  scope_plan: scope_plan, need_indices: need_indices)
382
-
377
+
383
378
  plans = access_plans.fetch(expr.name.to_s, [])
384
379
  selected = plans.find { |p| p.accessor_key == plan_id }
385
380
  scope = selected ? selected.scope : []
@@ -431,6 +426,9 @@ module Kumi
431
426
  when Syntax::CallExpression
432
427
  entry = Kumi::Registry.entry(expr.fn_name)
433
428
 
429
+ # Validate signature metadata from FunctionSignaturePass (read-only assertions)
430
+ validate_signature_metadata(expr, entry)
431
+
434
432
  # Constant folding optimization: evaluate expressions with all literal arguments
435
433
  if can_constant_fold?(expr, entry)
436
434
  folded_value = constant_fold(expr, entry)
@@ -449,22 +447,13 @@ module Kumi
449
447
  # For comparison ops with nested reducers, we need to ensure
450
448
  # the nested reducer gets the right required_scope (per-player)
451
449
  # instead of the full dimensional scope from infer_expr_scope
452
-
453
- # Get the desired result scope from our scope plan (per-player scope)
454
- # This should be [:players] for per-player operations
455
450
  plan = @join_reduce_plans[@current_decl]
456
451
  target_scope = if plan.is_a?(Kumi::Core::Analyzer::Plans::Reduce) && plan.result_scope && !plan.result_scope.empty?
457
452
  plan.result_scope
458
453
  elsif required_scope && !required_scope.empty?
459
454
  required_scope
460
455
  else
461
- # Try to infer per-player scope from the nested reducer argument
462
- nested_reducer_arg = find_nested_reducer_arg(expr)
463
- if nested_reducer_arg
464
- infer_per_player_scope(nested_reducer_arg)
465
- else
466
- []
467
- end
456
+ []
468
457
  end
469
458
 
470
459
  puts " NESTED_REDUCTION target_scope=#{target_scope.inspect}" if ENV["DEBUG_LOWER"]
@@ -767,44 +756,29 @@ module Kumi
767
756
  twin = :"#{cond.name}__vec"
768
757
  twin_meta = @vec_meta && @vec_meta[twin]
769
758
 
770
- if cascade_scope && !Array(cascade_scope).empty?
771
- # Consumer needs a grouped view of this declaration.
772
- if twin_meta && twin_meta[:scope] == Array(cascade_scope)
773
- # We have a vectorized twin at exactly the required scope - use it!
774
- ops << Kumi::Core::IR::Ops.Ref(twin)
775
- ops.size - 1
776
- else
777
- # Need to inline re-lower the referenced declaration's *expression*
778
- decl = @declarations.fetch(cond.name) { raise "unknown decl #{cond.name}" }
779
- slot = lower_expression(decl.expression, ops, access_plans, scope_plan,
780
- true, Array(cascade_scope), cacheable: true)
781
- project_mask_to_scope(slot, cascade_scope, ops, access_plans)
782
- end
783
- else
784
- # Plain (scalar) use, or already-materialized vec twin
785
- ref = twin_meta ? twin : cond.name
786
- ops << Kumi::Core::IR::Ops.Ref(ref)
787
- ops.size - 1
788
- end
759
+ raise "Missing cascade_scope" unless cascade_scope && !Array(cascade_scope).empty?
789
760
 
790
- when Syntax::CallExpression
791
- if cond.fn_name == :cascade_and
792
- parts = cond.args.map { |a| lower_cascade_pred(a, cascade_scope, ops, access_plans, scope_plan) }
793
- # They’re all @ cascade_scope (or scalar) now; align scalars broadcast, vecs already match.
794
- parts.reduce do |acc, s|
795
- ops << Kumi::Core::IR::Ops.Map(:and, 2, acc, s)
796
- ops.size - 1
797
- end
761
+ # Consumer needs a grouped view of this declaration.
762
+ if twin_meta && twin_meta[:scope] == Array(cascade_scope)
763
+ # We have a vectorized twin at exactly the required scope - use it!
764
+ ops << Kumi::Core::IR::Ops.Ref(twin)
765
+ ops.size - 1
798
766
  else
799
- slot = lower_expression(cond, ops, access_plans, scope_plan,
800
- true, Array(cascade_scope), cacheable: false)
767
+ # Need to inline re-lower the referenced declaration's *expression*
768
+ decl = @declarations.fetch(cond.name) { raise "unknown decl #{cond.name}" }
769
+ slot = lower_expression(decl.expression, ops, access_plans, scope_plan,
770
+ true, Array(cascade_scope), cacheable: true)
801
771
  project_mask_to_scope(slot, cascade_scope, ops, access_plans)
802
772
  end
803
-
773
+ when Syntax::CallExpression
774
+ parts = cond.args.map { |a| lower_cascade_pred(a, cascade_scope, ops, access_plans, scope_plan) }
775
+ # They’re all @ cascade_scope (or scalar) now; align scalars broadcast, vecs already match.
776
+ parts.reduce do |acc, s|
777
+ ops << Kumi::Core::IR::Ops.Map(:and, 2, acc, s)
778
+ ops.size - 1
779
+ end
804
780
  else
805
- slot = lower_expression(cond, ops, access_plans, scope_plan,
806
- true, Array(cascade_scope), cacheable: false)
807
- project_mask_to_scope(slot, cascade_scope, ops, access_plans)
781
+ raise "Unexpected Expression #{cond.class} in Cascade"
808
782
  end
809
783
  end
810
784
 
@@ -870,86 +844,10 @@ module Kumi
870
844
  end
871
845
  end
872
846
 
873
- def find_nested_reducer_arg(expr)
874
- return nil unless expr.is_a?(Kumi::Syntax::CallExpression)
875
-
876
- expr.args.each do |arg|
877
- case arg
878
- when Kumi::Syntax::CallExpression
879
- entry = Kumi::Registry.entry(arg.fn_name)
880
- return arg if entry&.reducer
881
-
882
- nested = find_nested_reducer_arg(arg)
883
- return nested if nested
884
- end
885
- end
886
- nil
887
- end
888
-
889
- def infer_per_player_scope(reducer_expr)
890
- return [] unless reducer_expr.is_a?(Kumi::Syntax::CallExpression)
891
-
892
- # Look at the reducer's argument to determine the full scope
893
- arg = reducer_expr.args.first
894
- return [] unless arg
895
-
896
- case arg
897
- when Kumi::Syntax::InputElementReference
898
- # For paths like [:players, :score_matrices, :session, :points]
899
- # We want to keep [:players] and reduce over the rest
900
- arg.path.empty? ? [] : [arg.path.first]
901
- when Kumi::Syntax::CallExpression
902
- # For nested expressions, get the deepest input path and take first element
903
- deepest = find_deepest_input_path(arg)
904
- deepest && !deepest.empty? ? [deepest.first] : []
905
- else
906
- []
907
- end
908
- end
909
-
910
- def find_deepest_input_path(expr)
911
- case expr
912
- when Kumi::Syntax::InputElementReference
913
- expr.path
914
- when Kumi::Syntax::InputReference
915
- [expr.name]
916
- when Kumi::Syntax::CallExpression
917
- paths = expr.args.map { |a| find_deepest_input_path(a) }.compact
918
- paths.max_by(&:length)
919
- else
920
- nil
921
- end
922
- end
923
-
924
847
  # Make sure a boolean mask lives at exactly cascade_scope.
925
848
  def project_mask_to_scope(slot, cascade_scope, ops, access_plans)
926
849
  sh = determine_slot_shape(slot, ops, access_plans)
927
850
  return slot if sh.scope == cascade_scope
928
-
929
- # If we have a scalar condition but need it at cascade scope, broadcast it
930
- if sh.kind == :scalar && cascade_scope && !Array(cascade_scope).empty?
931
- # Find a target vector that already has the cascade scope
932
- target_slot = nil
933
- ops.each_with_index do |op, i|
934
- next unless %i[load_input map].include?(op.tag)
935
-
936
- shape = determine_slot_shape(i, ops, access_plans)
937
- if shape.kind == :vec && shape.scope == Array(cascade_scope) && shape.has_idx
938
- target_slot = i
939
- break
940
- end
941
- end
942
-
943
- return slot unless target_slot
944
-
945
- ops << Kumi::Core::IR::Ops.AlignTo(target_slot, slot, to_scope: Array(cascade_scope), on_missing: :error,
946
- require_unique: true)
947
- return ops.size - 1
948
-
949
- # Can't broadcast, use as-is
950
-
951
- end
952
-
953
851
  return slot if sh.kind == :scalar
954
852
 
955
853
  cascade_scope = Array(cascade_scope)
@@ -961,38 +859,23 @@ module Kumi
961
859
  raise "cascade condition scope #{slot_scope.inspect} is not prefix-compatible with #{cascade_scope.inspect}"
962
860
  end
963
861
 
964
- if slot_scope.length < cascade_scope.length
965
- # Need to broadcast UP: slot scope is shorter, needs to be aligned to cascade scope
966
- # Find a target vector that already has the cascade scope
967
- target_slot = nil
968
- ops.each_with_index do |op, i|
969
- next unless %i[load_input map].include?(op.tag)
862
+ return unless slot_scope.length < cascade_scope.length
970
863
 
971
- shape = determine_slot_shape(i, ops, access_plans)
972
- if shape.kind == :vec && shape.scope == cascade_scope && shape.has_idx
973
- target_slot = i
974
- break
975
- end
976
- end
864
+ # Need to broadcast UP: slot scope is shorter, needs to be aligned to cascade scope
865
+ # Find a target vector that already has the cascade scope
866
+ target_slot = nil
867
+ ops.each_with_index do |op, i|
868
+ next unless %i[load_input map].include?(op.tag)
977
869
 
978
- if target_slot
979
- ops << Kumi::Core::IR::Ops.AlignTo(target_slot, slot, to_scope: cascade_scope, on_missing: :error, require_unique: true)
980
- ops.size - 1
981
- else
982
- # Fallback: use the slot itself (might not work but worth trying)
983
- ops << Kumi::Core::IR::Ops.AlignTo(slot, slot, to_scope: cascade_scope, on_missing: :error, require_unique: true)
984
- ops.size - 1
985
- end
986
- else
987
- # Need to reduce DOWN: slot scope is longer, reduce extra dimensions
988
- extra_axes = slot_scope - cascade_scope
989
- if extra_axes.empty?
990
- slot # should not happen due to early return above
991
- else
992
- ops << Kumi::Core::IR::Ops.Reduce(:any?, extra_axes, cascade_scope, [], slot)
993
- ops.size - 1
870
+ shape = determine_slot_shape(i, ops, access_plans)
871
+ if shape.kind == :vec && shape.scope == cascade_scope && shape.has_idx
872
+ target_slot = i
873
+ break
994
874
  end
995
875
  end
876
+
877
+ ops << Kumi::Core::IR::Ops.AlignTo(target_slot, slot, to_scope: cascade_scope, on_missing: :error, require_unique: true)
878
+ ops.size - 1
996
879
  end
997
880
 
998
881
  # Constant folding optimization helpers
@@ -1000,14 +883,47 @@ module Kumi
1000
883
  return false unless entry&.fn # Skip if function not found
1001
884
  return false if entry.reducer # Skip reducer functions for now
1002
885
  return false if expr.args.empty? # Need at least one argument
1003
-
886
+
1004
887
  # Check if all arguments are literals
1005
888
  expr.args.all? { |arg| arg.is_a?(Syntax::Literal) }
1006
889
  end
1007
890
 
891
+ def validate_signature_metadata(expr, entry)
892
+ # Get the node index to access signature metadata
893
+ node_index = get_state(:node_index, required: false)
894
+ return unless node_index
895
+
896
+ node_entry = node_index[expr.object_id]
897
+ return unless node_entry
898
+
899
+ metadata = node_entry[:metadata]
900
+ return unless metadata
901
+
902
+ # Validate that dropped axes make sense for reduction functions
903
+ if entry&.reducer && metadata[:dropped_axes]
904
+ dropped_axes = metadata[:dropped_axes]
905
+ unless dropped_axes.is_a?(Array)
906
+ raise "Invalid dropped_axes metadata for reducer #{expr.fn_name}: expected Array, got #{dropped_axes.class}"
907
+ end
908
+
909
+ # For reductions, we should have at least one dropped axis (or empty for scalar reductions)
910
+ puts " SIGNATURE[#{expr.fn_name}] dropped_axes: #{dropped_axes.inspect}" if ENV["DEBUG_LOWER"]
911
+ end
912
+
913
+ # Validate join_policy is recognized
914
+ if metadata[:join_policy] && !%i[zip product].include?(metadata[:join_policy])
915
+ raise "Invalid join_policy for #{expr.fn_name}: #{metadata[:join_policy].inspect}"
916
+ end
917
+
918
+ # Warn about join_policy when no join op exists yet (future integration point)
919
+ return unless metadata[:join_policy] && ENV["DEBUG_LOWER"]
920
+
921
+ puts " SIGNATURE[#{expr.fn_name}] join_policy: #{metadata[:join_policy]} (join op not yet implemented)"
922
+ end
923
+
1008
924
  def constant_fold(expr, entry)
1009
925
  literal_values = expr.args.map(&:value)
1010
-
926
+
1011
927
  begin
1012
928
  # Call the function with literal values at compile time
1013
929
  entry.fn.call(*literal_values)
@@ -1018,7 +934,6 @@ module Kumi
1018
934
  raise "Cannot constant fold #{expr.fn_name}: #{e.message}"
1019
935
  end
1020
936
  end
1021
-
1022
937
  end
1023
938
  end
1024
939
  end
@@ -5,38 +5,68 @@ module Kumi
5
5
  module Core
6
6
  module Analyzer
7
7
  module Passes
8
- # RESPONSIBILITY: Compute topological ordering of declarations, allowing safe conditional cycles
9
- # DEPENDENCIES: :dependencies from DependencyResolver, :declarations from NameIndexer, :cascades from UnsatDetector
8
+ # RESPONSIBILITY: Compute topological ordering of declarations, blocking all cycles
9
+ # DEPENDENCIES: :dependencies from DependencyResolver, :declarations from NameIndexer
10
10
  # PRODUCES: :evaluation_order - Array of declaration names in evaluation order
11
+ # :node_index - Hash mapping object_id to node metadata for later passes
11
12
  # INTERFACE: new(schema, state).run(errors)
12
13
  class Toposorter < PassBase
13
14
  def run(errors)
14
15
  dependency_graph = get_state(:dependencies, required: false) || {}
15
16
  definitions = get_state(:declarations, required: false) || {}
16
17
 
18
+ # Create node index for later passes to use
19
+ node_index = build_node_index(definitions)
17
20
  order = compute_topological_order(dependency_graph, definitions, errors)
18
- state.with(:evaluation_order, order)
21
+
22
+ state.with(:evaluation_order, order).with(:node_index, node_index)
19
23
  end
20
24
 
21
25
  private
22
26
 
27
+ def build_node_index(definitions)
28
+ index = {}
29
+
30
+ # Walk all declarations and their expressions to index every node
31
+ definitions.each_value do |decl|
32
+ index_node_recursive(decl, index)
33
+ end
34
+
35
+ index
36
+ end
37
+
38
+ def index_node_recursive(node, index)
39
+ return unless node
40
+
41
+ # Index this node by its object_id
42
+ index[node.object_id] = {
43
+ node: node,
44
+ type: node.class.name.split('::').last,
45
+ metadata: {}
46
+ }
47
+
48
+ # Use the same approach as the visitor pattern - recursively index all children
49
+ if node.respond_to?(:children)
50
+ node.children.each { |child| index_node_recursive(child, index) }
51
+ end
52
+
53
+ # Index expression for declaration nodes
54
+ if node.respond_to?(:expression)
55
+ index_node_recursive(node.expression, index)
56
+ end
57
+ end
58
+
23
59
  def compute_topological_order(graph, definitions, errors)
24
60
  temp_marks = Set.new
25
61
  perm_marks = Set.new
26
62
  order = []
27
- cascades = get_state(:cascades) || {}
28
63
 
29
64
  visit_node = lambda do |node, path = []|
30
65
  return if perm_marks.include?(node)
31
66
 
32
67
  if temp_marks.include?(node)
33
- # Check if this is a safe conditional cycle
34
- cycle_path = path + [node]
35
- return if safe_conditional_cycle?(cycle_path, graph, cascades)
36
-
37
- # Allow this cycle - it's safe due to cascade mutual exclusion
68
+ # Block all cycles - no mutual recursion allowed
38
69
  report_unexpected_cycle(temp_marks, node, errors)
39
-
40
70
  return
41
71
  end
42
72
 
@@ -66,32 +96,6 @@ module Kumi
66
96
  order.freeze
67
97
  end
68
98
 
69
- def safe_conditional_cycle?(cycle_path, graph, cascades)
70
- return false if cycle_path.nil? || cycle_path.size < 2
71
-
72
- # Find where the cycle starts - look for the first occurrence of the repeated node
73
- last_node = cycle_path.last
74
- return false if last_node.nil?
75
-
76
- cycle_start = cycle_path.index(last_node)
77
- return false unless cycle_start && cycle_start < cycle_path.size - 1
78
-
79
- cycle_nodes = cycle_path[cycle_start..]
80
-
81
- # Check if all edges in the cycle are conditional
82
- cycle_nodes.each_cons(2) do |from, to|
83
- edges = graph[from] || []
84
- edge = edges.find { |e| e.to == to }
85
-
86
- return false unless edge&.conditional
87
-
88
- # Check if the cascade has mutually exclusive conditions
89
- cascade_meta = cascades[edge.cascade_owner]
90
- return false unless cascade_meta&.dig(:all_mutually_exclusive)
91
- end
92
-
93
- true
94
- end
95
99
 
96
100
  def report_unexpected_cycle(temp_marks, current_node, errors)
97
101
  cycle_path = temp_marks.to_a.join(" → ") + " → #{current_node}"
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "set"
5
+
6
+ module Kumi
7
+ module Core
8
+ module Analyzer
9
+ module StateSerde
10
+ module_function
11
+
12
+ # Exact round-trip (recommended for resume)
13
+ def dump_marshal(state)
14
+ Marshal.dump({ v: 1, data: state.to_h })
15
+ end
16
+
17
+ def load_marshal(bytes)
18
+ payload = Marshal.load(bytes)
19
+ ::Kumi::Core::Analyzer::AnalysisState.new(payload[:data])
20
+ end
21
+
22
+ # Human-readable snapshot (best-effort; not guaranteed resumable)
23
+ def dump_json(state, pretty: true)
24
+ h = encode_json_safe(state.to_h)
25
+ pretty ? JSON.pretty_generate(h) : JSON.generate(h)
26
+ end
27
+
28
+ def load_json(json_str)
29
+ h = JSON.parse(json_str) # Don't symbolize keys - let decode_json_safe handle it
30
+ ::Kumi::Core::Analyzer::AnalysisState.new(decode_json_safe(h))
31
+ end
32
+
33
+ # ---- helpers ----
34
+ def encode_json_safe(x)
35
+ case x
36
+ when Hash then x.transform_keys(&:to_s).transform_values { |v| encode_json_safe(v) }
37
+ when Array then x.map { |v| encode_json_safe(v) }
38
+ when Set then { "$set" => x.to_a.map { |v| encode_json_safe(v) } }
39
+ when Symbol then { "$sym" => x.to_s }
40
+ when ::Kumi::Core::IR::Module, ::Kumi::Core::IR::Decl, ::Kumi::Core::IR::Op
41
+ { "$ir" => x.inspect }
42
+ else x
43
+ end
44
+ end
45
+
46
+ def decode_json_safe(x)
47
+ case x
48
+ when Hash
49
+ # Check for special encoding markers first (before key transformation)
50
+ if x.key?("$sym") then x["$sym"].to_sym
51
+ elsif x.key?("$set") then Set.new(x["$set"].map { |item| decode_json_safe(item) })
52
+ elsif x.key?("$ir") then x["$ir"] # Keep as string inspection for JSON round-trip
53
+ else
54
+ # Regular hash - transform keys to symbols and recursively decode values
55
+ x.transform_keys(&:to_sym).transform_values { |value| decode_json_safe(value) }
56
+ end
57
+ when Array then x.map { |item| decode_json_safe(item) }
58
+ else x
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end
@@ -3,17 +3,19 @@
3
3
  module Kumi
4
4
  module Core
5
5
  module Analyzer
6
- # One plan for a specific path and mode (path:mode)
7
- AccessPlan = Struct.new(:path, :containers, :leaf, :scope, :depth, :mode,
8
- :on_missing, :key_policy, :operations, keyword_init: true) do
9
- def initialize(path:, containers:, leaf:, scope:, depth:, mode:, on_missing:, key_policy:, operations:)
10
- super
11
- freeze
12
- end
6
+ module Structs
7
+ # One plan for a specific path and mode (path:mode)
8
+ AccessPlan = Struct.new(:path, :containers, :leaf, :scope, :depth, :mode,
9
+ :on_missing, :key_policy, :operations, keyword_init: true) do
10
+ def initialize(path:, containers:, leaf:, scope:, depth:, mode:, on_missing:, key_policy:, operations:)
11
+ super
12
+ freeze
13
+ end
13
14
 
14
- def accessor_key = "#{path}:#{mode}"
15
- def ndims = depth
16
- def scalar? = depth.zero?
15
+ def accessor_key = "#{path}:#{mode}"
16
+ def ndims = depth
17
+ def scalar? = depth.zero?
18
+ end
17
19
  end
18
20
  end
19
21
  end
@@ -73,7 +73,7 @@ module Kumi
73
73
  modes.each do |mode|
74
74
  operations = build_operations(path, mode)
75
75
 
76
- list << Kumi::Core::Analyzer::AccessPlan.new(
76
+ list << Kumi::Core::Analyzer::Structs::AccessPlan.new(
77
77
  path: base[:path],
78
78
  containers: base[:containers],
79
79
  leaf: base[:leaf],
@@ -148,7 +148,8 @@ module Kumi
148
148
  ops << enter_hash(seg)
149
149
  puts " Added: enter_hash('#{seg}')" if ENV["DEBUG_ACCESSOR_OPS"]
150
150
  else
151
- raise ArgumentError, "Invalid parent :container '#{container}' for segment '#{seg}'. Expected :array, :object, :hash, or nil (root)"
151
+ raise ArgumentError,
152
+ "Invalid parent :container '#{container}' for segment '#{seg}'. Expected :array, :object, :hash, or nil (root)"
152
153
  end
153
154
 
154
155
  parent_meta = node