kumi 0.0.12 → 0.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. checksums.yaml +4 -4
  2. data/.rspec +0 -1
  3. data/BACKLOG.md +34 -0
  4. data/CHANGELOG.md +15 -0
  5. data/CLAUDE.md +4 -6
  6. data/README.md +0 -18
  7. data/config/functions.yaml +352 -0
  8. data/docs/dev/analyzer-debug.md +52 -0
  9. data/docs/dev/parse-command.md +64 -0
  10. data/docs/functions/analyzer_integration.md +199 -0
  11. data/docs/functions/signatures.md +171 -0
  12. data/examples/hash_objects_demo.rb +138 -0
  13. data/golden/array_operations/schema.kumi +17 -0
  14. data/golden/cascade_logic/schema.kumi +16 -0
  15. data/golden/mixed_nesting/schema.kumi +42 -0
  16. data/golden/simple_math/schema.kumi +10 -0
  17. data/lib/kumi/analyzer.rb +72 -21
  18. data/lib/kumi/core/analyzer/checkpoint.rb +72 -0
  19. data/lib/kumi/core/analyzer/debug.rb +167 -0
  20. data/lib/kumi/core/analyzer/passes/broadcast_detector.rb +1 -3
  21. data/lib/kumi/core/analyzer/passes/function_signature_pass.rb +199 -0
  22. data/lib/kumi/core/analyzer/passes/load_input_cse.rb +120 -0
  23. data/lib/kumi/core/analyzer/passes/lower_to_ir_pass.rb +99 -151
  24. data/lib/kumi/core/analyzer/passes/toposorter.rb +37 -1
  25. data/lib/kumi/core/analyzer/state_serde.rb +64 -0
  26. data/lib/kumi/core/analyzer/structs/access_plan.rb +12 -10
  27. data/lib/kumi/core/compiler/access_planner.rb +3 -2
  28. data/lib/kumi/core/function_registry/collection_functions.rb +3 -1
  29. data/lib/kumi/core/functions/dimension.rb +98 -0
  30. data/lib/kumi/core/functions/dtypes.rb +20 -0
  31. data/lib/kumi/core/functions/errors.rb +11 -0
  32. data/lib/kumi/core/functions/kernel_adapter.rb +45 -0
  33. data/lib/kumi/core/functions/loader.rb +119 -0
  34. data/lib/kumi/core/functions/registry_v2.rb +68 -0
  35. data/lib/kumi/core/functions/shape.rb +70 -0
  36. data/lib/kumi/core/functions/signature.rb +122 -0
  37. data/lib/kumi/core/functions/signature_parser.rb +86 -0
  38. data/lib/kumi/core/functions/signature_resolver.rb +272 -0
  39. data/lib/kumi/core/ir/execution_engine/interpreter.rb +98 -7
  40. data/lib/kumi/core/ir/execution_engine/profiler.rb +202 -0
  41. data/lib/kumi/core/ir/execution_engine.rb +30 -1
  42. data/lib/kumi/dev/ir.rb +75 -0
  43. data/lib/kumi/dev/parse.rb +105 -0
  44. data/lib/kumi/dev/runner.rb +83 -0
  45. data/lib/kumi/frontends/ruby.rb +28 -0
  46. data/lib/kumi/frontends/text.rb +46 -0
  47. data/lib/kumi/frontends.rb +29 -0
  48. data/lib/kumi/kernels/ruby/aggregate_core.rb +105 -0
  49. data/lib/kumi/kernels/ruby/datetime_scalar.rb +21 -0
  50. data/lib/kumi/kernels/ruby/mask_scalar.rb +15 -0
  51. data/lib/kumi/kernels/ruby/scalar_core.rb +63 -0
  52. data/lib/kumi/kernels/ruby/string_scalar.rb +19 -0
  53. data/lib/kumi/kernels/ruby/vector_struct.rb +39 -0
  54. data/lib/kumi/runtime/executable.rb +63 -20
  55. data/lib/kumi/schema.rb +4 -4
  56. data/lib/kumi/support/diff.rb +22 -0
  57. data/lib/kumi/support/ir_render.rb +61 -0
  58. data/lib/kumi/version.rb +1 -1
  59. data/lib/kumi.rb +2 -0
  60. data/performance_results.txt +63 -0
  61. data/scripts/test_mixed_nesting_performance.rb +206 -0
  62. metadata +45 -5
  63. data/docs/features/javascript-transpiler.md +0 -148
  64. data/lib/kumi/js.rb +0 -23
  65. data/lib/kumi/support/ir_dump.rb +0 -491
@@ -0,0 +1,120 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kumi
4
+ module Core
5
+ module Analyzer
6
+ module Passes
7
+ # Load Input Common Subexpression Elimination Pass
8
+ #
9
+ # Eliminates redundant load_input operations by reusing loads that
10
+ # were already stored by earlier declarations.
11
+ #
12
+ # OPTIMIZATION STRATEGY:
13
+ # - Cross-declaration load reuse: If a load_input with the same
14
+ # (plan_id, scope, is_scalar, has_idx) was already stored by an
15
+ # earlier declaration, rewrite later identical loads to ref the
16
+ # stored value instead of re-loading.
17
+ # - Only reuses producers that appear earlier in module order
18
+ # (no reordering/hoisting).
19
+ # - Safe because interpreter's outputs persist across declarations
20
+ # and ref operations resolve previously stored values.
21
+ #
22
+ # REQUIREMENTS:
23
+ # - Must run after LowerToIR pass
24
+ # - IR module must be available in state
25
+ #
26
+ # DEBUG:
27
+ # - Set DEBUG_LOAD_CSE=1 to see optimization decisions
28
+ class LoadInputCSE < PassBase
29
+ def run(errors)
30
+ ir = get_state(:ir_module, required: true)
31
+ return state unless ir&.decls
32
+
33
+ debug = ENV["DEBUG_LOAD_CSE"]
34
+
35
+ # Map: key -> { name:, decl_index: }
36
+ producers = {}
37
+
38
+ puts "LOAD_CSE: Analyzing #{ir.decls.length} declarations" if debug
39
+
40
+ # First pass: find canonical producers (earliest decl that stores a given load)
41
+ ir.decls.each_with_index do |decl, di|
42
+ decl.ops.each_with_index do |op, oi|
43
+ next unless op.tag == :load_input
44
+
45
+ key = load_key(op)
46
+ # Does this decl store that slot under a name?
47
+ store_name = name_storing_slot(decl.ops, oi)
48
+ next unless store_name
49
+
50
+ # Keep earliest producer only
51
+ if !producers.key?(key)
52
+ producers[key] = { name: store_name, decl_index: di }
53
+ puts "LOAD_CSE: Found producer #{store_name} in decl #{di} for key #{key.inspect}" if debug
54
+ end
55
+ end
56
+ end
57
+
58
+ puts "LOAD_CSE: Found #{producers.size} unique load patterns" if debug
59
+
60
+ # Second pass: rewrite later identical loads to refs
61
+ optimizations = 0
62
+ new_decls = ir.decls.each_with_index.map do |decl, di|
63
+ new_ops = decl.ops.each_with_index.map do |op, oi|
64
+ next op unless op.tag == :load_input
65
+
66
+ key = load_key(op)
67
+ prod = producers[key]
68
+
69
+ # Only rewrite if producer is in an earlier decl
70
+ if prod && prod[:decl_index] < di
71
+ optimizations += 1
72
+ puts "LOAD_CSE: Replacing load_input in #{decl.name}[#{oi}] with ref to #{prod[:name]}" if debug
73
+ Kumi::Core::IR::Ops.Ref(prod[:name])
74
+ else
75
+ op
76
+ end
77
+ end
78
+
79
+ Kumi::Core::IR::Decl.new(
80
+ name: decl.name,
81
+ kind: decl.kind,
82
+ shape: decl.shape,
83
+ ops: new_ops
84
+ )
85
+ end
86
+
87
+ puts "LOAD_CSE: Applied #{optimizations} optimizations" if debug
88
+
89
+ new_ir = Kumi::Core::IR::Module.new(inputs: ir.inputs, decls: new_decls)
90
+ state.with(:ir_module, new_ir)
91
+ end
92
+
93
+ private
94
+
95
+ # Generate a unique key for a load_input operation based on its attributes
96
+ def load_key(op)
97
+ attrs = op.attrs || {}
98
+ [
99
+ :load_input,
100
+ attrs[:plan_id],
101
+ Array(attrs[:scope]),
102
+ !!attrs[:is_scalar],
103
+ !!attrs[:has_idx]
104
+ ]
105
+ end
106
+
107
+ # Find a store operation that names the given slot index
108
+ def name_storing_slot(ops, slot_id)
109
+ ops.each do |op|
110
+ next unless op.tag == :store
111
+ src = op.args && op.args[0]
112
+ return op.attrs[:name] if src == slot_id
113
+ end
114
+ nil
115
+ end
116
+ end
117
+ end
118
+ end
119
+ end
120
+ end
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "../../../support/ir_dump"
4
-
5
3
  module Kumi
6
4
  module Core
7
5
  module Analyzer
@@ -196,9 +194,6 @@ module Kumi
196
194
  when :reduce
197
195
  rs = Array(op.attrs[:result_scope] || [])
198
196
  rs.empty? ? SlotShape.scalar : SlotShape.vec(rs, has_idx: true)
199
-
200
- when :lift
201
- SlotShape.scalar # lift groups to nested Ruby arrays
202
197
  when :switch
203
198
  branch_shapes =
204
199
  op.attrs[:cases].map { |(_, v)| determine_slot_shape(v, ops, access_plans) } +
@@ -217,7 +212,7 @@ module Kumi
217
212
  end
218
213
 
219
214
  else
220
- SlotShape.scalar
215
+ raise "Op `#{op.tag}` not supported"
221
216
  end
222
217
  end
223
218
 
@@ -379,6 +374,7 @@ module Kumi
379
374
  when Syntax::InputReference
380
375
  plan_id = pick_plan_id_for_input([expr.name], access_plans,
381
376
  scope_plan: scope_plan, need_indices: need_indices)
377
+
382
378
  plans = access_plans.fetch(expr.name.to_s, [])
383
379
  selected = plans.find { |p| p.accessor_key == plan_id }
384
380
  scope = selected ? selected.scope : []
@@ -430,6 +426,16 @@ module Kumi
430
426
  when Syntax::CallExpression
431
427
  entry = Kumi::Registry.entry(expr.fn_name)
432
428
 
429
+ # Validate signature metadata from FunctionSignaturePass (read-only assertions)
430
+ validate_signature_metadata(expr, entry)
431
+
432
+ # Constant folding optimization: evaluate expressions with all literal arguments
433
+ if can_constant_fold?(expr, entry)
434
+ folded_value = constant_fold(expr, entry)
435
+ ops << Kumi::Core::IR::Ops.Const(folded_value)
436
+ return ops.size - 1
437
+ end
438
+
433
439
  if ENV["DEBUG_LOWER"] && has_nested_reducer?(expr)
434
440
  puts " NESTED_REDUCER_DETECTED in #{expr.fn_name} with req_scope=#{required_scope.inspect}"
435
441
  end
@@ -441,22 +447,13 @@ module Kumi
441
447
  # For comparison ops with nested reducers, we need to ensure
442
448
  # the nested reducer gets the right required_scope (per-player)
443
449
  # instead of the full dimensional scope from infer_expr_scope
444
-
445
- # Get the desired result scope from our scope plan (per-player scope)
446
- # This should be [:players] for per-player operations
447
450
  plan = @join_reduce_plans[@current_decl]
448
451
  target_scope = if plan.is_a?(Kumi::Core::Analyzer::Plans::Reduce) && plan.result_scope && !plan.result_scope.empty?
449
452
  plan.result_scope
450
453
  elsif required_scope && !required_scope.empty?
451
454
  required_scope
452
455
  else
453
- # Try to infer per-player scope from the nested reducer argument
454
- nested_reducer_arg = find_nested_reducer_arg(expr)
455
- if nested_reducer_arg
456
- infer_per_player_scope(nested_reducer_arg)
457
- else
458
- []
459
- end
456
+ []
460
457
  end
461
458
 
462
459
  puts " NESTED_REDUCTION target_scope=#{target_scope.inspect}" if ENV["DEBUG_LOWER"]
@@ -759,44 +756,29 @@ module Kumi
759
756
  twin = :"#{cond.name}__vec"
760
757
  twin_meta = @vec_meta && @vec_meta[twin]
761
758
 
762
- if cascade_scope && !Array(cascade_scope).empty?
763
- # Consumer needs a grouped view of this declaration.
764
- if twin_meta && twin_meta[:scope] == Array(cascade_scope)
765
- # We have a vectorized twin at exactly the required scope - use it!
766
- ops << Kumi::Core::IR::Ops.Ref(twin)
767
- ops.size - 1
768
- else
769
- # Need to inline re-lower the referenced declaration's *expression*
770
- decl = @declarations.fetch(cond.name) { raise "unknown decl #{cond.name}" }
771
- slot = lower_expression(decl.expression, ops, access_plans, scope_plan,
772
- true, Array(cascade_scope), cacheable: true)
773
- project_mask_to_scope(slot, cascade_scope, ops, access_plans)
774
- end
775
- else
776
- # Plain (scalar) use, or already-materialized vec twin
777
- ref = twin_meta ? twin : cond.name
778
- ops << Kumi::Core::IR::Ops.Ref(ref)
779
- ops.size - 1
780
- end
759
+ raise "Missing cascade_scope" unless cascade_scope && !Array(cascade_scope).empty?
781
760
 
782
- when Syntax::CallExpression
783
- if cond.fn_name == :cascade_and
784
- parts = cond.args.map { |a| lower_cascade_pred(a, cascade_scope, ops, access_plans, scope_plan) }
785
- # They’re all @ cascade_scope (or scalar) now; align scalars broadcast, vecs already match.
786
- parts.reduce do |acc, s|
787
- ops << Kumi::Core::IR::Ops.Map(:and, 2, acc, s)
788
- ops.size - 1
789
- end
761
+ # Consumer needs a grouped view of this declaration.
762
+ if twin_meta && twin_meta[:scope] == Array(cascade_scope)
763
+ # We have a vectorized twin at exactly the required scope - use it!
764
+ ops << Kumi::Core::IR::Ops.Ref(twin)
765
+ ops.size - 1
790
766
  else
791
- slot = lower_expression(cond, ops, access_plans, scope_plan,
792
- true, Array(cascade_scope), cacheable: false)
767
+ # Need to inline re-lower the referenced declaration's *expression*
768
+ decl = @declarations.fetch(cond.name) { raise "unknown decl #{cond.name}" }
769
+ slot = lower_expression(decl.expression, ops, access_plans, scope_plan,
770
+ true, Array(cascade_scope), cacheable: true)
793
771
  project_mask_to_scope(slot, cascade_scope, ops, access_plans)
794
772
  end
795
-
773
+ when Syntax::CallExpression
774
+ parts = cond.args.map { |a| lower_cascade_pred(a, cascade_scope, ops, access_plans, scope_plan) }
775
+ # They’re all @ cascade_scope (or scalar) now; align scalars broadcast, vecs already match.
776
+ parts.reduce do |acc, s|
777
+ ops << Kumi::Core::IR::Ops.Map(:and, 2, acc, s)
778
+ ops.size - 1
779
+ end
796
780
  else
797
- slot = lower_expression(cond, ops, access_plans, scope_plan,
798
- true, Array(cascade_scope), cacheable: false)
799
- project_mask_to_scope(slot, cascade_scope, ops, access_plans)
781
+ raise "Unexpected Expression #{cond.class} in Cascade"
800
782
  end
801
783
  end
802
784
 
@@ -862,128 +844,94 @@ module Kumi
862
844
  end
863
845
  end
864
846
 
865
- def find_nested_reducer_arg(expr)
866
- return nil unless expr.is_a?(Kumi::Syntax::CallExpression)
847
+ # Make sure a boolean mask lives at exactly cascade_scope.
848
+ def project_mask_to_scope(slot, cascade_scope, ops, access_plans)
849
+ sh = determine_slot_shape(slot, ops, access_plans)
850
+ return slot if sh.scope == cascade_scope
851
+ return slot if sh.kind == :scalar
867
852
 
868
- expr.args.each do |arg|
869
- case arg
870
- when Kumi::Syntax::CallExpression
871
- entry = Kumi::Registry.entry(arg.fn_name)
872
- return arg if entry&.reducer
853
+ cascade_scope = Array(cascade_scope)
854
+ slot_scope = Array(sh.scope)
873
855
 
874
- nested = find_nested_reducer_arg(arg)
875
- return nested if nested
876
- end
856
+ # Check prefix compatibility
857
+ short, long = [cascade_scope, slot_scope].sort_by(&:length)
858
+ unless long.first(short.length) == short
859
+ raise "cascade condition scope #{slot_scope.inspect} is not prefix-compatible with #{cascade_scope.inspect}"
877
860
  end
878
- nil
879
- end
880
861
 
881
- def infer_per_player_scope(reducer_expr)
882
- return [] unless reducer_expr.is_a?(Kumi::Syntax::CallExpression)
883
-
884
- # Look at the reducer's argument to determine the full scope
885
- arg = reducer_expr.args.first
886
- return [] unless arg
887
-
888
- case arg
889
- when Kumi::Syntax::InputElementReference
890
- # For paths like [:players, :score_matrices, :session, :points]
891
- # We want to keep [:players] and reduce over the rest
892
- arg.path.empty? ? [] : [arg.path.first]
893
- when Kumi::Syntax::CallExpression
894
- # For nested expressions, get the deepest input path and take first element
895
- deepest = find_deepest_input_path(arg)
896
- deepest && !deepest.empty? ? [deepest.first] : []
897
- else
898
- []
899
- end
900
- end
862
+ return unless slot_scope.length < cascade_scope.length
901
863
 
902
- def find_deepest_input_path(expr)
903
- case expr
904
- when Kumi::Syntax::InputElementReference
905
- expr.path
906
- when Kumi::Syntax::InputReference
907
- [expr.name]
908
- when Kumi::Syntax::CallExpression
909
- paths = expr.args.map { |a| find_deepest_input_path(a) }.compact
910
- paths.max_by(&:length)
911
- else
912
- nil
864
+ # Need to broadcast UP: slot scope is shorter, needs to be aligned to cascade scope
865
+ # Find a target vector that already has the cascade scope
866
+ target_slot = nil
867
+ ops.each_with_index do |op, i|
868
+ next unless %i[load_input map].include?(op.tag)
869
+
870
+ shape = determine_slot_shape(i, ops, access_plans)
871
+ if shape.kind == :vec && shape.scope == cascade_scope && shape.has_idx
872
+ target_slot = i
873
+ break
874
+ end
913
875
  end
876
+
877
+ ops << Kumi::Core::IR::Ops.AlignTo(target_slot, slot, to_scope: cascade_scope, on_missing: :error, require_unique: true)
878
+ ops.size - 1
914
879
  end
915
880
 
916
- # Make sure a boolean mask lives at exactly cascade_scope.
917
- def project_mask_to_scope(slot, cascade_scope, ops, access_plans)
918
- sh = determine_slot_shape(slot, ops, access_plans)
919
- return slot if sh.scope == cascade_scope
881
+ # Constant folding optimization helpers
882
+ def can_constant_fold?(expr, entry)
883
+ return false unless entry&.fn # Skip if function not found
884
+ return false if entry.reducer # Skip reducer functions for now
885
+ return false if expr.args.empty? # Need at least one argument
920
886
 
921
- # If we have a scalar condition but need it at cascade scope, broadcast it
922
- if sh.kind == :scalar && cascade_scope && !Array(cascade_scope).empty?
923
- # Find a target vector that already has the cascade scope
924
- target_slot = nil
925
- ops.each_with_index do |op, i|
926
- next unless %i[load_input map].include?(op.tag)
887
+ # Check if all arguments are literals
888
+ expr.args.all? { |arg| arg.is_a?(Syntax::Literal) }
889
+ end
927
890
 
928
- shape = determine_slot_shape(i, ops, access_plans)
929
- if shape.kind == :vec && shape.scope == Array(cascade_scope) && shape.has_idx
930
- target_slot = i
931
- break
932
- end
933
- end
891
+ def validate_signature_metadata(expr, entry)
892
+ # Get the node index to access signature metadata
893
+ node_index = get_state(:node_index, required: false)
894
+ return unless node_index
934
895
 
935
- return slot unless target_slot
896
+ node_entry = node_index[expr.object_id]
897
+ return unless node_entry
936
898
 
937
- ops << Kumi::Core::IR::Ops.AlignTo(target_slot, slot, to_scope: Array(cascade_scope), on_missing: :error,
938
- require_unique: true)
939
- return ops.size - 1
899
+ metadata = node_entry[:metadata]
900
+ return unless metadata
940
901
 
941
- # Can't broadcast, use as-is
902
+ # Validate that dropped axes make sense for reduction functions
903
+ if entry&.reducer && metadata[:dropped_axes]
904
+ dropped_axes = metadata[:dropped_axes]
905
+ unless dropped_axes.is_a?(Array)
906
+ raise "Invalid dropped_axes metadata for reducer #{expr.fn_name}: expected Array, got #{dropped_axes.class}"
907
+ end
942
908
 
909
+ # For reductions, we should have at least one dropped axis (or empty for scalar reductions)
910
+ puts " SIGNATURE[#{expr.fn_name}] dropped_axes: #{dropped_axes.inspect}" if ENV["DEBUG_LOWER"]
943
911
  end
944
912
 
945
- return slot if sh.kind == :scalar
946
-
947
- cascade_scope = Array(cascade_scope)
948
- slot_scope = Array(sh.scope)
949
-
950
- # Check prefix compatibility
951
- short, long = [cascade_scope, slot_scope].sort_by(&:length)
952
- unless long.first(short.length) == short
953
- raise "cascade condition scope #{slot_scope.inspect} is not prefix-compatible with #{cascade_scope.inspect}"
913
+ # Validate join_policy is recognized
914
+ if metadata[:join_policy] && !%i[zip product].include?(metadata[:join_policy])
915
+ raise "Invalid join_policy for #{expr.fn_name}: #{metadata[:join_policy].inspect}"
954
916
  end
955
917
 
956
- if slot_scope.length < cascade_scope.length
957
- # Need to broadcast UP: slot scope is shorter, needs to be aligned to cascade scope
958
- # Find a target vector that already has the cascade scope
959
- target_slot = nil
960
- ops.each_with_index do |op, i|
961
- next unless %i[load_input map].include?(op.tag)
918
+ # Warn about join_policy when no join op exists yet (future integration point)
919
+ return unless metadata[:join_policy] && ENV["DEBUG_LOWER"]
962
920
 
963
- shape = determine_slot_shape(i, ops, access_plans)
964
- if shape.kind == :vec && shape.scope == cascade_scope && shape.has_idx
965
- target_slot = i
966
- break
967
- end
968
- end
921
+ puts " SIGNATURE[#{expr.fn_name}] join_policy: #{metadata[:join_policy]} (join op not yet implemented)"
922
+ end
969
923
 
970
- if target_slot
971
- ops << Kumi::Core::IR::Ops.AlignTo(target_slot, slot, to_scope: cascade_scope, on_missing: :error, require_unique: true)
972
- ops.size - 1
973
- else
974
- # Fallback: use the slot itself (might not work but worth trying)
975
- ops << Kumi::Core::IR::Ops.AlignTo(slot, slot, to_scope: cascade_scope, on_missing: :error, require_unique: true)
976
- ops.size - 1
977
- end
978
- else
979
- # Need to reduce DOWN: slot scope is longer, reduce extra dimensions
980
- extra_axes = slot_scope - cascade_scope
981
- if extra_axes.empty?
982
- slot # should not happen due to early return above
983
- else
984
- ops << Kumi::Core::IR::Ops.Reduce(:any?, extra_axes, cascade_scope, [], slot)
985
- ops.size - 1
986
- end
924
+ def constant_fold(expr, entry)
925
+ literal_values = expr.args.map(&:value)
926
+
927
+ begin
928
+ # Call the function with literal values at compile time
929
+ entry.fn.call(*literal_values)
930
+ rescue StandardError => e
931
+ # If constant folding fails, fall back to runtime evaluation
932
+ # This shouldn't happen with pure functions, but be defensive
933
+ puts "Constant folding failed for #{expr.fn_name}: #{e.message}" if ENV["DEBUG_LOWER"]
934
+ raise "Cannot constant fold #{expr.fn_name}: #{e.message}"
987
935
  end
988
936
  end
989
937
  end
@@ -8,18 +8,54 @@ module Kumi
8
8
  # RESPONSIBILITY: Compute topological ordering of declarations, allowing safe conditional cycles
9
9
  # DEPENDENCIES: :dependencies from DependencyResolver, :declarations from NameIndexer, :cascades from UnsatDetector
10
10
  # PRODUCES: :evaluation_order - Array of declaration names in evaluation order
11
+ # :node_index - Hash mapping object_id to node metadata for later passes
11
12
  # INTERFACE: new(schema, state).run(errors)
12
13
  class Toposorter < PassBase
13
14
  def run(errors)
14
15
  dependency_graph = get_state(:dependencies, required: false) || {}
15
16
  definitions = get_state(:declarations, required: false) || {}
16
17
 
18
+ # Create node index for later passes to use
19
+ node_index = build_node_index(definitions)
17
20
  order = compute_topological_order(dependency_graph, definitions, errors)
18
- state.with(:evaluation_order, order)
21
+
22
+ state.with(:evaluation_order, order).with(:node_index, node_index)
19
23
  end
20
24
 
21
25
  private
22
26
 
27
+ def build_node_index(definitions)
28
+ index = {}
29
+
30
+ # Walk all declarations and their expressions to index every node
31
+ definitions.each_value do |decl|
32
+ index_node_recursive(decl, index)
33
+ end
34
+
35
+ index
36
+ end
37
+
38
+ def index_node_recursive(node, index)
39
+ return unless node
40
+
41
+ # Index this node by its object_id
42
+ index[node.object_id] = {
43
+ node: node,
44
+ type: node.class.name.split('::').last,
45
+ metadata: {}
46
+ }
47
+
48
+ # Use the same approach as the visitor pattern - recursively index all children
49
+ if node.respond_to?(:children)
50
+ node.children.each { |child| index_node_recursive(child, index) }
51
+ end
52
+
53
+ # Index expression for declaration nodes
54
+ if node.respond_to?(:expression)
55
+ index_node_recursive(node.expression, index)
56
+ end
57
+ end
58
+
23
59
  def compute_topological_order(graph, definitions, errors)
24
60
  temp_marks = Set.new
25
61
  perm_marks = Set.new
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "set"
5
+
6
+ module Kumi
7
+ module Core
8
+ module Analyzer
9
+ module StateSerde
10
+ module_function
11
+
12
+ # Exact round-trip (recommended for resume)
13
+ def dump_marshal(state)
14
+ Marshal.dump({ v: 1, data: state.to_h })
15
+ end
16
+
17
+ def load_marshal(bytes)
18
+ payload = Marshal.load(bytes)
19
+ ::Kumi::Core::Analyzer::AnalysisState.new(payload[:data])
20
+ end
21
+
22
+ # Human-readable snapshot (best-effort; not guaranteed resumable)
23
+ def dump_json(state, pretty: true)
24
+ h = encode_json_safe(state.to_h)
25
+ pretty ? JSON.pretty_generate(h) : JSON.generate(h)
26
+ end
27
+
28
+ def load_json(json_str)
29
+ h = JSON.parse(json_str) # Don't symbolize keys - let decode_json_safe handle it
30
+ ::Kumi::Core::Analyzer::AnalysisState.new(decode_json_safe(h))
31
+ end
32
+
33
+ # ---- helpers ----
34
+ def encode_json_safe(x)
35
+ case x
36
+ when Hash then x.transform_keys(&:to_s).transform_values { |v| encode_json_safe(v) }
37
+ when Array then x.map { |v| encode_json_safe(v) }
38
+ when Set then { "$set" => x.to_a.map { |v| encode_json_safe(v) } }
39
+ when Symbol then { "$sym" => x.to_s }
40
+ when ::Kumi::Core::IR::Module, ::Kumi::Core::IR::Decl, ::Kumi::Core::IR::Op
41
+ { "$ir" => x.inspect }
42
+ else x
43
+ end
44
+ end
45
+
46
+ def decode_json_safe(x)
47
+ case x
48
+ when Hash
49
+ # Check for special encoding markers first (before key transformation)
50
+ if x.key?("$sym") then x["$sym"].to_sym
51
+ elsif x.key?("$set") then Set.new(x["$set"].map { |item| decode_json_safe(item) })
52
+ elsif x.key?("$ir") then x["$ir"] # Keep as string inspection for JSON round-trip
53
+ else
54
+ # Regular hash - transform keys to symbols and recursively decode values
55
+ x.transform_keys(&:to_sym).transform_values { |value| decode_json_safe(value) }
56
+ end
57
+ when Array then x.map { |item| decode_json_safe(item) }
58
+ else x
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end
@@ -3,17 +3,19 @@
3
3
  module Kumi
4
4
  module Core
5
5
  module Analyzer
6
- # One plan for a specific path and mode (path:mode)
7
- AccessPlan = Struct.new(:path, :containers, :leaf, :scope, :depth, :mode,
8
- :on_missing, :key_policy, :operations, keyword_init: true) do
9
- def initialize(path:, containers:, leaf:, scope:, depth:, mode:, on_missing:, key_policy:, operations:)
10
- super
11
- freeze
12
- end
6
+ module Structs
7
+ # One plan for a specific path and mode (path:mode)
8
+ AccessPlan = Struct.new(:path, :containers, :leaf, :scope, :depth, :mode,
9
+ :on_missing, :key_policy, :operations, keyword_init: true) do
10
+ def initialize(path:, containers:, leaf:, scope:, depth:, mode:, on_missing:, key_policy:, operations:)
11
+ super
12
+ freeze
13
+ end
13
14
 
14
- def accessor_key = "#{path}:#{mode}"
15
- def ndims = depth
16
- def scalar? = depth.zero?
15
+ def accessor_key = "#{path}:#{mode}"
16
+ def ndims = depth
17
+ def scalar? = depth.zero?
18
+ end
17
19
  end
18
20
  end
19
21
  end
@@ -73,7 +73,7 @@ module Kumi
73
73
  modes.each do |mode|
74
74
  operations = build_operations(path, mode)
75
75
 
76
- list << Kumi::Core::Analyzer::AccessPlan.new(
76
+ list << Kumi::Core::Analyzer::Structs::AccessPlan.new(
77
77
  path: base[:path],
78
78
  containers: base[:containers],
79
79
  leaf: base[:leaf],
@@ -148,7 +148,8 @@ module Kumi
148
148
  ops << enter_hash(seg)
149
149
  puts " Added: enter_hash('#{seg}')" if ENV["DEBUG_ACCESSOR_OPS"]
150
150
  else
151
- raise ArgumentError, "Invalid parent :container '#{container}' for segment '#{seg}'. Expected :array, :object, :hash, or nil (root)"
151
+ raise ArgumentError,
152
+ "Invalid parent :container '#{container}' for segment '#{seg}'. Expected :array, :object, :hash, or nil (root)"
152
153
  end
153
154
 
154
155
  parent_meta = node
@@ -11,7 +11,9 @@ module Kumi
11
11
  empty?: FunctionBuilder.collection_unary(:empty?, "Check if collection is empty", :empty?, reducer: true,
12
12
  structure_function: true),
13
13
  size: FunctionBuilder::Entry.new(
14
- fn: ->(collection) { collection.size },
14
+ fn: lambda { |collection|
15
+ collection.size
16
+ },
15
17
  arity: 1,
16
18
  param_types: [:any],
17
19
  return_type: :integer,