kumi 0.0.9 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +18 -0
  3. data/CLAUDE.md +18 -258
  4. data/README.md +188 -121
  5. data/docs/AST.md +1 -1
  6. data/docs/FUNCTIONS.md +52 -8
  7. data/docs/VECTOR_SEMANTICS.md +286 -0
  8. data/docs/compiler_design_principles.md +86 -0
  9. data/docs/features/README.md +15 -2
  10. data/docs/features/hierarchical-broadcasting.md +349 -0
  11. data/docs/features/javascript-transpiler.md +148 -0
  12. data/docs/features/performance.md +1 -3
  13. data/docs/features/s-expression-printer.md +2 -2
  14. data/docs/schema_metadata.md +7 -7
  15. data/examples/deep_schema_compilation_and_evaluation_benchmark.rb +21 -15
  16. data/examples/game_of_life.rb +2 -4
  17. data/lib/kumi/analyzer.rb +34 -14
  18. data/lib/kumi/compiler.rb +4 -283
  19. data/lib/kumi/core/analyzer/passes/broadcast_detector.rb +717 -66
  20. data/lib/kumi/core/analyzer/passes/dependency_resolver.rb +1 -1
  21. data/lib/kumi/core/analyzer/passes/input_access_planner_pass.rb +47 -0
  22. data/lib/kumi/core/analyzer/passes/input_collector.rb +118 -99
  23. data/lib/kumi/core/analyzer/passes/join_reduce_planning_pass.rb +293 -0
  24. data/lib/kumi/core/analyzer/passes/lower_to_ir_pass.rb +993 -0
  25. data/lib/kumi/core/analyzer/passes/pass_base.rb +2 -2
  26. data/lib/kumi/core/analyzer/passes/scope_resolution_pass.rb +346 -0
  27. data/lib/kumi/core/analyzer/passes/semantic_constraint_validator.rb +28 -0
  28. data/lib/kumi/core/analyzer/passes/toposorter.rb +9 -3
  29. data/lib/kumi/core/analyzer/passes/type_checker.rb +9 -5
  30. data/lib/kumi/core/analyzer/passes/type_consistency_checker.rb +2 -2
  31. data/lib/kumi/core/analyzer/passes/{type_inferencer.rb → type_inferencer_pass.rb} +4 -4
  32. data/lib/kumi/core/analyzer/passes/unsat_detector.rb +92 -48
  33. data/lib/kumi/core/analyzer/plans.rb +52 -0
  34. data/lib/kumi/core/analyzer/structs/access_plan.rb +20 -0
  35. data/lib/kumi/core/analyzer/structs/input_meta.rb +29 -0
  36. data/lib/kumi/core/compiler/access_builder.rb +36 -0
  37. data/lib/kumi/core/compiler/access_planner.rb +219 -0
  38. data/lib/kumi/core/compiler/accessors/base.rb +69 -0
  39. data/lib/kumi/core/compiler/accessors/each_indexed_accessor.rb +84 -0
  40. data/lib/kumi/core/compiler/accessors/materialize_accessor.rb +55 -0
  41. data/lib/kumi/core/compiler/accessors/ravel_accessor.rb +73 -0
  42. data/lib/kumi/core/compiler/accessors/read_accessor.rb +41 -0
  43. data/lib/kumi/core/compiler_base.rb +137 -0
  44. data/lib/kumi/core/error_reporter.rb +6 -5
  45. data/lib/kumi/core/errors.rb +4 -0
  46. data/lib/kumi/core/explain.rb +157 -205
  47. data/lib/kumi/core/export/node_builders.rb +2 -2
  48. data/lib/kumi/core/export/node_serializers.rb +1 -1
  49. data/lib/kumi/core/function_registry/collection_functions.rb +100 -6
  50. data/lib/kumi/core/function_registry/conditional_functions.rb +14 -4
  51. data/lib/kumi/core/function_registry/function_builder.rb +142 -53
  52. data/lib/kumi/core/function_registry/logical_functions.rb +173 -3
  53. data/lib/kumi/core/function_registry/stat_functions.rb +156 -0
  54. data/lib/kumi/core/function_registry.rb +138 -98
  55. data/lib/kumi/core/ir/execution_engine/combinators.rb +117 -0
  56. data/lib/kumi/core/ir/execution_engine/interpreter.rb +336 -0
  57. data/lib/kumi/core/ir/execution_engine/values.rb +46 -0
  58. data/lib/kumi/core/ir/execution_engine.rb +50 -0
  59. data/lib/kumi/core/ir.rb +58 -0
  60. data/lib/kumi/core/ruby_parser/build_context.rb +2 -2
  61. data/lib/kumi/core/ruby_parser/declaration_reference_proxy.rb +0 -12
  62. data/lib/kumi/core/ruby_parser/dsl_cascade_builder.rb +37 -16
  63. data/lib/kumi/core/ruby_parser/input_builder.rb +61 -8
  64. data/lib/kumi/core/ruby_parser/parser.rb +1 -1
  65. data/lib/kumi/core/ruby_parser/schema_builder.rb +2 -2
  66. data/lib/kumi/core/ruby_parser/sugar.rb +7 -0
  67. data/lib/kumi/errors.rb +2 -0
  68. data/lib/kumi/js.rb +23 -0
  69. data/lib/kumi/registry.rb +17 -22
  70. data/lib/kumi/runtime/executable.rb +213 -0
  71. data/lib/kumi/schema.rb +15 -4
  72. data/lib/kumi/schema_metadata.rb +2 -2
  73. data/lib/kumi/support/ir_dump.rb +491 -0
  74. data/lib/kumi/support/s_expression_printer.rb +17 -16
  75. data/lib/kumi/syntax/array_expression.rb +6 -6
  76. data/lib/kumi/syntax/call_expression.rb +4 -4
  77. data/lib/kumi/syntax/cascade_expression.rb +4 -4
  78. data/lib/kumi/syntax/case_expression.rb +4 -4
  79. data/lib/kumi/syntax/declaration_reference.rb +4 -4
  80. data/lib/kumi/syntax/hash_expression.rb +4 -4
  81. data/lib/kumi/syntax/input_declaration.rb +6 -5
  82. data/lib/kumi/syntax/input_element_reference.rb +5 -5
  83. data/lib/kumi/syntax/input_reference.rb +5 -5
  84. data/lib/kumi/syntax/literal.rb +4 -4
  85. data/lib/kumi/syntax/location.rb +5 -0
  86. data/lib/kumi/syntax/node.rb +33 -34
  87. data/lib/kumi/syntax/root.rb +6 -6
  88. data/lib/kumi/syntax/trait_declaration.rb +4 -4
  89. data/lib/kumi/syntax/value_declaration.rb +4 -4
  90. data/lib/kumi/version.rb +1 -1
  91. data/lib/kumi.rb +6 -15
  92. data/scripts/analyze_broadcast_methods.rb +68 -0
  93. data/scripts/analyze_cascade_methods.rb +74 -0
  94. data/scripts/check_broadcasting_coverage.rb +51 -0
  95. data/scripts/find_dead_code.rb +114 -0
  96. metadata +36 -9
  97. data/docs/features/array-broadcasting.md +0 -170
  98. data/lib/kumi/cli.rb +0 -449
  99. data/lib/kumi/core/compiled_schema.rb +0 -43
  100. data/lib/kumi/core/evaluation_wrapper.rb +0 -40
  101. data/lib/kumi/core/schema_instance.rb +0 -111
  102. data/lib/kumi/core/vectorization_metadata.rb +0 -110
  103. data/migrate_to_core_iterative.rb +0 -938
@@ -0,0 +1,84 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kumi
4
+ module Core
5
+ module Compiler
6
+ module Accessors
7
+ class EachIndexedAccessor
8
+ extend Base
9
+
10
+ def self.build(operations, path_key, policy, key_policy, with_indices = true)
11
+ walker = build_each_walker(operations, path_key, policy, key_policy)
12
+ if with_indices
13
+ lambda do |data, &blk|
14
+ if blk
15
+ walker.call(data, 0, [], ->(val, idx) { blk.call(val, idx) })
16
+ nil
17
+ else
18
+ out = []
19
+ walker.call(data, 0, [], ->(val, idx) { out << [val, idx] })
20
+ out
21
+ end
22
+ end
23
+ else
24
+ lambda do |data, &blk|
25
+ if blk
26
+ walker.call(data, 0, [], ->(val, _idx) { blk.call(val) })
27
+ nil
28
+ else
29
+ out = []
30
+ walker.call(data, 0, [], ->(val, _idx) { out << val })
31
+ out
32
+ end
33
+ end
34
+ end
35
+ end
36
+
37
+ # Depth-first traversal yielding (value, nd_index)
38
+ def self.build_each_walker(operations, path_key, policy, key_policy)
39
+ mode = :each_indexed
40
+ walk = nil
41
+ walk = lambda do |node, pc, ndx, y|
42
+ if pc >= operations.length
43
+ y.call(node, ndx)
44
+ return
45
+ end
46
+
47
+ op = operations[pc]
48
+ case op[:type]
49
+ when :enter_hash
50
+ # If the *next* op is an array hop, relax to indifferent for that fetch
51
+ policy_for = next_enters_array?(operations, pc) ? :indifferent : key_policy
52
+ next_node = fetch_key(node, op[:key], policy_for)
53
+ if next_node == Base::MISSING
54
+ case missing_key_action(policy)
55
+ when :yield_nil then y.call(nil, ndx)
56
+ when :skip then return
57
+ when :raise then raise KeyError, "Missing key '#{op[:key]}' at '#{path_key}' (#{mode})"
58
+ end
59
+ return
60
+ end
61
+ walk.call(next_node, pc + 1, ndx, y)
62
+
63
+ when :enter_array
64
+ if node.nil?
65
+ case missing_array_action(policy)
66
+ when :yield_nil then y.call(nil, ndx)
67
+ when :skip then return
68
+ when :raise then raise KeyError, "Missing array at '#{path_key}' (#{mode})"
69
+ end
70
+ return
71
+ end
72
+ assert_array!(node, path_key, mode)
73
+ node.each_with_index { |child, i| walk.call(child, pc + 1, ndx + [i], y) }
74
+
75
+ else
76
+ raise "Unknown operation: #{op.inspect}"
77
+ end
78
+ end
79
+ end
80
+ end
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kumi
4
+ module Core
5
+ module Compiler
6
+ module Accessors
7
+ class MaterializeAccessor
8
+ extend Base
9
+
10
+ def self.build(operations, path_key, policy, key_policy)
11
+ mode = :materialize
12
+ lambda do |data|
13
+ walk = nil
14
+ walk = lambda do |node, pc|
15
+ return node if pc >= operations.length
16
+
17
+ op = operations[pc]
18
+ case op[:type]
19
+ when :enter_hash
20
+ assert_hash!(node, path_key, mode)
21
+ preview_array = next_enters_array?(operations, pc)
22
+ policy_for = preview_array ? :indifferent : key_policy
23
+ next_node = fetch_key(node, op[:key], policy_for)
24
+ if next_node == Base::MISSING
25
+ case missing_key_action(policy)
26
+ when :yield_nil then return nil
27
+ when :skip then return preview_array ? [] : nil
28
+ when :raise then raise KeyError, "Missing key '#{op[:key]}' at '#{path_key}' (#{mode})"
29
+ end
30
+ end
31
+ walk.call(next_node, pc + 1)
32
+
33
+ when :enter_array
34
+ if node.nil?
35
+ case missing_array_action(policy)
36
+ when :yield_nil then return nil
37
+ when :skip then return []
38
+ when :raise then raise KeyError, "Missing array at '#{path_key}' (#{mode})"
39
+ end
40
+ end
41
+ assert_array!(node, path_key, mode)
42
+ node.map { |child| walk.call(child, pc + 1) }
43
+
44
+ else
45
+ raise "Unknown operation: #{op.inspect}"
46
+ end
47
+ end
48
+ walk.call(data, 0)
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kumi
4
+ module Core
5
+ module Compiler
6
+ module Accessors
7
+ # Ravel: collect leaf elements reached by the op sequence.
8
+ # Invariants guaranteed by the planner for :ravel:
9
+ # - Every array edge along the path has an :enter_array op.
10
+ # - If the leaf container is an array, a terminal :enter_array is appended,
11
+ # so the leaf we see here is the element, not the array.
12
+ class RavelAccessor
13
+ extend Base
14
+
15
+ def self.build(operations, path_key, policy, key_policy)
16
+ mode = :ravel
17
+
18
+ lambda do |data|
19
+ out = []
20
+
21
+ walk = nil
22
+ walk = lambda do |node, pc|
23
+ # Leaf: ops exhausted ⇒ emit this element (scalar/object/array element).
24
+ if pc >= operations.length
25
+ out << node
26
+ return
27
+ end
28
+
29
+ op = operations[pc]
30
+ case op[:type]
31
+ when :enter_hash
32
+ # If the next step is an array, we don’t care about key symbol/string
33
+ # (we’ll just iterate) → use indifferent lookup.
34
+ preview_array = next_enters_array?(operations, pc)
35
+ policy_for = preview_array ? :indifferent : key_policy
36
+
37
+ next_node = fetch_key(node, op[:key], policy_for)
38
+ if next_node == Base::MISSING
39
+ case missing_key_action(policy)
40
+ when :yield_nil then out << nil
41
+ when :skip then return
42
+ when :raise then raise KeyError, "Missing key '#{op[:key]}' at '#{path_key}' (#{mode})"
43
+ end
44
+ return
45
+ end
46
+ walk.call(next_node, pc + 1)
47
+
48
+ when :enter_array
49
+ if node.nil?
50
+ case missing_array_action(policy)
51
+ when :yield_nil then out << nil
52
+ when :skip then return
53
+ when :raise then raise KeyError, "Missing array at '#{path_key}' (#{mode})"
54
+ end
55
+ return
56
+ end
57
+ assert_array!(node, path_key, mode)
58
+ node.each { |child| walk.call(child, pc + 1) }
59
+
60
+ else
61
+ raise "Unknown operation: #{op.inspect}"
62
+ end
63
+ end
64
+
65
+ walk.call(data, 0)
66
+ out
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kumi
4
+ module Core
5
+ module Compiler
6
+ module Accessors
7
+ class ReadAccessor
8
+ extend Base
9
+
10
+ def self.build(operations, path_key, policy, key_policy)
11
+ mode = :read
12
+ lambda do |data|
13
+ node = data
14
+ operations.each do |op|
15
+ case op[:type]
16
+ when :enter_hash
17
+ assert_hash!(node, path_key, mode)
18
+ next_node = fetch_key(node, op[:key], key_policy)
19
+ if next_node == Base::MISSING
20
+ case missing_key_action(policy)
21
+ when :yield_nil then return nil
22
+ when :skip then return nil
23
+ when :raise then raise KeyError, "Missing key '#{op[:key]}' at '#{path_key}' (#{mode})"
24
+ end
25
+ end
26
+ node = next_node
27
+ when :enter_array
28
+ # Should never be present for rank-0 plans
29
+ raise TypeError, "Array encountered in :read accessor at '#{path_key}'"
30
+ else
31
+ raise "Unknown operation: #{op.inspect}"
32
+ end
33
+ end
34
+ node
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,137 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kumi
4
+ module Core
5
+ # Base compiler class with shared compilation logic between Ruby and JS compilers
6
+ class CompilerBase
7
+ # Map node classes to compiler methods
8
+ DISPATCH = {
9
+ Kumi::Syntax::Literal => :compile_literal,
10
+ Kumi::Syntax::InputReference => :compile_field_node,
11
+ Kumi::Syntax::InputElementReference => :compile_element_field_reference,
12
+ Kumi::Syntax::DeclarationReference => :compile_binding_node,
13
+ Kumi::Syntax::ArrayExpression => :compile_list,
14
+ Kumi::Syntax::CallExpression => :compile_call,
15
+ Kumi::Syntax::CascadeExpression => :compile_cascade
16
+ }.freeze
17
+
18
+ def initialize(syntax_tree, analyzer_result)
19
+ @schema = syntax_tree
20
+ @analysis = analyzer_result
21
+ end
22
+
23
+ # Shared compilation logic
24
+
25
+ def build_index
26
+ @index = {}
27
+ @schema.values.each { |a| @index[a.name] = a }
28
+ @schema.traits.each { |t| @index[t.name] = t }
29
+ end
30
+
31
+ def determine_operation_mode_for_path(_path)
32
+ # Use pre-computed operation mode from analysis
33
+ compilation_meta = @analysis.state[:broadcasts]&.dig(:compilation_metadata, @current_declaration)
34
+ compilation_meta&.dig(:operation_mode) || :broadcast
35
+ end
36
+
37
+ def vectorized_operation?(expr)
38
+ # Use pre-computed vectorization decision from analysis
39
+ compilation_meta = @analysis.state[:broadcasts]&.dig(:compilation_metadata, @current_declaration)
40
+ return false unless compilation_meta
41
+
42
+ # Check if current declaration is vectorized
43
+ if compilation_meta[:is_vectorized]
44
+ # For vectorized declarations, check if this specific operation should be vectorized
45
+ vectorized_ops = @analysis.state[:broadcasts][:vectorized_operations] || {}
46
+ current_decl_info = vectorized_ops[@current_declaration]
47
+
48
+ # For cascade declarations, check individual operations within them
49
+ return true if current_decl_info && current_decl_info[:operation] == expr.fn_name
50
+
51
+ # For cascade_with_vectorized_conditions_or_results, allow nested operations
52
+ return true if current_decl_info && current_decl_info[:source] == :cascade_with_vectorized_conditions_or_results
53
+
54
+ # Check if this is a direct vectorized operation
55
+ return true if current_decl_info && current_decl_info[:operation]
56
+ end
57
+
58
+ # Fallback: Reduction functions are NOT vectorized operations - they consume arrays
59
+ return false if Kumi::Registry.reducer?(expr.fn_name)
60
+
61
+ # Use pre-computed vectorization context for remaining cases
62
+ compilation_meta.dig(:vectorization_context, :needs_broadcasting) || false
63
+ end
64
+
65
+ def is_cascade_vectorized?(_expr)
66
+ # Use metadata to determine if this cascade is vectorized
67
+ broadcast_meta = @analysis.state[:broadcasts]
68
+ cascade_info = @current_declaration && broadcast_meta&.dig(:vectorized_operations, @current_declaration)
69
+ cascade_info && cascade_info[:source] == :cascade_with_vectorized_conditions_or_results
70
+ end
71
+
72
+ def get_cascade_compilation_metadata
73
+ compilation_meta = @analysis.state[:broadcasts]&.dig(:compilation_metadata, @current_declaration)
74
+ cascade_info = compilation_meta&.dig(:cascade_info) || {}
75
+ [compilation_meta, cascade_info]
76
+ end
77
+
78
+ def get_cascade_strategy
79
+ @analysis.state[:broadcasts][:cascade_strategies][@current_declaration]
80
+ end
81
+
82
+ def get_function_call_strategy
83
+ compilation_meta = @analysis.state[:broadcasts]&.dig(:compilation_metadata, @current_declaration)
84
+ compilation_meta&.dig(:function_call_strategy) || {}
85
+ end
86
+
87
+ def needs_flattening?
88
+ function_strategy = get_function_call_strategy
89
+ function_strategy[:flattening_required]
90
+ end
91
+
92
+ def get_flattening_info
93
+ @analysis.state[:broadcasts][:flattening_declarations][@current_declaration]
94
+ end
95
+
96
+ def get_flatten_argument_indices
97
+ compilation_meta = @analysis.state[:broadcasts]&.dig(:compilation_metadata, @current_declaration)
98
+ compilation_meta&.dig(:function_call_strategy, :flatten_argument_indices) || []
99
+ end
100
+
101
+ # Dispatch to the appropriate compile_* method
102
+ def compile_expr(expr)
103
+ method = DISPATCH.fetch(expr.class)
104
+ send(method, expr)
105
+ end
106
+
107
+ # Abstract methods to be implemented by subclasses
108
+ def compile_literal(expr)
109
+ raise NotImplementedError, "Subclasses must implement compile_literal"
110
+ end
111
+
112
+ def compile_field_node(expr)
113
+ raise NotImplementedError, "Subclasses must implement compile_field_node"
114
+ end
115
+
116
+ def compile_element_field_reference(expr)
117
+ raise NotImplementedError, "Subclasses must implement compile_element_field_reference"
118
+ end
119
+
120
+ def compile_binding_node(expr)
121
+ raise NotImplementedError, "Subclasses must implement compile_binding_node"
122
+ end
123
+
124
+ def compile_list(expr)
125
+ raise NotImplementedError, "Subclasses must implement compile_list"
126
+ end
127
+
128
+ def compile_call(expr)
129
+ raise NotImplementedError, "Subclasses must implement compile_call"
130
+ end
131
+
132
+ def compile_cascade(expr)
133
+ raise NotImplementedError, "Subclasses must implement compile_cascade"
134
+ end
135
+ end
136
+ end
137
+ end
@@ -12,7 +12,7 @@ module Kumi
12
12
  # 4. Support both immediate raising and error accumulation patterns
13
13
  module ErrorReporter
14
14
  # Standard error structure for internal use
15
- ErrorEntry = Struct.new(:location, :message, :type, :context, keyword_init: true) do
15
+ ErrorEntry = Struct.new(:location, :message, :type, :context, :backtrace, keyword_init: true) do
16
16
  def to_s
17
17
  location_str = format_location(location)
18
18
  "#{location_str}: #{message}"
@@ -47,12 +47,13 @@ module Kumi
47
47
  # @param type [Symbol] Optional error category (:syntax, :semantic, :type, etc.)
48
48
  # @param context [Hash] Optional additional context
49
49
  # @return [ErrorEntry] Structured error entry
50
- def create_error(message, location: nil, type: :semantic, context: {})
50
+ def create_error(message, location: nil, type: :semantic, context: {}, backtrace: nil)
51
51
  ErrorEntry.new(
52
52
  location: location,
53
53
  message: message,
54
54
  type: type,
55
- context: context
55
+ context: context,
56
+ backtrace: backtrace
56
57
  )
57
58
  end
58
59
 
@@ -76,8 +77,8 @@ module Kumi
76
77
  # @param error_class [Class] Exception class to raise
77
78
  # @param type [Symbol] Error category
78
79
  # @param context [Hash] Additional context
79
- def raise_error(message, location: nil, error_class: Errors::SemanticError, type: :semantic, context: {})
80
- entry = create_error(message, location: location, type: type, context: context)
80
+ def raise_error(message, location: nil, error_class: Errors::SemanticError, type: :semantic, backtrace: nil, context: {})
81
+ entry = create_error(message, location: location, type: type, context: context, backtrace: backtrace || caller)
81
82
  # Pass both the formatted message and the original location to the error constructor
82
83
  raise error_class.new(entry.to_s, location)
83
84
  end
@@ -24,6 +24,8 @@ module Kumi
24
24
 
25
25
  class UnknownFunction < Error; end
26
26
 
27
+ class AnalysisError < Error; end
28
+
27
29
  class SemanticError < LocatedError; end
28
30
 
29
31
  class TypeError < SemanticError; end
@@ -32,6 +34,8 @@ module Kumi
32
34
 
33
35
  class SyntaxError < LocatedError; end
34
36
 
37
+ class CompilationError < Error; end
38
+
35
39
  class RuntimeError < Error; end
36
40
 
37
41
  class DomainViolationError < Error