kumi 0.0.10 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +18 -0
  3. data/CLAUDE.md +7 -231
  4. data/README.md +1 -1
  5. data/docs/VECTOR_SEMANTICS.md +286 -0
  6. data/docs/features/hierarchical-broadcasting.md +1 -1
  7. data/docs/features/s-expression-printer.md +2 -2
  8. data/examples/deep_schema_compilation_and_evaluation_benchmark.rb +21 -15
  9. data/lib/kumi/analyzer.rb +34 -12
  10. data/lib/kumi/compiler.rb +2 -12
  11. data/lib/kumi/core/analyzer/passes/broadcast_detector.rb +157 -64
  12. data/lib/kumi/core/analyzer/passes/dependency_resolver.rb +1 -1
  13. data/lib/kumi/core/analyzer/passes/input_access_planner_pass.rb +47 -0
  14. data/lib/kumi/core/analyzer/passes/input_collector.rb +118 -101
  15. data/lib/kumi/core/analyzer/passes/join_reduce_planning_pass.rb +293 -0
  16. data/lib/kumi/core/analyzer/passes/lower_to_ir_pass.rb +993 -0
  17. data/lib/kumi/core/analyzer/passes/pass_base.rb +2 -2
  18. data/lib/kumi/core/analyzer/passes/scope_resolution_pass.rb +346 -0
  19. data/lib/kumi/core/analyzer/passes/semantic_constraint_validator.rb +2 -1
  20. data/lib/kumi/core/analyzer/passes/toposorter.rb +9 -3
  21. data/lib/kumi/core/analyzer/passes/type_checker.rb +3 -3
  22. data/lib/kumi/core/analyzer/passes/type_consistency_checker.rb +2 -2
  23. data/lib/kumi/core/analyzer/passes/{type_inferencer.rb → type_inferencer_pass.rb} +4 -4
  24. data/lib/kumi/core/analyzer/passes/unsat_detector.rb +2 -2
  25. data/lib/kumi/core/analyzer/plans.rb +52 -0
  26. data/lib/kumi/core/analyzer/structs/access_plan.rb +20 -0
  27. data/lib/kumi/core/analyzer/structs/input_meta.rb +29 -0
  28. data/lib/kumi/core/compiler/access_builder.rb +36 -0
  29. data/lib/kumi/core/compiler/access_planner.rb +219 -0
  30. data/lib/kumi/core/compiler/accessors/base.rb +69 -0
  31. data/lib/kumi/core/compiler/accessors/each_indexed_accessor.rb +84 -0
  32. data/lib/kumi/core/compiler/accessors/materialize_accessor.rb +55 -0
  33. data/lib/kumi/core/compiler/accessors/ravel_accessor.rb +73 -0
  34. data/lib/kumi/core/compiler/accessors/read_accessor.rb +41 -0
  35. data/lib/kumi/core/compiler_base.rb +2 -2
  36. data/lib/kumi/core/error_reporter.rb +6 -5
  37. data/lib/kumi/core/errors.rb +4 -0
  38. data/lib/kumi/core/explain.rb +157 -205
  39. data/lib/kumi/core/export/node_builders.rb +2 -2
  40. data/lib/kumi/core/export/node_serializers.rb +1 -1
  41. data/lib/kumi/core/function_registry/collection_functions.rb +21 -10
  42. data/lib/kumi/core/function_registry/conditional_functions.rb +14 -4
  43. data/lib/kumi/core/function_registry/function_builder.rb +142 -55
  44. data/lib/kumi/core/function_registry/logical_functions.rb +5 -5
  45. data/lib/kumi/core/function_registry/stat_functions.rb +2 -2
  46. data/lib/kumi/core/function_registry.rb +126 -108
  47. data/lib/kumi/core/ir/execution_engine/combinators.rb +117 -0
  48. data/lib/kumi/core/ir/execution_engine/interpreter.rb +336 -0
  49. data/lib/kumi/core/ir/execution_engine/values.rb +46 -0
  50. data/lib/kumi/core/ir/execution_engine.rb +50 -0
  51. data/lib/kumi/core/ir.rb +58 -0
  52. data/lib/kumi/core/ruby_parser/build_context.rb +2 -2
  53. data/lib/kumi/core/ruby_parser/declaration_reference_proxy.rb +0 -12
  54. data/lib/kumi/core/ruby_parser/dsl_cascade_builder.rb +36 -15
  55. data/lib/kumi/core/ruby_parser/input_builder.rb +5 -5
  56. data/lib/kumi/core/ruby_parser/parser.rb +1 -1
  57. data/lib/kumi/core/ruby_parser/schema_builder.rb +2 -2
  58. data/lib/kumi/core/ruby_parser/sugar.rb +7 -0
  59. data/lib/kumi/registry.rb +14 -79
  60. data/lib/kumi/runtime/executable.rb +213 -0
  61. data/lib/kumi/schema.rb +14 -3
  62. data/lib/kumi/schema_metadata.rb +2 -2
  63. data/lib/kumi/support/ir_dump.rb +491 -0
  64. data/lib/kumi/support/s_expression_printer.rb +1 -1
  65. data/lib/kumi/syntax/location.rb +5 -0
  66. data/lib/kumi/syntax/node.rb +0 -1
  67. data/lib/kumi/syntax/root.rb +2 -2
  68. data/lib/kumi/version.rb +1 -1
  69. data/lib/kumi.rb +6 -15
  70. metadata +26 -15
  71. data/lib/kumi/core/cascade_executor_builder.rb +0 -132
  72. data/lib/kumi/core/compiled_schema.rb +0 -43
  73. data/lib/kumi/core/compiler/expression_compiler.rb +0 -146
  74. data/lib/kumi/core/compiler/function_invoker.rb +0 -55
  75. data/lib/kumi/core/compiler/path_traversal_compiler.rb +0 -158
  76. data/lib/kumi/core/compiler/reference_compiler.rb +0 -46
  77. data/lib/kumi/core/evaluation_wrapper.rb +0 -40
  78. data/lib/kumi/core/nested_structure_utils.rb +0 -78
  79. data/lib/kumi/core/schema_instance.rb +0 -115
  80. data/lib/kumi/core/vectorized_function_builder.rb +0 -88
  81. data/lib/kumi/js/compiler.rb +0 -878
  82. data/lib/kumi/js/function_registry.rb +0 -333
  83. data/migrate_to_core_iterative.rb +0 -938
@@ -19,7 +19,7 @@ module Kumi
19
19
  validate_value_args(name, expr, blk)
20
20
 
21
21
  expression = blk ? build_cascade(&blk) : ensure_syntax(expr)
22
- @context.attributes << Kumi::Syntax::ValueDeclaration.new(name, expression, loc: @context.current_location)
22
+ @context.values << Kumi::Syntax::ValueDeclaration.new(name, expression, loc: @context.current_location)
23
23
  end
24
24
 
25
25
  def trait(*args, **kwargs)
@@ -90,7 +90,7 @@ module Kumi
90
90
  location: @context.current_location)
91
91
  end
92
92
 
93
- has_expr = !expr.nil?
93
+ has_expr = !expr.is_a?(NilClass)
94
94
  has_block = blk
95
95
 
96
96
  if has_expr && has_block
@@ -254,6 +254,13 @@ module Kumi
254
254
  ast_node = to_ast_node
255
255
  Sugar.create_call_expression(:subtract, [Sugar.ensure_literal(0), ast_node])
256
256
  end
257
+
258
+ # Override Ruby's built-in nil? method to transform into == nil
259
+ define_method(:nil?) do
260
+ ast_node = to_ast_node
261
+ nil_literal = Kumi::Syntax::Literal.new(nil)
262
+ Sugar.create_call_expression(:==, [ast_node, nil_literal])
263
+ end
257
264
  end
258
265
  end
259
266
  end
data/lib/kumi/registry.rb CHANGED
@@ -1,96 +1,31 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Kumi
4
- # Public interface for registering custom functions in Kumi schemas
5
- #
6
- # Usage:
7
- # Kumi::Registry.register(:my_function) do |x|
8
- # x * 2
9
- # end
4
+ # Public facade for the function registry.
5
+ # Delegates to Kumi::Core::FunctionRegistry.
10
6
  module Registry
11
- extend Core::FunctionRegistry
12
-
13
- Entry = Core::FunctionRegistry::FunctionBuilder::Entry
14
-
15
- @functions = Core::FunctionRegistry::CORE_FUNCTIONS.transform_values(&:dup)
16
- @frozen = false
17
- @lock = Mutex.new
18
-
19
- class FrozenError < RuntimeError; end
7
+ Entry = Core::FunctionRegistry::FunctionBuilder::Entry
8
+ FrozenError = Core::FunctionRegistry::FrozenError
20
9
 
21
10
  class << self
22
- def reset!
23
- @lock.synchronize do
24
- @functions = Core::FunctionRegistry::CORE_FUNCTIONS.transform_values(&:dup)
25
- @frozen = false
26
- end
11
+ def auto_register(*mods)
12
+ Core::FunctionRegistry.auto_register(*mods)
27
13
  end
28
14
 
29
- # Register a custom function with the Kumi function registry
30
- #
31
- # Example:
32
- # Kumi::Registry.register(:double) do |x|
33
- # x * 2
34
- # end
35
- #
36
- # # Use in schema:
37
- # value :doubled, fn(:double, input.number)
38
- def register(name, &block)
39
- @lock.synchronize do
40
- raise FrozenError, "registry is frozen" if @frozen
41
- raise ArgumentError, "Function #{name.inspect} already registered" if @functions.key?(name)
42
-
43
- fn_lambda = block.is_a?(Proc) ? block : ->(*args) { yield(*args) }
44
- @functions[name] = Entry.new(
45
- fn: fn_lambda,
46
- arity: fn_lambda.arity,
47
- param_types: [:any],
48
- return_type: :any,
49
- description: nil,
50
- inverse: nil,
51
- reducer: false
52
- )
15
+ def method_missing(name, ...)
16
+ if Core::FunctionRegistry.respond_to?(name)
17
+ Core::FunctionRegistry.public_send(name, ...)
18
+ else
19
+ super
53
20
  end
54
21
  end
55
22
 
56
- # Register a custom function with detailed metadata for type and domain validation
57
- #
58
- # Example:
59
- # Kumi::Registry.register_with_metadata(
60
- # :add_tax,
61
- # ->(amount, rate) { amount * (1 + rate) },
62
- # arity: 2,
63
- # param_types: [:float, :float],
64
- # return_type: :float,
65
- # description: "Adds tax to an amount",
66
- # )
67
- #
68
- # # Use in schema:
69
- # value :total, fn(:add_tax, input.price, input.tax_rate)
70
- def register_with_metadata(name, fn_lambda, arity:, param_types: [:any], return_type: :any, description: nil, inverse: nil,
71
- reducer: false)
72
- @lock.synchronize do
73
- raise FrozenError, "registry is frozen" if @frozen
74
- raise ArgumentError, "Function #{name.inspect} already registered" if @functions.key?(name)
75
-
76
- @functions[name] = Entry.new(
77
- fn: fn_lambda,
78
- arity: arity,
79
- param_types: param_types,
80
- return_type: return_type,
81
- description: description,
82
- inverse: inverse,
83
- reducer: reducer
84
- )
85
- end
23
+ def respond_to_missing?(name, include_private = false)
24
+ Core::FunctionRegistry.respond_to?(name, include_private) || super
86
25
  end
87
26
 
88
27
  def freeze!
89
- @lock.synchronize do
90
- @functions.each_value(&:freeze)
91
- @functions.freeze
92
- @frozen = true
93
- end
28
+ Core::FunctionRegistry.freeze!
94
29
  end
95
30
  end
96
31
  end
@@ -0,0 +1,213 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kumi
4
+ module Runtime
5
+ # Executable / Reader: evaluation interface for compiled schemas
6
+ #
7
+ # BUILD:
8
+ # - Executable.from_analysis(state) consumes:
9
+ # * :ir_module (lowered IR)
10
+ # * :access_plans (for AccessBuilder)
11
+ # * function registry
12
+ # - Builds accessor lambdas once per plan id.
13
+ #
14
+ # EVALUATION:
15
+ # - program.read(inputs, mode: :public|:wrapped, target: nil)
16
+ # * mode=:public → returns “user values” (scalars and plain Ruby arrays). Vec results are exposed as their lifted scalar.
17
+ # * mode=:wrapped → returns internal VM structures for introspection:
18
+ # - Scalars as {k: :scalar, v: ...}
19
+ # - Vec twins available as :name__vec (and :name for lifted scalar)
20
+ # * target: symbol → short-circuit after computing the requested declaration and its dependencies.
21
+ #
22
+ # NAMING & TWINS (TODO: we are not exposing for now):
23
+ # - Every vectorized declaration with indices has:
24
+ # * :name__vec → internal vec form (rows with idx)
25
+ # * :name → lifted scalar form (nested arrays shaped by scope)
26
+ # - only :name is visible (TODO: For now, we do not expose the twins)
27
+ #
28
+ # CACHING / MEMOIZATION:
29
+ # - Values are computed once per evaluation; dependent requests reuse cached slots.
30
+ #
31
+ # ERROR SURFACE:
32
+ # - VM errors are wrapped as Kumi::Core::Errors::RuntimeError with op context (decl/op index).
33
+ # - Accessors raise descriptive KeyError for missing fields/arrays (policy-aware).
34
+ #
35
+ # DEBUGGING:
36
+ # - DEBUG_LOWER=1 to print IR at build time
37
+ # - DEBUG_VM_ARGS=1 to trace VM execution
38
+ # - Accessors can be debugged independently with DEBUG_ACCESSOR_OPS=1
39
+ class Executable
40
+ def self.from_analysis(state, registry: nil)
41
+ ir = state.fetch(:ir_module)
42
+ access_plans = state.fetch(:access_plans)
43
+ input_metadata = state[:input_metadata] || {}
44
+ accessors = Kumi::Core::Compiler::AccessBuilder.build(access_plans)
45
+
46
+ access_meta = {}
47
+ access_plans.each_value do |plans|
48
+ plans.each do |p|
49
+ access_meta[p.accessor_key] = { mode: p.mode, scope: p.scope }
50
+ end
51
+ end
52
+
53
+ # Use the internal functions hash that VM expects
54
+ registry ||= Kumi::Registry.functions
55
+ new(ir: ir, accessors: accessors, access_meta: access_meta, registry: registry, input_metadata: input_metadata)
56
+ end
57
+
58
+ def initialize(ir:, accessors:, access_meta:, registry:, input_metadata:)
59
+ @ir = ir.freeze
60
+ @acc = accessors.freeze
61
+ @meta = access_meta.freeze
62
+ @reg = registry
63
+ @input_metadata = input_metadata.freeze
64
+ @decl = @ir.decls.map { |d| [d.name, d] }.to_h
65
+ end
66
+
67
+ def decl?(name) = @decl.key?(name)
68
+
69
+ def read(input, mode: :ruby)
70
+ Run.new(self, input, mode: mode, input_metadata: @input_metadata)
71
+ end
72
+
73
+ # API compatibility for backward compatibility
74
+ def evaluate(ctx, *key_names)
75
+ target_keys = key_names.empty? ? @decl.keys : validate_keys(key_names)
76
+
77
+ # Handle context wrapping for backward compatibility
78
+ input = ctx.respond_to?(:ctx) ? ctx.ctx : ctx
79
+
80
+ target_keys.each_with_object({}) do |key, result|
81
+ result[key] = eval_decl(key, input, mode: :ruby)
82
+ end
83
+ end
84
+
85
+ def eval_decl(name, input, mode: :ruby)
86
+ raise Kumi::Core::Errors::RuntimeError, "unknown decl #{name}" unless decl?(name)
87
+
88
+ out = Kumi::Core::IR::ExecutionEngine.run(@ir, { input: input, target: name },
89
+ accessors: @acc, registry: @reg).fetch(name)
90
+
91
+ mode == :ruby ? unwrap(@decl[name], out) : out
92
+ end
93
+
94
+ private
95
+
96
+ def validate_keys(keys)
97
+ unknown_keys = keys - @decl.keys
98
+ return keys if unknown_keys.empty?
99
+
100
+ raise Kumi::Errors::RuntimeError, "No binding named #{unknown_keys.first}"
101
+ end
102
+
103
+ private
104
+
105
+ def unwrap(_decl, v)
106
+ v[:k] == :scalar ? v[:v] : v # no grouping needed
107
+ end
108
+ end
109
+
110
+ class Run
111
+ def initialize(program, input, mode:, input_metadata:)
112
+ @program = program
113
+ @input = input
114
+ @mode = mode
115
+ @input_metadata = input_metadata
116
+ @cache = {}
117
+ end
118
+
119
+ def get(name)
120
+ @cache[name] ||= @program.eval_decl(name, @input, mode: @mode)
121
+ end
122
+
123
+ def [](name)
124
+ get(name)
125
+ end
126
+
127
+ def slice(*keys)
128
+ return {} if keys.empty?
129
+ keys.each_with_object({}) { |key, result| result[key] = get(key) }
130
+ end
131
+
132
+ def compiled_schema
133
+ @program
134
+ end
135
+
136
+ def method_missing(sym, *args, **kwargs, &blk)
137
+ return super unless args.empty? && kwargs.empty? && @program.decl?(sym)
138
+
139
+ get(sym)
140
+ end
141
+
142
+ def respond_to_missing?(sym, priv = false)
143
+ @program.decl?(sym) || super
144
+ end
145
+
146
+ def update(**changes)
147
+ changes.each do |field, value|
148
+ # Validate field exists
149
+ raise ArgumentError, "unknown input field: #{field}" unless input_field_exists?(field)
150
+
151
+ # Validate domain constraints
152
+ validate_domain_constraint(field, value)
153
+
154
+ # Update the input data
155
+ @input = deep_merge(@input, { field => value })
156
+ end
157
+
158
+ # Clear cache after all updates
159
+ @cache.clear
160
+ self
161
+ end
162
+
163
+ def wrapped!
164
+ @mode = :wrapped
165
+ @cache.clear
166
+ self
167
+ end
168
+
169
+ def ruby!
170
+ @mode = :ruby
171
+ @cache.clear
172
+ self
173
+ end
174
+
175
+ private
176
+
177
+ def input_field_exists?(field)
178
+ # Check if field is declared in input block
179
+ @input_metadata.key?(field) || @input.key?(field)
180
+ end
181
+
182
+ def validate_domain_constraint(field, value)
183
+ field_meta = @input_metadata[field]
184
+ return unless field_meta&.dig(:domain)
185
+
186
+ domain = field_meta[:domain]
187
+ return unless violates_domain?(value, domain)
188
+
189
+ raise ArgumentError, "value #{value} is not in domain #{domain}"
190
+ end
191
+
192
+ def violates_domain?(value, domain)
193
+ case domain
194
+ when Range
195
+ !domain.include?(value)
196
+ when Array
197
+ !domain.include?(value)
198
+ when Proc
199
+ # For Proc domains, we can't statically analyze
200
+ false
201
+ else
202
+ false
203
+ end
204
+ end
205
+
206
+ def deep_merge(a, b)
207
+ return b unless a.is_a?(Hash) && b.is_a?(Hash)
208
+
209
+ a.merge(b) { |_k, v1, v2| deep_merge(v1, v2) }
210
+ end
211
+ end
212
+ end
213
+ end
data/lib/kumi/schema.rb CHANGED
@@ -13,22 +13,24 @@ module Kumi
13
13
  end
14
14
 
15
15
  def from(context)
16
+ # VERY IMPORTANT: This method is overriden on specs in order to use dual mode.
17
+
16
18
  raise("No schema defined") unless @__compiled_schema__
17
19
 
18
20
  # Validate input types and domain constraints
19
- input_meta = @__analyzer_result__.state[:inputs] || {}
21
+ input_meta = @__analyzer_result__.state[:input_metadata] || {}
20
22
  violations = Core::Input::Validator.validate_context(context, input_meta)
21
23
 
22
24
  raise Errors::InputValidationError, violations unless violations.empty?
23
25
 
24
- Core::SchemaInstance.new(@__compiled_schema__, @__analyzer_result__.state, context)
26
+ @__compiled_schema__.read(context, mode: :ruby)
25
27
  end
26
28
 
27
29
  def explain(context, *keys)
28
30
  raise("No schema defined") unless @__compiled_schema__
29
31
 
30
32
  # Validate input types and domain constraints
31
- input_meta = @__analyzer_result__.state[:inputs] || {}
33
+ input_meta = @__analyzer_result__.state[:input_metadata] || {}
32
34
  violations = Core::Input::Validator.validate_context(context, input_meta)
33
35
 
34
36
  raise Errors::InputValidationError, violations unless violations.empty?
@@ -40,8 +42,17 @@ module Kumi
40
42
  nil
41
43
  end
42
44
 
45
+ def build_syntax_tree(&block)
46
+ @__syntax_tree__ = Core::RubyParser::Dsl.build_syntax_tree(&block).freeze
47
+ end
48
+
43
49
  def schema(&block)
50
+ # from_location = caller_locations(1, 1).first
51
+ # raise "Called from #{from_location.path}:#{from_location.lineno}"
44
52
  @__syntax_tree__ = Core::RubyParser::Dsl.build_syntax_tree(&block).freeze
53
+
54
+ puts Support::SExpressionPrinter.print(@__syntax_tree__, indent: 2) if ENV["KUMI_DEBUG"] || ENV["KUMI_PRINT_SYNTAX_TREE"]
55
+
45
56
  @__analyzer_result__ = Analyzer.analyze!(@__syntax_tree__).freeze
46
57
  @__compiled_schema__ = Compiler.compile(@__syntax_tree__, analyzer: @__analyzer_result__).freeze
47
58
 
@@ -341,9 +341,9 @@ module Kumi
341
341
  end
342
342
 
343
343
  def extract_inputs
344
- return {} unless @state[:inputs]
344
+ return {} unless @state[:input_metadata]
345
345
 
346
- @state[:inputs].transform_values do |field_info|
346
+ @state[:input_metadata].transform_values do |field_info|
347
347
  {
348
348
  type: normalize_type(field_info[:type]),
349
349
  domain: normalize_domain(field_info[:domain]),