kumi 0.0.9 → 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +18 -0
- data/CLAUDE.md +18 -258
- data/README.md +188 -121
- data/docs/AST.md +1 -1
- data/docs/FUNCTIONS.md +52 -8
- data/docs/VECTOR_SEMANTICS.md +286 -0
- data/docs/compiler_design_principles.md +86 -0
- data/docs/features/README.md +15 -2
- data/docs/features/hierarchical-broadcasting.md +349 -0
- data/docs/features/javascript-transpiler.md +148 -0
- data/docs/features/performance.md +1 -3
- data/docs/features/s-expression-printer.md +2 -2
- data/docs/schema_metadata.md +7 -7
- data/examples/deep_schema_compilation_and_evaluation_benchmark.rb +21 -15
- data/examples/game_of_life.rb +2 -4
- data/lib/kumi/analyzer.rb +34 -14
- data/lib/kumi/compiler.rb +4 -283
- data/lib/kumi/core/analyzer/passes/broadcast_detector.rb +717 -66
- data/lib/kumi/core/analyzer/passes/dependency_resolver.rb +1 -1
- data/lib/kumi/core/analyzer/passes/input_access_planner_pass.rb +47 -0
- data/lib/kumi/core/analyzer/passes/input_collector.rb +118 -99
- data/lib/kumi/core/analyzer/passes/join_reduce_planning_pass.rb +293 -0
- data/lib/kumi/core/analyzer/passes/lower_to_ir_pass.rb +993 -0
- data/lib/kumi/core/analyzer/passes/pass_base.rb +2 -2
- data/lib/kumi/core/analyzer/passes/scope_resolution_pass.rb +346 -0
- data/lib/kumi/core/analyzer/passes/semantic_constraint_validator.rb +28 -0
- data/lib/kumi/core/analyzer/passes/toposorter.rb +9 -3
- data/lib/kumi/core/analyzer/passes/type_checker.rb +9 -5
- data/lib/kumi/core/analyzer/passes/type_consistency_checker.rb +2 -2
- data/lib/kumi/core/analyzer/passes/{type_inferencer.rb → type_inferencer_pass.rb} +4 -4
- data/lib/kumi/core/analyzer/passes/unsat_detector.rb +92 -48
- data/lib/kumi/core/analyzer/plans.rb +52 -0
- data/lib/kumi/core/analyzer/structs/access_plan.rb +20 -0
- data/lib/kumi/core/analyzer/structs/input_meta.rb +29 -0
- data/lib/kumi/core/compiler/access_builder.rb +36 -0
- data/lib/kumi/core/compiler/access_planner.rb +219 -0
- data/lib/kumi/core/compiler/accessors/base.rb +69 -0
- data/lib/kumi/core/compiler/accessors/each_indexed_accessor.rb +84 -0
- data/lib/kumi/core/compiler/accessors/materialize_accessor.rb +55 -0
- data/lib/kumi/core/compiler/accessors/ravel_accessor.rb +73 -0
- data/lib/kumi/core/compiler/accessors/read_accessor.rb +41 -0
- data/lib/kumi/core/compiler_base.rb +137 -0
- data/lib/kumi/core/error_reporter.rb +6 -5
- data/lib/kumi/core/errors.rb +4 -0
- data/lib/kumi/core/explain.rb +157 -205
- data/lib/kumi/core/export/node_builders.rb +2 -2
- data/lib/kumi/core/export/node_serializers.rb +1 -1
- data/lib/kumi/core/function_registry/collection_functions.rb +100 -6
- data/lib/kumi/core/function_registry/conditional_functions.rb +14 -4
- data/lib/kumi/core/function_registry/function_builder.rb +142 -53
- data/lib/kumi/core/function_registry/logical_functions.rb +173 -3
- data/lib/kumi/core/function_registry/stat_functions.rb +156 -0
- data/lib/kumi/core/function_registry.rb +138 -98
- data/lib/kumi/core/ir/execution_engine/combinators.rb +117 -0
- data/lib/kumi/core/ir/execution_engine/interpreter.rb +336 -0
- data/lib/kumi/core/ir/execution_engine/values.rb +46 -0
- data/lib/kumi/core/ir/execution_engine.rb +50 -0
- data/lib/kumi/core/ir.rb +58 -0
- data/lib/kumi/core/ruby_parser/build_context.rb +2 -2
- data/lib/kumi/core/ruby_parser/declaration_reference_proxy.rb +0 -12
- data/lib/kumi/core/ruby_parser/dsl_cascade_builder.rb +37 -16
- data/lib/kumi/core/ruby_parser/input_builder.rb +61 -8
- data/lib/kumi/core/ruby_parser/parser.rb +1 -1
- data/lib/kumi/core/ruby_parser/schema_builder.rb +2 -2
- data/lib/kumi/core/ruby_parser/sugar.rb +7 -0
- data/lib/kumi/errors.rb +2 -0
- data/lib/kumi/js.rb +23 -0
- data/lib/kumi/registry.rb +17 -22
- data/lib/kumi/runtime/executable.rb +213 -0
- data/lib/kumi/schema.rb +15 -4
- data/lib/kumi/schema_metadata.rb +2 -2
- data/lib/kumi/support/ir_dump.rb +491 -0
- data/lib/kumi/support/s_expression_printer.rb +17 -16
- data/lib/kumi/syntax/array_expression.rb +6 -6
- data/lib/kumi/syntax/call_expression.rb +4 -4
- data/lib/kumi/syntax/cascade_expression.rb +4 -4
- data/lib/kumi/syntax/case_expression.rb +4 -4
- data/lib/kumi/syntax/declaration_reference.rb +4 -4
- data/lib/kumi/syntax/hash_expression.rb +4 -4
- data/lib/kumi/syntax/input_declaration.rb +6 -5
- data/lib/kumi/syntax/input_element_reference.rb +5 -5
- data/lib/kumi/syntax/input_reference.rb +5 -5
- data/lib/kumi/syntax/literal.rb +4 -4
- data/lib/kumi/syntax/location.rb +5 -0
- data/lib/kumi/syntax/node.rb +33 -34
- data/lib/kumi/syntax/root.rb +6 -6
- data/lib/kumi/syntax/trait_declaration.rb +4 -4
- data/lib/kumi/syntax/value_declaration.rb +4 -4
- data/lib/kumi/version.rb +1 -1
- data/lib/kumi.rb +6 -15
- data/scripts/analyze_broadcast_methods.rb +68 -0
- data/scripts/analyze_cascade_methods.rb +74 -0
- data/scripts/check_broadcasting_coverage.rb +51 -0
- data/scripts/find_dead_code.rb +114 -0
- metadata +36 -9
- data/docs/features/array-broadcasting.md +0 -170
- data/lib/kumi/cli.rb +0 -449
- data/lib/kumi/core/compiled_schema.rb +0 -43
- data/lib/kumi/core/evaluation_wrapper.rb +0 -40
- data/lib/kumi/core/schema_instance.rb +0 -111
- data/lib/kumi/core/vectorization_metadata.rb +0 -110
- data/migrate_to_core_iterative.rb +0 -938
@@ -2,135 +2,175 @@
|
|
2
2
|
|
3
3
|
module Kumi
|
4
4
|
module Core
|
5
|
-
#
|
6
|
-
# This is the public interface for registering custom functions
|
5
|
+
# Internal function registry (single source of truth).
|
7
6
|
module FunctionRegistry
|
8
|
-
# Re-export the Entry struct from FunctionBuilder for compatibility
|
9
7
|
Entry = FunctionBuilder::Entry
|
10
8
|
|
11
|
-
# Core operators that are always available
|
12
9
|
CORE_OPERATORS = %i[== > < >= <= != between?].freeze
|
13
10
|
|
14
|
-
# Build
|
11
|
+
# Build core functions once
|
15
12
|
CORE_FUNCTIONS = {}.tap do |registry|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
13
|
+
[
|
14
|
+
ComparisonFunctions.definitions,
|
15
|
+
MathFunctions.definitions,
|
16
|
+
StringFunctions.definitions,
|
17
|
+
LogicalFunctions.definitions,
|
18
|
+
CollectionFunctions.definitions,
|
19
|
+
ConditionalFunctions.definitions,
|
20
|
+
TypeFunctions.definitions,
|
21
|
+
StatFunctions.definitions
|
22
|
+
].each do |defs|
|
23
|
+
defs.each do |name, entry|
|
24
|
+
raise ArgumentError, "Duplicate core function: #{name}" if registry.key?(name)
|
25
|
+
|
26
|
+
registry[name] = entry
|
27
|
+
end
|
28
|
+
end
|
23
29
|
end.freeze
|
24
30
|
|
25
|
-
@
|
26
|
-
@
|
31
|
+
@lock = Mutex.new
|
32
|
+
@functions = CORE_FUNCTIONS.transform_values(&:dup)
|
33
|
+
@frozen = false
|
27
34
|
|
28
|
-
|
29
|
-
# Public interface for registering custom functions
|
30
|
-
def register(name, &block)
|
31
|
-
raise ArgumentError, "Function #{name.inspect} already registered" if @functions.key?(name)
|
35
|
+
class FrozenError < RuntimeError; end
|
32
36
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
def register_with_metadata(name, fn_lambda, arity:, param_types: [:any], return_type: :any, description: nil,
|
39
|
-
inverse: nil, reducer: false)
|
40
|
-
raise ArgumentError, "Function #{name.inspect} already registered" if @functions.key?(name)
|
41
|
-
|
42
|
-
@functions[name] = Entry.new(
|
43
|
-
fn: fn_lambda,
|
44
|
-
arity: arity,
|
45
|
-
param_types: param_types,
|
46
|
-
return_type: return_type,
|
47
|
-
description: description,
|
48
|
-
inverse: inverse,
|
49
|
-
reducer: reducer
|
50
|
-
)
|
51
|
-
end
|
37
|
+
class << self
|
38
|
+
def auto_register(*mods)
|
39
|
+
mods.each do |mod|
|
40
|
+
mod.public_instance_methods(false).each do |m|
|
41
|
+
next if function?(m)
|
52
42
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
next if supported?(method_name)
|
43
|
+
register(m) { |*args| mod.new.public_send(m, *args) }
|
44
|
+
end
|
45
|
+
mod.singleton_methods(false).each do |m|
|
46
|
+
next if function?(m)
|
58
47
|
|
59
|
-
|
60
|
-
|
48
|
+
fn = mod.method(m)
|
49
|
+
register(m) { |*args| fn.call(*args) }
|
61
50
|
end
|
62
51
|
end
|
63
52
|
end
|
64
|
-
end
|
65
53
|
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
54
|
+
#
|
55
|
+
# Lifecycle
|
56
|
+
#
|
57
|
+
def reset!
|
58
|
+
@lock.synchronize do
|
59
|
+
@functions = CORE_FUNCTIONS.transform_values(&:dup)
|
60
|
+
@frozen = false
|
61
|
+
end
|
62
|
+
end
|
70
63
|
|
71
|
-
|
72
|
-
|
64
|
+
def freeze!
|
65
|
+
@lock.synchronize do
|
66
|
+
@functions.each_value(&:freeze)
|
67
|
+
@functions.freeze
|
68
|
+
@frozen = true
|
69
|
+
end
|
70
|
+
end
|
73
71
|
|
74
|
-
|
75
|
-
|
72
|
+
def frozen?
|
73
|
+
@frozen
|
74
|
+
end
|
76
75
|
|
77
|
-
|
78
|
-
|
79
|
-
|
76
|
+
#
|
77
|
+
# Registration
|
78
|
+
#
|
79
|
+
# Unified entry point; used by both public and internal callers.
|
80
|
+
def register(name, fn_or = nil, **meta, &block)
|
81
|
+
fn = fn_or || block
|
82
|
+
raise ArgumentError, "block or Proc required" unless fn.is_a?(Proc)
|
83
|
+
|
84
|
+
defaults = {
|
85
|
+
arity: fn.arity,
|
86
|
+
param_types: [:any],
|
87
|
+
return_type: :any,
|
88
|
+
description: nil,
|
89
|
+
param_modes: nil,
|
90
|
+
reducer: false,
|
91
|
+
structure_function: false
|
92
|
+
}
|
93
|
+
register_with_metadata(name, fn, **defaults, **meta)
|
94
|
+
end
|
80
95
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
96
|
+
# Back-compat explicit API
|
97
|
+
def register_with_metadata(name, fn, arity:, param_types: [:any], return_type: :any,
|
98
|
+
description: nil, param_modes: nil, reducer: false,
|
99
|
+
structure_function: false)
|
100
|
+
@lock.synchronize do
|
101
|
+
raise FrozenError, "registry is frozen" if @frozen
|
102
|
+
raise ArgumentError, "Function #{name.inspect} already registered" if @functions.key?(name)
|
103
|
+
|
104
|
+
@functions[name] = Entry.new(
|
105
|
+
fn: fn,
|
106
|
+
arity: arity,
|
107
|
+
param_types: param_types,
|
108
|
+
return_type: return_type,
|
109
|
+
description: description,
|
110
|
+
param_modes: param_modes,
|
111
|
+
reducer: reducer,
|
112
|
+
structure_function: structure_function
|
113
|
+
)
|
114
|
+
end
|
115
|
+
end
|
90
116
|
|
91
|
-
|
92
|
-
|
93
|
-
|
117
|
+
#
|
118
|
+
# Queries
|
119
|
+
#
|
120
|
+
def function?(name)
|
121
|
+
@functions.key?(name)
|
122
|
+
end
|
123
|
+
alias supported? function?
|
94
124
|
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
end
|
125
|
+
def operator?(name)
|
126
|
+
name.is_a?(Symbol) && function?(name) && CORE_OPERATORS.include?(name)
|
127
|
+
end
|
99
128
|
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
end
|
129
|
+
def entry(name)
|
130
|
+
@functions[name]
|
131
|
+
end
|
104
132
|
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
end
|
133
|
+
def fetch(name)
|
134
|
+
ent = entry(name)
|
135
|
+
raise Kumi::Errors::UnknownFunction, "Unknown function: #{name}" unless ent
|
109
136
|
|
110
|
-
|
111
|
-
|
112
|
-
end
|
137
|
+
ent.fn
|
138
|
+
end
|
113
139
|
|
114
|
-
|
115
|
-
|
116
|
-
|
140
|
+
def signature(name)
|
141
|
+
ent = entry(name) or raise Kumi::Errors::UnknownFunction, "Unknown function: #{name}"
|
142
|
+
{ arity: ent.arity, param_types: ent.param_types, return_type: ent.return_type, description: ent.description }
|
143
|
+
end
|
117
144
|
|
118
|
-
|
119
|
-
|
120
|
-
|
145
|
+
def reducer?(name)
|
146
|
+
ent = entry(name)
|
147
|
+
ent ? !!ent.reducer : false
|
148
|
+
end
|
121
149
|
|
122
|
-
|
123
|
-
|
124
|
-
|
150
|
+
def structure_function?(name)
|
151
|
+
ent = entry(name)
|
152
|
+
ent ? !!ent.structure_function : false
|
153
|
+
end
|
125
154
|
|
126
|
-
|
127
|
-
|
128
|
-
|
155
|
+
def all_functions
|
156
|
+
@functions.keys
|
157
|
+
end
|
158
|
+
alias all all_functions
|
159
|
+
|
160
|
+
def functions
|
161
|
+
@functions.dup
|
162
|
+
end
|
129
163
|
|
130
|
-
|
131
|
-
|
164
|
+
# Introspection helpers
|
165
|
+
def comparison_operators = ComparisonFunctions.definitions.keys
|
166
|
+
def math_operations = MathFunctions.definitions.keys
|
167
|
+
def string_operations = StringFunctions.definitions.keys
|
168
|
+
def logical_operations = LogicalFunctions.definitions.keys
|
169
|
+
def collection_operations = CollectionFunctions.definitions.keys
|
170
|
+
def conditional_operations = ConditionalFunctions.definitions.keys
|
171
|
+
def type_operations = TypeFunctions.definitions.keys
|
172
|
+
def stat_operations = StatFunctions.definitions.keys
|
132
173
|
end
|
133
174
|
end
|
134
175
|
end
|
135
176
|
end
|
136
|
-
# end
|
@@ -0,0 +1,117 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Kumi
|
4
|
+
module Core
|
5
|
+
module IR
|
6
|
+
module ExecutionEngine
|
7
|
+
# Pure combinators for data transformation
|
8
|
+
module Combinators
|
9
|
+
# Broadcast scalar over vec (scalar→vec only)
|
10
|
+
# @param s [Hash] scalar value {:k => :scalar, :v => value}
|
11
|
+
# @param v [Hash] vector value {:k => :vec, :scope => [...], :rows => [...]}
|
12
|
+
# @return [Hash] broadcasted vector
|
13
|
+
def self.broadcast_scalar(s, v)
|
14
|
+
raise "First arg must be scalar" unless s[:k] == :scalar
|
15
|
+
raise "Second arg must be vec" unless v[:k] == :vec
|
16
|
+
|
17
|
+
rows = v[:rows].map do |r|
|
18
|
+
r.key?(:idx) ? { v: s[:v], idx: r[:idx] } : { v: s[:v] }
|
19
|
+
end
|
20
|
+
|
21
|
+
Values.vec(v[:scope], rows, v[:has_idx])
|
22
|
+
end
|
23
|
+
|
24
|
+
# Positional zip for same-scope vecs
|
25
|
+
# @param vecs [Array<Hash>] vectors to zip together
|
26
|
+
# @return [Hash] zipped vector
|
27
|
+
def self.zip_same_scope(*vecs)
|
28
|
+
raise "All arguments must be vecs" unless vecs.all? { |v| v[:k] == :vec }
|
29
|
+
raise "All vecs must have same scope" unless vecs.map { |v| v[:scope] }.uniq.size == 1
|
30
|
+
raise "All vecs must have same row count" unless vecs.map { |v| v[:rows].size }.uniq.size == 1
|
31
|
+
return vecs.first if vecs.length == 1
|
32
|
+
|
33
|
+
first_vec = vecs.first
|
34
|
+
zipped_rows = first_vec[:rows].zip(*vecs[1..].map { |v| v[:rows] }).map do |row_group|
|
35
|
+
combined_values = row_group.map { |r| r[:v] }
|
36
|
+
result_row = { v: combined_values }
|
37
|
+
result_row[:idx] = row_group.first[:idx] if row_group.first.key?(:idx)
|
38
|
+
result_row
|
39
|
+
end
|
40
|
+
|
41
|
+
Values.vec(first_vec[:scope], zipped_rows, first_vec[:has_idx])
|
42
|
+
end
|
43
|
+
|
44
|
+
# Prefix-index alignment for rank expansion/broadcasting
|
45
|
+
# @param tgt [Hash] target vector (defines output structure)
|
46
|
+
# @param src [Hash] source vector (values to align)
|
47
|
+
# @param to_scope [Array] target scope
|
48
|
+
# @param require_unique [Boolean] enforce unique prefixes
|
49
|
+
# @param on_missing [Symbol] :error or :nil policy
|
50
|
+
# @return [Hash] aligned vector
|
51
|
+
def self.align_to(tgt, src, to_scope:, require_unique: false, on_missing: :error)
|
52
|
+
raise "align_to expects vecs with indices" unless [tgt, src].all? { |v| v[:k] == :vec && v[:has_idx] }
|
53
|
+
|
54
|
+
to_rank = to_scope.length
|
55
|
+
src_rank = src[:rows].first[:idx].length
|
56
|
+
raise "scope not prefix-compatible: #{src_rank} > #{to_rank}" unless src_rank <= to_rank
|
57
|
+
|
58
|
+
# Build prefix->value hash
|
59
|
+
h = {}
|
60
|
+
src[:rows].each do |r|
|
61
|
+
k = r[:idx].first(src_rank)
|
62
|
+
raise "non-unique prefix for align_to: #{k.inspect}" if require_unique && h.key?(k)
|
63
|
+
|
64
|
+
h[k] = r[:v]
|
65
|
+
end
|
66
|
+
|
67
|
+
# Map target rows through alignment
|
68
|
+
rows = tgt[:rows].map do |r|
|
69
|
+
k = r[:idx].first(src_rank)
|
70
|
+
if h.key?(k)
|
71
|
+
{ v: h[k], idx: r[:idx] }
|
72
|
+
else
|
73
|
+
case on_missing
|
74
|
+
when :nil then { v: nil, idx: r[:idx] }
|
75
|
+
when :error then raise "missing prefix #{k.inspect} in align_to"
|
76
|
+
else raise "unknown on_missing policy: #{on_missing}"
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
Values.vec(to_scope, rows, true)
|
82
|
+
end
|
83
|
+
|
84
|
+
# Build hierarchical groups for lift operation
|
85
|
+
# @param rows [Array<Hash>] rows with indices
|
86
|
+
# @param depth [Integer] nesting depth
|
87
|
+
# @return [Array] nested array structure
|
88
|
+
# rows: [{ v: ..., idx: [i0,i1,...] }, ...] with lexicographically sorted :idx
|
89
|
+
def self.group_rows(rows, depth = 0)
|
90
|
+
return [] if rows.empty?
|
91
|
+
raise ArgumentError, "depth < 0" if depth < 0
|
92
|
+
|
93
|
+
if depth == 0
|
94
|
+
return rows.first[:v] if rows.first[:idx].nil? || rows.first[:idx].empty?
|
95
|
+
|
96
|
+
return rows.map { |r| r[:v] }
|
97
|
+
end
|
98
|
+
|
99
|
+
out = []
|
100
|
+
i = 0
|
101
|
+
n = rows.length
|
102
|
+
while i < n
|
103
|
+
head = rows[i][:idx].first
|
104
|
+
j = i + 1
|
105
|
+
j += 1 while j < n && rows[j][:idx].first == head
|
106
|
+
|
107
|
+
tail = rows[i...j].map { |r| { v: r[:v], idx: r[:idx][1..-1] } }
|
108
|
+
out << group_rows(tail, depth - 1)
|
109
|
+
i = j
|
110
|
+
end
|
111
|
+
out
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|