kumi 0.0.10 → 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +1 -1
- data/CHANGELOG.md +23 -0
- data/CLAUDE.md +7 -231
- data/README.md +5 -5
- data/docs/SYNTAX.md +66 -0
- data/docs/VECTOR_SEMANTICS.md +286 -0
- data/docs/features/hierarchical-broadcasting.md +67 -1
- data/docs/features/input-declaration-system.md +16 -0
- data/docs/features/s-expression-printer.md +2 -2
- data/lib/kumi/analyzer.rb +34 -12
- data/lib/kumi/compiler.rb +2 -12
- data/lib/kumi/core/analyzer/passes/broadcast_detector.rb +157 -64
- data/lib/kumi/core/analyzer/passes/dependency_resolver.rb +1 -1
- data/lib/kumi/core/analyzer/passes/input_access_planner_pass.rb +47 -0
- data/lib/kumi/core/analyzer/passes/input_collector.rb +123 -101
- data/lib/kumi/core/analyzer/passes/join_reduce_planning_pass.rb +293 -0
- data/lib/kumi/core/analyzer/passes/lower_to_ir_pass.rb +993 -0
- data/lib/kumi/core/analyzer/passes/pass_base.rb +2 -2
- data/lib/kumi/core/analyzer/passes/scope_resolution_pass.rb +346 -0
- data/lib/kumi/core/analyzer/passes/semantic_constraint_validator.rb +2 -1
- data/lib/kumi/core/analyzer/passes/toposorter.rb +9 -3
- data/lib/kumi/core/analyzer/passes/type_checker.rb +3 -3
- data/lib/kumi/core/analyzer/passes/type_consistency_checker.rb +2 -2
- data/lib/kumi/core/analyzer/passes/{type_inferencer.rb → type_inferencer_pass.rb} +4 -4
- data/lib/kumi/core/analyzer/passes/unsat_detector.rb +2 -2
- data/lib/kumi/core/analyzer/plans.rb +52 -0
- data/lib/kumi/core/analyzer/structs/access_plan.rb +20 -0
- data/lib/kumi/core/analyzer/structs/input_meta.rb +29 -0
- data/lib/kumi/core/compiler/access_builder.rb +36 -0
- data/lib/kumi/core/compiler/access_planner.rb +219 -0
- data/lib/kumi/core/compiler/accessors/base.rb +69 -0
- data/lib/kumi/core/compiler/accessors/each_indexed_accessor.rb +84 -0
- data/lib/kumi/core/compiler/accessors/materialize_accessor.rb +55 -0
- data/lib/kumi/core/compiler/accessors/ravel_accessor.rb +73 -0
- data/lib/kumi/core/compiler/accessors/read_accessor.rb +41 -0
- data/lib/kumi/core/compiler_base.rb +2 -2
- data/lib/kumi/core/error_reporter.rb +6 -5
- data/lib/kumi/core/errors.rb +4 -0
- data/lib/kumi/core/explain.rb +157 -205
- data/lib/kumi/core/export/node_builders.rb +2 -2
- data/lib/kumi/core/export/node_serializers.rb +1 -1
- data/lib/kumi/core/function_registry/collection_functions.rb +21 -10
- data/lib/kumi/core/function_registry/conditional_functions.rb +14 -4
- data/lib/kumi/core/function_registry/function_builder.rb +142 -55
- data/lib/kumi/core/function_registry/logical_functions.rb +5 -5
- data/lib/kumi/core/function_registry/stat_functions.rb +2 -2
- data/lib/kumi/core/function_registry.rb +126 -108
- data/lib/kumi/core/input/validator.rb +1 -1
- data/lib/kumi/core/ir/execution_engine/combinators.rb +117 -0
- data/lib/kumi/core/ir/execution_engine/interpreter.rb +336 -0
- data/lib/kumi/core/ir/execution_engine/values.rb +46 -0
- data/lib/kumi/core/ir/execution_engine.rb +50 -0
- data/lib/kumi/core/ir.rb +58 -0
- data/lib/kumi/core/ruby_parser/build_context.rb +2 -2
- data/lib/kumi/core/ruby_parser/declaration_reference_proxy.rb +0 -12
- data/lib/kumi/core/ruby_parser/dsl_cascade_builder.rb +36 -15
- data/lib/kumi/core/ruby_parser/input_builder.rb +30 -9
- data/lib/kumi/core/ruby_parser/parser.rb +1 -1
- data/lib/kumi/core/ruby_parser/schema_builder.rb +2 -2
- data/lib/kumi/core/ruby_parser/sugar.rb +7 -0
- data/lib/kumi/core/types/validator.rb +1 -1
- data/lib/kumi/registry.rb +14 -79
- data/lib/kumi/runtime/executable.rb +213 -0
- data/lib/kumi/schema.rb +14 -3
- data/lib/kumi/schema_metadata.rb +2 -2
- data/lib/kumi/support/ir_dump.rb +491 -0
- data/lib/kumi/support/s_expression_printer.rb +1 -1
- data/lib/kumi/syntax/location.rb +5 -0
- data/lib/kumi/syntax/node.rb +0 -1
- data/lib/kumi/syntax/root.rb +2 -2
- data/lib/kumi/version.rb +1 -1
- data/lib/kumi.rb +6 -15
- metadata +37 -19
- data/lib/kumi/core/cascade_executor_builder.rb +0 -132
- data/lib/kumi/core/compiled_schema.rb +0 -43
- data/lib/kumi/core/compiler/expression_compiler.rb +0 -146
- data/lib/kumi/core/compiler/function_invoker.rb +0 -55
- data/lib/kumi/core/compiler/path_traversal_compiler.rb +0 -158
- data/lib/kumi/core/compiler/reference_compiler.rb +0 -46
- data/lib/kumi/core/evaluation_wrapper.rb +0 -40
- data/lib/kumi/core/nested_structure_utils.rb +0 -78
- data/lib/kumi/core/schema_instance.rb +0 -115
- data/lib/kumi/core/vectorized_function_builder.rb +0 -88
- data/lib/kumi/js/compiler.rb +0 -878
- data/lib/kumi/js/function_registry.rb +0 -333
- data/migrate_to_core_iterative.rb +0 -938
@@ -0,0 +1,117 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Kumi
|
4
|
+
module Core
|
5
|
+
module IR
|
6
|
+
module ExecutionEngine
|
7
|
+
# Pure combinators for data transformation
|
8
|
+
module Combinators
|
9
|
+
# Broadcast scalar over vec (scalar→vec only)
|
10
|
+
# @param s [Hash] scalar value {:k => :scalar, :v => value}
|
11
|
+
# @param v [Hash] vector value {:k => :vec, :scope => [...], :rows => [...]}
|
12
|
+
# @return [Hash] broadcasted vector
|
13
|
+
def self.broadcast_scalar(s, v)
|
14
|
+
raise "First arg must be scalar" unless s[:k] == :scalar
|
15
|
+
raise "Second arg must be vec" unless v[:k] == :vec
|
16
|
+
|
17
|
+
rows = v[:rows].map do |r|
|
18
|
+
r.key?(:idx) ? { v: s[:v], idx: r[:idx] } : { v: s[:v] }
|
19
|
+
end
|
20
|
+
|
21
|
+
Values.vec(v[:scope], rows, v[:has_idx])
|
22
|
+
end
|
23
|
+
|
24
|
+
# Positional zip for same-scope vecs
|
25
|
+
# @param vecs [Array<Hash>] vectors to zip together
|
26
|
+
# @return [Hash] zipped vector
|
27
|
+
def self.zip_same_scope(*vecs)
|
28
|
+
raise "All arguments must be vecs" unless vecs.all? { |v| v[:k] == :vec }
|
29
|
+
raise "All vecs must have same scope" unless vecs.map { |v| v[:scope] }.uniq.size == 1
|
30
|
+
raise "All vecs must have same row count" unless vecs.map { |v| v[:rows].size }.uniq.size == 1
|
31
|
+
return vecs.first if vecs.length == 1
|
32
|
+
|
33
|
+
first_vec = vecs.first
|
34
|
+
zipped_rows = first_vec[:rows].zip(*vecs[1..].map { |v| v[:rows] }).map do |row_group|
|
35
|
+
combined_values = row_group.map { |r| r[:v] }
|
36
|
+
result_row = { v: combined_values }
|
37
|
+
result_row[:idx] = row_group.first[:idx] if row_group.first.key?(:idx)
|
38
|
+
result_row
|
39
|
+
end
|
40
|
+
|
41
|
+
Values.vec(first_vec[:scope], zipped_rows, first_vec[:has_idx])
|
42
|
+
end
|
43
|
+
|
44
|
+
# Prefix-index alignment for rank expansion/broadcasting
|
45
|
+
# @param tgt [Hash] target vector (defines output structure)
|
46
|
+
# @param src [Hash] source vector (values to align)
|
47
|
+
# @param to_scope [Array] target scope
|
48
|
+
# @param require_unique [Boolean] enforce unique prefixes
|
49
|
+
# @param on_missing [Symbol] :error or :nil policy
|
50
|
+
# @return [Hash] aligned vector
|
51
|
+
def self.align_to(tgt, src, to_scope:, require_unique: false, on_missing: :error)
|
52
|
+
raise "align_to expects vecs with indices" unless [tgt, src].all? { |v| v[:k] == :vec && v[:has_idx] }
|
53
|
+
|
54
|
+
to_rank = to_scope.length
|
55
|
+
src_rank = src[:rows].first[:idx].length
|
56
|
+
raise "scope not prefix-compatible: #{src_rank} > #{to_rank}" unless src_rank <= to_rank
|
57
|
+
|
58
|
+
# Build prefix->value hash
|
59
|
+
h = {}
|
60
|
+
src[:rows].each do |r|
|
61
|
+
k = r[:idx].first(src_rank)
|
62
|
+
raise "non-unique prefix for align_to: #{k.inspect}" if require_unique && h.key?(k)
|
63
|
+
|
64
|
+
h[k] = r[:v]
|
65
|
+
end
|
66
|
+
|
67
|
+
# Map target rows through alignment
|
68
|
+
rows = tgt[:rows].map do |r|
|
69
|
+
k = r[:idx].first(src_rank)
|
70
|
+
if h.key?(k)
|
71
|
+
{ v: h[k], idx: r[:idx] }
|
72
|
+
else
|
73
|
+
case on_missing
|
74
|
+
when :nil then { v: nil, idx: r[:idx] }
|
75
|
+
when :error then raise "missing prefix #{k.inspect} in align_to"
|
76
|
+
else raise "unknown on_missing policy: #{on_missing}"
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
Values.vec(to_scope, rows, true)
|
82
|
+
end
|
83
|
+
|
84
|
+
# Build hierarchical groups for lift operation
|
85
|
+
# @param rows [Array<Hash>] rows with indices
|
86
|
+
# @param depth [Integer] nesting depth
|
87
|
+
# @return [Array] nested array structure
|
88
|
+
# rows: [{ v: ..., idx: [i0,i1,...] }, ...] with lexicographically sorted :idx
|
89
|
+
def self.group_rows(rows, depth = 0)
|
90
|
+
return [] if rows.empty?
|
91
|
+
raise ArgumentError, "depth < 0" if depth < 0
|
92
|
+
|
93
|
+
if depth == 0
|
94
|
+
return rows.first[:v] if rows.first[:idx].nil? || rows.first[:idx].empty?
|
95
|
+
|
96
|
+
return rows.map { |r| r[:v] }
|
97
|
+
end
|
98
|
+
|
99
|
+
out = []
|
100
|
+
i = 0
|
101
|
+
n = rows.length
|
102
|
+
while i < n
|
103
|
+
head = rows[i][:idx].first
|
104
|
+
j = i + 1
|
105
|
+
j += 1 while j < n && rows[j][:idx].first == head
|
106
|
+
|
107
|
+
tail = rows[i...j].map { |r| { v: r[:v], idx: r[:idx][1..-1] } }
|
108
|
+
out << group_rows(tail, depth - 1)
|
109
|
+
i = j
|
110
|
+
end
|
111
|
+
out
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
@@ -0,0 +1,336 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Kumi
|
4
|
+
module Core
|
5
|
+
module IR
|
6
|
+
module ExecutionEngine
|
7
|
+
# Interpreter for IR modules - thin layer that delegates to combinators
|
8
|
+
module Interpreter
|
9
|
+
PRODUCES_SLOT = %i[const load_input ref array map reduce lift align_to switch].freeze
|
10
|
+
NON_PRODUCERS = %i[guard_push guard_pop assign store].freeze
|
11
|
+
|
12
|
+
def self.run(ir_module, ctx, accessors:, registry:)
|
13
|
+
# Validate registry is properly initialized
|
14
|
+
raise ArgumentError, "Registry cannot be nil" if registry.nil?
|
15
|
+
raise ArgumentError, "Registry must be a Hash, got #{registry.class}" unless registry.is_a?(Hash)
|
16
|
+
|
17
|
+
outputs = {}
|
18
|
+
target = ctx[:target]
|
19
|
+
guard_stack = [true]
|
20
|
+
|
21
|
+
ir_module.decls.each do |decl|
|
22
|
+
slots = []
|
23
|
+
guard_stack = [true] # reset per decl
|
24
|
+
|
25
|
+
decl.ops.each_with_index do |op, op_index|
|
26
|
+
if ENV["ASSERT_VM_SLOTS"] == "1"
|
27
|
+
expected = op_index
|
28
|
+
unless slots.length == expected
|
29
|
+
raise "slot drift: have=#{slots.length} expect=#{expected} at #{decl.name}@op#{op_index} #{op.tag}"
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
case op.tag
|
34
|
+
when :guard_push
|
35
|
+
cond_slot = op.attrs[:cond_slot]
|
36
|
+
raise "guard_push: cond slot OOB" if cond_slot >= slots.length
|
37
|
+
|
38
|
+
c = slots[cond_slot]
|
39
|
+
|
40
|
+
guard_stack << case c[:k]
|
41
|
+
when :scalar
|
42
|
+
guard_stack.last && !!c[:v] # same as today
|
43
|
+
when :vec
|
44
|
+
# vector mask: push the mask value itself; truthiness handled inside ops
|
45
|
+
c
|
46
|
+
else
|
47
|
+
false
|
48
|
+
end
|
49
|
+
slots << nil # keep slot_id == op_index
|
50
|
+
next
|
51
|
+
|
52
|
+
when :guard_pop
|
53
|
+
guard_stack.pop
|
54
|
+
slots << nil
|
55
|
+
next
|
56
|
+
end
|
57
|
+
|
58
|
+
# Skip body when guarded off, but keep indices aligned
|
59
|
+
unless guard_stack.last
|
60
|
+
slots << nil if PRODUCES_SLOT.include?(op.tag) || NON_PRODUCERS.include?(op.tag)
|
61
|
+
next
|
62
|
+
end
|
63
|
+
|
64
|
+
case op.tag
|
65
|
+
|
66
|
+
when :assign
|
67
|
+
dst = op.attrs[:dst]
|
68
|
+
src = op.attrs[:src]
|
69
|
+
raise "assign: dst/src OOB" if dst >= slots.length || src >= slots.length
|
70
|
+
|
71
|
+
slots[dst] = slots[src]
|
72
|
+
|
73
|
+
when :const
|
74
|
+
result = Values.scalar(op.attrs[:value])
|
75
|
+
puts "DEBUG Const #{op.attrs[:value].inspect}: result=#{result}" if ENV["DEBUG_VM_ARGS"]
|
76
|
+
slots << result
|
77
|
+
|
78
|
+
when :load_input
|
79
|
+
plan_id = op.attrs[:plan_id]
|
80
|
+
scope = op.attrs[:scope] || []
|
81
|
+
scalar = op.attrs[:is_scalar]
|
82
|
+
indexed = op.attrs[:has_idx]
|
83
|
+
raw = accessors.fetch(plan_id).call(ctx[:input] || ctx["input"])
|
84
|
+
|
85
|
+
puts "DEBUG LoadInput plan_id: #{plan_id} raw_values: #{raw.inspect}" if ENV["DEBUG_VM_ARGS"]
|
86
|
+
slots << if scalar
|
87
|
+
Values.scalar(raw)
|
88
|
+
elsif indexed
|
89
|
+
Values.vec(scope, raw.map { |v, idx| { v: v, idx: Array(idx) } }, true)
|
90
|
+
else
|
91
|
+
Values.vec(scope, raw.map { |v| { v: v } }, false)
|
92
|
+
end
|
93
|
+
|
94
|
+
when :ref
|
95
|
+
name = op.attrs[:name]
|
96
|
+
referenced_value = outputs.fetch(name) { raise "Missing output for reference: #{name}" }
|
97
|
+
if ENV["DEBUG_VM_ARGS"]
|
98
|
+
puts "DEBUG Ref #{name}: #{referenced_value[:k] == :scalar ? "scalar(#{referenced_value[:v].inspect})" : "#{referenced_value[:k]}(#{referenced_value[:rows]&.size || 0} rows)"}"
|
99
|
+
end
|
100
|
+
slots << referenced_value
|
101
|
+
|
102
|
+
when :array
|
103
|
+
# Validate slot indices before accessing
|
104
|
+
op.args.each do |slot_idx|
|
105
|
+
if slot_idx >= slots.length
|
106
|
+
raise "Array operation: slot index #{slot_idx} out of bounds (slots.length=#{slots.length})"
|
107
|
+
elsif slots[slot_idx].nil?
|
108
|
+
raise "Array operation: slot #{slot_idx} is nil " \
|
109
|
+
"(available slots: #{slots.length}, non-nil slots: #{slots.compact.length})"
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
parts = op.args.map { |i| slots[i] }
|
114
|
+
if parts.all? { |p| p[:k] == :scalar }
|
115
|
+
slots << Values.scalar(parts.map { |p| p[:v] })
|
116
|
+
else
|
117
|
+
base = parts.find { |p| p[:k] == :vec } or raise "Array literal needs a vec carrier"
|
118
|
+
# Preserve original order: broadcast scalars in-place
|
119
|
+
arg_vecs = parts.map { |p| p[:k] == :scalar ? Combinators.broadcast_scalar(p, base) : p }
|
120
|
+
# All vectors must share scope
|
121
|
+
scopes = arg_vecs.map { |v| v[:scope] }.uniq
|
122
|
+
raise "Cross-scope array literal" unless scopes.size <= 1
|
123
|
+
|
124
|
+
zipped = Combinators.zip_same_scope(*arg_vecs)
|
125
|
+
rows = zipped[:rows].map do |row|
|
126
|
+
vals = Array(row[:v])
|
127
|
+
row.key?(:idx) ? { v: vals, idx: row[:idx] } : { v: vals }
|
128
|
+
end
|
129
|
+
slots << Values.vec(base[:scope], rows, base[:has_idx])
|
130
|
+
end
|
131
|
+
|
132
|
+
when :map
|
133
|
+
fn_name = op.attrs[:fn]
|
134
|
+
fn_entry = registry[fn_name] or raise "Function #{fn_name} not found in registry"
|
135
|
+
fn = fn_entry.fn
|
136
|
+
puts "DEBUG Map #{fn_name}: args=#{op.args.inspect}" if ENV["DEBUG_VM_ARGS"]
|
137
|
+
|
138
|
+
# Validate slot indices before accessing
|
139
|
+
op.args.each do |slot_idx|
|
140
|
+
if slot_idx >= slots.length
|
141
|
+
raise "Map operation #{fn_name}: slot index #{slot_idx} out of bounds (slots.length=#{slots.length})"
|
142
|
+
elsif slots[slot_idx].nil?
|
143
|
+
raise "Map operation #{fn_name}: slot #{slot_idx} is nil " \
|
144
|
+
"(available slots: #{slots.length}, non-nil slots: #{slots.compact.length})"
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
args = op.args.map { |slot_idx| slots[slot_idx] }
|
149
|
+
|
150
|
+
if args.all? { |a| a[:k] == :scalar }
|
151
|
+
puts "DEBUG Scalar call #{fn_name}: args=#{args.map { |a| a[:v] }.inspect}" if ENV["DEBUG_VM_ARGS"]
|
152
|
+
scalar_args = args.map { |a| a[:v] }
|
153
|
+
result = fn.call(*scalar_args)
|
154
|
+
slots << Values.scalar(result)
|
155
|
+
else
|
156
|
+
base = args.find { |a| a[:k] == :vec } or raise "Map needs a vec carrier"
|
157
|
+
puts "DEBUG Vec call #{fn_name}: base=#{base.inspect}" if ENV["DEBUG_VM_ARGS"]
|
158
|
+
# Preserve original order: broadcast scalars in-place
|
159
|
+
arg_vecs = args.map { |a| a[:k] == :scalar ? Combinators.broadcast_scalar(a, base) : a }
|
160
|
+
puts "DEBUG Vec call #{fn_name}: arg_vecs=#{arg_vecs.inspect}" if ENV["DEBUG_VM_ARGS"]
|
161
|
+
scopes = arg_vecs.map { |v| v[:scope] }.uniq
|
162
|
+
puts "DEBUG Vec call #{fn_name}: scopes=#{scopes.inspect}" if ENV["DEBUG_VM_ARGS"]
|
163
|
+
raise "Cross-scope Map without Join" unless scopes.size <= 1
|
164
|
+
|
165
|
+
zipped = Combinators.zip_same_scope(*arg_vecs)
|
166
|
+
|
167
|
+
# if ENV["DEBUG_VM_ARGS"] && fn_name == :if
|
168
|
+
# puts "DEBUG Vec call #{fn_name}: zipped rows:"
|
169
|
+
# zipped[:rows].each_with_index do |row, i|
|
170
|
+
# puts " [#{i}] args=#{Array(row[:v]).inspect}"
|
171
|
+
# end
|
172
|
+
# end
|
173
|
+
|
174
|
+
puts "DEBUG Vec call #{fn_name}: zipped rows=#{zipped[:rows].inspect}" if ENV["DEBUG_VM_ARGS"]
|
175
|
+
rows = zipped[:rows].map do |row|
|
176
|
+
row_args = Array(row[:v])
|
177
|
+
vr = fn.call(*row_args)
|
178
|
+
row.key?(:idx) ? { v: vr, idx: row[:idx] } : { v: vr }
|
179
|
+
end
|
180
|
+
puts "DEBUG Vec call #{fn_name}: result rows=#{rows.inspect}" if ENV["DEBUG_VM_ARGS"]
|
181
|
+
|
182
|
+
slots << Values.vec(base[:scope], rows, base[:has_idx])
|
183
|
+
end
|
184
|
+
|
185
|
+
when :switch
|
186
|
+
chosen = op.attrs[:cases].find do |(cond_slot, _)|
|
187
|
+
if cond_slot >= slots.length
|
188
|
+
raise "Switch operation: condition slot #{cond_slot} out of bounds (slots.length=#{slots.length})"
|
189
|
+
elsif slots[cond_slot].nil?
|
190
|
+
raise "Switch operation: condition slot #{cond_slot} is nil (available slots: #{slots.length}, non-nil slots: #{slots.compact.length})"
|
191
|
+
end
|
192
|
+
|
193
|
+
c = slots[cond_slot]
|
194
|
+
if c[:k] == :scalar
|
195
|
+
!!c[:v]
|
196
|
+
else
|
197
|
+
# TODO: Proper vectorized cascade handling
|
198
|
+
false
|
199
|
+
end
|
200
|
+
end
|
201
|
+
result_slot = chosen ? chosen[1] : op.attrs[:default]
|
202
|
+
if result_slot >= slots.length
|
203
|
+
raise "Switch operation: result slot #{result_slot} out of bounds (slots.length=#{slots.length})"
|
204
|
+
elsif slots[result_slot].nil?
|
205
|
+
raise "Switch operation: result slot #{result_slot} is nil (available slots: #{slots.length}, non-nil slots: #{slots.compact.length})"
|
206
|
+
end
|
207
|
+
|
208
|
+
slots << slots[result_slot]
|
209
|
+
|
210
|
+
when :store
|
211
|
+
name = op.attrs[:name]
|
212
|
+
src = op.args[0] or raise "store: missing source slot"
|
213
|
+
if src >= slots.length
|
214
|
+
raise "Store operation '#{name}': source slot #{src} out of bounds (slots.length=#{slots.length})"
|
215
|
+
elsif slots[src].nil?
|
216
|
+
raise "Store operation '#{name}': source slot #{src} is nil (available slots: #{slots.length}, non-nil slots: #{slots.compact.length})"
|
217
|
+
end
|
218
|
+
|
219
|
+
outputs[name] = slots[src]
|
220
|
+
|
221
|
+
# keep slot_id == op_index invariant
|
222
|
+
slots << nil
|
223
|
+
|
224
|
+
return outputs if target && name == target
|
225
|
+
|
226
|
+
when :reduce
|
227
|
+
fn_entry = registry[op.attrs[:fn]] or raise "Function #{op.attrs[:fn]} not found in registry"
|
228
|
+
fn = fn_entry.fn
|
229
|
+
|
230
|
+
src = slots[op.args[0]]
|
231
|
+
raise "Reduce expects Vec" unless src[:k] == :vec
|
232
|
+
|
233
|
+
result_scope = Array(op.attrs[:result_scope] || [])
|
234
|
+
axis = Array(op.attrs[:axis] || [])
|
235
|
+
|
236
|
+
if result_scope.empty?
|
237
|
+
# === GLOBAL REDUCE ===
|
238
|
+
# Accept either ravel or indexed.
|
239
|
+
vals = src[:rows].map { |r| r[:v] }
|
240
|
+
slots << Values.scalar(fn.call(vals))
|
241
|
+
else
|
242
|
+
# === GROUPED REDUCE ===
|
243
|
+
# Must have indices to group by prefix keys.
|
244
|
+
unless src[:has_idx]
|
245
|
+
raise "Grouped reduce requires indexed input (got ravel) for #{op.attrs[:fn]} at #{result_scope.inspect}"
|
246
|
+
end
|
247
|
+
|
248
|
+
group_len = result_scope.length
|
249
|
+
|
250
|
+
# Preserve stable source order so zips with other @result_scope vecs line up.
|
251
|
+
groups = {} # { key(Array<Integer>) => Array<value> }
|
252
|
+
order = [] # Array<key> in first-seen order
|
253
|
+
|
254
|
+
src[:rows].each do |row|
|
255
|
+
key = Array(row[:idx]).first(group_len)
|
256
|
+
unless groups.key?(key)
|
257
|
+
groups[key] = []
|
258
|
+
order << key
|
259
|
+
end
|
260
|
+
groups[key] << row[:v]
|
261
|
+
end
|
262
|
+
|
263
|
+
out_rows = order.map { |key| { v: fn.call(groups[key]), idx: key } }
|
264
|
+
|
265
|
+
slots << Values.vec(result_scope, out_rows, true)
|
266
|
+
end
|
267
|
+
|
268
|
+
when :lift
|
269
|
+
src_slot = op.args[0]
|
270
|
+
if src_slot >= slots.length
|
271
|
+
raise "Lift operation: source slot #{src_slot} out of bounds (slots.length=#{slots.length})"
|
272
|
+
elsif slots[src_slot].nil?
|
273
|
+
raise "Lift operation: source slot #{src_slot} is nil (available slots: #{slots.length}, non-nil slots: #{slots.compact.length})"
|
274
|
+
end
|
275
|
+
|
276
|
+
v = slots[src_slot]
|
277
|
+
to_scope = op.attrs[:to_scope] || []
|
278
|
+
depth = [to_scope.length, v[:rank] || v[:rows].first&.dig(:idx)&.length || 0].min
|
279
|
+
slots << Values.scalar(Combinators.group_rows(v[:rows], depth))
|
280
|
+
|
281
|
+
when :align_to
|
282
|
+
tgt_slot = op.args[0]
|
283
|
+
src_slot = op.args[1]
|
284
|
+
|
285
|
+
if tgt_slot >= slots.length
|
286
|
+
raise "AlignTo operation: target slot #{tgt_slot} out of bounds (slots.length=#{slots.length})"
|
287
|
+
elsif slots[tgt_slot].nil?
|
288
|
+
raise "AlignTo operation: target slot #{tgt_slot} is nil " \
|
289
|
+
"(available slots: #{slots.length}, non-nil slots: #{slots.compact.length})"
|
290
|
+
end
|
291
|
+
|
292
|
+
if src_slot >= slots.length
|
293
|
+
raise "AlignTo operation: source slot #{src_slot} out of bounds (slots.length=#{slots.length})"
|
294
|
+
elsif slots[src_slot].nil?
|
295
|
+
raise "AlignTo operation: source slot #{src_slot} is nil " \
|
296
|
+
"(available slots: #{slots.length}, non-nil slots: #{slots.compact.length})"
|
297
|
+
end
|
298
|
+
|
299
|
+
tgt = slots[tgt_slot]
|
300
|
+
src = slots[src_slot]
|
301
|
+
|
302
|
+
to_scope = op.attrs[:to_scope] || []
|
303
|
+
require_unique = op.attrs[:require_unique] || false
|
304
|
+
on_missing = op.attrs[:on_missing] || :error
|
305
|
+
|
306
|
+
aligned = Combinators.align_to(tgt, src, to_scope: to_scope,
|
307
|
+
require_unique: require_unique,
|
308
|
+
on_missing: on_missing)
|
309
|
+
slots << aligned
|
310
|
+
|
311
|
+
when :join
|
312
|
+
raise NotImplementedError, "Join not implemented yet"
|
313
|
+
|
314
|
+
else
|
315
|
+
raise "Unknown operation: #{op.tag}"
|
316
|
+
end
|
317
|
+
rescue StandardError => e
|
318
|
+
op_index = decl.ops.index(op) || "?"
|
319
|
+
context_info = []
|
320
|
+
context_info << "slots.length=#{slots.length}"
|
321
|
+
context_info << "non_nil_slots=#{slots.compact.length}" if slots.any?(&:nil?)
|
322
|
+
context_info << "op_attrs=#{op.attrs.inspect}" if op.attrs && !op.attrs.empty?
|
323
|
+
context_info << "op_args=#{op.args.inspect}" if op.args && !op.args.empty?
|
324
|
+
|
325
|
+
context_str = context_info.empty? ? "" : " (#{context_info.join(', ')})"
|
326
|
+
raise "#{decl.name}@op#{op_index} #{op.tag}#{context_str}: #{e.message}"
|
327
|
+
end
|
328
|
+
end
|
329
|
+
|
330
|
+
outputs
|
331
|
+
end
|
332
|
+
end
|
333
|
+
end
|
334
|
+
end
|
335
|
+
end
|
336
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Kumi
|
4
|
+
module Core
|
5
|
+
module IR
|
6
|
+
module ExecutionEngine
|
7
|
+
# Value constructors and helpers for VM data representation
|
8
|
+
module Values
|
9
|
+
# Create a scalar value
|
10
|
+
def self.scalar(v)
|
11
|
+
{ k: :scalar, v: v }
|
12
|
+
end
|
13
|
+
|
14
|
+
# Create a vector with scope and rows
|
15
|
+
def self.vec(scope, rows, has_idx)
|
16
|
+
if has_idx
|
17
|
+
rank = rows.empty? ? 0 : rows.first[:idx].length
|
18
|
+
# TODO: > Make sure this is not costly
|
19
|
+
# raise if rows.any? { |r| r[:idx].length != rank }
|
20
|
+
rows = rows.sort_by { |r| r[:idx] } # one-time sort
|
21
|
+
else
|
22
|
+
rank = 0
|
23
|
+
end
|
24
|
+
|
25
|
+
{ k: :vec, scope: scope, rows: rows, has_idx: has_idx, rank: rank }
|
26
|
+
end
|
27
|
+
|
28
|
+
# Create a row with optional index
|
29
|
+
def self.row(v, idx = nil)
|
30
|
+
idx ? { v: v, idx: Array(idx) } : { v: v }
|
31
|
+
end
|
32
|
+
|
33
|
+
# Check if value is scalar
|
34
|
+
def self.scalar?(val)
|
35
|
+
val[:k] == :scalar
|
36
|
+
end
|
37
|
+
|
38
|
+
# Check if value is vector
|
39
|
+
def self.vec?(val)
|
40
|
+
val[:k] == :vec
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Kumi
|
4
|
+
module Core
|
5
|
+
module IR
|
6
|
+
# ExecutionEngine interpreter for IR execution
|
7
|
+
#
|
8
|
+
# ARCHITECTURE:
|
9
|
+
# - Values:
|
10
|
+
# * Scalar(v) → { k: :scalar, v: v }
|
11
|
+
# * Vec(scope, rows, has_idx) → { k: :vec, scope: [:axis, ...], rows: [{ v:, idx:[...] }, ...], has_idx: true/false }
|
12
|
+
# - Rank = idx length; scope length is the logical axes carried by the vector
|
13
|
+
#
|
14
|
+
# - Combinators (pure, stateless, delegate to Executor):
|
15
|
+
# * broadcast_scalar(scalar, vec) → replicate scalar across vec rows (preserves idx/scope)
|
16
|
+
# * zip_same_scope(vec1, vec2, ...) → positional zip for equal scope & equal row count
|
17
|
+
# * align_to(tgt_vec, src_vec, to_scope) → expand src by prefix indices to match a higher-rank scope
|
18
|
+
# * group_rows(rows, depth) → stable grouping by idx prefix to nested Ruby arrays
|
19
|
+
#
|
20
|
+
# - Executor:
|
21
|
+
# * Executes IR ops in order; delegates to combinators; maintains a slot stack
|
22
|
+
# * No structural inference; trusts IR attributes (scope, has_idx, is_scalar)
|
23
|
+
#
|
24
|
+
# OP SEMANTICS (subset):
|
25
|
+
# - const(value) → push Scalar(value)
|
26
|
+
# - ref(name) → push previous slot by stored name (twins allowed: :name__vec)
|
27
|
+
# - load_input(plan_id, attrs) → call accessor; return Scalar or Vec according to attrs/mode
|
28
|
+
# - map(fn, argc, *args) → elementwise or scalar call; auto alignment already handled by IR
|
29
|
+
# - reduce(fn, axis, ...) → reduce one vector arg; returns Scalar
|
30
|
+
# - align_to(to_scope, a, b) → align b to a’s to_scope (prefix-compat only)
|
31
|
+
# - array(count, *args) → collect args into a Scalar(Array)
|
32
|
+
# - lift(to_scope, slot) → require Vec(has_idx), group rows with `group_rows` to nested Scalar
|
33
|
+
# - store(name, slot) → bind slot to name in env (used for :name and :name__vec twins)
|
34
|
+
#
|
35
|
+
# PRINCIPLES:
|
36
|
+
# - Mechanical execution only; “smarts” live in LowerToIR.
|
37
|
+
# - Never sniff Ruby types to guess shapes.
|
38
|
+
# - Errors early and clearly if invariants are violated (e.g., align_to expects vecs with indices).
|
39
|
+
#
|
40
|
+
# DEBUGGING:
|
41
|
+
# - DEBUG_VM_ARGS=1 prints per-op execution and arguments.
|
42
|
+
# - DEBUG_GROUP_ROWS=1 prints grouping decisions during Lift.
|
43
|
+
module ExecutionEngine
|
44
|
+
def self.run(ir_module, ctx, accessors:, registry:)
|
45
|
+
Interpreter.run(ir_module, ctx, accessors: accessors, registry: registry)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
data/lib/kumi/core/ir.rb
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Kumi
|
4
|
+
module Core
|
5
|
+
module IR
|
6
|
+
Op = Struct.new(:tag, :attrs, :args, keyword_init: true) do
|
7
|
+
def initialize(**args)
|
8
|
+
super
|
9
|
+
freeze
|
10
|
+
end
|
11
|
+
end
|
12
|
+
Decl = Struct.new(:name, :kind, :shape, :ops, keyword_init: true) do
|
13
|
+
def initialize(**args)
|
14
|
+
super
|
15
|
+
ops&.each(&:freeze)
|
16
|
+
freeze
|
17
|
+
end
|
18
|
+
end
|
19
|
+
Module = Struct.new(:inputs, :decls, keyword_init: true) do
|
20
|
+
def initialize(**args)
|
21
|
+
super
|
22
|
+
decls&.each(&:freeze)
|
23
|
+
freeze
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
module IR::Ops
|
29
|
+
def self.Const(v) = IR::Op.new(tag: :const, attrs: { value: v }, args: [])
|
30
|
+
def self.LoadInput(plan_id, scope: [], is_scalar: false, has_idx: false) = IR::Op.new(tag: :load_input, attrs: { plan_id: plan_id, scope: scope, is_scalar: is_scalar, has_idx: has_idx }, args: [])
|
31
|
+
def self.Ref(name) = IR::Op.new(tag: :ref, attrs: { name: name }, args: [])
|
32
|
+
def self.Map(fn, argc, *slots) = IR::Op.new(tag: :map, attrs: { fn: fn, argc: argc }, args: slots)
|
33
|
+
def self.Array(count, *slots) = IR::Op.new(tag: :array, attrs: { count: count }, args: slots)
|
34
|
+
def self.Switch(cases, default) = IR::Op.new(tag: :switch, attrs: { cases: cases, default: default }, args: [])
|
35
|
+
def self.GuardPush(cond_slot) = IR::Op.new(tag: :guard_push, attrs: { cond_slot: cond_slot }, args: [])
|
36
|
+
def self.GuardPop = IR::Op.new(tag: :guard_pop, attrs: {}, args: [])
|
37
|
+
def self.Assign(dst:, src:) = IR::Op.new(tag: :assign, attrs: { dst: dst, src: src }, args: [])
|
38
|
+
def self.Store(name, slot) = IR::Op.new(tag: :store, attrs: { name: name }, args: [slot])
|
39
|
+
def self.Lift(to_scope, slot) = IR::Op.new(tag: :lift, attrs: { to_scope: to_scope }, args: [slot])
|
40
|
+
def self.Join(*slots) = IR::Op.new(tag: :join, attrs: {}, args: slots)
|
41
|
+
|
42
|
+
# Up-sample `source` to the scope (and order) of `target` by index-prefix.
|
43
|
+
# Policies: :error | :nil for missing; require_unique: true enforces 1:1 on prefix.
|
44
|
+
def self.AlignTo(target_slot, source_slot, to_scope:, on_missing: :error, require_unique: true)
|
45
|
+
scope_array = to_scope.is_a?(::Array) ? to_scope : [to_scope]
|
46
|
+
IR::Op.new(
|
47
|
+
tag: :align_to,
|
48
|
+
attrs: { to_scope: scope_array, on_missing: on_missing, require_unique: require_unique },
|
49
|
+
args: [target_slot, source_slot]
|
50
|
+
)
|
51
|
+
end
|
52
|
+
|
53
|
+
def self.Reduce(fn, axis, result_scope, flatten, slot)
|
54
|
+
IR::Op.new(tag: :reduce, attrs: { fn: fn, axis: axis, result_scope: result_scope, flatten: flatten }, args: [slot])
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -4,12 +4,12 @@ module Kumi
|
|
4
4
|
module Core
|
5
5
|
module RubyParser
|
6
6
|
class BuildContext
|
7
|
-
attr_reader :inputs, :
|
7
|
+
attr_reader :inputs, :values, :traits
|
8
8
|
attr_accessor :current_location
|
9
9
|
|
10
10
|
def initialize
|
11
11
|
@inputs = []
|
12
|
-
@
|
12
|
+
@values = []
|
13
13
|
@traits = []
|
14
14
|
@input_block_defined = false
|
15
15
|
end
|
@@ -20,18 +20,6 @@ module Kumi
|
|
20
20
|
def to_ast_node
|
21
21
|
Kumi::Syntax::DeclarationReference.new(@name, loc: @context.current_location)
|
22
22
|
end
|
23
|
-
|
24
|
-
private
|
25
|
-
|
26
|
-
def method_missing(method_name, *args, &block)
|
27
|
-
# All operators are handled by ProxyRefinement methods
|
28
|
-
# Field access should use input.field.subfield syntax, not bare identifiers
|
29
|
-
super
|
30
|
-
end
|
31
|
-
|
32
|
-
def respond_to_missing?(_method_name, _include_private = false)
|
33
|
-
true
|
34
|
-
end
|
35
23
|
end
|
36
24
|
end
|
37
25
|
end
|