kumi 0.0.10 → 0.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +1 -1
  3. data/CHANGELOG.md +23 -0
  4. data/CLAUDE.md +7 -231
  5. data/README.md +5 -5
  6. data/docs/SYNTAX.md +66 -0
  7. data/docs/VECTOR_SEMANTICS.md +286 -0
  8. data/docs/features/hierarchical-broadcasting.md +67 -1
  9. data/docs/features/input-declaration-system.md +16 -0
  10. data/docs/features/s-expression-printer.md +2 -2
  11. data/lib/kumi/analyzer.rb +34 -12
  12. data/lib/kumi/compiler.rb +2 -12
  13. data/lib/kumi/core/analyzer/passes/broadcast_detector.rb +157 -64
  14. data/lib/kumi/core/analyzer/passes/dependency_resolver.rb +1 -1
  15. data/lib/kumi/core/analyzer/passes/input_access_planner_pass.rb +47 -0
  16. data/lib/kumi/core/analyzer/passes/input_collector.rb +123 -101
  17. data/lib/kumi/core/analyzer/passes/join_reduce_planning_pass.rb +293 -0
  18. data/lib/kumi/core/analyzer/passes/lower_to_ir_pass.rb +993 -0
  19. data/lib/kumi/core/analyzer/passes/pass_base.rb +2 -2
  20. data/lib/kumi/core/analyzer/passes/scope_resolution_pass.rb +346 -0
  21. data/lib/kumi/core/analyzer/passes/semantic_constraint_validator.rb +2 -1
  22. data/lib/kumi/core/analyzer/passes/toposorter.rb +9 -3
  23. data/lib/kumi/core/analyzer/passes/type_checker.rb +3 -3
  24. data/lib/kumi/core/analyzer/passes/type_consistency_checker.rb +2 -2
  25. data/lib/kumi/core/analyzer/passes/{type_inferencer.rb → type_inferencer_pass.rb} +4 -4
  26. data/lib/kumi/core/analyzer/passes/unsat_detector.rb +2 -2
  27. data/lib/kumi/core/analyzer/plans.rb +52 -0
  28. data/lib/kumi/core/analyzer/structs/access_plan.rb +20 -0
  29. data/lib/kumi/core/analyzer/structs/input_meta.rb +29 -0
  30. data/lib/kumi/core/compiler/access_builder.rb +36 -0
  31. data/lib/kumi/core/compiler/access_planner.rb +219 -0
  32. data/lib/kumi/core/compiler/accessors/base.rb +69 -0
  33. data/lib/kumi/core/compiler/accessors/each_indexed_accessor.rb +84 -0
  34. data/lib/kumi/core/compiler/accessors/materialize_accessor.rb +55 -0
  35. data/lib/kumi/core/compiler/accessors/ravel_accessor.rb +73 -0
  36. data/lib/kumi/core/compiler/accessors/read_accessor.rb +41 -0
  37. data/lib/kumi/core/compiler_base.rb +2 -2
  38. data/lib/kumi/core/error_reporter.rb +6 -5
  39. data/lib/kumi/core/errors.rb +4 -0
  40. data/lib/kumi/core/explain.rb +157 -205
  41. data/lib/kumi/core/export/node_builders.rb +2 -2
  42. data/lib/kumi/core/export/node_serializers.rb +1 -1
  43. data/lib/kumi/core/function_registry/collection_functions.rb +21 -10
  44. data/lib/kumi/core/function_registry/conditional_functions.rb +14 -4
  45. data/lib/kumi/core/function_registry/function_builder.rb +142 -55
  46. data/lib/kumi/core/function_registry/logical_functions.rb +5 -5
  47. data/lib/kumi/core/function_registry/stat_functions.rb +2 -2
  48. data/lib/kumi/core/function_registry.rb +126 -108
  49. data/lib/kumi/core/input/validator.rb +1 -1
  50. data/lib/kumi/core/ir/execution_engine/combinators.rb +117 -0
  51. data/lib/kumi/core/ir/execution_engine/interpreter.rb +336 -0
  52. data/lib/kumi/core/ir/execution_engine/values.rb +46 -0
  53. data/lib/kumi/core/ir/execution_engine.rb +50 -0
  54. data/lib/kumi/core/ir.rb +58 -0
  55. data/lib/kumi/core/ruby_parser/build_context.rb +2 -2
  56. data/lib/kumi/core/ruby_parser/declaration_reference_proxy.rb +0 -12
  57. data/lib/kumi/core/ruby_parser/dsl_cascade_builder.rb +36 -15
  58. data/lib/kumi/core/ruby_parser/input_builder.rb +30 -9
  59. data/lib/kumi/core/ruby_parser/parser.rb +1 -1
  60. data/lib/kumi/core/ruby_parser/schema_builder.rb +2 -2
  61. data/lib/kumi/core/ruby_parser/sugar.rb +7 -0
  62. data/lib/kumi/core/types/validator.rb +1 -1
  63. data/lib/kumi/registry.rb +14 -79
  64. data/lib/kumi/runtime/executable.rb +213 -0
  65. data/lib/kumi/schema.rb +14 -3
  66. data/lib/kumi/schema_metadata.rb +2 -2
  67. data/lib/kumi/support/ir_dump.rb +491 -0
  68. data/lib/kumi/support/s_expression_printer.rb +1 -1
  69. data/lib/kumi/syntax/location.rb +5 -0
  70. data/lib/kumi/syntax/node.rb +0 -1
  71. data/lib/kumi/syntax/root.rb +2 -2
  72. data/lib/kumi/version.rb +1 -1
  73. data/lib/kumi.rb +6 -15
  74. metadata +37 -19
  75. data/lib/kumi/core/cascade_executor_builder.rb +0 -132
  76. data/lib/kumi/core/compiled_schema.rb +0 -43
  77. data/lib/kumi/core/compiler/expression_compiler.rb +0 -146
  78. data/lib/kumi/core/compiler/function_invoker.rb +0 -55
  79. data/lib/kumi/core/compiler/path_traversal_compiler.rb +0 -158
  80. data/lib/kumi/core/compiler/reference_compiler.rb +0 -46
  81. data/lib/kumi/core/evaluation_wrapper.rb +0 -40
  82. data/lib/kumi/core/nested_structure_utils.rb +0 -78
  83. data/lib/kumi/core/schema_instance.rb +0 -115
  84. data/lib/kumi/core/vectorized_function_builder.rb +0 -88
  85. data/lib/kumi/js/compiler.rb +0 -878
  86. data/lib/kumi/js/function_registry.rb +0 -333
  87. data/migrate_to_core_iterative.rb +0 -938
@@ -0,0 +1,117 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kumi
4
+ module Core
5
+ module IR
6
+ module ExecutionEngine
7
+ # Pure combinators for data transformation
8
+ module Combinators
9
+ # Broadcast scalar over vec (scalar→vec only)
10
+ # @param s [Hash] scalar value {:k => :scalar, :v => value}
11
+ # @param v [Hash] vector value {:k => :vec, :scope => [...], :rows => [...]}
12
+ # @return [Hash] broadcasted vector
13
+ def self.broadcast_scalar(s, v)
14
+ raise "First arg must be scalar" unless s[:k] == :scalar
15
+ raise "Second arg must be vec" unless v[:k] == :vec
16
+
17
+ rows = v[:rows].map do |r|
18
+ r.key?(:idx) ? { v: s[:v], idx: r[:idx] } : { v: s[:v] }
19
+ end
20
+
21
+ Values.vec(v[:scope], rows, v[:has_idx])
22
+ end
23
+
24
+ # Positional zip for same-scope vecs
25
+ # @param vecs [Array<Hash>] vectors to zip together
26
+ # @return [Hash] zipped vector
27
+ def self.zip_same_scope(*vecs)
28
+ raise "All arguments must be vecs" unless vecs.all? { |v| v[:k] == :vec }
29
+ raise "All vecs must have same scope" unless vecs.map { |v| v[:scope] }.uniq.size == 1
30
+ raise "All vecs must have same row count" unless vecs.map { |v| v[:rows].size }.uniq.size == 1
31
+ return vecs.first if vecs.length == 1
32
+
33
+ first_vec = vecs.first
34
+ zipped_rows = first_vec[:rows].zip(*vecs[1..].map { |v| v[:rows] }).map do |row_group|
35
+ combined_values = row_group.map { |r| r[:v] }
36
+ result_row = { v: combined_values }
37
+ result_row[:idx] = row_group.first[:idx] if row_group.first.key?(:idx)
38
+ result_row
39
+ end
40
+
41
+ Values.vec(first_vec[:scope], zipped_rows, first_vec[:has_idx])
42
+ end
43
+
44
+ # Prefix-index alignment for rank expansion/broadcasting
45
+ # @param tgt [Hash] target vector (defines output structure)
46
+ # @param src [Hash] source vector (values to align)
47
+ # @param to_scope [Array] target scope
48
+ # @param require_unique [Boolean] enforce unique prefixes
49
+ # @param on_missing [Symbol] :error or :nil policy
50
+ # @return [Hash] aligned vector
51
+ def self.align_to(tgt, src, to_scope:, require_unique: false, on_missing: :error)
52
+ raise "align_to expects vecs with indices" unless [tgt, src].all? { |v| v[:k] == :vec && v[:has_idx] }
53
+
54
+ to_rank = to_scope.length
55
+ src_rank = src[:rows].first[:idx].length
56
+ raise "scope not prefix-compatible: #{src_rank} > #{to_rank}" unless src_rank <= to_rank
57
+
58
+ # Build prefix->value hash
59
+ h = {}
60
+ src[:rows].each do |r|
61
+ k = r[:idx].first(src_rank)
62
+ raise "non-unique prefix for align_to: #{k.inspect}" if require_unique && h.key?(k)
63
+
64
+ h[k] = r[:v]
65
+ end
66
+
67
+ # Map target rows through alignment
68
+ rows = tgt[:rows].map do |r|
69
+ k = r[:idx].first(src_rank)
70
+ if h.key?(k)
71
+ { v: h[k], idx: r[:idx] }
72
+ else
73
+ case on_missing
74
+ when :nil then { v: nil, idx: r[:idx] }
75
+ when :error then raise "missing prefix #{k.inspect} in align_to"
76
+ else raise "unknown on_missing policy: #{on_missing}"
77
+ end
78
+ end
79
+ end
80
+
81
+ Values.vec(to_scope, rows, true)
82
+ end
83
+
84
+ # Build hierarchical groups for lift operation
85
+ # @param rows [Array<Hash>] rows with indices
86
+ # @param depth [Integer] nesting depth
87
+ # @return [Array] nested array structure
88
+ # rows: [{ v: ..., idx: [i0,i1,...] }, ...] with lexicographically sorted :idx
89
+ def self.group_rows(rows, depth = 0)
90
+ return [] if rows.empty?
91
+ raise ArgumentError, "depth < 0" if depth < 0
92
+
93
+ if depth == 0
94
+ return rows.first[:v] if rows.first[:idx].nil? || rows.first[:idx].empty?
95
+
96
+ return rows.map { |r| r[:v] }
97
+ end
98
+
99
+ out = []
100
+ i = 0
101
+ n = rows.length
102
+ while i < n
103
+ head = rows[i][:idx].first
104
+ j = i + 1
105
+ j += 1 while j < n && rows[j][:idx].first == head
106
+
107
+ tail = rows[i...j].map { |r| { v: r[:v], idx: r[:idx][1..-1] } }
108
+ out << group_rows(tail, depth - 1)
109
+ i = j
110
+ end
111
+ out
112
+ end
113
+ end
114
+ end
115
+ end
116
+ end
117
+ end
@@ -0,0 +1,336 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kumi
4
+ module Core
5
+ module IR
6
+ module ExecutionEngine
7
+ # Interpreter for IR modules - thin layer that delegates to combinators
8
+ module Interpreter
9
+ PRODUCES_SLOT = %i[const load_input ref array map reduce lift align_to switch].freeze
10
+ NON_PRODUCERS = %i[guard_push guard_pop assign store].freeze
11
+
12
+ def self.run(ir_module, ctx, accessors:, registry:)
13
+ # Validate registry is properly initialized
14
+ raise ArgumentError, "Registry cannot be nil" if registry.nil?
15
+ raise ArgumentError, "Registry must be a Hash, got #{registry.class}" unless registry.is_a?(Hash)
16
+
17
+ outputs = {}
18
+ target = ctx[:target]
19
+ guard_stack = [true]
20
+
21
+ ir_module.decls.each do |decl|
22
+ slots = []
23
+ guard_stack = [true] # reset per decl
24
+
25
+ decl.ops.each_with_index do |op, op_index|
26
+ if ENV["ASSERT_VM_SLOTS"] == "1"
27
+ expected = op_index
28
+ unless slots.length == expected
29
+ raise "slot drift: have=#{slots.length} expect=#{expected} at #{decl.name}@op#{op_index} #{op.tag}"
30
+ end
31
+ end
32
+
33
+ case op.tag
34
+ when :guard_push
35
+ cond_slot = op.attrs[:cond_slot]
36
+ raise "guard_push: cond slot OOB" if cond_slot >= slots.length
37
+
38
+ c = slots[cond_slot]
39
+
40
+ guard_stack << case c[:k]
41
+ when :scalar
42
+ guard_stack.last && !!c[:v] # same as today
43
+ when :vec
44
+ # vector mask: push the mask value itself; truthiness handled inside ops
45
+ c
46
+ else
47
+ false
48
+ end
49
+ slots << nil # keep slot_id == op_index
50
+ next
51
+
52
+ when :guard_pop
53
+ guard_stack.pop
54
+ slots << nil
55
+ next
56
+ end
57
+
58
+ # Skip body when guarded off, but keep indices aligned
59
+ unless guard_stack.last
60
+ slots << nil if PRODUCES_SLOT.include?(op.tag) || NON_PRODUCERS.include?(op.tag)
61
+ next
62
+ end
63
+
64
+ case op.tag
65
+
66
+ when :assign
67
+ dst = op.attrs[:dst]
68
+ src = op.attrs[:src]
69
+ raise "assign: dst/src OOB" if dst >= slots.length || src >= slots.length
70
+
71
+ slots[dst] = slots[src]
72
+
73
+ when :const
74
+ result = Values.scalar(op.attrs[:value])
75
+ puts "DEBUG Const #{op.attrs[:value].inspect}: result=#{result}" if ENV["DEBUG_VM_ARGS"]
76
+ slots << result
77
+
78
+ when :load_input
79
+ plan_id = op.attrs[:plan_id]
80
+ scope = op.attrs[:scope] || []
81
+ scalar = op.attrs[:is_scalar]
82
+ indexed = op.attrs[:has_idx]
83
+ raw = accessors.fetch(plan_id).call(ctx[:input] || ctx["input"])
84
+
85
+ puts "DEBUG LoadInput plan_id: #{plan_id} raw_values: #{raw.inspect}" if ENV["DEBUG_VM_ARGS"]
86
+ slots << if scalar
87
+ Values.scalar(raw)
88
+ elsif indexed
89
+ Values.vec(scope, raw.map { |v, idx| { v: v, idx: Array(idx) } }, true)
90
+ else
91
+ Values.vec(scope, raw.map { |v| { v: v } }, false)
92
+ end
93
+
94
+ when :ref
95
+ name = op.attrs[:name]
96
+ referenced_value = outputs.fetch(name) { raise "Missing output for reference: #{name}" }
97
+ if ENV["DEBUG_VM_ARGS"]
98
+ puts "DEBUG Ref #{name}: #{referenced_value[:k] == :scalar ? "scalar(#{referenced_value[:v].inspect})" : "#{referenced_value[:k]}(#{referenced_value[:rows]&.size || 0} rows)"}"
99
+ end
100
+ slots << referenced_value
101
+
102
+ when :array
103
+ # Validate slot indices before accessing
104
+ op.args.each do |slot_idx|
105
+ if slot_idx >= slots.length
106
+ raise "Array operation: slot index #{slot_idx} out of bounds (slots.length=#{slots.length})"
107
+ elsif slots[slot_idx].nil?
108
+ raise "Array operation: slot #{slot_idx} is nil " \
109
+ "(available slots: #{slots.length}, non-nil slots: #{slots.compact.length})"
110
+ end
111
+ end
112
+
113
+ parts = op.args.map { |i| slots[i] }
114
+ if parts.all? { |p| p[:k] == :scalar }
115
+ slots << Values.scalar(parts.map { |p| p[:v] })
116
+ else
117
+ base = parts.find { |p| p[:k] == :vec } or raise "Array literal needs a vec carrier"
118
+ # Preserve original order: broadcast scalars in-place
119
+ arg_vecs = parts.map { |p| p[:k] == :scalar ? Combinators.broadcast_scalar(p, base) : p }
120
+ # All vectors must share scope
121
+ scopes = arg_vecs.map { |v| v[:scope] }.uniq
122
+ raise "Cross-scope array literal" unless scopes.size <= 1
123
+
124
+ zipped = Combinators.zip_same_scope(*arg_vecs)
125
+ rows = zipped[:rows].map do |row|
126
+ vals = Array(row[:v])
127
+ row.key?(:idx) ? { v: vals, idx: row[:idx] } : { v: vals }
128
+ end
129
+ slots << Values.vec(base[:scope], rows, base[:has_idx])
130
+ end
131
+
132
+ when :map
133
+ fn_name = op.attrs[:fn]
134
+ fn_entry = registry[fn_name] or raise "Function #{fn_name} not found in registry"
135
+ fn = fn_entry.fn
136
+ puts "DEBUG Map #{fn_name}: args=#{op.args.inspect}" if ENV["DEBUG_VM_ARGS"]
137
+
138
+ # Validate slot indices before accessing
139
+ op.args.each do |slot_idx|
140
+ if slot_idx >= slots.length
141
+ raise "Map operation #{fn_name}: slot index #{slot_idx} out of bounds (slots.length=#{slots.length})"
142
+ elsif slots[slot_idx].nil?
143
+ raise "Map operation #{fn_name}: slot #{slot_idx} is nil " \
144
+ "(available slots: #{slots.length}, non-nil slots: #{slots.compact.length})"
145
+ end
146
+ end
147
+
148
+ args = op.args.map { |slot_idx| slots[slot_idx] }
149
+
150
+ if args.all? { |a| a[:k] == :scalar }
151
+ puts "DEBUG Scalar call #{fn_name}: args=#{args.map { |a| a[:v] }.inspect}" if ENV["DEBUG_VM_ARGS"]
152
+ scalar_args = args.map { |a| a[:v] }
153
+ result = fn.call(*scalar_args)
154
+ slots << Values.scalar(result)
155
+ else
156
+ base = args.find { |a| a[:k] == :vec } or raise "Map needs a vec carrier"
157
+ puts "DEBUG Vec call #{fn_name}: base=#{base.inspect}" if ENV["DEBUG_VM_ARGS"]
158
+ # Preserve original order: broadcast scalars in-place
159
+ arg_vecs = args.map { |a| a[:k] == :scalar ? Combinators.broadcast_scalar(a, base) : a }
160
+ puts "DEBUG Vec call #{fn_name}: arg_vecs=#{arg_vecs.inspect}" if ENV["DEBUG_VM_ARGS"]
161
+ scopes = arg_vecs.map { |v| v[:scope] }.uniq
162
+ puts "DEBUG Vec call #{fn_name}: scopes=#{scopes.inspect}" if ENV["DEBUG_VM_ARGS"]
163
+ raise "Cross-scope Map without Join" unless scopes.size <= 1
164
+
165
+ zipped = Combinators.zip_same_scope(*arg_vecs)
166
+
167
+ # if ENV["DEBUG_VM_ARGS"] && fn_name == :if
168
+ # puts "DEBUG Vec call #{fn_name}: zipped rows:"
169
+ # zipped[:rows].each_with_index do |row, i|
170
+ # puts " [#{i}] args=#{Array(row[:v]).inspect}"
171
+ # end
172
+ # end
173
+
174
+ puts "DEBUG Vec call #{fn_name}: zipped rows=#{zipped[:rows].inspect}" if ENV["DEBUG_VM_ARGS"]
175
+ rows = zipped[:rows].map do |row|
176
+ row_args = Array(row[:v])
177
+ vr = fn.call(*row_args)
178
+ row.key?(:idx) ? { v: vr, idx: row[:idx] } : { v: vr }
179
+ end
180
+ puts "DEBUG Vec call #{fn_name}: result rows=#{rows.inspect}" if ENV["DEBUG_VM_ARGS"]
181
+
182
+ slots << Values.vec(base[:scope], rows, base[:has_idx])
183
+ end
184
+
185
+ when :switch
186
+ chosen = op.attrs[:cases].find do |(cond_slot, _)|
187
+ if cond_slot >= slots.length
188
+ raise "Switch operation: condition slot #{cond_slot} out of bounds (slots.length=#{slots.length})"
189
+ elsif slots[cond_slot].nil?
190
+ raise "Switch operation: condition slot #{cond_slot} is nil (available slots: #{slots.length}, non-nil slots: #{slots.compact.length})"
191
+ end
192
+
193
+ c = slots[cond_slot]
194
+ if c[:k] == :scalar
195
+ !!c[:v]
196
+ else
197
+ # TODO: Proper vectorized cascade handling
198
+ false
199
+ end
200
+ end
201
+ result_slot = chosen ? chosen[1] : op.attrs[:default]
202
+ if result_slot >= slots.length
203
+ raise "Switch operation: result slot #{result_slot} out of bounds (slots.length=#{slots.length})"
204
+ elsif slots[result_slot].nil?
205
+ raise "Switch operation: result slot #{result_slot} is nil (available slots: #{slots.length}, non-nil slots: #{slots.compact.length})"
206
+ end
207
+
208
+ slots << slots[result_slot]
209
+
210
+ when :store
211
+ name = op.attrs[:name]
212
+ src = op.args[0] or raise "store: missing source slot"
213
+ if src >= slots.length
214
+ raise "Store operation '#{name}': source slot #{src} out of bounds (slots.length=#{slots.length})"
215
+ elsif slots[src].nil?
216
+ raise "Store operation '#{name}': source slot #{src} is nil (available slots: #{slots.length}, non-nil slots: #{slots.compact.length})"
217
+ end
218
+
219
+ outputs[name] = slots[src]
220
+
221
+ # keep slot_id == op_index invariant
222
+ slots << nil
223
+
224
+ return outputs if target && name == target
225
+
226
+ when :reduce
227
+ fn_entry = registry[op.attrs[:fn]] or raise "Function #{op.attrs[:fn]} not found in registry"
228
+ fn = fn_entry.fn
229
+
230
+ src = slots[op.args[0]]
231
+ raise "Reduce expects Vec" unless src[:k] == :vec
232
+
233
+ result_scope = Array(op.attrs[:result_scope] || [])
234
+ axis = Array(op.attrs[:axis] || [])
235
+
236
+ if result_scope.empty?
237
+ # === GLOBAL REDUCE ===
238
+ # Accept either ravel or indexed.
239
+ vals = src[:rows].map { |r| r[:v] }
240
+ slots << Values.scalar(fn.call(vals))
241
+ else
242
+ # === GROUPED REDUCE ===
243
+ # Must have indices to group by prefix keys.
244
+ unless src[:has_idx]
245
+ raise "Grouped reduce requires indexed input (got ravel) for #{op.attrs[:fn]} at #{result_scope.inspect}"
246
+ end
247
+
248
+ group_len = result_scope.length
249
+
250
+ # Preserve stable source order so zips with other @result_scope vecs line up.
251
+ groups = {} # { key(Array<Integer>) => Array<value> }
252
+ order = [] # Array<key> in first-seen order
253
+
254
+ src[:rows].each do |row|
255
+ key = Array(row[:idx]).first(group_len)
256
+ unless groups.key?(key)
257
+ groups[key] = []
258
+ order << key
259
+ end
260
+ groups[key] << row[:v]
261
+ end
262
+
263
+ out_rows = order.map { |key| { v: fn.call(groups[key]), idx: key } }
264
+
265
+ slots << Values.vec(result_scope, out_rows, true)
266
+ end
267
+
268
+ when :lift
269
+ src_slot = op.args[0]
270
+ if src_slot >= slots.length
271
+ raise "Lift operation: source slot #{src_slot} out of bounds (slots.length=#{slots.length})"
272
+ elsif slots[src_slot].nil?
273
+ raise "Lift operation: source slot #{src_slot} is nil (available slots: #{slots.length}, non-nil slots: #{slots.compact.length})"
274
+ end
275
+
276
+ v = slots[src_slot]
277
+ to_scope = op.attrs[:to_scope] || []
278
+ depth = [to_scope.length, v[:rank] || v[:rows].first&.dig(:idx)&.length || 0].min
279
+ slots << Values.scalar(Combinators.group_rows(v[:rows], depth))
280
+
281
+ when :align_to
282
+ tgt_slot = op.args[0]
283
+ src_slot = op.args[1]
284
+
285
+ if tgt_slot >= slots.length
286
+ raise "AlignTo operation: target slot #{tgt_slot} out of bounds (slots.length=#{slots.length})"
287
+ elsif slots[tgt_slot].nil?
288
+ raise "AlignTo operation: target slot #{tgt_slot} is nil " \
289
+ "(available slots: #{slots.length}, non-nil slots: #{slots.compact.length})"
290
+ end
291
+
292
+ if src_slot >= slots.length
293
+ raise "AlignTo operation: source slot #{src_slot} out of bounds (slots.length=#{slots.length})"
294
+ elsif slots[src_slot].nil?
295
+ raise "AlignTo operation: source slot #{src_slot} is nil " \
296
+ "(available slots: #{slots.length}, non-nil slots: #{slots.compact.length})"
297
+ end
298
+
299
+ tgt = slots[tgt_slot]
300
+ src = slots[src_slot]
301
+
302
+ to_scope = op.attrs[:to_scope] || []
303
+ require_unique = op.attrs[:require_unique] || false
304
+ on_missing = op.attrs[:on_missing] || :error
305
+
306
+ aligned = Combinators.align_to(tgt, src, to_scope: to_scope,
307
+ require_unique: require_unique,
308
+ on_missing: on_missing)
309
+ slots << aligned
310
+
311
+ when :join
312
+ raise NotImplementedError, "Join not implemented yet"
313
+
314
+ else
315
+ raise "Unknown operation: #{op.tag}"
316
+ end
317
+ rescue StandardError => e
318
+ op_index = decl.ops.index(op) || "?"
319
+ context_info = []
320
+ context_info << "slots.length=#{slots.length}"
321
+ context_info << "non_nil_slots=#{slots.compact.length}" if slots.any?(&:nil?)
322
+ context_info << "op_attrs=#{op.attrs.inspect}" if op.attrs && !op.attrs.empty?
323
+ context_info << "op_args=#{op.args.inspect}" if op.args && !op.args.empty?
324
+
325
+ context_str = context_info.empty? ? "" : " (#{context_info.join(', ')})"
326
+ raise "#{decl.name}@op#{op_index} #{op.tag}#{context_str}: #{e.message}"
327
+ end
328
+ end
329
+
330
+ outputs
331
+ end
332
+ end
333
+ end
334
+ end
335
+ end
336
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kumi
4
+ module Core
5
+ module IR
6
+ module ExecutionEngine
7
+ # Value constructors and helpers for VM data representation
8
+ module Values
9
+ # Create a scalar value
10
+ def self.scalar(v)
11
+ { k: :scalar, v: v }
12
+ end
13
+
14
+ # Create a vector with scope and rows
15
+ def self.vec(scope, rows, has_idx)
16
+ if has_idx
17
+ rank = rows.empty? ? 0 : rows.first[:idx].length
18
+ # TODO: > Make sure this is not costly
19
+ # raise if rows.any? { |r| r[:idx].length != rank }
20
+ rows = rows.sort_by { |r| r[:idx] } # one-time sort
21
+ else
22
+ rank = 0
23
+ end
24
+
25
+ { k: :vec, scope: scope, rows: rows, has_idx: has_idx, rank: rank }
26
+ end
27
+
28
+ # Create a row with optional index
29
+ def self.row(v, idx = nil)
30
+ idx ? { v: v, idx: Array(idx) } : { v: v }
31
+ end
32
+
33
+ # Check if value is scalar
34
+ def self.scalar?(val)
35
+ val[:k] == :scalar
36
+ end
37
+
38
+ # Check if value is vector
39
+ def self.vec?(val)
40
+ val[:k] == :vec
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kumi
4
+ module Core
5
+ module IR
6
+ # ExecutionEngine interpreter for IR execution
7
+ #
8
+ # ARCHITECTURE:
9
+ # - Values:
10
+ # * Scalar(v) → { k: :scalar, v: v }
11
+ # * Vec(scope, rows, has_idx) → { k: :vec, scope: [:axis, ...], rows: [{ v:, idx:[...] }, ...], has_idx: true/false }
12
+ # - Rank = idx length; scope length is the logical axes carried by the vector
13
+ #
14
+ # - Combinators (pure, stateless, delegate to Executor):
15
+ # * broadcast_scalar(scalar, vec) → replicate scalar across vec rows (preserves idx/scope)
16
+ # * zip_same_scope(vec1, vec2, ...) → positional zip for equal scope & equal row count
17
+ # * align_to(tgt_vec, src_vec, to_scope) → expand src by prefix indices to match a higher-rank scope
18
+ # * group_rows(rows, depth) → stable grouping by idx prefix to nested Ruby arrays
19
+ #
20
+ # - Executor:
21
+ # * Executes IR ops in order; delegates to combinators; maintains a slot stack
22
+ # * No structural inference; trusts IR attributes (scope, has_idx, is_scalar)
23
+ #
24
+ # OP SEMANTICS (subset):
25
+ # - const(value) → push Scalar(value)
26
+ # - ref(name) → push previous slot by stored name (twins allowed: :name__vec)
27
+ # - load_input(plan_id, attrs) → call accessor; return Scalar or Vec according to attrs/mode
28
+ # - map(fn, argc, *args) → elementwise or scalar call; auto alignment already handled by IR
29
+ # - reduce(fn, axis, ...) → reduce one vector arg; returns Scalar
30
+ # - align_to(to_scope, a, b) → align b to a’s to_scope (prefix-compat only)
31
+ # - array(count, *args) → collect args into a Scalar(Array)
32
+ # - lift(to_scope, slot) → require Vec(has_idx), group rows with `group_rows` to nested Scalar
33
+ # - store(name, slot) → bind slot to name in env (used for :name and :name__vec twins)
34
+ #
35
+ # PRINCIPLES:
36
+ # - Mechanical execution only; “smarts” live in LowerToIR.
37
+ # - Never sniff Ruby types to guess shapes.
38
+ # - Errors early and clearly if invariants are violated (e.g., align_to expects vecs with indices).
39
+ #
40
+ # DEBUGGING:
41
+ # - DEBUG_VM_ARGS=1 prints per-op execution and arguments.
42
+ # - DEBUG_GROUP_ROWS=1 prints grouping decisions during Lift.
43
+ module ExecutionEngine
44
+ def self.run(ir_module, ctx, accessors:, registry:)
45
+ Interpreter.run(ir_module, ctx, accessors: accessors, registry: registry)
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kumi
4
+ module Core
5
+ module IR
6
+ Op = Struct.new(:tag, :attrs, :args, keyword_init: true) do
7
+ def initialize(**args)
8
+ super
9
+ freeze
10
+ end
11
+ end
12
+ Decl = Struct.new(:name, :kind, :shape, :ops, keyword_init: true) do
13
+ def initialize(**args)
14
+ super
15
+ ops&.each(&:freeze)
16
+ freeze
17
+ end
18
+ end
19
+ Module = Struct.new(:inputs, :decls, keyword_init: true) do
20
+ def initialize(**args)
21
+ super
22
+ decls&.each(&:freeze)
23
+ freeze
24
+ end
25
+ end
26
+ end
27
+
28
+ module IR::Ops
29
+ def self.Const(v) = IR::Op.new(tag: :const, attrs: { value: v }, args: [])
30
+ def self.LoadInput(plan_id, scope: [], is_scalar: false, has_idx: false) = IR::Op.new(tag: :load_input, attrs: { plan_id: plan_id, scope: scope, is_scalar: is_scalar, has_idx: has_idx }, args: [])
31
+ def self.Ref(name) = IR::Op.new(tag: :ref, attrs: { name: name }, args: [])
32
+ def self.Map(fn, argc, *slots) = IR::Op.new(tag: :map, attrs: { fn: fn, argc: argc }, args: slots)
33
+ def self.Array(count, *slots) = IR::Op.new(tag: :array, attrs: { count: count }, args: slots)
34
+ def self.Switch(cases, default) = IR::Op.new(tag: :switch, attrs: { cases: cases, default: default }, args: [])
35
+ def self.GuardPush(cond_slot) = IR::Op.new(tag: :guard_push, attrs: { cond_slot: cond_slot }, args: [])
36
+ def self.GuardPop = IR::Op.new(tag: :guard_pop, attrs: {}, args: [])
37
+ def self.Assign(dst:, src:) = IR::Op.new(tag: :assign, attrs: { dst: dst, src: src }, args: [])
38
+ def self.Store(name, slot) = IR::Op.new(tag: :store, attrs: { name: name }, args: [slot])
39
+ def self.Lift(to_scope, slot) = IR::Op.new(tag: :lift, attrs: { to_scope: to_scope }, args: [slot])
40
+ def self.Join(*slots) = IR::Op.new(tag: :join, attrs: {}, args: slots)
41
+
42
+ # Up-sample `source` to the scope (and order) of `target` by index-prefix.
43
+ # Policies: :error | :nil for missing; require_unique: true enforces 1:1 on prefix.
44
+ def self.AlignTo(target_slot, source_slot, to_scope:, on_missing: :error, require_unique: true)
45
+ scope_array = to_scope.is_a?(::Array) ? to_scope : [to_scope]
46
+ IR::Op.new(
47
+ tag: :align_to,
48
+ attrs: { to_scope: scope_array, on_missing: on_missing, require_unique: require_unique },
49
+ args: [target_slot, source_slot]
50
+ )
51
+ end
52
+
53
+ def self.Reduce(fn, axis, result_scope, flatten, slot)
54
+ IR::Op.new(tag: :reduce, attrs: { fn: fn, axis: axis, result_scope: result_scope, flatten: flatten }, args: [slot])
55
+ end
56
+ end
57
+ end
58
+ end
@@ -4,12 +4,12 @@ module Kumi
4
4
  module Core
5
5
  module RubyParser
6
6
  class BuildContext
7
- attr_reader :inputs, :attributes, :traits
7
+ attr_reader :inputs, :values, :traits
8
8
  attr_accessor :current_location
9
9
 
10
10
  def initialize
11
11
  @inputs = []
12
- @attributes = []
12
+ @values = []
13
13
  @traits = []
14
14
  @input_block_defined = false
15
15
  end
@@ -20,18 +20,6 @@ module Kumi
20
20
  def to_ast_node
21
21
  Kumi::Syntax::DeclarationReference.new(@name, loc: @context.current_location)
22
22
  end
23
-
24
- private
25
-
26
- def method_missing(method_name, *args, &block)
27
- # All operators are handled by ProxyRefinement methods
28
- # Field access should use input.field.subfield syntax, not bare identifiers
29
- super
30
- end
31
-
32
- def respond_to_missing?(_method_name, _include_private = false)
33
- true
34
- end
35
23
  end
36
24
  end
37
25
  end