kumi 0.0.21 → 0.0.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/README.md +59 -115
- data/lib/kumi/core/analyzer/passes/output_schema_pass.rb +6 -4
- data/lib/kumi/core/functions/function_spec.rb +16 -0
- data/lib/kumi/core/functions/loader.rb +2 -2
- data/lib/kumi/version.rb +1 -1
- metadata +2 -11
- data/lib/kumi/compiler.rb +0 -21
- data/lib/kumi/core/analyzer/passes/broadcast_detector.rb +0 -816
- data/lib/kumi/core/analyzer/passes/lower_to_ir_pass.rb +0 -907
- data/lib/kumi/core/analyzer/passes/scope_resolution_pass.rb +0 -349
- data/lib/kumi/core/analyzer/passes/type_checker.rb +0 -179
- data/lib/kumi/core/analyzer/passes/type_inferencer_pass.rb +0 -234
- data/lib/kumi/core/compiler_base.rb +0 -137
- data/lib/kumi/core/explain.rb +0 -254
- data/lib/kumi/core/functions/model.rb +0 -10
- data/lib/kumi/schema_metadata.rb +0 -524
@@ -1,816 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Kumi
|
4
|
-
module Core
|
5
|
-
module Analyzer
|
6
|
-
module Passes
|
7
|
-
# Detects which operations should be broadcast over arrays
|
8
|
-
# DEPENDENCIES: :input_metadata, :declarations
|
9
|
-
# PRODUCES: :broadcasts
|
10
|
-
class BroadcastDetector < PassBase
|
11
|
-
def run(errors)
|
12
|
-
input_meta = get_state(:input_metadata) || {}
|
13
|
-
definitions = get_state(:declarations) || {}
|
14
|
-
|
15
|
-
# Find array fields with their element types
|
16
|
-
array_fields = find_array_fields(input_meta)
|
17
|
-
|
18
|
-
# Build nested paths metadata for nested array traversal
|
19
|
-
nested_paths = build_nested_paths_metadata(input_meta)
|
20
|
-
|
21
|
-
# Build compiler metadata
|
22
|
-
compiler_metadata = {
|
23
|
-
array_fields: array_fields,
|
24
|
-
vectorized_operations: {},
|
25
|
-
reduction_operations: {},
|
26
|
-
nested_paths: nested_paths,
|
27
|
-
flattening_declarations: {}, # Track which declarations need flattening
|
28
|
-
cascade_strategies: {}, # Pre-computed cascade processing strategies
|
29
|
-
compilation_metadata: {} # Pre-computed compilation decisions
|
30
|
-
}
|
31
|
-
|
32
|
-
# Track which values are vectorized for type inference
|
33
|
-
vectorized_values = {}
|
34
|
-
|
35
|
-
# Analyze traits first, then values (to handle dependencies)
|
36
|
-
traits = definitions.select { |_name, decl| decl.is_a?(Kumi::Syntax::TraitDeclaration) }
|
37
|
-
values = definitions.select { |_name, decl| decl.is_a?(Kumi::Syntax::ValueDeclaration) }
|
38
|
-
|
39
|
-
(traits.to_a + values.to_a).each do |name, decl|
|
40
|
-
result = analyze_value_vectorization(name, decl.expression, array_fields, nested_paths, vectorized_values, errors,
|
41
|
-
definitions)
|
42
|
-
|
43
|
-
puts "#{name}: #{result[:type]} #{format_broadcast_info(result)}" if ENV["DEBUG_BROADCAST_CLEAN"]
|
44
|
-
|
45
|
-
case result[:type]
|
46
|
-
when :vectorized
|
47
|
-
compiler_metadata[:vectorized_operations][name] = result[:info]
|
48
|
-
|
49
|
-
# If this is a cascade with processing strategy, store it separately for easy compiler access
|
50
|
-
compiler_metadata[:cascade_strategies][name] = result[:info][:processing_strategy] if result[:info][:processing_strategy]
|
51
|
-
|
52
|
-
# Store array source information for dimension checking
|
53
|
-
array_source = extract_array_source(result[:info], array_fields)
|
54
|
-
vectorized_values[name] = { vectorized: true, array_source: array_source }
|
55
|
-
when :reduction
|
56
|
-
compiler_metadata[:reduction_operations][name] = result[:info]
|
57
|
-
# Mark this specific declaration as needing flattening for its argument
|
58
|
-
compiler_metadata[:flattening_declarations][name] = result[:info]
|
59
|
-
# Reduction produces scalar, not vectorized
|
60
|
-
vectorized_values[name] = { vectorized: false }
|
61
|
-
end
|
62
|
-
|
63
|
-
# Pre-compute compilation metadata for each declaration
|
64
|
-
compilation_meta = compute_compilation_metadata(
|
65
|
-
name, decl, compiler_metadata, vectorized_values, array_fields
|
66
|
-
)
|
67
|
-
compiler_metadata[:compilation_metadata][name] = compilation_meta
|
68
|
-
end
|
69
|
-
|
70
|
-
state.with(:broadcasts, compiler_metadata.freeze)
|
71
|
-
end
|
72
|
-
|
73
|
-
private
|
74
|
-
|
75
|
-
def infer_argument_scope(arg, array_fields, nested_paths)
|
76
|
-
case arg
|
77
|
-
when Kumi::Syntax::InputElementReference
|
78
|
-
if nested_paths.key?(arg.path)
|
79
|
-
# Extract scope from path - each array dimension in the path
|
80
|
-
arg.path.select.with_index { |_seg, i| nested_paths[arg.path[0..i]] }
|
81
|
-
else
|
82
|
-
arg.path.select { |seg| array_fields.key?(seg) }
|
83
|
-
end
|
84
|
-
when Kumi::Syntax::CallExpression
|
85
|
-
# For nested calls, find the deepest input reference
|
86
|
-
deepest_scope = []
|
87
|
-
arg.args.each do |nested_arg|
|
88
|
-
scope = infer_argument_scope(nested_arg, array_fields, nested_paths)
|
89
|
-
deepest_scope = scope if scope.length > deepest_scope.length
|
90
|
-
end
|
91
|
-
deepest_scope
|
92
|
-
else
|
93
|
-
[]
|
94
|
-
end
|
95
|
-
end
|
96
|
-
|
97
|
-
def format_broadcast_info(result)
|
98
|
-
case result[:type]
|
99
|
-
when :vectorized
|
100
|
-
info = result[:info]
|
101
|
-
"→ #{info[:source]} (path: #{info[:path]&.join('.')})"
|
102
|
-
when :reduction
|
103
|
-
info = result[:info]
|
104
|
-
"→ fn:#{info[:function]} (arg: #{info[:argument]&.class&.name&.split('::')&.last})"
|
105
|
-
when :scalar
|
106
|
-
"→ scalar"
|
107
|
-
else
|
108
|
-
"→ #{result[:info]}"
|
109
|
-
end
|
110
|
-
end
|
111
|
-
|
112
|
-
def compute_compilation_metadata(name, _decl, compiler_metadata, _vectorized_values, _array_fields)
|
113
|
-
metadata = {
|
114
|
-
operation_mode: :broadcast, # Default mode
|
115
|
-
is_vectorized: false,
|
116
|
-
vectorization_context: {},
|
117
|
-
cascade_info: {},
|
118
|
-
function_call_strategy: {}
|
119
|
-
}
|
120
|
-
|
121
|
-
# Check if this declaration is vectorized
|
122
|
-
if compiler_metadata[:vectorized_operations][name]
|
123
|
-
metadata[:is_vectorized] = true
|
124
|
-
vectorized_info = compiler_metadata[:vectorized_operations][name]
|
125
|
-
|
126
|
-
# Pre-compute vectorization context
|
127
|
-
metadata[:vectorization_context] = {
|
128
|
-
has_vectorized_args: true,
|
129
|
-
needs_broadcasting: true,
|
130
|
-
array_structure_depth: estimate_array_depth(vectorized_info, compiler_metadata[:nested_paths])
|
131
|
-
}
|
132
|
-
|
133
|
-
# If this is a cascade, pre-compute cascade processing strategy
|
134
|
-
if vectorized_info[:source] == :cascade_with_vectorized_conditions_or_results
|
135
|
-
strategy = compiler_metadata[:cascade_strategies][name]
|
136
|
-
metadata[:cascade_info] = {
|
137
|
-
is_vectorized: true,
|
138
|
-
processing_mode: strategy&.dig(:mode) || :hierarchical,
|
139
|
-
needs_hierarchical_processing: needs_hierarchical_processing?(strategy)
|
140
|
-
}
|
141
|
-
end
|
142
|
-
end
|
143
|
-
|
144
|
-
# Check if this declaration needs flattening
|
145
|
-
if compiler_metadata[:flattening_declarations][name]
|
146
|
-
metadata[:operation_mode] = :flatten
|
147
|
-
flattening_info = compiler_metadata[:flattening_declarations][name]
|
148
|
-
|
149
|
-
metadata[:function_call_strategy] = {
|
150
|
-
flattening_required: true,
|
151
|
-
flatten_argument_indices: flattening_info[:flatten_argument_indices] || [0],
|
152
|
-
result_structure: :scalar
|
153
|
-
}
|
154
|
-
end
|
155
|
-
|
156
|
-
metadata
|
157
|
-
end
|
158
|
-
|
159
|
-
def estimate_array_depth(vectorized_info, nested_paths)
|
160
|
-
case vectorized_info[:source]
|
161
|
-
when :nested_array_access
|
162
|
-
path = vectorized_info[:path]
|
163
|
-
nested_paths[path]&.dig(:array_depth) || 1
|
164
|
-
when :array_field_access
|
165
|
-
1
|
166
|
-
else
|
167
|
-
1
|
168
|
-
end
|
169
|
-
end
|
170
|
-
|
171
|
-
def needs_hierarchical_processing?(strategy)
|
172
|
-
return false unless strategy
|
173
|
-
|
174
|
-
case strategy[:mode]
|
175
|
-
when :nested_array, :deep_nested_array
|
176
|
-
true
|
177
|
-
else
|
178
|
-
false
|
179
|
-
end
|
180
|
-
end
|
181
|
-
|
182
|
-
def find_array_fields(input_meta)
|
183
|
-
result = {}
|
184
|
-
input_meta.each do |name, meta|
|
185
|
-
next unless meta[:type] == :array && meta[:children]
|
186
|
-
|
187
|
-
result[name] = {
|
188
|
-
element_fields: meta[:children].keys,
|
189
|
-
element_types: meta[:children].transform_values { |v| v[:type] || :any }
|
190
|
-
}
|
191
|
-
end
|
192
|
-
result
|
193
|
-
end
|
194
|
-
|
195
|
-
def build_nested_paths_metadata(input_meta)
|
196
|
-
nested_paths = {}
|
197
|
-
|
198
|
-
# Recursively build all possible nested paths from input metadata
|
199
|
-
input_meta.each do |root_name, root_meta|
|
200
|
-
collect_nested_paths(nested_paths, [root_name], root_meta, 0, nil)
|
201
|
-
end
|
202
|
-
|
203
|
-
nested_paths
|
204
|
-
end
|
205
|
-
|
206
|
-
def collect_nested_paths(nested_paths, current_path, current_meta, array_depth, parent_access_mode = nil)
|
207
|
-
# If current field is an array, increment array depth and track its access_mode
|
208
|
-
current_access_mode = parent_access_mode
|
209
|
-
if current_meta[:type] == :array
|
210
|
-
array_depth += 1
|
211
|
-
current_access_mode = current_meta[:access_mode] || :field # Default to :field if not specified
|
212
|
-
end
|
213
|
-
|
214
|
-
# If this field has children, recurse into them
|
215
|
-
if current_meta[:children]
|
216
|
-
current_meta[:children].each do |child_name, child_meta|
|
217
|
-
child_path = current_path + [child_name]
|
218
|
-
|
219
|
-
# Create metadata for this path if it involves arrays
|
220
|
-
if array_depth.positive?
|
221
|
-
nested_paths[child_path] =
|
222
|
-
build_path_metadata(child_path, child_meta, array_depth, current_access_mode)
|
223
|
-
end
|
224
|
-
|
225
|
-
# Recurse into child's children
|
226
|
-
collect_nested_paths(nested_paths, child_path, child_meta, array_depth, current_access_mode)
|
227
|
-
end
|
228
|
-
elsif array_depth.positive?
|
229
|
-
# Leaf field - create metadata if it involves arrays
|
230
|
-
nested_paths[current_path] = build_path_metadata(current_path, current_meta, array_depth, current_access_mode)
|
231
|
-
end
|
232
|
-
end
|
233
|
-
|
234
|
-
def build_path_metadata(_path, field_meta, array_depth, parent_access_mode = nil)
|
235
|
-
{
|
236
|
-
array_depth: array_depth,
|
237
|
-
element_type: field_meta[:type] || :any,
|
238
|
-
operation_mode: :broadcast, # Default mode - may be overridden for aggregations
|
239
|
-
result_structure: array_depth > 1 ? :nested_array : :array,
|
240
|
-
access_mode: parent_access_mode # Access mode of the parent array field
|
241
|
-
}
|
242
|
-
end
|
243
|
-
|
244
|
-
def analyze_value_vectorization(name, expr, array_fields, nested_paths, vectorized_values, errors, definitions = nil)
|
245
|
-
case expr
|
246
|
-
when Kumi::Syntax::InputElementReference
|
247
|
-
# Check if this path exists in nested_paths metadata (supports nested arrays)
|
248
|
-
if nested_paths.key?(expr.path)
|
249
|
-
{ type: :vectorized, info: { source: :nested_array_access, path: expr.path, nested_metadata: nested_paths[expr.path] } }
|
250
|
-
elsif array_fields.key?(expr.path.first)
|
251
|
-
{ type: :vectorized, info: { source: :array_field_access, path: expr.path } }
|
252
|
-
else
|
253
|
-
{ type: :scalar }
|
254
|
-
end
|
255
|
-
|
256
|
-
when Kumi::Syntax::DeclarationReference
|
257
|
-
# Check if this references a vectorized value
|
258
|
-
vector_info = vectorized_values[expr.name]
|
259
|
-
if vector_info && vector_info[:vectorized]
|
260
|
-
{ type: :vectorized, info: { source: :vectorized_declaration, name: expr.name } }
|
261
|
-
else
|
262
|
-
{ type: :scalar }
|
263
|
-
end
|
264
|
-
|
265
|
-
when Kumi::Syntax::CallExpression
|
266
|
-
analyze_call_vectorization(name, expr, array_fields, nested_paths, vectorized_values, errors, definitions)
|
267
|
-
|
268
|
-
when Kumi::Syntax::CascadeExpression
|
269
|
-
analyze_cascade_vectorization(name, expr, array_fields, nested_paths, vectorized_values, errors, definitions)
|
270
|
-
|
271
|
-
else
|
272
|
-
{ type: :scalar }
|
273
|
-
end
|
274
|
-
end
|
275
|
-
|
276
|
-
def analyze_call_vectorization(_name, expr, array_fields, nested_paths, vectorized_values, errors, definitions = nil)
|
277
|
-
entry = Kumi::Registry.entry(expr.fn_name)
|
278
|
-
is_reducer = entry&.reducer
|
279
|
-
is_structure = entry&.structure_function
|
280
|
-
|
281
|
-
# 1) Analyze all args once
|
282
|
-
arg_infos = expr.args.map do |arg|
|
283
|
-
analyze_argument_vectorization(arg, array_fields, nested_paths, vectorized_values, definitions)
|
284
|
-
end
|
285
|
-
vec_idx = arg_infos.each_index.select { |i| arg_infos[i][:vectorized] }
|
286
|
-
vec_any = !vec_idx.empty?
|
287
|
-
|
288
|
-
# 2) Special form: cascade_and (vectorized if any trait arg is vectorized)
|
289
|
-
if expr.fn_name == :cascade_and
|
290
|
-
vectorized_trait = expr.args.find do |arg|
|
291
|
-
arg.is_a?(Kumi::Syntax::DeclarationReference) && vectorized_values[arg.name]&.[](:vectorized)
|
292
|
-
end
|
293
|
-
if vectorized_trait
|
294
|
-
return { type: :vectorized,
|
295
|
-
info: { source: :cascade_condition_with_vectorized_trait, trait: vectorized_trait&.name } }
|
296
|
-
end
|
297
|
-
|
298
|
-
return { type: :scalar }
|
299
|
-
end
|
300
|
-
|
301
|
-
# 3) Reducers: only reduce when the input is actually vectorized
|
302
|
-
if is_reducer
|
303
|
-
return { type: :scalar } unless vec_any
|
304
|
-
|
305
|
-
# which args were vectorized?
|
306
|
-
flatten_indices = vec_idx.dup
|
307
|
-
vectorized_arg_index = vec_idx.first
|
308
|
-
argument_ast = expr.args[vectorized_arg_index]
|
309
|
-
|
310
|
-
src_info = arg_infos[vectorized_arg_index]
|
311
|
-
|
312
|
-
return {
|
313
|
-
type: :reduction,
|
314
|
-
info: {
|
315
|
-
function: expr.fn_name,
|
316
|
-
source: src_info[:source],
|
317
|
-
argument: argument_ast, # << keep AST of the vectorized argument
|
318
|
-
flatten_argument_indices: flatten_indices
|
319
|
-
}
|
320
|
-
}
|
321
|
-
end
|
322
|
-
|
323
|
-
# 4) Structure (non-reducer) functions like `size`
|
324
|
-
if is_structure
|
325
|
-
# If any arg is itself a PURE reducer call (e.g., size(sum(x))), the inner collapses first ⇒ outer is scalar
|
326
|
-
# But dual-nature functions (both reducer AND structure) should be treated as structure functions when nested
|
327
|
-
return { type: :scalar } if expr.args.any? do |a|
|
328
|
-
if a.is_a?(Kumi::Syntax::CallExpression)
|
329
|
-
arg_entry = Kumi::Registry.entry(a.fn_name)
|
330
|
-
arg_entry&.reducer && !arg_entry&.structure_function # Pure reducer only
|
331
|
-
else
|
332
|
-
false
|
333
|
-
end
|
334
|
-
end
|
335
|
-
|
336
|
-
# Structure fn over a vectorized element path ⇒ per-parent vectorization
|
337
|
-
return { type: :scalar } unless vec_any
|
338
|
-
|
339
|
-
src_info = arg_infos[vec_idx.first]
|
340
|
-
parent_scope = src_info[:parent_scope] || src_info[:source] # fallback if analyzer encodes parent separately
|
341
|
-
return {
|
342
|
-
type: :vectorized,
|
343
|
-
info: {
|
344
|
-
operation: expr.fn_name,
|
345
|
-
source: src_info[:source],
|
346
|
-
parent_scope: parent_scope,
|
347
|
-
vectorized_args: vec_idx.to_h { |i| [i, true] }
|
348
|
-
}
|
349
|
-
}
|
350
|
-
|
351
|
-
# Structure fn over a scalar/materialized container ⇒ scalar
|
352
|
-
|
353
|
-
end
|
354
|
-
|
355
|
-
# 5) Generic vectorized map (non-structure, non-reducer)
|
356
|
-
if vec_any
|
357
|
-
# Dimension / source compatibility check
|
358
|
-
sources = vec_idx.map { |i| arg_infos[i][:array_source] }.compact.uniq
|
359
|
-
if sources.size > 1
|
360
|
-
enhanced_message = build_dimension_mismatch_error(expr, arg_infos, array_fields, sources)
|
361
|
-
report_error(errors, enhanced_message, location: expr.loc, type: :semantic)
|
362
|
-
return { type: :scalar } # fail safe to prevent cascading errors
|
363
|
-
end
|
364
|
-
|
365
|
-
return {
|
366
|
-
type: :vectorized,
|
367
|
-
info: {
|
368
|
-
operation: expr.fn_name,
|
369
|
-
source: arg_infos[vec_idx.first][:source],
|
370
|
-
vectorized_args: vec_idx.to_h { |i| [i, true] }
|
371
|
-
}
|
372
|
-
}
|
373
|
-
end
|
374
|
-
|
375
|
-
# 6) Pure scalar
|
376
|
-
{ type: :scalar }
|
377
|
-
end
|
378
|
-
|
379
|
-
def structure_function?(fn_name)
|
380
|
-
# Check if function is marked as working on structure (not broadcast over elements)
|
381
|
-
Kumi::Registry.structure_function?(fn_name)
|
382
|
-
end
|
383
|
-
|
384
|
-
def analyze_argument_vectorization(arg, array_fields, nested_paths, vectorized_values, definitions = nil)
|
385
|
-
case arg
|
386
|
-
when Kumi::Syntax::InputElementReference
|
387
|
-
# Check nested paths first (supports nested arrays)
|
388
|
-
if nested_paths.key?(arg.path)
|
389
|
-
{ vectorized: true, source: :nested_array_field, array_source: arg.path.first }
|
390
|
-
# Fallback to old array_fields detection for backward compatibility
|
391
|
-
elsif array_fields.key?(arg.path.first)
|
392
|
-
{ vectorized: true, source: :array_field, array_source: arg.path.first }
|
393
|
-
else
|
394
|
-
{ vectorized: false }
|
395
|
-
end
|
396
|
-
|
397
|
-
when Kumi::Syntax::DeclarationReference
|
398
|
-
# Check if this references a vectorized value
|
399
|
-
vector_info = vectorized_values[arg.name]
|
400
|
-
if vector_info && vector_info[:vectorized]
|
401
|
-
array_source = vector_info[:array_source]
|
402
|
-
{ vectorized: true, source: :vectorized_value, array_source: array_source }
|
403
|
-
else
|
404
|
-
{ vectorized: false }
|
405
|
-
end
|
406
|
-
|
407
|
-
when Kumi::Syntax::CallExpression
|
408
|
-
# Recursively check nested call
|
409
|
-
result = analyze_value_vectorization(nil, arg, array_fields, nested_paths, vectorized_values, [], definitions)
|
410
|
-
# Handle different result types appropriately
|
411
|
-
case result[:type]
|
412
|
-
when :reduction
|
413
|
-
# Reductions can produce vectors if they preserve some dimensions
|
414
|
-
# This aligns with lower_to_ir logic for grouped reductions
|
415
|
-
info = result[:info]
|
416
|
-
if info && info[:argument]
|
417
|
-
# Check if the reduction argument has array scope that would be preserved
|
418
|
-
arg_scope = infer_argument_scope(info[:argument], array_fields, nested_paths)
|
419
|
-
if arg_scope.length > 1
|
420
|
-
# Multi-dimensional reduction - likely preserves outer dimension (per-player)
|
421
|
-
{ vectorized: true, source: :grouped_reduction, array_source: arg_scope.first }
|
422
|
-
else
|
423
|
-
# Single dimension or scalar reduction
|
424
|
-
{ vectorized: false, source: :scalar_from_reduction }
|
425
|
-
end
|
426
|
-
else
|
427
|
-
{ vectorized: false, source: :scalar_from_reduction }
|
428
|
-
end
|
429
|
-
when :vectorized
|
430
|
-
{ vectorized: true, source: :expression }
|
431
|
-
else
|
432
|
-
{ vectorized: false, source: :scalar }
|
433
|
-
end
|
434
|
-
|
435
|
-
else
|
436
|
-
{ vectorized: false }
|
437
|
-
end
|
438
|
-
end
|
439
|
-
|
440
|
-
def extract_array_source(info, _array_fields)
|
441
|
-
case info[:source]
|
442
|
-
when :array_field_access
|
443
|
-
info[:path]&.first
|
444
|
-
when :cascade_condition_with_vectorized_trait
|
445
|
-
# For cascades, we'd need to trace back to the original source
|
446
|
-
nil # TODO: Could be enhanced to trace through trait dependencies
|
447
|
-
end
|
448
|
-
end
|
449
|
-
|
450
|
-
def analyze_cascade_vectorization(name, expr, array_fields, nested_paths, vectorized_values, errors, definitions = nil)
|
451
|
-
# Enhanced cascade analysis with dimensional intelligence
|
452
|
-
condition_sources = []
|
453
|
-
result_sources = []
|
454
|
-
condition_dimensions = []
|
455
|
-
result_dimensions = []
|
456
|
-
is_vectorized = false
|
457
|
-
|
458
|
-
if ENV["DEBUG_CASCADE"]
|
459
|
-
puts "DEBUG: analyze_cascade_vectorization for #{name}"
|
460
|
-
puts " Expression: #{expr.inspect}"
|
461
|
-
puts " Cases: #{expr.cases.length}"
|
462
|
-
end
|
463
|
-
|
464
|
-
expr.cases.each do |case_expr|
|
465
|
-
# Analyze result expression
|
466
|
-
result_info = analyze_value_vectorization(nil, case_expr.result, array_fields, nested_paths, vectorized_values, errors,
|
467
|
-
definitions)
|
468
|
-
if result_info[:type] == :vectorized
|
469
|
-
is_vectorized = true
|
470
|
-
source, dimension = trace_dimensional_source(case_expr.result, result_info, vectorized_values, array_fields, definitions)
|
471
|
-
result_sources << source
|
472
|
-
result_dimensions << dimension
|
473
|
-
end
|
474
|
-
|
475
|
-
# Analyze condition expression
|
476
|
-
condition_info = analyze_value_vectorization(nil, case_expr.condition, array_fields, nested_paths, vectorized_values, errors,
|
477
|
-
definitions)
|
478
|
-
next unless condition_info[:type] == :vectorized
|
479
|
-
|
480
|
-
is_vectorized = true
|
481
|
-
|
482
|
-
# Special handling for cascade_and to check all arguments for dimensional conflicts
|
483
|
-
if ENV["DEBUG_CASCADE"]
|
484
|
-
puts " Checking condition type: #{case_expr.condition.class}"
|
485
|
-
puts " Condition fn_name: #{case_expr.condition.fn_name}" if case_expr.condition.is_a?(Kumi::Syntax::CallExpression)
|
486
|
-
end
|
487
|
-
|
488
|
-
if case_expr.condition.is_a?(Kumi::Syntax::CallExpression) && case_expr.condition.fn_name == :cascade_and
|
489
|
-
puts " -> ENTERING CASCADE_AND SPECIAL HANDLING" if ENV["DEBUG_CASCADE"]
|
490
|
-
# For cascade_and, check all individual trait references for dimensional conflicts
|
491
|
-
cascade_sources = []
|
492
|
-
cascade_dimensions = []
|
493
|
-
|
494
|
-
puts " cascade_and args: #{case_expr.condition.args.map(&:class)}" if ENV["DEBUG_CASCADE"]
|
495
|
-
|
496
|
-
case_expr.condition.args.each do |arg|
|
497
|
-
puts " Processing arg: #{arg.inspect}" if ENV["DEBUG_CASCADE"]
|
498
|
-
next unless arg.is_a?(Kumi::Syntax::DeclarationReference)
|
499
|
-
|
500
|
-
puts " Looking up declaration: #{arg.name}" if ENV["DEBUG_CASCADE"]
|
501
|
-
decl = definitions[arg.name] if definitions
|
502
|
-
if decl
|
503
|
-
puts " Found declaration, tracing source..." if ENV["DEBUG_CASCADE"]
|
504
|
-
arg_source, arg_dimension = trace_dimensional_source(decl.expression, condition_info, vectorized_values,
|
505
|
-
array_fields, definitions)
|
506
|
-
puts " Traced source: #{arg_source}, dimension: #{arg_dimension}" if ENV["DEBUG_CASCADE"]
|
507
|
-
cascade_sources << arg_source
|
508
|
-
cascade_dimensions << arg_dimension
|
509
|
-
elsif ENV["DEBUG_CASCADE"]
|
510
|
-
puts " Declaration not found: #{arg.name}"
|
511
|
-
end
|
512
|
-
end
|
513
|
-
|
514
|
-
# Check for conflicts between cascade_and arguments
|
515
|
-
unique_sources = cascade_sources.uniq
|
516
|
-
unique_dimensions = cascade_dimensions.uniq
|
517
|
-
|
518
|
-
if ENV["DEBUG_CASCADE"]
|
519
|
-
puts " cascade_sources: #{cascade_sources.inspect}"
|
520
|
-
puts " cascade_dimensions: #{cascade_dimensions.inspect}"
|
521
|
-
puts " unique_sources: #{unique_sources.inspect}"
|
522
|
-
puts " unique_dimensions: #{unique_dimensions.inspect}"
|
523
|
-
end
|
524
|
-
|
525
|
-
# Check for dimensional conflicts - either different sources OR incompatible dimensions
|
526
|
-
has_source_conflict = unique_sources.length > 1 && unique_sources.none? { |s| s.to_s.include?("unknown") }
|
527
|
-
has_dimension_conflict = unique_dimensions.length > 1 && !valid_hierarchical_broadcasting?(unique_dimensions)
|
528
|
-
|
529
|
-
if ENV["DEBUG_CASCADE"]
|
530
|
-
puts " has_source_conflict: #{has_source_conflict}"
|
531
|
-
puts " has_dimension_conflict: #{has_dimension_conflict}"
|
532
|
-
if unique_dimensions.length > 1
|
533
|
-
puts " valid_hierarchical_broadcasting?: #{valid_hierarchical_broadcasting?(unique_dimensions)}"
|
534
|
-
end
|
535
|
-
end
|
536
|
-
|
537
|
-
if has_source_conflict || has_dimension_conflict
|
538
|
-
# Multiple different sources or incompatible dimensions in same cascade_and - this is invalid
|
539
|
-
if ENV["DEBUG_CASCADE"]
|
540
|
-
puts " -> FOUND CASCADE_AND DIMENSIONAL CONFLICT:"
|
541
|
-
puts " Sources: #{unique_sources.inspect}"
|
542
|
-
puts " Dimensions: #{unique_dimensions.inspect}"
|
543
|
-
puts " Source conflict: #{has_source_conflict}"
|
544
|
-
puts " Dimension conflict: #{has_dimension_conflict}"
|
545
|
-
end
|
546
|
-
report_cascade_dimension_mismatch(errors, expr, unique_sources, unique_dimensions)
|
547
|
-
return { type: :scalar }
|
548
|
-
end
|
549
|
-
|
550
|
-
# Use the first valid source as the overall condition source
|
551
|
-
condition_sources.concat(cascade_sources)
|
552
|
-
condition_dimensions.concat(cascade_dimensions)
|
553
|
-
else
|
554
|
-
source, dimension = trace_dimensional_source(case_expr.condition, condition_info, vectorized_values, array_fields,
|
555
|
-
definitions)
|
556
|
-
condition_sources << source
|
557
|
-
condition_dimensions << dimension
|
558
|
-
end
|
559
|
-
end
|
560
|
-
|
561
|
-
if is_vectorized
|
562
|
-
# Validate dimensional compatibility
|
563
|
-
all_sources = (condition_sources + result_sources).compact.uniq
|
564
|
-
all_dimensions = (condition_dimensions + result_dimensions).compact.uniq
|
565
|
-
|
566
|
-
if ENV["DEBUG_CASCADE"]
|
567
|
-
puts " is_vectorized: true"
|
568
|
-
puts " condition_sources: #{condition_sources.inspect}"
|
569
|
-
puts " result_sources: #{result_sources.inspect}"
|
570
|
-
puts " condition_dimensions: #{condition_dimensions.inspect}"
|
571
|
-
puts " result_dimensions: #{result_dimensions.inspect}"
|
572
|
-
puts " all_sources: #{all_sources.inspect}"
|
573
|
-
puts " all_dimensions: #{all_dimensions.inspect}"
|
574
|
-
end
|
575
|
-
|
576
|
-
# For now, be less strict about dimensional validation
|
577
|
-
# Only report mismatches for clearly incompatible sources
|
578
|
-
definite_sources = all_sources.reject { |s| s.to_s.include?("unknown") || s.to_s.include?("operation") }
|
579
|
-
|
580
|
-
if ENV["DEBUG_CASCADE"]
|
581
|
-
puts " definite_sources: #{definite_sources.inspect}"
|
582
|
-
puts " definite_sources.length: #{definite_sources.length}"
|
583
|
-
end
|
584
|
-
|
585
|
-
if definite_sources.length > 1
|
586
|
-
# Check if sources are in valid hierarchical relationship (parent-child broadcasting)
|
587
|
-
is_valid_hierarchical = valid_hierarchical_broadcasting?(all_dimensions)
|
588
|
-
puts " valid_hierarchical_broadcasting?: #{is_valid_hierarchical}" if ENV["DEBUG_CASCADE"]
|
589
|
-
unless is_valid_hierarchical
|
590
|
-
# Multiple definite dimensional sources - this is a real mismatch
|
591
|
-
puts " -> REPORTING DIMENSIONAL MISMATCH" if ENV["DEBUG_CASCADE"]
|
592
|
-
report_cascade_dimension_mismatch(errors, expr, definite_sources, all_dimensions)
|
593
|
-
return { type: :scalar } # Treat as scalar to prevent further errors
|
594
|
-
end
|
595
|
-
end
|
596
|
-
|
597
|
-
# Compute cascade processing strategy based on dimensional analysis
|
598
|
-
processing_strategy = compute_cascade_processing_strategy(all_dimensions.first, nested_paths)
|
599
|
-
|
600
|
-
{ type: :vectorized, info: {
|
601
|
-
source: :cascade_with_vectorized_conditions_or_results,
|
602
|
-
dimensional_requirements: {
|
603
|
-
conditions: { sources: condition_sources.uniq, dimensions: condition_dimensions.uniq },
|
604
|
-
results: { sources: result_sources.uniq, dimensions: result_dimensions.uniq }
|
605
|
-
},
|
606
|
-
primary_dimension: all_dimensions.first,
|
607
|
-
nested_paths: extract_nested_paths_from_dimensions(all_dimensions.first, nested_paths),
|
608
|
-
processing_strategy: processing_strategy
|
609
|
-
} }
|
610
|
-
else
|
611
|
-
{ type: :scalar }
|
612
|
-
end
|
613
|
-
end
|
614
|
-
|
615
|
-
def trace_dimensional_source(expr, info, vectorized_values, array_fields, definitions = nil)
|
616
|
-
# Trace dimensional source by examining the AST node directly
|
617
|
-
case expr
|
618
|
-
when Kumi::Syntax::InputElementReference
|
619
|
-
# Direct array field access
|
620
|
-
source = expr.path.first
|
621
|
-
dimension = expr.path
|
622
|
-
[source, dimension]
|
623
|
-
when Kumi::Syntax::DeclarationReference
|
624
|
-
# Reference to another declaration - look up its dimensional info
|
625
|
-
if vectorized_values[expr.name]
|
626
|
-
vectorized_info = vectorized_values[expr.name]
|
627
|
-
if vectorized_info[:array_source]
|
628
|
-
[vectorized_info[:array_source], [vectorized_info[:array_source]]]
|
629
|
-
else
|
630
|
-
# Need to trace through the declaration's expression to find the real source
|
631
|
-
decl = definitions[expr.name] if definitions
|
632
|
-
if decl
|
633
|
-
# Recursively trace the declaration's expression
|
634
|
-
trace_dimensional_source(decl.expression, info, vectorized_values, array_fields, definitions)
|
635
|
-
else
|
636
|
-
[:unknown_vectorized_operation, [:unknown_vectorized_operation]]
|
637
|
-
end
|
638
|
-
end
|
639
|
-
else
|
640
|
-
[:unknown_declaration, [:unknown_declaration]]
|
641
|
-
end
|
642
|
-
when Kumi::Syntax::CallExpression
|
643
|
-
# For call expressions, trace through the arguments to find dimensional source
|
644
|
-
first_vectorized_arg = expr.args.find do |arg|
|
645
|
-
arg_info = analyze_argument_vectorization(arg, array_fields, {}, vectorized_values, definitions)
|
646
|
-
arg_info[:vectorized]
|
647
|
-
end
|
648
|
-
|
649
|
-
if first_vectorized_arg
|
650
|
-
trace_dimensional_source(first_vectorized_arg, info, vectorized_values, array_fields, definitions)
|
651
|
-
else
|
652
|
-
[:operation_unknown, [:operation_unknown]]
|
653
|
-
end
|
654
|
-
else
|
655
|
-
[:unknown_expr, [:unknown_expr]]
|
656
|
-
end
|
657
|
-
end
|
658
|
-
|
659
|
-
def extract_nested_paths_from_dimensions(dimension, nested_paths)
|
660
|
-
return nil unless dimension.is_a?(Array)
|
661
|
-
|
662
|
-
nested_paths[dimension]
|
663
|
-
end
|
664
|
-
|
665
|
-
# Check if dimensions represent valid hierarchical broadcasting (parent-to-child)
|
666
|
-
# Example: [:regions, :offices, :teams] can broadcast to [:regions, :offices, :teams, :employees]
|
667
|
-
def valid_hierarchical_broadcasting?(dimensions)
|
668
|
-
puts " DEBUG valid_hierarchical_broadcasting?: dimensions=#{dimensions.inspect}" if ENV["DEBUG_CASCADE"]
|
669
|
-
|
670
|
-
return true if dimensions.length <= 1
|
671
|
-
|
672
|
-
# Extract structural paths by removing the final field name from each dimension
|
673
|
-
# This allows us to identify that [:regions, :offices, :teams, :performance_score]
|
674
|
-
# and [:regions, :offices, :teams, :employees, :rating] both have the structural
|
675
|
-
# path [:regions, :offices, :teams] and [:regions, :offices, :teams, :employees] respectively
|
676
|
-
structural_paths = dimensions.map do |dim|
|
677
|
-
if dim.length > 1
|
678
|
-
dim[0..-2] # Remove the final field name
|
679
|
-
else
|
680
|
-
dim
|
681
|
-
end
|
682
|
-
end.uniq
|
683
|
-
|
684
|
-
puts " structural_paths: #{structural_paths.inspect}" if ENV["DEBUG_CASCADE"]
|
685
|
-
|
686
|
-
# Group dimensions by their root (first element)
|
687
|
-
root_groups = structural_paths.group_by(&:first)
|
688
|
-
|
689
|
-
puts " root_groups: #{root_groups.keys.inspect}" if ENV["DEBUG_CASCADE"]
|
690
|
-
|
691
|
-
# All dimensions must come from the same root
|
692
|
-
if root_groups.length > 1
|
693
|
-
puts " -> REJECT: Multiple roots" if ENV["DEBUG_CASCADE"]
|
694
|
-
return false
|
695
|
-
end
|
696
|
-
|
697
|
-
# If all structural paths are the same, this is valid (same level)
|
698
|
-
if structural_paths.length == 1
|
699
|
-
puts " -> ACCEPT: All dimensions at same structural level" if ENV["DEBUG_CASCADE"]
|
700
|
-
return true
|
701
|
-
end
|
702
|
-
|
703
|
-
# Within the same root, check if we have valid parent-child relationships
|
704
|
-
sorted_paths = structural_paths.sort_by(&:length)
|
705
|
-
|
706
|
-
puts " sorted structural paths: #{sorted_paths.inspect}" if ENV["DEBUG_CASCADE"]
|
707
|
-
|
708
|
-
# Check if all structural paths form a valid hierarchical structure
|
709
|
-
# For valid hierarchical broadcasting, structural paths should be related by parent-child relationships
|
710
|
-
|
711
|
-
# Check if there are any actual parent-child relationships
|
712
|
-
has_real_hierarchy = false
|
713
|
-
|
714
|
-
(0...sorted_paths.length).each do |i|
|
715
|
-
((i + 1)...sorted_paths.length).each do |j|
|
716
|
-
path1 = sorted_paths[i]
|
717
|
-
path2 = sorted_paths[j]
|
718
|
-
shorter, longer = [path1, path2].sort_by(&:length)
|
719
|
-
|
720
|
-
next unless longer[0, shorter.length] == shorter
|
721
|
-
|
722
|
-
puts " Found parent-child relationship: #{shorter.inspect} → #{longer.inspect}" if ENV["DEBUG_CASCADE"]
|
723
|
-
has_real_hierarchy = true
|
724
|
-
end
|
725
|
-
end
|
726
|
-
|
727
|
-
puts " has_real_hierarchy: #{has_real_hierarchy}" if ENV["DEBUG_CASCADE"]
|
728
|
-
|
729
|
-
# Allow same-level dimensions or hierarchical relationships
|
730
|
-
if !has_real_hierarchy && sorted_paths.length > 1
|
731
|
-
puts " -> REJECT: No parent-child relationships found - these are sibling branches" if ENV["DEBUG_CASCADE"]
|
732
|
-
return false
|
733
|
-
end
|
734
|
-
|
735
|
-
puts " -> ACCEPT: All dimensions compatible" if ENV["DEBUG_CASCADE"]
|
736
|
-
true
|
737
|
-
end
|
738
|
-
|
739
|
-
def compute_cascade_processing_strategy(primary_dimension, nested_paths)
|
740
|
-
return { mode: :scalar } unless primary_dimension
|
741
|
-
|
742
|
-
# Determine structure depth from the dimension path
|
743
|
-
structure_depth = primary_dimension.length
|
744
|
-
|
745
|
-
# Determine processing mode based on structure complexity
|
746
|
-
processing_mode = case structure_depth
|
747
|
-
when 0, 1
|
748
|
-
:simple_array # Single-level array processing
|
749
|
-
when 2, 3, 4
|
750
|
-
:nested_array # Multi-level nested array processing
|
751
|
-
else
|
752
|
-
:deep_nested_array # Very deep nesting (5+ levels)
|
753
|
-
end
|
754
|
-
|
755
|
-
# Get nested path information for this dimension
|
756
|
-
nested_path_info = nested_paths[primary_dimension]
|
757
|
-
|
758
|
-
{
|
759
|
-
mode: processing_mode,
|
760
|
-
structure_depth: structure_depth,
|
761
|
-
dimension_path: primary_dimension,
|
762
|
-
element_processing: :cascade_conditional_logic,
|
763
|
-
nested_path_info: nested_path_info
|
764
|
-
}
|
765
|
-
end
|
766
|
-
|
767
|
-
def report_cascade_dimension_mismatch(errors, expr, sources, dimensions)
|
768
|
-
puts "DEBUG: Dimensional analysis details:" if ENV["DEBUG_CASCADE"]
|
769
|
-
puts " Sources: #{sources.inspect}" if ENV["DEBUG_CASCADE"]
|
770
|
-
puts " Dimensions: #{dimensions.inspect}" if ENV["DEBUG_CASCADE"]
|
771
|
-
puts " Valid hierarchical? #{valid_hierarchical_broadcasting?(dimensions)}" if ENV["DEBUG_CASCADE"]
|
772
|
-
|
773
|
-
message = "Cascade dimensional mismatch: Cannot mix arrays from different sources (#{sources.join(', ')}) " \
|
774
|
-
"with dimensions (#{dimensions.map(&:inspect).join(', ')}) in cascade conditions and results."
|
775
|
-
report_error(errors, message, location: expr.loc, type: :semantic)
|
776
|
-
end
|
777
|
-
|
778
|
-
def build_dimension_mismatch_error(_expr, arg_infos, array_fields, vectorized_sources)
|
779
|
-
# Build detailed error message with type information
|
780
|
-
summary = "Cannot broadcast operation across arrays from different sources: #{vectorized_sources.join(', ')}. "
|
781
|
-
|
782
|
-
problem_desc = "Problem: Multiple operands are arrays from different sources:\n"
|
783
|
-
|
784
|
-
vectorized_args = arg_infos.select { |info| info[:vectorized] }
|
785
|
-
vectorized_args.each_with_index do |arg_info, index|
|
786
|
-
array_source = arg_info[:array_source]
|
787
|
-
next unless array_source && array_fields[array_source]
|
788
|
-
|
789
|
-
# Determine the type based on array field metadata
|
790
|
-
type_desc = determine_array_type(array_source, array_fields)
|
791
|
-
problem_desc += " - Operand #{index + 1} resolves to #{type_desc} from array '#{array_source}'\n"
|
792
|
-
end
|
793
|
-
|
794
|
-
explanation = "Direct operations on arrays from different sources is ambiguous and not supported. " \
|
795
|
-
"Vectorized operations can only work on fields from the same array input."
|
796
|
-
|
797
|
-
"#{summary}#{problem_desc}#{explanation}"
|
798
|
-
end
|
799
|
-
|
800
|
-
def determine_array_type(array_source, array_fields)
|
801
|
-
field_info = array_fields[array_source]
|
802
|
-
return "array(any)" unless field_info[:element_types]
|
803
|
-
|
804
|
-
# For nested arrays (like items.name where items is an array), this represents array(element_type)
|
805
|
-
element_types = field_info[:element_types].values.uniq
|
806
|
-
if element_types.length == 1
|
807
|
-
"array(#{element_types.first})"
|
808
|
-
else
|
809
|
-
"array(mixed)"
|
810
|
-
end
|
811
|
-
end
|
812
|
-
end
|
813
|
-
end
|
814
|
-
end
|
815
|
-
end
|
816
|
-
end
|