kumi 0.0.21 → 0.0.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,816 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Kumi
4
- module Core
5
- module Analyzer
6
- module Passes
7
- # Detects which operations should be broadcast over arrays
8
- # DEPENDENCIES: :input_metadata, :declarations
9
- # PRODUCES: :broadcasts
10
- class BroadcastDetector < PassBase
11
- def run(errors)
12
- input_meta = get_state(:input_metadata) || {}
13
- definitions = get_state(:declarations) || {}
14
-
15
- # Find array fields with their element types
16
- array_fields = find_array_fields(input_meta)
17
-
18
- # Build nested paths metadata for nested array traversal
19
- nested_paths = build_nested_paths_metadata(input_meta)
20
-
21
- # Build compiler metadata
22
- compiler_metadata = {
23
- array_fields: array_fields,
24
- vectorized_operations: {},
25
- reduction_operations: {},
26
- nested_paths: nested_paths,
27
- flattening_declarations: {}, # Track which declarations need flattening
28
- cascade_strategies: {}, # Pre-computed cascade processing strategies
29
- compilation_metadata: {} # Pre-computed compilation decisions
30
- }
31
-
32
- # Track which values are vectorized for type inference
33
- vectorized_values = {}
34
-
35
- # Analyze traits first, then values (to handle dependencies)
36
- traits = definitions.select { |_name, decl| decl.is_a?(Kumi::Syntax::TraitDeclaration) }
37
- values = definitions.select { |_name, decl| decl.is_a?(Kumi::Syntax::ValueDeclaration) }
38
-
39
- (traits.to_a + values.to_a).each do |name, decl|
40
- result = analyze_value_vectorization(name, decl.expression, array_fields, nested_paths, vectorized_values, errors,
41
- definitions)
42
-
43
- puts "#{name}: #{result[:type]} #{format_broadcast_info(result)}" if ENV["DEBUG_BROADCAST_CLEAN"]
44
-
45
- case result[:type]
46
- when :vectorized
47
- compiler_metadata[:vectorized_operations][name] = result[:info]
48
-
49
- # If this is a cascade with processing strategy, store it separately for easy compiler access
50
- compiler_metadata[:cascade_strategies][name] = result[:info][:processing_strategy] if result[:info][:processing_strategy]
51
-
52
- # Store array source information for dimension checking
53
- array_source = extract_array_source(result[:info], array_fields)
54
- vectorized_values[name] = { vectorized: true, array_source: array_source }
55
- when :reduction
56
- compiler_metadata[:reduction_operations][name] = result[:info]
57
- # Mark this specific declaration as needing flattening for its argument
58
- compiler_metadata[:flattening_declarations][name] = result[:info]
59
- # Reduction produces scalar, not vectorized
60
- vectorized_values[name] = { vectorized: false }
61
- end
62
-
63
- # Pre-compute compilation metadata for each declaration
64
- compilation_meta = compute_compilation_metadata(
65
- name, decl, compiler_metadata, vectorized_values, array_fields
66
- )
67
- compiler_metadata[:compilation_metadata][name] = compilation_meta
68
- end
69
-
70
- state.with(:broadcasts, compiler_metadata.freeze)
71
- end
72
-
73
- private
74
-
75
- def infer_argument_scope(arg, array_fields, nested_paths)
76
- case arg
77
- when Kumi::Syntax::InputElementReference
78
- if nested_paths.key?(arg.path)
79
- # Extract scope from path - each array dimension in the path
80
- arg.path.select.with_index { |_seg, i| nested_paths[arg.path[0..i]] }
81
- else
82
- arg.path.select { |seg| array_fields.key?(seg) }
83
- end
84
- when Kumi::Syntax::CallExpression
85
- # For nested calls, find the deepest input reference
86
- deepest_scope = []
87
- arg.args.each do |nested_arg|
88
- scope = infer_argument_scope(nested_arg, array_fields, nested_paths)
89
- deepest_scope = scope if scope.length > deepest_scope.length
90
- end
91
- deepest_scope
92
- else
93
- []
94
- end
95
- end
96
-
97
- def format_broadcast_info(result)
98
- case result[:type]
99
- when :vectorized
100
- info = result[:info]
101
- "→ #{info[:source]} (path: #{info[:path]&.join('.')})"
102
- when :reduction
103
- info = result[:info]
104
- "→ fn:#{info[:function]} (arg: #{info[:argument]&.class&.name&.split('::')&.last})"
105
- when :scalar
106
- "→ scalar"
107
- else
108
- "→ #{result[:info]}"
109
- end
110
- end
111
-
112
- def compute_compilation_metadata(name, _decl, compiler_metadata, _vectorized_values, _array_fields)
113
- metadata = {
114
- operation_mode: :broadcast, # Default mode
115
- is_vectorized: false,
116
- vectorization_context: {},
117
- cascade_info: {},
118
- function_call_strategy: {}
119
- }
120
-
121
- # Check if this declaration is vectorized
122
- if compiler_metadata[:vectorized_operations][name]
123
- metadata[:is_vectorized] = true
124
- vectorized_info = compiler_metadata[:vectorized_operations][name]
125
-
126
- # Pre-compute vectorization context
127
- metadata[:vectorization_context] = {
128
- has_vectorized_args: true,
129
- needs_broadcasting: true,
130
- array_structure_depth: estimate_array_depth(vectorized_info, compiler_metadata[:nested_paths])
131
- }
132
-
133
- # If this is a cascade, pre-compute cascade processing strategy
134
- if vectorized_info[:source] == :cascade_with_vectorized_conditions_or_results
135
- strategy = compiler_metadata[:cascade_strategies][name]
136
- metadata[:cascade_info] = {
137
- is_vectorized: true,
138
- processing_mode: strategy&.dig(:mode) || :hierarchical,
139
- needs_hierarchical_processing: needs_hierarchical_processing?(strategy)
140
- }
141
- end
142
- end
143
-
144
- # Check if this declaration needs flattening
145
- if compiler_metadata[:flattening_declarations][name]
146
- metadata[:operation_mode] = :flatten
147
- flattening_info = compiler_metadata[:flattening_declarations][name]
148
-
149
- metadata[:function_call_strategy] = {
150
- flattening_required: true,
151
- flatten_argument_indices: flattening_info[:flatten_argument_indices] || [0],
152
- result_structure: :scalar
153
- }
154
- end
155
-
156
- metadata
157
- end
158
-
159
- def estimate_array_depth(vectorized_info, nested_paths)
160
- case vectorized_info[:source]
161
- when :nested_array_access
162
- path = vectorized_info[:path]
163
- nested_paths[path]&.dig(:array_depth) || 1
164
- when :array_field_access
165
- 1
166
- else
167
- 1
168
- end
169
- end
170
-
171
- def needs_hierarchical_processing?(strategy)
172
- return false unless strategy
173
-
174
- case strategy[:mode]
175
- when :nested_array, :deep_nested_array
176
- true
177
- else
178
- false
179
- end
180
- end
181
-
182
- def find_array_fields(input_meta)
183
- result = {}
184
- input_meta.each do |name, meta|
185
- next unless meta[:type] == :array && meta[:children]
186
-
187
- result[name] = {
188
- element_fields: meta[:children].keys,
189
- element_types: meta[:children].transform_values { |v| v[:type] || :any }
190
- }
191
- end
192
- result
193
- end
194
-
195
- def build_nested_paths_metadata(input_meta)
196
- nested_paths = {}
197
-
198
- # Recursively build all possible nested paths from input metadata
199
- input_meta.each do |root_name, root_meta|
200
- collect_nested_paths(nested_paths, [root_name], root_meta, 0, nil)
201
- end
202
-
203
- nested_paths
204
- end
205
-
206
- def collect_nested_paths(nested_paths, current_path, current_meta, array_depth, parent_access_mode = nil)
207
- # If current field is an array, increment array depth and track its access_mode
208
- current_access_mode = parent_access_mode
209
- if current_meta[:type] == :array
210
- array_depth += 1
211
- current_access_mode = current_meta[:access_mode] || :field # Default to :field if not specified
212
- end
213
-
214
- # If this field has children, recurse into them
215
- if current_meta[:children]
216
- current_meta[:children].each do |child_name, child_meta|
217
- child_path = current_path + [child_name]
218
-
219
- # Create metadata for this path if it involves arrays
220
- if array_depth.positive?
221
- nested_paths[child_path] =
222
- build_path_metadata(child_path, child_meta, array_depth, current_access_mode)
223
- end
224
-
225
- # Recurse into child's children
226
- collect_nested_paths(nested_paths, child_path, child_meta, array_depth, current_access_mode)
227
- end
228
- elsif array_depth.positive?
229
- # Leaf field - create metadata if it involves arrays
230
- nested_paths[current_path] = build_path_metadata(current_path, current_meta, array_depth, current_access_mode)
231
- end
232
- end
233
-
234
- def build_path_metadata(_path, field_meta, array_depth, parent_access_mode = nil)
235
- {
236
- array_depth: array_depth,
237
- element_type: field_meta[:type] || :any,
238
- operation_mode: :broadcast, # Default mode - may be overridden for aggregations
239
- result_structure: array_depth > 1 ? :nested_array : :array,
240
- access_mode: parent_access_mode # Access mode of the parent array field
241
- }
242
- end
243
-
244
- def analyze_value_vectorization(name, expr, array_fields, nested_paths, vectorized_values, errors, definitions = nil)
245
- case expr
246
- when Kumi::Syntax::InputElementReference
247
- # Check if this path exists in nested_paths metadata (supports nested arrays)
248
- if nested_paths.key?(expr.path)
249
- { type: :vectorized, info: { source: :nested_array_access, path: expr.path, nested_metadata: nested_paths[expr.path] } }
250
- elsif array_fields.key?(expr.path.first)
251
- { type: :vectorized, info: { source: :array_field_access, path: expr.path } }
252
- else
253
- { type: :scalar }
254
- end
255
-
256
- when Kumi::Syntax::DeclarationReference
257
- # Check if this references a vectorized value
258
- vector_info = vectorized_values[expr.name]
259
- if vector_info && vector_info[:vectorized]
260
- { type: :vectorized, info: { source: :vectorized_declaration, name: expr.name } }
261
- else
262
- { type: :scalar }
263
- end
264
-
265
- when Kumi::Syntax::CallExpression
266
- analyze_call_vectorization(name, expr, array_fields, nested_paths, vectorized_values, errors, definitions)
267
-
268
- when Kumi::Syntax::CascadeExpression
269
- analyze_cascade_vectorization(name, expr, array_fields, nested_paths, vectorized_values, errors, definitions)
270
-
271
- else
272
- { type: :scalar }
273
- end
274
- end
275
-
276
- def analyze_call_vectorization(_name, expr, array_fields, nested_paths, vectorized_values, errors, definitions = nil)
277
- entry = Kumi::Registry.entry(expr.fn_name)
278
- is_reducer = entry&.reducer
279
- is_structure = entry&.structure_function
280
-
281
- # 1) Analyze all args once
282
- arg_infos = expr.args.map do |arg|
283
- analyze_argument_vectorization(arg, array_fields, nested_paths, vectorized_values, definitions)
284
- end
285
- vec_idx = arg_infos.each_index.select { |i| arg_infos[i][:vectorized] }
286
- vec_any = !vec_idx.empty?
287
-
288
- # 2) Special form: cascade_and (vectorized if any trait arg is vectorized)
289
- if expr.fn_name == :cascade_and
290
- vectorized_trait = expr.args.find do |arg|
291
- arg.is_a?(Kumi::Syntax::DeclarationReference) && vectorized_values[arg.name]&.[](:vectorized)
292
- end
293
- if vectorized_trait
294
- return { type: :vectorized,
295
- info: { source: :cascade_condition_with_vectorized_trait, trait: vectorized_trait&.name } }
296
- end
297
-
298
- return { type: :scalar }
299
- end
300
-
301
- # 3) Reducers: only reduce when the input is actually vectorized
302
- if is_reducer
303
- return { type: :scalar } unless vec_any
304
-
305
- # which args were vectorized?
306
- flatten_indices = vec_idx.dup
307
- vectorized_arg_index = vec_idx.first
308
- argument_ast = expr.args[vectorized_arg_index]
309
-
310
- src_info = arg_infos[vectorized_arg_index]
311
-
312
- return {
313
- type: :reduction,
314
- info: {
315
- function: expr.fn_name,
316
- source: src_info[:source],
317
- argument: argument_ast, # << keep AST of the vectorized argument
318
- flatten_argument_indices: flatten_indices
319
- }
320
- }
321
- end
322
-
323
- # 4) Structure (non-reducer) functions like `size`
324
- if is_structure
325
- # If any arg is itself a PURE reducer call (e.g., size(sum(x))), the inner collapses first ⇒ outer is scalar
326
- # But dual-nature functions (both reducer AND structure) should be treated as structure functions when nested
327
- return { type: :scalar } if expr.args.any? do |a|
328
- if a.is_a?(Kumi::Syntax::CallExpression)
329
- arg_entry = Kumi::Registry.entry(a.fn_name)
330
- arg_entry&.reducer && !arg_entry&.structure_function # Pure reducer only
331
- else
332
- false
333
- end
334
- end
335
-
336
- # Structure fn over a vectorized element path ⇒ per-parent vectorization
337
- return { type: :scalar } unless vec_any
338
-
339
- src_info = arg_infos[vec_idx.first]
340
- parent_scope = src_info[:parent_scope] || src_info[:source] # fallback if analyzer encodes parent separately
341
- return {
342
- type: :vectorized,
343
- info: {
344
- operation: expr.fn_name,
345
- source: src_info[:source],
346
- parent_scope: parent_scope,
347
- vectorized_args: vec_idx.to_h { |i| [i, true] }
348
- }
349
- }
350
-
351
- # Structure fn over a scalar/materialized container ⇒ scalar
352
-
353
- end
354
-
355
- # 5) Generic vectorized map (non-structure, non-reducer)
356
- if vec_any
357
- # Dimension / source compatibility check
358
- sources = vec_idx.map { |i| arg_infos[i][:array_source] }.compact.uniq
359
- if sources.size > 1
360
- enhanced_message = build_dimension_mismatch_error(expr, arg_infos, array_fields, sources)
361
- report_error(errors, enhanced_message, location: expr.loc, type: :semantic)
362
- return { type: :scalar } # fail safe to prevent cascading errors
363
- end
364
-
365
- return {
366
- type: :vectorized,
367
- info: {
368
- operation: expr.fn_name,
369
- source: arg_infos[vec_idx.first][:source],
370
- vectorized_args: vec_idx.to_h { |i| [i, true] }
371
- }
372
- }
373
- end
374
-
375
- # 6) Pure scalar
376
- { type: :scalar }
377
- end
378
-
379
- def structure_function?(fn_name)
380
- # Check if function is marked as working on structure (not broadcast over elements)
381
- Kumi::Registry.structure_function?(fn_name)
382
- end
383
-
384
- def analyze_argument_vectorization(arg, array_fields, nested_paths, vectorized_values, definitions = nil)
385
- case arg
386
- when Kumi::Syntax::InputElementReference
387
- # Check nested paths first (supports nested arrays)
388
- if nested_paths.key?(arg.path)
389
- { vectorized: true, source: :nested_array_field, array_source: arg.path.first }
390
- # Fallback to old array_fields detection for backward compatibility
391
- elsif array_fields.key?(arg.path.first)
392
- { vectorized: true, source: :array_field, array_source: arg.path.first }
393
- else
394
- { vectorized: false }
395
- end
396
-
397
- when Kumi::Syntax::DeclarationReference
398
- # Check if this references a vectorized value
399
- vector_info = vectorized_values[arg.name]
400
- if vector_info && vector_info[:vectorized]
401
- array_source = vector_info[:array_source]
402
- { vectorized: true, source: :vectorized_value, array_source: array_source }
403
- else
404
- { vectorized: false }
405
- end
406
-
407
- when Kumi::Syntax::CallExpression
408
- # Recursively check nested call
409
- result = analyze_value_vectorization(nil, arg, array_fields, nested_paths, vectorized_values, [], definitions)
410
- # Handle different result types appropriately
411
- case result[:type]
412
- when :reduction
413
- # Reductions can produce vectors if they preserve some dimensions
414
- # This aligns with lower_to_ir logic for grouped reductions
415
- info = result[:info]
416
- if info && info[:argument]
417
- # Check if the reduction argument has array scope that would be preserved
418
- arg_scope = infer_argument_scope(info[:argument], array_fields, nested_paths)
419
- if arg_scope.length > 1
420
- # Multi-dimensional reduction - likely preserves outer dimension (per-player)
421
- { vectorized: true, source: :grouped_reduction, array_source: arg_scope.first }
422
- else
423
- # Single dimension or scalar reduction
424
- { vectorized: false, source: :scalar_from_reduction }
425
- end
426
- else
427
- { vectorized: false, source: :scalar_from_reduction }
428
- end
429
- when :vectorized
430
- { vectorized: true, source: :expression }
431
- else
432
- { vectorized: false, source: :scalar }
433
- end
434
-
435
- else
436
- { vectorized: false }
437
- end
438
- end
439
-
440
- def extract_array_source(info, _array_fields)
441
- case info[:source]
442
- when :array_field_access
443
- info[:path]&.first
444
- when :cascade_condition_with_vectorized_trait
445
- # For cascades, we'd need to trace back to the original source
446
- nil # TODO: Could be enhanced to trace through trait dependencies
447
- end
448
- end
449
-
450
- def analyze_cascade_vectorization(name, expr, array_fields, nested_paths, vectorized_values, errors, definitions = nil)
451
- # Enhanced cascade analysis with dimensional intelligence
452
- condition_sources = []
453
- result_sources = []
454
- condition_dimensions = []
455
- result_dimensions = []
456
- is_vectorized = false
457
-
458
- if ENV["DEBUG_CASCADE"]
459
- puts "DEBUG: analyze_cascade_vectorization for #{name}"
460
- puts " Expression: #{expr.inspect}"
461
- puts " Cases: #{expr.cases.length}"
462
- end
463
-
464
- expr.cases.each do |case_expr|
465
- # Analyze result expression
466
- result_info = analyze_value_vectorization(nil, case_expr.result, array_fields, nested_paths, vectorized_values, errors,
467
- definitions)
468
- if result_info[:type] == :vectorized
469
- is_vectorized = true
470
- source, dimension = trace_dimensional_source(case_expr.result, result_info, vectorized_values, array_fields, definitions)
471
- result_sources << source
472
- result_dimensions << dimension
473
- end
474
-
475
- # Analyze condition expression
476
- condition_info = analyze_value_vectorization(nil, case_expr.condition, array_fields, nested_paths, vectorized_values, errors,
477
- definitions)
478
- next unless condition_info[:type] == :vectorized
479
-
480
- is_vectorized = true
481
-
482
- # Special handling for cascade_and to check all arguments for dimensional conflicts
483
- if ENV["DEBUG_CASCADE"]
484
- puts " Checking condition type: #{case_expr.condition.class}"
485
- puts " Condition fn_name: #{case_expr.condition.fn_name}" if case_expr.condition.is_a?(Kumi::Syntax::CallExpression)
486
- end
487
-
488
- if case_expr.condition.is_a?(Kumi::Syntax::CallExpression) && case_expr.condition.fn_name == :cascade_and
489
- puts " -> ENTERING CASCADE_AND SPECIAL HANDLING" if ENV["DEBUG_CASCADE"]
490
- # For cascade_and, check all individual trait references for dimensional conflicts
491
- cascade_sources = []
492
- cascade_dimensions = []
493
-
494
- puts " cascade_and args: #{case_expr.condition.args.map(&:class)}" if ENV["DEBUG_CASCADE"]
495
-
496
- case_expr.condition.args.each do |arg|
497
- puts " Processing arg: #{arg.inspect}" if ENV["DEBUG_CASCADE"]
498
- next unless arg.is_a?(Kumi::Syntax::DeclarationReference)
499
-
500
- puts " Looking up declaration: #{arg.name}" if ENV["DEBUG_CASCADE"]
501
- decl = definitions[arg.name] if definitions
502
- if decl
503
- puts " Found declaration, tracing source..." if ENV["DEBUG_CASCADE"]
504
- arg_source, arg_dimension = trace_dimensional_source(decl.expression, condition_info, vectorized_values,
505
- array_fields, definitions)
506
- puts " Traced source: #{arg_source}, dimension: #{arg_dimension}" if ENV["DEBUG_CASCADE"]
507
- cascade_sources << arg_source
508
- cascade_dimensions << arg_dimension
509
- elsif ENV["DEBUG_CASCADE"]
510
- puts " Declaration not found: #{arg.name}"
511
- end
512
- end
513
-
514
- # Check for conflicts between cascade_and arguments
515
- unique_sources = cascade_sources.uniq
516
- unique_dimensions = cascade_dimensions.uniq
517
-
518
- if ENV["DEBUG_CASCADE"]
519
- puts " cascade_sources: #{cascade_sources.inspect}"
520
- puts " cascade_dimensions: #{cascade_dimensions.inspect}"
521
- puts " unique_sources: #{unique_sources.inspect}"
522
- puts " unique_dimensions: #{unique_dimensions.inspect}"
523
- end
524
-
525
- # Check for dimensional conflicts - either different sources OR incompatible dimensions
526
- has_source_conflict = unique_sources.length > 1 && unique_sources.none? { |s| s.to_s.include?("unknown") }
527
- has_dimension_conflict = unique_dimensions.length > 1 && !valid_hierarchical_broadcasting?(unique_dimensions)
528
-
529
- if ENV["DEBUG_CASCADE"]
530
- puts " has_source_conflict: #{has_source_conflict}"
531
- puts " has_dimension_conflict: #{has_dimension_conflict}"
532
- if unique_dimensions.length > 1
533
- puts " valid_hierarchical_broadcasting?: #{valid_hierarchical_broadcasting?(unique_dimensions)}"
534
- end
535
- end
536
-
537
- if has_source_conflict || has_dimension_conflict
538
- # Multiple different sources or incompatible dimensions in same cascade_and - this is invalid
539
- if ENV["DEBUG_CASCADE"]
540
- puts " -> FOUND CASCADE_AND DIMENSIONAL CONFLICT:"
541
- puts " Sources: #{unique_sources.inspect}"
542
- puts " Dimensions: #{unique_dimensions.inspect}"
543
- puts " Source conflict: #{has_source_conflict}"
544
- puts " Dimension conflict: #{has_dimension_conflict}"
545
- end
546
- report_cascade_dimension_mismatch(errors, expr, unique_sources, unique_dimensions)
547
- return { type: :scalar }
548
- end
549
-
550
- # Use the first valid source as the overall condition source
551
- condition_sources.concat(cascade_sources)
552
- condition_dimensions.concat(cascade_dimensions)
553
- else
554
- source, dimension = trace_dimensional_source(case_expr.condition, condition_info, vectorized_values, array_fields,
555
- definitions)
556
- condition_sources << source
557
- condition_dimensions << dimension
558
- end
559
- end
560
-
561
- if is_vectorized
562
- # Validate dimensional compatibility
563
- all_sources = (condition_sources + result_sources).compact.uniq
564
- all_dimensions = (condition_dimensions + result_dimensions).compact.uniq
565
-
566
- if ENV["DEBUG_CASCADE"]
567
- puts " is_vectorized: true"
568
- puts " condition_sources: #{condition_sources.inspect}"
569
- puts " result_sources: #{result_sources.inspect}"
570
- puts " condition_dimensions: #{condition_dimensions.inspect}"
571
- puts " result_dimensions: #{result_dimensions.inspect}"
572
- puts " all_sources: #{all_sources.inspect}"
573
- puts " all_dimensions: #{all_dimensions.inspect}"
574
- end
575
-
576
- # For now, be less strict about dimensional validation
577
- # Only report mismatches for clearly incompatible sources
578
- definite_sources = all_sources.reject { |s| s.to_s.include?("unknown") || s.to_s.include?("operation") }
579
-
580
- if ENV["DEBUG_CASCADE"]
581
- puts " definite_sources: #{definite_sources.inspect}"
582
- puts " definite_sources.length: #{definite_sources.length}"
583
- end
584
-
585
- if definite_sources.length > 1
586
- # Check if sources are in valid hierarchical relationship (parent-child broadcasting)
587
- is_valid_hierarchical = valid_hierarchical_broadcasting?(all_dimensions)
588
- puts " valid_hierarchical_broadcasting?: #{is_valid_hierarchical}" if ENV["DEBUG_CASCADE"]
589
- unless is_valid_hierarchical
590
- # Multiple definite dimensional sources - this is a real mismatch
591
- puts " -> REPORTING DIMENSIONAL MISMATCH" if ENV["DEBUG_CASCADE"]
592
- report_cascade_dimension_mismatch(errors, expr, definite_sources, all_dimensions)
593
- return { type: :scalar } # Treat as scalar to prevent further errors
594
- end
595
- end
596
-
597
- # Compute cascade processing strategy based on dimensional analysis
598
- processing_strategy = compute_cascade_processing_strategy(all_dimensions.first, nested_paths)
599
-
600
- { type: :vectorized, info: {
601
- source: :cascade_with_vectorized_conditions_or_results,
602
- dimensional_requirements: {
603
- conditions: { sources: condition_sources.uniq, dimensions: condition_dimensions.uniq },
604
- results: { sources: result_sources.uniq, dimensions: result_dimensions.uniq }
605
- },
606
- primary_dimension: all_dimensions.first,
607
- nested_paths: extract_nested_paths_from_dimensions(all_dimensions.first, nested_paths),
608
- processing_strategy: processing_strategy
609
- } }
610
- else
611
- { type: :scalar }
612
- end
613
- end
614
-
615
- def trace_dimensional_source(expr, info, vectorized_values, array_fields, definitions = nil)
616
- # Trace dimensional source by examining the AST node directly
617
- case expr
618
- when Kumi::Syntax::InputElementReference
619
- # Direct array field access
620
- source = expr.path.first
621
- dimension = expr.path
622
- [source, dimension]
623
- when Kumi::Syntax::DeclarationReference
624
- # Reference to another declaration - look up its dimensional info
625
- if vectorized_values[expr.name]
626
- vectorized_info = vectorized_values[expr.name]
627
- if vectorized_info[:array_source]
628
- [vectorized_info[:array_source], [vectorized_info[:array_source]]]
629
- else
630
- # Need to trace through the declaration's expression to find the real source
631
- decl = definitions[expr.name] if definitions
632
- if decl
633
- # Recursively trace the declaration's expression
634
- trace_dimensional_source(decl.expression, info, vectorized_values, array_fields, definitions)
635
- else
636
- [:unknown_vectorized_operation, [:unknown_vectorized_operation]]
637
- end
638
- end
639
- else
640
- [:unknown_declaration, [:unknown_declaration]]
641
- end
642
- when Kumi::Syntax::CallExpression
643
- # For call expressions, trace through the arguments to find dimensional source
644
- first_vectorized_arg = expr.args.find do |arg|
645
- arg_info = analyze_argument_vectorization(arg, array_fields, {}, vectorized_values, definitions)
646
- arg_info[:vectorized]
647
- end
648
-
649
- if first_vectorized_arg
650
- trace_dimensional_source(first_vectorized_arg, info, vectorized_values, array_fields, definitions)
651
- else
652
- [:operation_unknown, [:operation_unknown]]
653
- end
654
- else
655
- [:unknown_expr, [:unknown_expr]]
656
- end
657
- end
658
-
659
- def extract_nested_paths_from_dimensions(dimension, nested_paths)
660
- return nil unless dimension.is_a?(Array)
661
-
662
- nested_paths[dimension]
663
- end
664
-
665
- # Check if dimensions represent valid hierarchical broadcasting (parent-to-child)
666
- # Example: [:regions, :offices, :teams] can broadcast to [:regions, :offices, :teams, :employees]
667
- def valid_hierarchical_broadcasting?(dimensions)
668
- puts " DEBUG valid_hierarchical_broadcasting?: dimensions=#{dimensions.inspect}" if ENV["DEBUG_CASCADE"]
669
-
670
- return true if dimensions.length <= 1
671
-
672
- # Extract structural paths by removing the final field name from each dimension
673
- # This allows us to identify that [:regions, :offices, :teams, :performance_score]
674
- # and [:regions, :offices, :teams, :employees, :rating] both have the structural
675
- # path [:regions, :offices, :teams] and [:regions, :offices, :teams, :employees] respectively
676
- structural_paths = dimensions.map do |dim|
677
- if dim.length > 1
678
- dim[0..-2] # Remove the final field name
679
- else
680
- dim
681
- end
682
- end.uniq
683
-
684
- puts " structural_paths: #{structural_paths.inspect}" if ENV["DEBUG_CASCADE"]
685
-
686
- # Group dimensions by their root (first element)
687
- root_groups = structural_paths.group_by(&:first)
688
-
689
- puts " root_groups: #{root_groups.keys.inspect}" if ENV["DEBUG_CASCADE"]
690
-
691
- # All dimensions must come from the same root
692
- if root_groups.length > 1
693
- puts " -> REJECT: Multiple roots" if ENV["DEBUG_CASCADE"]
694
- return false
695
- end
696
-
697
- # If all structural paths are the same, this is valid (same level)
698
- if structural_paths.length == 1
699
- puts " -> ACCEPT: All dimensions at same structural level" if ENV["DEBUG_CASCADE"]
700
- return true
701
- end
702
-
703
- # Within the same root, check if we have valid parent-child relationships
704
- sorted_paths = structural_paths.sort_by(&:length)
705
-
706
- puts " sorted structural paths: #{sorted_paths.inspect}" if ENV["DEBUG_CASCADE"]
707
-
708
- # Check if all structural paths form a valid hierarchical structure
709
- # For valid hierarchical broadcasting, structural paths should be related by parent-child relationships
710
-
711
- # Check if there are any actual parent-child relationships
712
- has_real_hierarchy = false
713
-
714
- (0...sorted_paths.length).each do |i|
715
- ((i + 1)...sorted_paths.length).each do |j|
716
- path1 = sorted_paths[i]
717
- path2 = sorted_paths[j]
718
- shorter, longer = [path1, path2].sort_by(&:length)
719
-
720
- next unless longer[0, shorter.length] == shorter
721
-
722
- puts " Found parent-child relationship: #{shorter.inspect} → #{longer.inspect}" if ENV["DEBUG_CASCADE"]
723
- has_real_hierarchy = true
724
- end
725
- end
726
-
727
- puts " has_real_hierarchy: #{has_real_hierarchy}" if ENV["DEBUG_CASCADE"]
728
-
729
- # Allow same-level dimensions or hierarchical relationships
730
- if !has_real_hierarchy && sorted_paths.length > 1
731
- puts " -> REJECT: No parent-child relationships found - these are sibling branches" if ENV["DEBUG_CASCADE"]
732
- return false
733
- end
734
-
735
- puts " -> ACCEPT: All dimensions compatible" if ENV["DEBUG_CASCADE"]
736
- true
737
- end
738
-
739
- def compute_cascade_processing_strategy(primary_dimension, nested_paths)
740
- return { mode: :scalar } unless primary_dimension
741
-
742
- # Determine structure depth from the dimension path
743
- structure_depth = primary_dimension.length
744
-
745
- # Determine processing mode based on structure complexity
746
- processing_mode = case structure_depth
747
- when 0, 1
748
- :simple_array # Single-level array processing
749
- when 2, 3, 4
750
- :nested_array # Multi-level nested array processing
751
- else
752
- :deep_nested_array # Very deep nesting (5+ levels)
753
- end
754
-
755
- # Get nested path information for this dimension
756
- nested_path_info = nested_paths[primary_dimension]
757
-
758
- {
759
- mode: processing_mode,
760
- structure_depth: structure_depth,
761
- dimension_path: primary_dimension,
762
- element_processing: :cascade_conditional_logic,
763
- nested_path_info: nested_path_info
764
- }
765
- end
766
-
767
- def report_cascade_dimension_mismatch(errors, expr, sources, dimensions)
768
- puts "DEBUG: Dimensional analysis details:" if ENV["DEBUG_CASCADE"]
769
- puts " Sources: #{sources.inspect}" if ENV["DEBUG_CASCADE"]
770
- puts " Dimensions: #{dimensions.inspect}" if ENV["DEBUG_CASCADE"]
771
- puts " Valid hierarchical? #{valid_hierarchical_broadcasting?(dimensions)}" if ENV["DEBUG_CASCADE"]
772
-
773
- message = "Cascade dimensional mismatch: Cannot mix arrays from different sources (#{sources.join(', ')}) " \
774
- "with dimensions (#{dimensions.map(&:inspect).join(', ')}) in cascade conditions and results."
775
- report_error(errors, message, location: expr.loc, type: :semantic)
776
- end
777
-
778
- def build_dimension_mismatch_error(_expr, arg_infos, array_fields, vectorized_sources)
779
- # Build detailed error message with type information
780
- summary = "Cannot broadcast operation across arrays from different sources: #{vectorized_sources.join(', ')}. "
781
-
782
- problem_desc = "Problem: Multiple operands are arrays from different sources:\n"
783
-
784
- vectorized_args = arg_infos.select { |info| info[:vectorized] }
785
- vectorized_args.each_with_index do |arg_info, index|
786
- array_source = arg_info[:array_source]
787
- next unless array_source && array_fields[array_source]
788
-
789
- # Determine the type based on array field metadata
790
- type_desc = determine_array_type(array_source, array_fields)
791
- problem_desc += " - Operand #{index + 1} resolves to #{type_desc} from array '#{array_source}'\n"
792
- end
793
-
794
- explanation = "Direct operations on arrays from different sources is ambiguous and not supported. " \
795
- "Vectorized operations can only work on fields from the same array input."
796
-
797
- "#{summary}#{problem_desc}#{explanation}"
798
- end
799
-
800
- def determine_array_type(array_source, array_fields)
801
- field_info = array_fields[array_source]
802
- return "array(any)" unless field_info[:element_types]
803
-
804
- # For nested arrays (like items.name where items is an array), this represents array(element_type)
805
- element_types = field_info[:element_types].values.uniq
806
- if element_types.length == 1
807
- "array(#{element_types.first})"
808
- else
809
- "array(mixed)"
810
- end
811
- end
812
- end
813
- end
814
- end
815
- end
816
- end