kumi 0.0.8 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. checksums.yaml +4 -4
  2. data/CLAUDE.md +28 -44
  3. data/README.md +188 -108
  4. data/docs/AST.md +8 -1
  5. data/docs/FUNCTIONS.md +52 -8
  6. data/docs/compiler_design_principles.md +86 -0
  7. data/docs/features/README.md +22 -2
  8. data/docs/features/hierarchical-broadcasting.md +349 -0
  9. data/docs/features/javascript-transpiler.md +148 -0
  10. data/docs/features/performance.md +1 -3
  11. data/docs/features/s-expression-printer.md +77 -0
  12. data/docs/schema_metadata.md +7 -7
  13. data/examples/game_of_life.rb +2 -4
  14. data/lib/kumi/analyzer.rb +0 -2
  15. data/lib/kumi/compiler.rb +6 -275
  16. data/lib/kumi/core/analyzer/passes/broadcast_detector.rb +600 -42
  17. data/lib/kumi/core/analyzer/passes/input_collector.rb +4 -2
  18. data/lib/kumi/core/analyzer/passes/semantic_constraint_validator.rb +27 -0
  19. data/lib/kumi/core/analyzer/passes/type_checker.rb +6 -2
  20. data/lib/kumi/core/analyzer/passes/unsat_detector.rb +90 -46
  21. data/lib/kumi/core/cascade_executor_builder.rb +132 -0
  22. data/lib/kumi/core/compiler/expression_compiler.rb +146 -0
  23. data/lib/kumi/core/compiler/function_invoker.rb +55 -0
  24. data/lib/kumi/core/compiler/path_traversal_compiler.rb +158 -0
  25. data/lib/kumi/core/compiler/reference_compiler.rb +46 -0
  26. data/lib/kumi/core/compiler_base.rb +137 -0
  27. data/lib/kumi/core/explain.rb +2 -2
  28. data/lib/kumi/core/function_registry/collection_functions.rb +86 -3
  29. data/lib/kumi/core/function_registry/function_builder.rb +5 -3
  30. data/lib/kumi/core/function_registry/logical_functions.rb +171 -1
  31. data/lib/kumi/core/function_registry/stat_functions.rb +156 -0
  32. data/lib/kumi/core/function_registry.rb +32 -10
  33. data/lib/kumi/core/nested_structure_utils.rb +78 -0
  34. data/lib/kumi/core/ruby_parser/dsl_cascade_builder.rb +2 -2
  35. data/lib/kumi/core/ruby_parser/input_builder.rb +61 -8
  36. data/lib/kumi/core/schema_instance.rb +4 -0
  37. data/lib/kumi/core/vectorized_function_builder.rb +88 -0
  38. data/lib/kumi/errors.rb +2 -0
  39. data/lib/kumi/js/compiler.rb +878 -0
  40. data/lib/kumi/js/function_registry.rb +333 -0
  41. data/lib/kumi/js.rb +23 -0
  42. data/lib/kumi/registry.rb +61 -1
  43. data/lib/kumi/schema.rb +1 -1
  44. data/lib/kumi/support/s_expression_printer.rb +162 -0
  45. data/lib/kumi/syntax/array_expression.rb +6 -6
  46. data/lib/kumi/syntax/call_expression.rb +4 -4
  47. data/lib/kumi/syntax/cascade_expression.rb +4 -4
  48. data/lib/kumi/syntax/case_expression.rb +4 -4
  49. data/lib/kumi/syntax/declaration_reference.rb +4 -4
  50. data/lib/kumi/syntax/hash_expression.rb +4 -4
  51. data/lib/kumi/syntax/input_declaration.rb +6 -5
  52. data/lib/kumi/syntax/input_element_reference.rb +5 -5
  53. data/lib/kumi/syntax/input_reference.rb +5 -5
  54. data/lib/kumi/syntax/literal.rb +4 -4
  55. data/lib/kumi/syntax/node.rb +34 -34
  56. data/lib/kumi/syntax/root.rb +6 -6
  57. data/lib/kumi/syntax/trait_declaration.rb +4 -4
  58. data/lib/kumi/syntax/value_declaration.rb +4 -4
  59. data/lib/kumi/version.rb +1 -1
  60. data/lib/kumi.rb +1 -1
  61. data/scripts/analyze_broadcast_methods.rb +68 -0
  62. data/scripts/analyze_cascade_methods.rb +74 -0
  63. data/scripts/check_broadcasting_coverage.rb +51 -0
  64. data/scripts/find_dead_code.rb +114 -0
  65. metadata +22 -4
  66. data/docs/features/array-broadcasting.md +0 -170
  67. data/lib/kumi/cli.rb +0 -449
  68. data/lib/kumi/core/vectorization_metadata.rb +0 -110
@@ -15,11 +15,18 @@ module Kumi
15
15
  # Find array fields with their element types
16
16
  array_fields = find_array_fields(input_meta)
17
17
 
18
+ # Build nested paths metadata for nested array traversal
19
+ nested_paths = build_nested_paths_metadata(input_meta)
20
+
18
21
  # Build compiler metadata
19
22
  compiler_metadata = {
20
23
  array_fields: array_fields,
21
24
  vectorized_operations: {},
22
- reduction_operations: {}
25
+ reduction_operations: {},
26
+ nested_paths: nested_paths,
27
+ flattening_declarations: {}, # Track which declarations need flattening
28
+ cascade_strategies: {}, # Pre-computed cascade processing strategies
29
+ compilation_metadata: {} # Pre-computed compilation decisions
23
30
  }
24
31
 
25
32
  # Track which values are vectorized for type inference
@@ -30,19 +37,32 @@ module Kumi
30
37
  values = definitions.select { |_name, decl| decl.is_a?(Kumi::Syntax::ValueDeclaration) }
31
38
 
32
39
  (traits.to_a + values.to_a).each do |name, decl|
33
- result = analyze_value_vectorization(name, decl.expression, array_fields, vectorized_values, errors)
40
+ result = analyze_value_vectorization(name, decl.expression, array_fields, nested_paths, vectorized_values, errors,
41
+ definitions)
34
42
 
35
43
  case result[:type]
36
44
  when :vectorized
37
45
  compiler_metadata[:vectorized_operations][name] = result[:info]
46
+
47
+ # If this is a cascade with processing strategy, store it separately for easy compiler access
48
+ compiler_metadata[:cascade_strategies][name] = result[:info][:processing_strategy] if result[:info][:processing_strategy]
49
+
38
50
  # Store array source information for dimension checking
39
51
  array_source = extract_array_source(result[:info], array_fields)
40
52
  vectorized_values[name] = { vectorized: true, array_source: array_source }
41
53
  when :reduction
42
54
  compiler_metadata[:reduction_operations][name] = result[:info]
55
+ # Mark this specific declaration as needing flattening for its argument
56
+ compiler_metadata[:flattening_declarations][name] = result[:info]
43
57
  # Reduction produces scalar, not vectorized
44
58
  vectorized_values[name] = { vectorized: false }
45
59
  end
60
+
61
+ # Pre-compute compilation metadata for each declaration
62
+ compilation_meta = compute_compilation_metadata(
63
+ name, decl, compiler_metadata, vectorized_values, array_fields
64
+ )
65
+ compiler_metadata[:compilation_metadata][name] = compilation_meta
46
66
  end
47
67
 
48
68
  state.with(:broadcasts, compiler_metadata.freeze)
@@ -50,6 +70,76 @@ module Kumi
50
70
 
51
71
  private
52
72
 
73
+ def compute_compilation_metadata(name, _decl, compiler_metadata, _vectorized_values, _array_fields)
74
+ metadata = {
75
+ operation_mode: :broadcast, # Default mode
76
+ is_vectorized: false,
77
+ vectorization_context: {},
78
+ cascade_info: {},
79
+ function_call_strategy: {}
80
+ }
81
+
82
+ # Check if this declaration is vectorized
83
+ if compiler_metadata[:vectorized_operations][name]
84
+ metadata[:is_vectorized] = true
85
+ vectorized_info = compiler_metadata[:vectorized_operations][name]
86
+
87
+ # Pre-compute vectorization context
88
+ metadata[:vectorization_context] = {
89
+ has_vectorized_args: true,
90
+ needs_broadcasting: true,
91
+ array_structure_depth: estimate_array_depth(vectorized_info, compiler_metadata[:nested_paths])
92
+ }
93
+
94
+ # If this is a cascade, pre-compute cascade processing strategy
95
+ if vectorized_info[:source] == :cascade_with_vectorized_conditions_or_results
96
+ strategy = compiler_metadata[:cascade_strategies][name]
97
+ metadata[:cascade_info] = {
98
+ is_vectorized: true,
99
+ processing_mode: strategy&.dig(:mode) || :hierarchical,
100
+ needs_hierarchical_processing: needs_hierarchical_processing?(strategy)
101
+ }
102
+ end
103
+ end
104
+
105
+ # Check if this declaration needs flattening
106
+ if compiler_metadata[:flattening_declarations][name]
107
+ metadata[:operation_mode] = :flatten
108
+ flattening_info = compiler_metadata[:flattening_declarations][name]
109
+
110
+ metadata[:function_call_strategy] = {
111
+ flattening_required: true,
112
+ flatten_argument_indices: flattening_info[:flatten_argument_indices] || [0],
113
+ result_structure: :scalar
114
+ }
115
+ end
116
+
117
+ metadata
118
+ end
119
+
120
+ def estimate_array_depth(vectorized_info, nested_paths)
121
+ case vectorized_info[:source]
122
+ when :nested_array_access
123
+ path = vectorized_info[:path]
124
+ nested_paths[path]&.dig(:array_depth) || 1
125
+ when :array_field_access
126
+ 1
127
+ else
128
+ 1
129
+ end
130
+ end
131
+
132
+ def needs_hierarchical_processing?(strategy)
133
+ return false unless strategy
134
+
135
+ case strategy[:mode]
136
+ when :nested_array, :deep_nested_array
137
+ true
138
+ else
139
+ false
140
+ end
141
+ end
142
+
53
143
  def find_array_fields(input_meta)
54
144
  result = {}
55
145
  input_meta.each do |name, meta|
@@ -63,10 +153,63 @@ module Kumi
63
153
  result
64
154
  end
65
155
 
66
- def analyze_value_vectorization(name, expr, array_fields, vectorized_values, errors)
156
+ def build_nested_paths_metadata(input_meta)
157
+ nested_paths = {}
158
+
159
+ # Recursively build all possible nested paths from input metadata
160
+ input_meta.each do |root_name, root_meta|
161
+ collect_nested_paths(nested_paths, [root_name], root_meta, 0, nil)
162
+ end
163
+
164
+ nested_paths
165
+ end
166
+
167
+ def collect_nested_paths(nested_paths, current_path, current_meta, array_depth, parent_access_mode = nil)
168
+ # If current field is an array, increment array depth and track its access_mode
169
+ current_access_mode = parent_access_mode
170
+ if current_meta[:type] == :array
171
+ array_depth += 1
172
+ current_access_mode = current_meta[:access_mode] || :object # Default to :object if not specified
173
+ end
174
+
175
+ # If this field has children, recurse into them
176
+ if current_meta[:children]
177
+ current_meta[:children].each do |child_name, child_meta|
178
+ child_path = current_path + [child_name]
179
+
180
+ # Create metadata for this path if it involves arrays
181
+ if array_depth.positive?
182
+ nested_paths[child_path] =
183
+ build_path_metadata(child_path, child_meta, array_depth, current_access_mode)
184
+ end
185
+
186
+ # Recurse into child's children
187
+ collect_nested_paths(nested_paths, child_path, child_meta, array_depth, current_access_mode)
188
+ end
189
+ elsif array_depth.positive?
190
+ # Leaf field - create metadata if it involves arrays
191
+ nested_paths[current_path] = build_path_metadata(current_path, current_meta, array_depth, current_access_mode)
192
+ end
193
+ end
194
+
195
+ def build_path_metadata(_path, field_meta, array_depth, parent_access_mode = nil)
196
+ {
197
+ array_depth: array_depth,
198
+ element_type: field_meta[:type] || :any,
199
+ operation_mode: :broadcast, # Default mode - may be overridden for aggregations
200
+ result_structure: array_depth > 1 ? :nested_array : :array,
201
+ access_mode: parent_access_mode # Access mode of the parent array field
202
+ }
203
+ end
204
+
205
+ def analyze_value_vectorization(name, expr, array_fields, nested_paths, vectorized_values, errors, definitions = nil)
67
206
  case expr
68
207
  when Kumi::Syntax::InputElementReference
69
- if array_fields.key?(expr.path.first)
208
+ # Check if this path exists in nested_paths metadata (supports nested arrays)
209
+ if nested_paths.key?(expr.path)
210
+ { type: :vectorized, info: { source: :nested_array_access, path: expr.path, nested_metadata: nested_paths[expr.path] } }
211
+ # Fallback to old array_fields detection for backward compatibility
212
+ elsif array_fields.key?(expr.path.first)
70
213
  { type: :vectorized, info: { source: :array_field_access, path: expr.path } }
71
214
  else
72
215
  { type: :scalar }
@@ -82,43 +225,57 @@ module Kumi
82
225
  end
83
226
 
84
227
  when Kumi::Syntax::CallExpression
85
- analyze_call_vectorization(name, expr, array_fields, vectorized_values, errors)
228
+ analyze_call_vectorization(name, expr, array_fields, nested_paths, vectorized_values, errors, definitions)
86
229
 
87
230
  when Kumi::Syntax::CascadeExpression
88
- analyze_cascade_vectorization(name, expr, array_fields, vectorized_values, errors)
231
+ analyze_cascade_vectorization(name, expr, array_fields, nested_paths, vectorized_values, errors, definitions)
89
232
 
90
233
  else
91
234
  { type: :scalar }
92
235
  end
93
236
  end
94
237
 
95
- def analyze_call_vectorization(_name, expr, array_fields, vectorized_values, errors)
238
+ def analyze_call_vectorization(_name, expr, array_fields, nested_paths, vectorized_values, errors, definitions = nil)
96
239
  # Check if this is a reduction function using function registry metadata
97
240
  if Kumi::Registry.reducer?(expr.fn_name)
98
241
  # Only treat as reduction if the argument is actually vectorized
99
- arg_info = analyze_argument_vectorization(expr.args.first, array_fields, vectorized_values)
242
+ arg_info = analyze_argument_vectorization(expr.args.first, array_fields, nested_paths, vectorized_values, definitions)
100
243
  if arg_info[:vectorized]
101
- { type: :reduction, info: { function: expr.fn_name, source: arg_info[:source] } }
244
+ # Pre-compute which argument indices need flattening
245
+ flatten_indices = []
246
+ expr.args.each_with_index do |arg, index|
247
+ arg_vectorization = analyze_argument_vectorization(arg, array_fields, nested_paths, vectorized_values, definitions)
248
+ flatten_indices << index if arg_vectorization[:vectorized]
249
+ end
250
+
251
+ { type: :reduction, info: {
252
+ function: expr.fn_name,
253
+ source: arg_info[:source],
254
+ argument: expr.args.first,
255
+ flatten_argument_indices: flatten_indices
256
+ } }
102
257
  else
103
258
  # Not a vectorized reduction - just a regular function call
104
259
  { type: :scalar }
105
260
  end
106
261
 
107
262
  else
108
- # Special case: all?, any?, none? functions with vectorized trait arguments should be treated as vectorized
109
- # for cascade condition purposes (they get transformed during compilation)
110
- if %i[all? any? none?].include?(expr.fn_name) && expr.args.length == 1
111
- arg = expr.args.first
112
- if arg.is_a?(Kumi::Syntax::ArrayExpression) && arg.elements.length == 1
113
- trait_ref = arg.elements.first
114
- if trait_ref.is_a?(Kumi::Syntax::DeclarationReference) && vectorized_values[trait_ref.name]&.[](:vectorized)
115
- return { type: :vectorized, info: { source: :cascade_condition_with_vectorized_trait, trait: trait_ref.name } }
116
- end
263
+
264
+ # Special case: cascade_and takes individual trait arguments
265
+ if expr.fn_name == :cascade_and
266
+ # Check if any of the individual arguments are vectorized traits
267
+ vectorized_trait = expr.args.find do |arg|
268
+ arg.is_a?(Kumi::Syntax::DeclarationReference) && vectorized_values[arg.name]&.[](:vectorized)
269
+ end
270
+ if vectorized_trait
271
+ return { type: :vectorized, info: { source: :cascade_condition_with_vectorized_trait, trait: vectorized_trait.name } }
117
272
  end
118
273
  end
119
274
 
120
- # ANY function with vectorized arguments becomes vectorized (with broadcasting)
121
- arg_infos = expr.args.map { |arg| analyze_argument_vectorization(arg, array_fields, vectorized_values) }
275
+ # Analyze arguments to determine function behavior
276
+ arg_infos = expr.args.map do |arg|
277
+ analyze_argument_vectorization(arg, array_fields, nested_paths, vectorized_values, definitions)
278
+ end
122
279
 
123
280
  if arg_infos.any? { |info| info[:vectorized] }
124
281
  # Check for dimension mismatches when multiple arguments are vectorized
@@ -133,21 +290,37 @@ module Kumi
133
290
  return { type: :scalar } # Treat as scalar to prevent further errors
134
291
  end
135
292
 
136
- # This is a vectorized operation - ANY function supports broadcasting
137
- { type: :vectorized, info: {
138
- operation: expr.fn_name,
139
- vectorized_args: arg_infos.map.with_index { |info, i| [i, info[:vectorized]] }.to_h
140
- } }
293
+ # Check if this is a structure function that should work on the array as-is
294
+ if structure_function?(expr.fn_name)
295
+ # Structure functions like size should work on structure as-is (scalar)
296
+ { type: :scalar }
297
+ else
298
+ # This is a vectorized operation - broadcast over elements
299
+ { type: :vectorized, info: {
300
+ operation: expr.fn_name,
301
+ vectorized_args: arg_infos.map.with_index { |info, i| [i, info[:vectorized]] }.to_h
302
+ } }
303
+ end
141
304
  else
305
+ # No vectorized arguments - regular scalar function
142
306
  { type: :scalar }
143
307
  end
144
308
  end
145
309
  end
146
310
 
147
- def analyze_argument_vectorization(arg, array_fields, vectorized_values)
311
+ def structure_function?(fn_name)
312
+ # Check if function is marked as working on structure (not broadcast over elements)
313
+ Kumi::Registry.structure_function?(fn_name)
314
+ end
315
+
316
+ def analyze_argument_vectorization(arg, array_fields, nested_paths, vectorized_values, definitions = nil)
148
317
  case arg
149
318
  when Kumi::Syntax::InputElementReference
150
- if array_fields.key?(arg.path.first)
319
+ # Check nested paths first (supports nested arrays)
320
+ if nested_paths.key?(arg.path)
321
+ { vectorized: true, source: :nested_array_field, array_source: arg.path.first }
322
+ # Fallback to old array_fields detection for backward compatibility
323
+ elsif array_fields.key?(arg.path.first)
151
324
  { vectorized: true, source: :array_field, array_source: arg.path.first }
152
325
  else
153
326
  { vectorized: false }
@@ -165,7 +338,7 @@ module Kumi
165
338
 
166
339
  when Kumi::Syntax::CallExpression
167
340
  # Recursively check
168
- result = analyze_value_vectorization(nil, arg, array_fields, vectorized_values, [])
341
+ result = analyze_value_vectorization(nil, arg, array_fields, nested_paths, vectorized_values, [], definitions)
169
342
  { vectorized: result[:type] == :vectorized, source: :expression }
170
343
 
171
344
  else
@@ -183,30 +356,415 @@ module Kumi
183
356
  end
184
357
  end
185
358
 
186
- def analyze_cascade_vectorization(_name, expr, array_fields, vectorized_values, errors)
187
- # A cascade is vectorized if:
188
- # 1. Any of its result expressions are vectorized, OR
189
- # 2. Any of its conditions reference vectorized values (traits or arrays)
190
- vectorized_results = []
191
- vectorized_conditions = []
359
+ def analyze_cascade_vectorization(name, expr, array_fields, nested_paths, vectorized_values, errors, definitions = nil)
360
+ # Enhanced cascade analysis with dimensional intelligence
361
+ condition_sources = []
362
+ result_sources = []
363
+ condition_dimensions = []
364
+ result_dimensions = []
365
+ is_vectorized = false
366
+
367
+ if ENV["DEBUG_CASCADE"]
368
+ puts "DEBUG: analyze_cascade_vectorization for #{name}"
369
+ puts " Expression: #{expr.inspect}"
370
+ puts " Cases: #{expr.cases.length}"
371
+ end
192
372
 
193
373
  expr.cases.each do |case_expr|
194
- # Check if result is vectorized
195
- result_info = analyze_value_vectorization(nil, case_expr.result, array_fields, vectorized_values, errors)
196
- vectorized_results << (result_info[:type] == :vectorized)
374
+ # Analyze result expression
375
+ result_info = analyze_value_vectorization(nil, case_expr.result, array_fields, nested_paths, vectorized_values, errors,
376
+ definitions)
377
+ if result_info[:type] == :vectorized
378
+ is_vectorized = true
379
+ source, dimension = trace_dimensional_source(case_expr.result, result_info, vectorized_values, array_fields, definitions)
380
+ result_sources << source
381
+ result_dimensions << dimension
382
+ end
383
+
384
+ # Analyze condition expression
385
+ condition_info = analyze_value_vectorization(nil, case_expr.condition, array_fields, nested_paths, vectorized_values, errors,
386
+ definitions)
387
+ next unless condition_info[:type] == :vectorized
388
+
389
+ is_vectorized = true
390
+
391
+ # Special handling for cascade_and to check all arguments for dimensional conflicts
392
+ if ENV["DEBUG_CASCADE"]
393
+ puts " Checking condition type: #{case_expr.condition.class}"
394
+ puts " Condition fn_name: #{case_expr.condition.fn_name}" if case_expr.condition.is_a?(Kumi::Syntax::CallExpression)
395
+ end
396
+
397
+ if case_expr.condition.is_a?(Kumi::Syntax::CallExpression) && case_expr.condition.fn_name == :cascade_and
398
+ puts " -> ENTERING CASCADE_AND SPECIAL HANDLING" if ENV["DEBUG_CASCADE"]
399
+ # For cascade_and, check all individual trait references for dimensional conflicts
400
+ cascade_sources = []
401
+ cascade_dimensions = []
402
+
403
+ puts " cascade_and args: #{case_expr.condition.args.map(&:class)}" if ENV["DEBUG_CASCADE"]
404
+
405
+ case_expr.condition.args.each do |arg|
406
+ puts " Processing arg: #{arg.inspect}" if ENV["DEBUG_CASCADE"]
407
+ next unless arg.is_a?(Kumi::Syntax::DeclarationReference)
408
+
409
+ puts " Looking up declaration: #{arg.name}" if ENV["DEBUG_CASCADE"]
410
+ decl = definitions[arg.name] if definitions
411
+ if decl
412
+ puts " Found declaration, tracing source..." if ENV["DEBUG_CASCADE"]
413
+ arg_source, arg_dimension = trace_dimensional_source(decl.expression, condition_info, vectorized_values,
414
+ array_fields, definitions)
415
+ puts " Traced source: #{arg_source}, dimension: #{arg_dimension}" if ENV["DEBUG_CASCADE"]
416
+ cascade_sources << arg_source
417
+ cascade_dimensions << arg_dimension
418
+ elsif ENV["DEBUG_CASCADE"]
419
+ puts " Declaration not found: #{arg.name}"
420
+ end
421
+ end
422
+
423
+ # Check for conflicts between cascade_and arguments
424
+ unique_sources = cascade_sources.uniq
425
+ unique_dimensions = cascade_dimensions.uniq
426
+
427
+ if ENV["DEBUG_CASCADE"]
428
+ puts " cascade_sources: #{cascade_sources.inspect}"
429
+ puts " cascade_dimensions: #{cascade_dimensions.inspect}"
430
+ puts " unique_sources: #{unique_sources.inspect}"
431
+ puts " unique_dimensions: #{unique_dimensions.inspect}"
432
+ end
433
+
434
+ # Check for dimensional conflicts - either different sources OR incompatible dimensions
435
+ has_source_conflict = unique_sources.length > 1 && unique_sources.none? { |s| s.to_s.include?("unknown") }
436
+ has_dimension_conflict = unique_dimensions.length > 1 && !valid_hierarchical_broadcasting?(unique_dimensions)
437
+
438
+ if ENV["DEBUG_CASCADE"]
439
+ puts " has_source_conflict: #{has_source_conflict}"
440
+ puts " has_dimension_conflict: #{has_dimension_conflict}"
441
+ if unique_dimensions.length > 1
442
+ puts " valid_hierarchical_broadcasting?: #{valid_hierarchical_broadcasting?(unique_dimensions)}"
443
+ end
444
+ end
445
+
446
+ if has_source_conflict || has_dimension_conflict
447
+ # Multiple different sources or incompatible dimensions in same cascade_and - this is invalid
448
+ if ENV["DEBUG_CASCADE"]
449
+ puts " -> FOUND CASCADE_AND DIMENSIONAL CONFLICT:"
450
+ puts " Sources: #{unique_sources.inspect}"
451
+ puts " Dimensions: #{unique_dimensions.inspect}"
452
+ puts " Source conflict: #{has_source_conflict}"
453
+ puts " Dimension conflict: #{has_dimension_conflict}"
454
+ end
455
+ report_cascade_dimension_mismatch(errors, expr, unique_sources, unique_dimensions)
456
+ return { type: :scalar }
457
+ end
197
458
 
198
- # Check if condition is vectorized
199
- condition_info = analyze_value_vectorization(nil, case_expr.condition, array_fields, vectorized_values, errors)
200
- vectorized_conditions << (condition_info[:type] == :vectorized)
459
+ # Use the first valid source as the overall condition source
460
+ condition_sources.concat(cascade_sources)
461
+ condition_dimensions.concat(cascade_dimensions)
462
+ else
463
+ source, dimension = trace_dimensional_source(case_expr.condition, condition_info, vectorized_values, array_fields,
464
+ definitions)
465
+ condition_sources << source
466
+ condition_dimensions << dimension
467
+ end
201
468
  end
202
469
 
203
- if vectorized_results.any? || vectorized_conditions.any?
204
- { type: :vectorized, info: { source: :cascade_with_vectorized_conditions_or_results } }
470
+ if is_vectorized
471
+ # Validate dimensional compatibility
472
+ all_sources = (condition_sources + result_sources).compact.uniq
473
+ all_dimensions = (condition_dimensions + result_dimensions).compact.uniq
474
+
475
+ if ENV["DEBUG_CASCADE"]
476
+ puts " is_vectorized: true"
477
+ puts " condition_sources: #{condition_sources.inspect}"
478
+ puts " result_sources: #{result_sources.inspect}"
479
+ puts " condition_dimensions: #{condition_dimensions.inspect}"
480
+ puts " result_dimensions: #{result_dimensions.inspect}"
481
+ puts " all_sources: #{all_sources.inspect}"
482
+ puts " all_dimensions: #{all_dimensions.inspect}"
483
+ end
484
+
485
+ # For now, be less strict about dimensional validation
486
+ # Only report mismatches for clearly incompatible sources
487
+ definite_sources = all_sources.reject { |s| s.to_s.include?("unknown") || s.to_s.include?("operation") }
488
+
489
+ if ENV["DEBUG_CASCADE"]
490
+ puts " definite_sources: #{definite_sources.inspect}"
491
+ puts " definite_sources.length: #{definite_sources.length}"
492
+ end
493
+
494
+ if definite_sources.length > 1
495
+ # Check if sources are in valid hierarchical relationship (parent-child broadcasting)
496
+ is_valid_hierarchical = valid_hierarchical_broadcasting?(all_dimensions)
497
+ puts " valid_hierarchical_broadcasting?: #{is_valid_hierarchical}" if ENV["DEBUG_CASCADE"]
498
+ unless is_valid_hierarchical
499
+ # Multiple definite dimensional sources - this is a real mismatch
500
+ puts " -> REPORTING DIMENSIONAL MISMATCH" if ENV["DEBUG_CASCADE"]
501
+ report_cascade_dimension_mismatch(errors, expr, definite_sources, all_dimensions)
502
+ return { type: :scalar } # Treat as scalar to prevent further errors
503
+ end
504
+ end
505
+
506
+ # Compute cascade processing strategy based on dimensional analysis
507
+ processing_strategy = compute_cascade_processing_strategy(all_dimensions.first, nested_paths)
508
+
509
+ { type: :vectorized, info: {
510
+ source: :cascade_with_vectorized_conditions_or_results,
511
+ dimensional_requirements: {
512
+ conditions: { sources: condition_sources.uniq, dimensions: condition_dimensions.uniq },
513
+ results: { sources: result_sources.uniq, dimensions: result_dimensions.uniq }
514
+ },
515
+ primary_dimension: all_dimensions.first,
516
+ nested_paths: extract_nested_paths_from_dimensions(all_dimensions.first, nested_paths),
517
+ processing_strategy: processing_strategy
518
+ } }
205
519
  else
206
520
  { type: :scalar }
207
521
  end
208
522
  end
209
523
 
524
+ def trace_dimensional_source(expr, info, vectorized_values, array_fields, definitions = nil)
525
+ # Trace dimensional source by examining the AST node directly
526
+ case expr
527
+ when Kumi::Syntax::InputElementReference
528
+ # Direct array field access
529
+ source = expr.path.first
530
+ dimension = expr.path
531
+ [source, dimension]
532
+ when Kumi::Syntax::DeclarationReference
533
+ # Reference to another declaration - look up its dimensional info
534
+ if vectorized_values[expr.name]
535
+ vectorized_info = vectorized_values[expr.name]
536
+ if vectorized_info[:array_source]
537
+ [vectorized_info[:array_source], [vectorized_info[:array_source]]]
538
+ else
539
+ # Need to trace through the declaration's expression to find the real source
540
+ decl = definitions[expr.name] if definitions
541
+ if decl
542
+ # Recursively trace the declaration's expression
543
+ trace_dimensional_source(decl.expression, info, vectorized_values, array_fields, definitions)
544
+ else
545
+ [:unknown_vectorized_operation, [:unknown_vectorized_operation]]
546
+ end
547
+ end
548
+ else
549
+ [:unknown_declaration, [:unknown_declaration]]
550
+ end
551
+ when Kumi::Syntax::CallExpression
552
+ # For call expressions, trace through the arguments to find dimensional source
553
+ first_vectorized_arg = expr.args.find do |arg|
554
+ arg_info = analyze_argument_vectorization(arg, array_fields, {}, vectorized_values, definitions)
555
+ arg_info[:vectorized]
556
+ end
557
+
558
+ if first_vectorized_arg
559
+ trace_dimensional_source(first_vectorized_arg, info, vectorized_values, array_fields, definitions)
560
+ else
561
+ [:operation_unknown, [:operation_unknown]]
562
+ end
563
+ else
564
+ [:unknown_expr, [:unknown_expr]]
565
+ end
566
+ end
567
+
568
+ def extract_dimensional_info_with_context(info, _array_fields, _nested_paths, vectorized_values)
569
+ case info[:source]
570
+ when :array_field_access, :nested_array_access
571
+ # Direct array field access - use the path
572
+ source = info[:path]&.first
573
+ dimension = info[:path]
574
+ [source, dimension]
575
+ when :vectorized_declaration
576
+ # Reference to another vectorized declaration - look it up
577
+ if info[:name] && vectorized_values[info[:name]]
578
+ vectorized_info = vectorized_values[info[:name]]
579
+ if vectorized_info[:array_source]
580
+ # This declaration references an array field, use that source
581
+ [vectorized_info[:array_source], [vectorized_info[:array_source]]]
582
+ else
583
+ # This is a derived vectorized value, try to trace its source
584
+ [:vectorized_reference, [:vectorized_reference]]
585
+ end
586
+ else
587
+ [:unknown_declaration, [:unknown_declaration]]
588
+ end
589
+ else
590
+ # Operations and other cases - try to extract from operation args
591
+ if info[:operation] && info[:vectorized_args]
592
+ # This is an operation result - trace the vectorized arguments
593
+ # For now, assume operations inherit the dimension of their first vectorized arg
594
+ [:operation_result, [:operation_result]]
595
+ else
596
+ [:unknown, [:unknown]]
597
+ end
598
+ end
599
+ end
600
+
601
+ def extract_dimensional_source(info, _array_fields)
602
+ case info[:source]
603
+ when :array_field_access
604
+ info[:path]&.first
605
+ when :nested_array_access
606
+ info[:path]&.first
607
+ when :vectorized_declaration, :vectorized_value
608
+ # Try to extract from the vectorized value info if available
609
+ if info[:name] && info.dig(:info, :path)
610
+ info[:info][:path].first
611
+ else
612
+ :vectorized_reference
613
+ end
614
+ else
615
+ # For operations and other cases, try to infer from vectorized args
616
+ if info[:vectorized_args]
617
+ # This is likely an operation - we should look at its arguments
618
+ :operation_result
619
+ else
620
+ :unknown
621
+ end
622
+ end
623
+ end
624
+
625
+ def extract_dimensions(info, _array_fields, _nested_paths)
626
+ case info[:source]
627
+ when :array_field_access
628
+ info[:path]
629
+ when :nested_array_access
630
+ info[:path]
631
+ when :vectorized_declaration, :vectorized_value
632
+ # Try to extract from the vectorized value info if available
633
+ if info[:name] && info.dig(:info, :path)
634
+ info[:info][:path]
635
+ else
636
+ [:vectorized_reference]
637
+ end
638
+ else
639
+ # For operations, try to infer from the operation context
640
+ if info[:vectorized_args]
641
+ # This is likely an operation - we should trace its arguments
642
+ [:operation_result]
643
+ else
644
+ [:unknown]
645
+ end
646
+ end
647
+ end
648
+
649
+ def extract_nested_paths_from_dimensions(dimension, nested_paths)
650
+ return nil unless dimension.is_a?(Array)
651
+
652
+ nested_paths[dimension]
653
+ end
654
+
655
+ # Check if dimensions represent valid hierarchical broadcasting (parent-to-child)
656
+ # Example: [:regions, :offices, :teams] can broadcast to [:regions, :offices, :teams, :employees]
657
+ def valid_hierarchical_broadcasting?(dimensions)
658
+ puts " DEBUG valid_hierarchical_broadcasting?: dimensions=#{dimensions.inspect}" if ENV["DEBUG_CASCADE"]
659
+
660
+ return true if dimensions.length <= 1
661
+
662
+ # Extract structural paths by removing the final field name from each dimension
663
+ # This allows us to identify that [:regions, :offices, :teams, :performance_score]
664
+ # and [:regions, :offices, :teams, :employees, :rating] both have the structural
665
+ # path [:regions, :offices, :teams] and [:regions, :offices, :teams, :employees] respectively
666
+ structural_paths = dimensions.map do |dim|
667
+ if dim.length > 1
668
+ dim[0..-2] # Remove the final field name
669
+ else
670
+ dim
671
+ end
672
+ end.uniq
673
+
674
+ puts " structural_paths: #{structural_paths.inspect}" if ENV["DEBUG_CASCADE"]
675
+
676
+ # Group dimensions by their root (first element)
677
+ root_groups = structural_paths.group_by(&:first)
678
+
679
+ puts " root_groups: #{root_groups.keys.inspect}" if ENV["DEBUG_CASCADE"]
680
+
681
+ # All dimensions must come from the same root
682
+ if root_groups.length > 1
683
+ puts " -> REJECT: Multiple roots" if ENV["DEBUG_CASCADE"]
684
+ return false
685
+ end
686
+
687
+ # If all structural paths are the same, this is valid (same level)
688
+ if structural_paths.length == 1
689
+ puts " -> ACCEPT: All dimensions at same structural level" if ENV["DEBUG_CASCADE"]
690
+ return true
691
+ end
692
+
693
+ # Within the same root, check if we have valid parent-child relationships
694
+ sorted_paths = structural_paths.sort_by(&:length)
695
+
696
+ puts " sorted structural paths: #{sorted_paths.inspect}" if ENV["DEBUG_CASCADE"]
697
+
698
+ # Check if all structural paths form a valid hierarchical structure
699
+ # For valid hierarchical broadcasting, structural paths should be related by parent-child relationships
700
+
701
+ # Check if there are any actual parent-child relationships
702
+ has_real_hierarchy = false
703
+
704
+ (0...sorted_paths.length).each do |i|
705
+ ((i + 1)...sorted_paths.length).each do |j|
706
+ path1 = sorted_paths[i]
707
+ path2 = sorted_paths[j]
708
+ shorter, longer = [path1, path2].sort_by(&:length)
709
+
710
+ next unless longer[0, shorter.length] == shorter
711
+
712
+ puts " Found parent-child relationship: #{shorter.inspect} → #{longer.inspect}" if ENV["DEBUG_CASCADE"]
713
+ has_real_hierarchy = true
714
+ end
715
+ end
716
+
717
+ puts " has_real_hierarchy: #{has_real_hierarchy}" if ENV["DEBUG_CASCADE"]
718
+
719
+ # Allow same-level dimensions or hierarchical relationships
720
+ if !has_real_hierarchy && sorted_paths.length > 1
721
+ puts " -> REJECT: No parent-child relationships found - these are sibling branches" if ENV["DEBUG_CASCADE"]
722
+ return false
723
+ end
724
+
725
+ puts " -> ACCEPT: All dimensions compatible" if ENV["DEBUG_CASCADE"]
726
+ true
727
+ end
728
+
729
+ def compute_cascade_processing_strategy(primary_dimension, nested_paths)
730
+ return { mode: :scalar } unless primary_dimension
731
+
732
+ # Determine structure depth from the dimension path
733
+ structure_depth = primary_dimension.length
734
+
735
+ # Determine processing mode based on structure complexity
736
+ processing_mode = case structure_depth
737
+ when 0, 1
738
+ :simple_array # Single-level array processing
739
+ when 2, 3, 4
740
+ :nested_array # Multi-level nested array processing
741
+ else
742
+ :deep_nested_array # Very deep nesting (5+ levels)
743
+ end
744
+
745
+ # Get nested path information for this dimension
746
+ nested_path_info = nested_paths[primary_dimension]
747
+
748
+ {
749
+ mode: processing_mode,
750
+ structure_depth: structure_depth,
751
+ dimension_path: primary_dimension,
752
+ element_processing: :cascade_conditional_logic,
753
+ nested_path_info: nested_path_info
754
+ }
755
+ end
756
+
757
+ def report_cascade_dimension_mismatch(errors, expr, sources, dimensions)
758
+ puts "DEBUG: Dimensional analysis details:" if ENV["DEBUG_CASCADE"]
759
+ puts " Sources: #{sources.inspect}" if ENV["DEBUG_CASCADE"]
760
+ puts " Dimensions: #{dimensions.inspect}" if ENV["DEBUG_CASCADE"]
761
+ puts " Valid hierarchical? #{valid_hierarchical_broadcasting?(dimensions)}" if ENV["DEBUG_CASCADE"]
762
+
763
+ message = "Cascade dimensional mismatch: Cannot mix arrays from different sources (#{sources.join(', ')}) " \
764
+ "with dimensions (#{dimensions.map(&:inspect).join(', ')}) in cascade conditions and results."
765
+ report_error(errors, message, location: expr.loc, type: :semantic)
766
+ end
767
+
210
768
  def build_dimension_mismatch_error(_expr, arg_infos, array_fields, vectorized_sources)
211
769
  # Build detailed error message with type information
212
770
  summary = "Cannot broadcast operation across arrays from different sources: #{vectorized_sources.join(', ')}. "