kumi 0.0.9 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +18 -0
  3. data/CLAUDE.md +18 -258
  4. data/README.md +188 -121
  5. data/docs/AST.md +1 -1
  6. data/docs/FUNCTIONS.md +52 -8
  7. data/docs/VECTOR_SEMANTICS.md +286 -0
  8. data/docs/compiler_design_principles.md +86 -0
  9. data/docs/features/README.md +15 -2
  10. data/docs/features/hierarchical-broadcasting.md +349 -0
  11. data/docs/features/javascript-transpiler.md +148 -0
  12. data/docs/features/performance.md +1 -3
  13. data/docs/features/s-expression-printer.md +2 -2
  14. data/docs/schema_metadata.md +7 -7
  15. data/examples/deep_schema_compilation_and_evaluation_benchmark.rb +21 -15
  16. data/examples/game_of_life.rb +2 -4
  17. data/lib/kumi/analyzer.rb +34 -14
  18. data/lib/kumi/compiler.rb +4 -283
  19. data/lib/kumi/core/analyzer/passes/broadcast_detector.rb +717 -66
  20. data/lib/kumi/core/analyzer/passes/dependency_resolver.rb +1 -1
  21. data/lib/kumi/core/analyzer/passes/input_access_planner_pass.rb +47 -0
  22. data/lib/kumi/core/analyzer/passes/input_collector.rb +118 -99
  23. data/lib/kumi/core/analyzer/passes/join_reduce_planning_pass.rb +293 -0
  24. data/lib/kumi/core/analyzer/passes/lower_to_ir_pass.rb +993 -0
  25. data/lib/kumi/core/analyzer/passes/pass_base.rb +2 -2
  26. data/lib/kumi/core/analyzer/passes/scope_resolution_pass.rb +346 -0
  27. data/lib/kumi/core/analyzer/passes/semantic_constraint_validator.rb +28 -0
  28. data/lib/kumi/core/analyzer/passes/toposorter.rb +9 -3
  29. data/lib/kumi/core/analyzer/passes/type_checker.rb +9 -5
  30. data/lib/kumi/core/analyzer/passes/type_consistency_checker.rb +2 -2
  31. data/lib/kumi/core/analyzer/passes/{type_inferencer.rb → type_inferencer_pass.rb} +4 -4
  32. data/lib/kumi/core/analyzer/passes/unsat_detector.rb +92 -48
  33. data/lib/kumi/core/analyzer/plans.rb +52 -0
  34. data/lib/kumi/core/analyzer/structs/access_plan.rb +20 -0
  35. data/lib/kumi/core/analyzer/structs/input_meta.rb +29 -0
  36. data/lib/kumi/core/compiler/access_builder.rb +36 -0
  37. data/lib/kumi/core/compiler/access_planner.rb +219 -0
  38. data/lib/kumi/core/compiler/accessors/base.rb +69 -0
  39. data/lib/kumi/core/compiler/accessors/each_indexed_accessor.rb +84 -0
  40. data/lib/kumi/core/compiler/accessors/materialize_accessor.rb +55 -0
  41. data/lib/kumi/core/compiler/accessors/ravel_accessor.rb +73 -0
  42. data/lib/kumi/core/compiler/accessors/read_accessor.rb +41 -0
  43. data/lib/kumi/core/compiler_base.rb +137 -0
  44. data/lib/kumi/core/error_reporter.rb +6 -5
  45. data/lib/kumi/core/errors.rb +4 -0
  46. data/lib/kumi/core/explain.rb +157 -205
  47. data/lib/kumi/core/export/node_builders.rb +2 -2
  48. data/lib/kumi/core/export/node_serializers.rb +1 -1
  49. data/lib/kumi/core/function_registry/collection_functions.rb +100 -6
  50. data/lib/kumi/core/function_registry/conditional_functions.rb +14 -4
  51. data/lib/kumi/core/function_registry/function_builder.rb +142 -53
  52. data/lib/kumi/core/function_registry/logical_functions.rb +173 -3
  53. data/lib/kumi/core/function_registry/stat_functions.rb +156 -0
  54. data/lib/kumi/core/function_registry.rb +138 -98
  55. data/lib/kumi/core/ir/execution_engine/combinators.rb +117 -0
  56. data/lib/kumi/core/ir/execution_engine/interpreter.rb +336 -0
  57. data/lib/kumi/core/ir/execution_engine/values.rb +46 -0
  58. data/lib/kumi/core/ir/execution_engine.rb +50 -0
  59. data/lib/kumi/core/ir.rb +58 -0
  60. data/lib/kumi/core/ruby_parser/build_context.rb +2 -2
  61. data/lib/kumi/core/ruby_parser/declaration_reference_proxy.rb +0 -12
  62. data/lib/kumi/core/ruby_parser/dsl_cascade_builder.rb +37 -16
  63. data/lib/kumi/core/ruby_parser/input_builder.rb +61 -8
  64. data/lib/kumi/core/ruby_parser/parser.rb +1 -1
  65. data/lib/kumi/core/ruby_parser/schema_builder.rb +2 -2
  66. data/lib/kumi/core/ruby_parser/sugar.rb +7 -0
  67. data/lib/kumi/errors.rb +2 -0
  68. data/lib/kumi/js.rb +23 -0
  69. data/lib/kumi/registry.rb +17 -22
  70. data/lib/kumi/runtime/executable.rb +213 -0
  71. data/lib/kumi/schema.rb +15 -4
  72. data/lib/kumi/schema_metadata.rb +2 -2
  73. data/lib/kumi/support/ir_dump.rb +491 -0
  74. data/lib/kumi/support/s_expression_printer.rb +17 -16
  75. data/lib/kumi/syntax/array_expression.rb +6 -6
  76. data/lib/kumi/syntax/call_expression.rb +4 -4
  77. data/lib/kumi/syntax/cascade_expression.rb +4 -4
  78. data/lib/kumi/syntax/case_expression.rb +4 -4
  79. data/lib/kumi/syntax/declaration_reference.rb +4 -4
  80. data/lib/kumi/syntax/hash_expression.rb +4 -4
  81. data/lib/kumi/syntax/input_declaration.rb +6 -5
  82. data/lib/kumi/syntax/input_element_reference.rb +5 -5
  83. data/lib/kumi/syntax/input_reference.rb +5 -5
  84. data/lib/kumi/syntax/literal.rb +4 -4
  85. data/lib/kumi/syntax/location.rb +5 -0
  86. data/lib/kumi/syntax/node.rb +33 -34
  87. data/lib/kumi/syntax/root.rb +6 -6
  88. data/lib/kumi/syntax/trait_declaration.rb +4 -4
  89. data/lib/kumi/syntax/value_declaration.rb +4 -4
  90. data/lib/kumi/version.rb +1 -1
  91. data/lib/kumi.rb +6 -15
  92. data/scripts/analyze_broadcast_methods.rb +68 -0
  93. data/scripts/analyze_cascade_methods.rb +74 -0
  94. data/scripts/check_broadcasting_coverage.rb +51 -0
  95. data/scripts/find_dead_code.rb +114 -0
  96. metadata +36 -9
  97. data/docs/features/array-broadcasting.md +0 -170
  98. data/lib/kumi/cli.rb +0 -449
  99. data/lib/kumi/core/compiled_schema.rb +0 -43
  100. data/lib/kumi/core/evaluation_wrapper.rb +0 -40
  101. data/lib/kumi/core/schema_instance.rb +0 -111
  102. data/lib/kumi/core/vectorization_metadata.rb +0 -110
  103. data/migrate_to_core_iterative.rb +0 -938
@@ -5,21 +5,28 @@ module Kumi
5
5
  module Analyzer
6
6
  module Passes
7
7
  # Detects which operations should be broadcast over arrays
8
- # DEPENDENCIES: :inputs, :declarations
8
+ # DEPENDENCIES: :input_metadata, :declarations
9
9
  # PRODUCES: :broadcasts
10
10
  class BroadcastDetector < PassBase
11
11
  def run(errors)
12
- input_meta = get_state(:inputs) || {}
12
+ input_meta = get_state(:input_metadata) || {}
13
13
  definitions = get_state(:declarations) || {}
14
14
 
15
15
  # Find array fields with their element types
16
16
  array_fields = find_array_fields(input_meta)
17
17
 
18
+ # Build nested paths metadata for nested array traversal
19
+ nested_paths = build_nested_paths_metadata(input_meta)
20
+
18
21
  # Build compiler metadata
19
22
  compiler_metadata = {
20
23
  array_fields: array_fields,
21
24
  vectorized_operations: {},
22
- reduction_operations: {}
25
+ reduction_operations: {},
26
+ nested_paths: nested_paths,
27
+ flattening_declarations: {}, # Track which declarations need flattening
28
+ cascade_strategies: {}, # Pre-computed cascade processing strategies
29
+ compilation_metadata: {} # Pre-computed compilation decisions
23
30
  }
24
31
 
25
32
  # Track which values are vectorized for type inference
@@ -30,19 +37,36 @@ module Kumi
30
37
  values = definitions.select { |_name, decl| decl.is_a?(Kumi::Syntax::ValueDeclaration) }
31
38
 
32
39
  (traits.to_a + values.to_a).each do |name, decl|
33
- result = analyze_value_vectorization(name, decl.expression, array_fields, vectorized_values, errors)
40
+ result = analyze_value_vectorization(name, decl.expression, array_fields, nested_paths, vectorized_values, errors,
41
+ definitions)
42
+
43
+ if ENV["DEBUG_BROADCAST_CLEAN"]
44
+ puts "#{name}: #{result[:type]} #{format_broadcast_info(result)}"
45
+ end
34
46
 
35
47
  case result[:type]
36
48
  when :vectorized
37
49
  compiler_metadata[:vectorized_operations][name] = result[:info]
50
+
51
+ # If this is a cascade with processing strategy, store it separately for easy compiler access
52
+ compiler_metadata[:cascade_strategies][name] = result[:info][:processing_strategy] if result[:info][:processing_strategy]
53
+
38
54
  # Store array source information for dimension checking
39
55
  array_source = extract_array_source(result[:info], array_fields)
40
56
  vectorized_values[name] = { vectorized: true, array_source: array_source }
41
57
  when :reduction
42
58
  compiler_metadata[:reduction_operations][name] = result[:info]
59
+ # Mark this specific declaration as needing flattening for its argument
60
+ compiler_metadata[:flattening_declarations][name] = result[:info]
43
61
  # Reduction produces scalar, not vectorized
44
62
  vectorized_values[name] = { vectorized: false }
45
63
  end
64
+
65
+ # Pre-compute compilation metadata for each declaration
66
+ compilation_meta = compute_compilation_metadata(
67
+ name, decl, compiler_metadata, vectorized_values, array_fields
68
+ )
69
+ compiler_metadata[:compilation_metadata][name] = compilation_meta
46
70
  end
47
71
 
48
72
  state.with(:broadcasts, compiler_metadata.freeze)
@@ -50,6 +74,113 @@ module Kumi
50
74
 
51
75
  private
52
76
 
77
+ def infer_argument_scope(arg, array_fields, nested_paths)
78
+ case arg
79
+ when Kumi::Syntax::InputElementReference
80
+ if nested_paths.key?(arg.path)
81
+ # Extract scope from path - each array dimension in the path
82
+ arg.path.select.with_index { |_seg, i| nested_paths[arg.path[0..i]] }
83
+ else
84
+ arg.path.select { |seg| array_fields.key?(seg) }
85
+ end
86
+ when Kumi::Syntax::CallExpression
87
+ # For nested calls, find the deepest input reference
88
+ deepest_scope = []
89
+ arg.args.each do |nested_arg|
90
+ scope = infer_argument_scope(nested_arg, array_fields, nested_paths)
91
+ deepest_scope = scope if scope.length > deepest_scope.length
92
+ end
93
+ deepest_scope
94
+ else
95
+ []
96
+ end
97
+ end
98
+
99
+ def format_broadcast_info(result)
100
+ case result[:type]
101
+ when :vectorized
102
+ info = result[:info]
103
+ "→ #{info[:source]} (path: #{info[:path]&.join('.')})"
104
+ when :reduction
105
+ info = result[:info]
106
+ "→ fn:#{info[:function]} (arg: #{info[:argument]&.class&.name&.split('::')&.last})"
107
+ when :scalar
108
+ "→ scalar"
109
+ else
110
+ "→ #{result[:info]}"
111
+ end
112
+ end
113
+
114
+ def compute_compilation_metadata(name, _decl, compiler_metadata, _vectorized_values, _array_fields)
115
+ metadata = {
116
+ operation_mode: :broadcast, # Default mode
117
+ is_vectorized: false,
118
+ vectorization_context: {},
119
+ cascade_info: {},
120
+ function_call_strategy: {}
121
+ }
122
+
123
+ # Check if this declaration is vectorized
124
+ if compiler_metadata[:vectorized_operations][name]
125
+ metadata[:is_vectorized] = true
126
+ vectorized_info = compiler_metadata[:vectorized_operations][name]
127
+
128
+ # Pre-compute vectorization context
129
+ metadata[:vectorization_context] = {
130
+ has_vectorized_args: true,
131
+ needs_broadcasting: true,
132
+ array_structure_depth: estimate_array_depth(vectorized_info, compiler_metadata[:nested_paths])
133
+ }
134
+
135
+ # If this is a cascade, pre-compute cascade processing strategy
136
+ if vectorized_info[:source] == :cascade_with_vectorized_conditions_or_results
137
+ strategy = compiler_metadata[:cascade_strategies][name]
138
+ metadata[:cascade_info] = {
139
+ is_vectorized: true,
140
+ processing_mode: strategy&.dig(:mode) || :hierarchical,
141
+ needs_hierarchical_processing: needs_hierarchical_processing?(strategy)
142
+ }
143
+ end
144
+ end
145
+
146
+ # Check if this declaration needs flattening
147
+ if compiler_metadata[:flattening_declarations][name]
148
+ metadata[:operation_mode] = :flatten
149
+ flattening_info = compiler_metadata[:flattening_declarations][name]
150
+
151
+ metadata[:function_call_strategy] = {
152
+ flattening_required: true,
153
+ flatten_argument_indices: flattening_info[:flatten_argument_indices] || [0],
154
+ result_structure: :scalar
155
+ }
156
+ end
157
+
158
+ metadata
159
+ end
160
+
161
+ def estimate_array_depth(vectorized_info, nested_paths)
162
+ case vectorized_info[:source]
163
+ when :nested_array_access
164
+ path = vectorized_info[:path]
165
+ nested_paths[path]&.dig(:array_depth) || 1
166
+ when :array_field_access
167
+ 1
168
+ else
169
+ 1
170
+ end
171
+ end
172
+
173
+ def needs_hierarchical_processing?(strategy)
174
+ return false unless strategy
175
+
176
+ case strategy[:mode]
177
+ when :nested_array, :deep_nested_array
178
+ true
179
+ else
180
+ false
181
+ end
182
+ end
183
+
53
184
  def find_array_fields(input_meta)
54
185
  result = {}
55
186
  input_meta.each do |name, meta|
@@ -63,10 +194,62 @@ module Kumi
63
194
  result
64
195
  end
65
196
 
66
- def analyze_value_vectorization(name, expr, array_fields, vectorized_values, errors)
197
+ def build_nested_paths_metadata(input_meta)
198
+ nested_paths = {}
199
+
200
+ # Recursively build all possible nested paths from input metadata
201
+ input_meta.each do |root_name, root_meta|
202
+ collect_nested_paths(nested_paths, [root_name], root_meta, 0, nil)
203
+ end
204
+
205
+ nested_paths
206
+ end
207
+
208
+ def collect_nested_paths(nested_paths, current_path, current_meta, array_depth, parent_access_mode = nil)
209
+ # If current field is an array, increment array depth and track its access_mode
210
+ current_access_mode = parent_access_mode
211
+ if current_meta[:type] == :array
212
+ array_depth += 1
213
+ current_access_mode = current_meta[:access_mode] || :field # Default to :field if not specified
214
+ end
215
+
216
+ # If this field has children, recurse into them
217
+ if current_meta[:children]
218
+ current_meta[:children].each do |child_name, child_meta|
219
+ child_path = current_path + [child_name]
220
+
221
+ # Create metadata for this path if it involves arrays
222
+ if array_depth.positive?
223
+ nested_paths[child_path] =
224
+ build_path_metadata(child_path, child_meta, array_depth, current_access_mode)
225
+ end
226
+
227
+ # Recurse into child's children
228
+ collect_nested_paths(nested_paths, child_path, child_meta, array_depth, current_access_mode)
229
+ end
230
+ elsif array_depth.positive?
231
+ # Leaf field - create metadata if it involves arrays
232
+ nested_paths[current_path] = build_path_metadata(current_path, current_meta, array_depth, current_access_mode)
233
+ end
234
+ end
235
+
236
+ def build_path_metadata(_path, field_meta, array_depth, parent_access_mode = nil)
237
+ {
238
+ array_depth: array_depth,
239
+ element_type: field_meta[:type] || :any,
240
+ operation_mode: :broadcast, # Default mode - may be overridden for aggregations
241
+ result_structure: array_depth > 1 ? :nested_array : :array,
242
+ access_mode: parent_access_mode # Access mode of the parent array field
243
+ }
244
+ end
245
+
246
+ def analyze_value_vectorization(name, expr, array_fields, nested_paths, vectorized_values, errors, definitions = nil)
67
247
  case expr
68
248
  when Kumi::Syntax::InputElementReference
69
- if array_fields.key?(expr.path.first)
249
+ # Check if this path exists in nested_paths metadata (supports nested arrays)
250
+ if nested_paths.key?(expr.path)
251
+ { type: :vectorized, info: { source: :nested_array_access, path: expr.path, nested_metadata: nested_paths[expr.path] } }
252
+ elsif array_fields.key?(expr.path.first)
70
253
  { type: :vectorized, info: { source: :array_field_access, path: expr.path } }
71
254
  else
72
255
  { type: :scalar }
@@ -82,72 +265,132 @@ module Kumi
82
265
  end
83
266
 
84
267
  when Kumi::Syntax::CallExpression
85
- analyze_call_vectorization(name, expr, array_fields, vectorized_values, errors)
268
+ analyze_call_vectorization(name, expr, array_fields, nested_paths, vectorized_values, errors, definitions)
86
269
 
87
270
  when Kumi::Syntax::CascadeExpression
88
- analyze_cascade_vectorization(name, expr, array_fields, vectorized_values, errors)
271
+ analyze_cascade_vectorization(name, expr, array_fields, nested_paths, vectorized_values, errors, definitions)
89
272
 
90
273
  else
91
274
  { type: :scalar }
92
275
  end
93
276
  end
94
277
 
95
- def analyze_call_vectorization(_name, expr, array_fields, vectorized_values, errors)
96
- # Check if this is a reduction function using function registry metadata
97
- if Kumi::Registry.reducer?(expr.fn_name)
98
- # Only treat as reduction if the argument is actually vectorized
99
- arg_info = analyze_argument_vectorization(expr.args.first, array_fields, vectorized_values)
100
- if arg_info[:vectorized]
101
- { type: :reduction, info: { function: expr.fn_name, source: arg_info[:source] } }
102
- else
103
- # Not a vectorized reduction - just a regular function call
104
- { type: :scalar }
278
+ def analyze_call_vectorization(_name, expr, array_fields, nested_paths, vectorized_values, errors, definitions = nil)
279
+ entry = Kumi::Registry.entry(expr.fn_name)
280
+ is_reducer = entry&.reducer
281
+ is_structure = entry&.structure_function
282
+
283
+ # 1) Analyze all args once
284
+ arg_infos = expr.args.map do |arg|
285
+ analyze_argument_vectorization(arg, array_fields, nested_paths, vectorized_values, definitions)
286
+ end
287
+ vec_idx = arg_infos.each_index.select { |i| arg_infos[i][:vectorized] }
288
+ vec_any = !vec_idx.empty?
289
+
290
+ # 2) Special form: cascade_and (vectorized if any trait arg is vectorized)
291
+ if expr.fn_name == :cascade_and
292
+ vectorized_trait = expr.args.find do |arg|
293
+ arg.is_a?(Kumi::Syntax::DeclarationReference) && vectorized_values[arg.name]&.[](:vectorized)
294
+ end
295
+ if vectorized_trait
296
+ return { type: :vectorized,
297
+ info: { source: :cascade_condition_with_vectorized_trait, trait: vectorized_trait&.name } }
105
298
  end
106
299
 
107
- else
108
- # Special case: all?, any?, none? functions with vectorized trait arguments should be treated as vectorized
109
- # for cascade condition purposes (they get transformed during compilation)
110
- if %i[all? any? none?].include?(expr.fn_name) && expr.args.length == 1
111
- arg = expr.args.first
112
- if arg.is_a?(Kumi::Syntax::ArrayExpression) && arg.elements.length == 1
113
- trait_ref = arg.elements.first
114
- if trait_ref.is_a?(Kumi::Syntax::DeclarationReference) && vectorized_values[trait_ref.name]&.[](:vectorized)
115
- return { type: :vectorized, info: { source: :cascade_condition_with_vectorized_trait, trait: trait_ref.name } }
116
- end
300
+ return { type: :scalar }
301
+ end
302
+
303
+ # 3) Reducers: only reduce when the input is actually vectorized
304
+ if is_reducer
305
+ return { type: :scalar } unless vec_any
306
+
307
+ # which args were vectorized?
308
+ flatten_indices = vec_idx.dup
309
+ vectorized_arg_index = vec_idx.first
310
+ argument_ast = expr.args[vectorized_arg_index]
311
+
312
+ src_info = arg_infos[vectorized_arg_index]
313
+
314
+ return {
315
+ type: :reduction,
316
+ info: {
317
+ function: expr.fn_name,
318
+ source: src_info[:source],
319
+ argument: argument_ast, # << keep AST of the vectorized argument
320
+ flatten_argument_indices: flatten_indices
321
+ }
322
+ }
323
+ end
324
+
325
+ # 4) Structure (non-reducer) functions like `size`
326
+ if is_structure
327
+ # If any arg is itself a PURE reducer call (e.g., size(sum(x))), the inner collapses first ⇒ outer is scalar
328
+ # But dual-nature functions (both reducer AND structure) should be treated as structure functions when nested
329
+ return { type: :scalar } if expr.args.any? do |a|
330
+ if a.is_a?(Kumi::Syntax::CallExpression)
331
+ arg_entry = Kumi::Registry.entry(a.fn_name)
332
+ arg_entry&.reducer && !arg_entry&.structure_function # Pure reducer only
333
+ else
334
+ false
117
335
  end
118
336
  end
119
337
 
120
- # ANY function with vectorized arguments becomes vectorized (with broadcasting)
121
- arg_infos = expr.args.map { |arg| analyze_argument_vectorization(arg, array_fields, vectorized_values) }
338
+ # Structure fn over a vectorized element path per-parent vectorization
339
+ return { type: :scalar } unless vec_any
122
340
 
123
- if arg_infos.any? { |info| info[:vectorized] }
124
- # Check for dimension mismatches when multiple arguments are vectorized
125
- vectorized_sources = arg_infos.select { |info| info[:vectorized] }.filter_map { |info| info[:array_source] }.uniq
341
+ src_info = arg_infos[vec_idx.first]
342
+ parent_scope = src_info[:parent_scope] || src_info[:source] # fallback if analyzer encodes parent separately
343
+ return {
344
+ type: :vectorized,
345
+ info: {
346
+ operation: expr.fn_name,
347
+ source: src_info[:source],
348
+ parent_scope: parent_scope,
349
+ vectorized_args: vec_idx.to_h { |i| [i, true] }
350
+ }
351
+ }
126
352
 
127
- if vectorized_sources.length > 1
128
- # Multiple different array sources - this is a dimension mismatch
129
- # Generate enhanced error message with type information
130
- enhanced_message = build_dimension_mismatch_error(expr, arg_infos, array_fields, vectorized_sources)
353
+ # Structure fn over a scalar/materialized container ⇒ scalar
131
354
 
132
- report_error(errors, enhanced_message, location: expr.loc, type: :semantic)
133
- return { type: :scalar } # Treat as scalar to prevent further errors
134
- end
355
+ end
135
356
 
136
- # This is a vectorized operation - ANY function supports broadcasting
137
- { type: :vectorized, info: {
138
- operation: expr.fn_name,
139
- vectorized_args: arg_infos.map.with_index { |info, i| [i, info[:vectorized]] }.to_h
140
- } }
141
- else
142
- { type: :scalar }
357
+ # 5) Generic vectorized map (non-structure, non-reducer)
358
+ if vec_any
359
+ # Dimension / source compatibility check
360
+ sources = vec_idx.map { |i| arg_infos[i][:array_source] }.compact.uniq
361
+ if sources.size > 1
362
+ enhanced_message = build_dimension_mismatch_error(expr, arg_infos, array_fields, sources)
363
+ report_error(errors, enhanced_message, location: expr.loc, type: :semantic)
364
+ return { type: :scalar } # fail safe to prevent cascading errors
143
365
  end
366
+
367
+ return {
368
+ type: :vectorized,
369
+ info: {
370
+ operation: expr.fn_name,
371
+ source: arg_infos[vec_idx.first][:source],
372
+ vectorized_args: vec_idx.to_h { |i| [i, true] }
373
+ }
374
+ }
144
375
  end
376
+
377
+ # 6) Pure scalar
378
+ { type: :scalar }
379
+ end
380
+
381
+ def structure_function?(fn_name)
382
+ # Check if function is marked as working on structure (not broadcast over elements)
383
+ Kumi::Registry.structure_function?(fn_name)
145
384
  end
146
385
 
147
- def analyze_argument_vectorization(arg, array_fields, vectorized_values)
386
+ def analyze_argument_vectorization(arg, array_fields, nested_paths, vectorized_values, definitions = nil)
148
387
  case arg
149
388
  when Kumi::Syntax::InputElementReference
150
- if array_fields.key?(arg.path.first)
389
+ # Check nested paths first (supports nested arrays)
390
+ if nested_paths.key?(arg.path)
391
+ { vectorized: true, source: :nested_array_field, array_source: arg.path.first }
392
+ # Fallback to old array_fields detection for backward compatibility
393
+ elsif array_fields.key?(arg.path.first)
151
394
  { vectorized: true, source: :array_field, array_source: arg.path.first }
152
395
  else
153
396
  { vectorized: false }
@@ -164,9 +407,32 @@ module Kumi
164
407
  end
165
408
 
166
409
  when Kumi::Syntax::CallExpression
167
- # Recursively check
168
- result = analyze_value_vectorization(nil, arg, array_fields, vectorized_values, [])
169
- { vectorized: result[:type] == :vectorized, source: :expression }
410
+ # Recursively check nested call
411
+ result = analyze_value_vectorization(nil, arg, array_fields, nested_paths, vectorized_values, [], definitions)
412
+ # Handle different result types appropriately
413
+ case result[:type]
414
+ when :reduction
415
+ # Reductions can produce vectors if they preserve some dimensions
416
+ # This aligns with lower_to_ir logic for grouped reductions
417
+ info = result[:info]
418
+ if info && info[:argument]
419
+ # Check if the reduction argument has array scope that would be preserved
420
+ arg_scope = infer_argument_scope(info[:argument], array_fields, nested_paths)
421
+ if arg_scope.length > 1
422
+ # Multi-dimensional reduction - likely preserves outer dimension (per-player)
423
+ { vectorized: true, source: :grouped_reduction, array_source: arg_scope.first }
424
+ else
425
+ # Single dimension or scalar reduction
426
+ { vectorized: false, source: :scalar_from_reduction }
427
+ end
428
+ else
429
+ { vectorized: false, source: :scalar_from_reduction }
430
+ end
431
+ when :vectorized
432
+ { vectorized: true, source: :expression }
433
+ else
434
+ { vectorized: false, source: :scalar }
435
+ end
170
436
 
171
437
  else
172
438
  { vectorized: false }
@@ -183,30 +449,415 @@ module Kumi
183
449
  end
184
450
  end
185
451
 
186
- def analyze_cascade_vectorization(_name, expr, array_fields, vectorized_values, errors)
187
- # A cascade is vectorized if:
188
- # 1. Any of its result expressions are vectorized, OR
189
- # 2. Any of its conditions reference vectorized values (traits or arrays)
190
- vectorized_results = []
191
- vectorized_conditions = []
452
+ def analyze_cascade_vectorization(name, expr, array_fields, nested_paths, vectorized_values, errors, definitions = nil)
453
+ # Enhanced cascade analysis with dimensional intelligence
454
+ condition_sources = []
455
+ result_sources = []
456
+ condition_dimensions = []
457
+ result_dimensions = []
458
+ is_vectorized = false
459
+
460
+ if ENV["DEBUG_CASCADE"]
461
+ puts "DEBUG: analyze_cascade_vectorization for #{name}"
462
+ puts " Expression: #{expr.inspect}"
463
+ puts " Cases: #{expr.cases.length}"
464
+ end
192
465
 
193
466
  expr.cases.each do |case_expr|
194
- # Check if result is vectorized
195
- result_info = analyze_value_vectorization(nil, case_expr.result, array_fields, vectorized_values, errors)
196
- vectorized_results << (result_info[:type] == :vectorized)
467
+ # Analyze result expression
468
+ result_info = analyze_value_vectorization(nil, case_expr.result, array_fields, nested_paths, vectorized_values, errors,
469
+ definitions)
470
+ if result_info[:type] == :vectorized
471
+ is_vectorized = true
472
+ source, dimension = trace_dimensional_source(case_expr.result, result_info, vectorized_values, array_fields, definitions)
473
+ result_sources << source
474
+ result_dimensions << dimension
475
+ end
476
+
477
+ # Analyze condition expression
478
+ condition_info = analyze_value_vectorization(nil, case_expr.condition, array_fields, nested_paths, vectorized_values, errors,
479
+ definitions)
480
+ next unless condition_info[:type] == :vectorized
481
+
482
+ is_vectorized = true
483
+
484
+ # Special handling for cascade_and to check all arguments for dimensional conflicts
485
+ if ENV["DEBUG_CASCADE"]
486
+ puts " Checking condition type: #{case_expr.condition.class}"
487
+ puts " Condition fn_name: #{case_expr.condition.fn_name}" if case_expr.condition.is_a?(Kumi::Syntax::CallExpression)
488
+ end
489
+
490
+ if case_expr.condition.is_a?(Kumi::Syntax::CallExpression) && case_expr.condition.fn_name == :cascade_and
491
+ puts " -> ENTERING CASCADE_AND SPECIAL HANDLING" if ENV["DEBUG_CASCADE"]
492
+ # For cascade_and, check all individual trait references for dimensional conflicts
493
+ cascade_sources = []
494
+ cascade_dimensions = []
495
+
496
+ puts " cascade_and args: #{case_expr.condition.args.map(&:class)}" if ENV["DEBUG_CASCADE"]
497
+
498
+ case_expr.condition.args.each do |arg|
499
+ puts " Processing arg: #{arg.inspect}" if ENV["DEBUG_CASCADE"]
500
+ next unless arg.is_a?(Kumi::Syntax::DeclarationReference)
501
+
502
+ puts " Looking up declaration: #{arg.name}" if ENV["DEBUG_CASCADE"]
503
+ decl = definitions[arg.name] if definitions
504
+ if decl
505
+ puts " Found declaration, tracing source..." if ENV["DEBUG_CASCADE"]
506
+ arg_source, arg_dimension = trace_dimensional_source(decl.expression, condition_info, vectorized_values,
507
+ array_fields, definitions)
508
+ puts " Traced source: #{arg_source}, dimension: #{arg_dimension}" if ENV["DEBUG_CASCADE"]
509
+ cascade_sources << arg_source
510
+ cascade_dimensions << arg_dimension
511
+ elsif ENV["DEBUG_CASCADE"]
512
+ puts " Declaration not found: #{arg.name}"
513
+ end
514
+ end
197
515
 
198
- # Check if condition is vectorized
199
- condition_info = analyze_value_vectorization(nil, case_expr.condition, array_fields, vectorized_values, errors)
200
- vectorized_conditions << (condition_info[:type] == :vectorized)
516
+ # Check for conflicts between cascade_and arguments
517
+ unique_sources = cascade_sources.uniq
518
+ unique_dimensions = cascade_dimensions.uniq
519
+
520
+ if ENV["DEBUG_CASCADE"]
521
+ puts " cascade_sources: #{cascade_sources.inspect}"
522
+ puts " cascade_dimensions: #{cascade_dimensions.inspect}"
523
+ puts " unique_sources: #{unique_sources.inspect}"
524
+ puts " unique_dimensions: #{unique_dimensions.inspect}"
525
+ end
526
+
527
+ # Check for dimensional conflicts - either different sources OR incompatible dimensions
528
+ has_source_conflict = unique_sources.length > 1 && unique_sources.none? { |s| s.to_s.include?("unknown") }
529
+ has_dimension_conflict = unique_dimensions.length > 1 && !valid_hierarchical_broadcasting?(unique_dimensions)
530
+
531
+ if ENV["DEBUG_CASCADE"]
532
+ puts " has_source_conflict: #{has_source_conflict}"
533
+ puts " has_dimension_conflict: #{has_dimension_conflict}"
534
+ if unique_dimensions.length > 1
535
+ puts " valid_hierarchical_broadcasting?: #{valid_hierarchical_broadcasting?(unique_dimensions)}"
536
+ end
537
+ end
538
+
539
+ if has_source_conflict || has_dimension_conflict
540
+ # Multiple different sources or incompatible dimensions in same cascade_and - this is invalid
541
+ if ENV["DEBUG_CASCADE"]
542
+ puts " -> FOUND CASCADE_AND DIMENSIONAL CONFLICT:"
543
+ puts " Sources: #{unique_sources.inspect}"
544
+ puts " Dimensions: #{unique_dimensions.inspect}"
545
+ puts " Source conflict: #{has_source_conflict}"
546
+ puts " Dimension conflict: #{has_dimension_conflict}"
547
+ end
548
+ report_cascade_dimension_mismatch(errors, expr, unique_sources, unique_dimensions)
549
+ return { type: :scalar }
550
+ end
551
+
552
+ # Use the first valid source as the overall condition source
553
+ condition_sources.concat(cascade_sources)
554
+ condition_dimensions.concat(cascade_dimensions)
555
+ else
556
+ source, dimension = trace_dimensional_source(case_expr.condition, condition_info, vectorized_values, array_fields,
557
+ definitions)
558
+ condition_sources << source
559
+ condition_dimensions << dimension
560
+ end
201
561
  end
202
562
 
203
- if vectorized_results.any? || vectorized_conditions.any?
204
- { type: :vectorized, info: { source: :cascade_with_vectorized_conditions_or_results } }
563
+ if is_vectorized
564
+ # Validate dimensional compatibility
565
+ all_sources = (condition_sources + result_sources).compact.uniq
566
+ all_dimensions = (condition_dimensions + result_dimensions).compact.uniq
567
+
568
+ if ENV["DEBUG_CASCADE"]
569
+ puts " is_vectorized: true"
570
+ puts " condition_sources: #{condition_sources.inspect}"
571
+ puts " result_sources: #{result_sources.inspect}"
572
+ puts " condition_dimensions: #{condition_dimensions.inspect}"
573
+ puts " result_dimensions: #{result_dimensions.inspect}"
574
+ puts " all_sources: #{all_sources.inspect}"
575
+ puts " all_dimensions: #{all_dimensions.inspect}"
576
+ end
577
+
578
+ # For now, be less strict about dimensional validation
579
+ # Only report mismatches for clearly incompatible sources
580
+ definite_sources = all_sources.reject { |s| s.to_s.include?("unknown") || s.to_s.include?("operation") }
581
+
582
+ if ENV["DEBUG_CASCADE"]
583
+ puts " definite_sources: #{definite_sources.inspect}"
584
+ puts " definite_sources.length: #{definite_sources.length}"
585
+ end
586
+
587
+ if definite_sources.length > 1
588
+ # Check if sources are in valid hierarchical relationship (parent-child broadcasting)
589
+ is_valid_hierarchical = valid_hierarchical_broadcasting?(all_dimensions)
590
+ puts " valid_hierarchical_broadcasting?: #{is_valid_hierarchical}" if ENV["DEBUG_CASCADE"]
591
+ unless is_valid_hierarchical
592
+ # Multiple definite dimensional sources - this is a real mismatch
593
+ puts " -> REPORTING DIMENSIONAL MISMATCH" if ENV["DEBUG_CASCADE"]
594
+ report_cascade_dimension_mismatch(errors, expr, definite_sources, all_dimensions)
595
+ return { type: :scalar } # Treat as scalar to prevent further errors
596
+ end
597
+ end
598
+
599
+ # Compute cascade processing strategy based on dimensional analysis
600
+ processing_strategy = compute_cascade_processing_strategy(all_dimensions.first, nested_paths)
601
+
602
+ { type: :vectorized, info: {
603
+ source: :cascade_with_vectorized_conditions_or_results,
604
+ dimensional_requirements: {
605
+ conditions: { sources: condition_sources.uniq, dimensions: condition_dimensions.uniq },
606
+ results: { sources: result_sources.uniq, dimensions: result_dimensions.uniq }
607
+ },
608
+ primary_dimension: all_dimensions.first,
609
+ nested_paths: extract_nested_paths_from_dimensions(all_dimensions.first, nested_paths),
610
+ processing_strategy: processing_strategy
611
+ } }
205
612
  else
206
613
  { type: :scalar }
207
614
  end
208
615
  end
209
616
 
617
+ def trace_dimensional_source(expr, info, vectorized_values, array_fields, definitions = nil)
618
+ # Trace dimensional source by examining the AST node directly
619
+ case expr
620
+ when Kumi::Syntax::InputElementReference
621
+ # Direct array field access
622
+ source = expr.path.first
623
+ dimension = expr.path
624
+ [source, dimension]
625
+ when Kumi::Syntax::DeclarationReference
626
+ # Reference to another declaration - look up its dimensional info
627
+ if vectorized_values[expr.name]
628
+ vectorized_info = vectorized_values[expr.name]
629
+ if vectorized_info[:array_source]
630
+ [vectorized_info[:array_source], [vectorized_info[:array_source]]]
631
+ else
632
+ # Need to trace through the declaration's expression to find the real source
633
+ decl = definitions[expr.name] if definitions
634
+ if decl
635
+ # Recursively trace the declaration's expression
636
+ trace_dimensional_source(decl.expression, info, vectorized_values, array_fields, definitions)
637
+ else
638
+ [:unknown_vectorized_operation, [:unknown_vectorized_operation]]
639
+ end
640
+ end
641
+ else
642
+ [:unknown_declaration, [:unknown_declaration]]
643
+ end
644
+ when Kumi::Syntax::CallExpression
645
+ # For call expressions, trace through the arguments to find dimensional source
646
+ first_vectorized_arg = expr.args.find do |arg|
647
+ arg_info = analyze_argument_vectorization(arg, array_fields, {}, vectorized_values, definitions)
648
+ arg_info[:vectorized]
649
+ end
650
+
651
+ if first_vectorized_arg
652
+ trace_dimensional_source(first_vectorized_arg, info, vectorized_values, array_fields, definitions)
653
+ else
654
+ [:operation_unknown, [:operation_unknown]]
655
+ end
656
+ else
657
+ [:unknown_expr, [:unknown_expr]]
658
+ end
659
+ end
660
+
661
+ def extract_dimensional_info_with_context(info, _array_fields, _nested_paths, vectorized_values)
662
+ case info[:source]
663
+ when :array_field_access, :nested_array_access
664
+ # Direct array field access - use the path
665
+ source = info[:path]&.first
666
+ dimension = info[:path]
667
+ [source, dimension]
668
+ when :vectorized_declaration
669
+ # Reference to another vectorized declaration - look it up
670
+ if info[:name] && vectorized_values[info[:name]]
671
+ vectorized_info = vectorized_values[info[:name]]
672
+ if vectorized_info[:array_source]
673
+ # This declaration references an array field, use that source
674
+ [vectorized_info[:array_source], [vectorized_info[:array_source]]]
675
+ else
676
+ # This is a derived vectorized value, try to trace its source
677
+ [:vectorized_reference, [:vectorized_reference]]
678
+ end
679
+ else
680
+ [:unknown_declaration, [:unknown_declaration]]
681
+ end
682
+ else
683
+ # Operations and other cases - try to extract from operation args
684
+ if info[:operation] && info[:vectorized_args]
685
+ # This is an operation result - trace the vectorized arguments
686
+ # For now, assume operations inherit the dimension of their first vectorized arg
687
+ [:operation_result, [:operation_result]]
688
+ else
689
+ [:unknown, [:unknown]]
690
+ end
691
+ end
692
+ end
693
+
694
+ def extract_dimensional_source(info, _array_fields)
695
+ case info[:source]
696
+ when :array_field_access
697
+ info[:path]&.first
698
+ when :nested_array_access
699
+ info[:path]&.first
700
+ when :vectorized_declaration, :vectorized_value
701
+ # Try to extract from the vectorized value info if available
702
+ if info[:name] && info.dig(:info, :path)
703
+ info[:info][:path].first
704
+ else
705
+ :vectorized_reference
706
+ end
707
+ else
708
+ # For operations and other cases, try to infer from vectorized args
709
+ if info[:vectorized_args]
710
+ # This is likely an operation - we should look at its arguments
711
+ :operation_result
712
+ else
713
+ :unknown
714
+ end
715
+ end
716
+ end
717
+
718
+ def extract_dimensions(info, _array_fields, _nested_paths)
719
+ case info[:source]
720
+ when :array_field_access
721
+ info[:path]
722
+ when :nested_array_access
723
+ info[:path]
724
+ when :vectorized_declaration, :vectorized_value
725
+ # Try to extract from the vectorized value info if available
726
+ if info[:name] && info.dig(:info, :path)
727
+ info[:info][:path]
728
+ else
729
+ [:vectorized_reference]
730
+ end
731
+ else
732
+ # For operations, try to infer from the operation context
733
+ if info[:vectorized_args]
734
+ # This is likely an operation - we should trace its arguments
735
+ [:operation_result]
736
+ else
737
+ [:unknown]
738
+ end
739
+ end
740
+ end
741
+
742
+ def extract_nested_paths_from_dimensions(dimension, nested_paths)
743
+ return nil unless dimension.is_a?(Array)
744
+
745
+ nested_paths[dimension]
746
+ end
747
+
748
+ # Check if dimensions represent valid hierarchical broadcasting (parent-to-child)
749
+ # Example: [:regions, :offices, :teams] can broadcast to [:regions, :offices, :teams, :employees]
750
+ def valid_hierarchical_broadcasting?(dimensions)
751
+ puts " DEBUG valid_hierarchical_broadcasting?: dimensions=#{dimensions.inspect}" if ENV["DEBUG_CASCADE"]
752
+
753
+ return true if dimensions.length <= 1
754
+
755
+ # Extract structural paths by removing the final field name from each dimension
756
+ # This allows us to identify that [:regions, :offices, :teams, :performance_score]
757
+ # and [:regions, :offices, :teams, :employees, :rating] both have the structural
758
+ # path [:regions, :offices, :teams] and [:regions, :offices, :teams, :employees] respectively
759
+ structural_paths = dimensions.map do |dim|
760
+ if dim.length > 1
761
+ dim[0..-2] # Remove the final field name
762
+ else
763
+ dim
764
+ end
765
+ end.uniq
766
+
767
+ puts " structural_paths: #{structural_paths.inspect}" if ENV["DEBUG_CASCADE"]
768
+
769
+ # Group dimensions by their root (first element)
770
+ root_groups = structural_paths.group_by(&:first)
771
+
772
+ puts " root_groups: #{root_groups.keys.inspect}" if ENV["DEBUG_CASCADE"]
773
+
774
+ # All dimensions must come from the same root
775
+ if root_groups.length > 1
776
+ puts " -> REJECT: Multiple roots" if ENV["DEBUG_CASCADE"]
777
+ return false
778
+ end
779
+
780
+ # If all structural paths are the same, this is valid (same level)
781
+ if structural_paths.length == 1
782
+ puts " -> ACCEPT: All dimensions at same structural level" if ENV["DEBUG_CASCADE"]
783
+ return true
784
+ end
785
+
786
+ # Within the same root, check if we have valid parent-child relationships
787
+ sorted_paths = structural_paths.sort_by(&:length)
788
+
789
+ puts " sorted structural paths: #{sorted_paths.inspect}" if ENV["DEBUG_CASCADE"]
790
+
791
+ # Check if all structural paths form a valid hierarchical structure
792
+ # For valid hierarchical broadcasting, structural paths should be related by parent-child relationships
793
+
794
+ # Check if there are any actual parent-child relationships
795
+ has_real_hierarchy = false
796
+
797
+ (0...sorted_paths.length).each do |i|
798
+ ((i + 1)...sorted_paths.length).each do |j|
799
+ path1 = sorted_paths[i]
800
+ path2 = sorted_paths[j]
801
+ shorter, longer = [path1, path2].sort_by(&:length)
802
+
803
+ next unless longer[0, shorter.length] == shorter
804
+
805
+ puts " Found parent-child relationship: #{shorter.inspect} → #{longer.inspect}" if ENV["DEBUG_CASCADE"]
806
+ has_real_hierarchy = true
807
+ end
808
+ end
809
+
810
+ puts " has_real_hierarchy: #{has_real_hierarchy}" if ENV["DEBUG_CASCADE"]
811
+
812
+ # Allow same-level dimensions or hierarchical relationships
813
+ if !has_real_hierarchy && sorted_paths.length > 1
814
+ puts " -> REJECT: No parent-child relationships found - these are sibling branches" if ENV["DEBUG_CASCADE"]
815
+ return false
816
+ end
817
+
818
+ puts " -> ACCEPT: All dimensions compatible" if ENV["DEBUG_CASCADE"]
819
+ true
820
+ end
821
+
822
+ def compute_cascade_processing_strategy(primary_dimension, nested_paths)
823
+ return { mode: :scalar } unless primary_dimension
824
+
825
+ # Determine structure depth from the dimension path
826
+ structure_depth = primary_dimension.length
827
+
828
+ # Determine processing mode based on structure complexity
829
+ processing_mode = case structure_depth
830
+ when 0, 1
831
+ :simple_array # Single-level array processing
832
+ when 2, 3, 4
833
+ :nested_array # Multi-level nested array processing
834
+ else
835
+ :deep_nested_array # Very deep nesting (5+ levels)
836
+ end
837
+
838
+ # Get nested path information for this dimension
839
+ nested_path_info = nested_paths[primary_dimension]
840
+
841
+ {
842
+ mode: processing_mode,
843
+ structure_depth: structure_depth,
844
+ dimension_path: primary_dimension,
845
+ element_processing: :cascade_conditional_logic,
846
+ nested_path_info: nested_path_info
847
+ }
848
+ end
849
+
850
+ def report_cascade_dimension_mismatch(errors, expr, sources, dimensions)
851
+ puts "DEBUG: Dimensional analysis details:" if ENV["DEBUG_CASCADE"]
852
+ puts " Sources: #{sources.inspect}" if ENV["DEBUG_CASCADE"]
853
+ puts " Dimensions: #{dimensions.inspect}" if ENV["DEBUG_CASCADE"]
854
+ puts " Valid hierarchical? #{valid_hierarchical_broadcasting?(dimensions)}" if ENV["DEBUG_CASCADE"]
855
+
856
+ message = "Cascade dimensional mismatch: Cannot mix arrays from different sources (#{sources.join(', ')}) " \
857
+ "with dimensions (#{dimensions.map(&:inspect).join(', ')}) in cascade conditions and results."
858
+ report_error(errors, message, location: expr.loc, type: :semantic)
859
+ end
860
+
210
861
  def build_dimension_mismatch_error(_expr, arg_infos, array_fields, vectorized_sources)
211
862
  # Build detailed error message with type information
212
863
  summary = "Cannot broadcast operation across arrays from different sources: #{vectorized_sources.join(', ')}. "