kumi 0.0.9 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CLAUDE.md +28 -44
- data/README.md +187 -120
- data/docs/AST.md +1 -1
- data/docs/FUNCTIONS.md +52 -8
- data/docs/compiler_design_principles.md +86 -0
- data/docs/features/README.md +15 -2
- data/docs/features/hierarchical-broadcasting.md +349 -0
- data/docs/features/javascript-transpiler.md +148 -0
- data/docs/features/performance.md +1 -3
- data/docs/schema_metadata.md +7 -7
- data/examples/game_of_life.rb +2 -4
- data/lib/kumi/analyzer.rb +0 -2
- data/lib/kumi/compiler.rb +6 -275
- data/lib/kumi/core/analyzer/passes/broadcast_detector.rb +600 -42
- data/lib/kumi/core/analyzer/passes/input_collector.rb +4 -2
- data/lib/kumi/core/analyzer/passes/semantic_constraint_validator.rb +27 -0
- data/lib/kumi/core/analyzer/passes/type_checker.rb +6 -2
- data/lib/kumi/core/analyzer/passes/unsat_detector.rb +90 -46
- data/lib/kumi/core/cascade_executor_builder.rb +132 -0
- data/lib/kumi/core/compiler/expression_compiler.rb +146 -0
- data/lib/kumi/core/compiler/function_invoker.rb +55 -0
- data/lib/kumi/core/compiler/path_traversal_compiler.rb +158 -0
- data/lib/kumi/core/compiler/reference_compiler.rb +46 -0
- data/lib/kumi/core/compiler_base.rb +137 -0
- data/lib/kumi/core/explain.rb +2 -2
- data/lib/kumi/core/function_registry/collection_functions.rb +86 -3
- data/lib/kumi/core/function_registry/function_builder.rb +5 -3
- data/lib/kumi/core/function_registry/logical_functions.rb +171 -1
- data/lib/kumi/core/function_registry/stat_functions.rb +156 -0
- data/lib/kumi/core/function_registry.rb +32 -10
- data/lib/kumi/core/nested_structure_utils.rb +78 -0
- data/lib/kumi/core/ruby_parser/dsl_cascade_builder.rb +2 -2
- data/lib/kumi/core/ruby_parser/input_builder.rb +61 -8
- data/lib/kumi/core/schema_instance.rb +4 -0
- data/lib/kumi/core/vectorized_function_builder.rb +88 -0
- data/lib/kumi/errors.rb +2 -0
- data/lib/kumi/js/compiler.rb +878 -0
- data/lib/kumi/js/function_registry.rb +333 -0
- data/lib/kumi/js.rb +23 -0
- data/lib/kumi/registry.rb +61 -1
- data/lib/kumi/schema.rb +1 -1
- data/lib/kumi/support/s_expression_printer.rb +16 -15
- data/lib/kumi/syntax/array_expression.rb +6 -6
- data/lib/kumi/syntax/call_expression.rb +4 -4
- data/lib/kumi/syntax/cascade_expression.rb +4 -4
- data/lib/kumi/syntax/case_expression.rb +4 -4
- data/lib/kumi/syntax/declaration_reference.rb +4 -4
- data/lib/kumi/syntax/hash_expression.rb +4 -4
- data/lib/kumi/syntax/input_declaration.rb +6 -5
- data/lib/kumi/syntax/input_element_reference.rb +5 -5
- data/lib/kumi/syntax/input_reference.rb +5 -5
- data/lib/kumi/syntax/literal.rb +4 -4
- data/lib/kumi/syntax/node.rb +34 -34
- data/lib/kumi/syntax/root.rb +6 -6
- data/lib/kumi/syntax/trait_declaration.rb +4 -4
- data/lib/kumi/syntax/value_declaration.rb +4 -4
- data/lib/kumi/version.rb +1 -1
- data/lib/kumi.rb +1 -1
- data/scripts/analyze_broadcast_methods.rb +68 -0
- data/scripts/analyze_cascade_methods.rb +74 -0
- data/scripts/check_broadcasting_coverage.rb +51 -0
- data/scripts/find_dead_code.rb +114 -0
- metadata +20 -4
- data/docs/features/array-broadcasting.md +0 -170
- data/lib/kumi/cli.rb +0 -449
- data/lib/kumi/core/vectorization_metadata.rb +0 -110
@@ -15,11 +15,18 @@ module Kumi
|
|
15
15
|
# Find array fields with their element types
|
16
16
|
array_fields = find_array_fields(input_meta)
|
17
17
|
|
18
|
+
# Build nested paths metadata for nested array traversal
|
19
|
+
nested_paths = build_nested_paths_metadata(input_meta)
|
20
|
+
|
18
21
|
# Build compiler metadata
|
19
22
|
compiler_metadata = {
|
20
23
|
array_fields: array_fields,
|
21
24
|
vectorized_operations: {},
|
22
|
-
reduction_operations: {}
|
25
|
+
reduction_operations: {},
|
26
|
+
nested_paths: nested_paths,
|
27
|
+
flattening_declarations: {}, # Track which declarations need flattening
|
28
|
+
cascade_strategies: {}, # Pre-computed cascade processing strategies
|
29
|
+
compilation_metadata: {} # Pre-computed compilation decisions
|
23
30
|
}
|
24
31
|
|
25
32
|
# Track which values are vectorized for type inference
|
@@ -30,19 +37,32 @@ module Kumi
|
|
30
37
|
values = definitions.select { |_name, decl| decl.is_a?(Kumi::Syntax::ValueDeclaration) }
|
31
38
|
|
32
39
|
(traits.to_a + values.to_a).each do |name, decl|
|
33
|
-
result = analyze_value_vectorization(name, decl.expression, array_fields, vectorized_values, errors
|
40
|
+
result = analyze_value_vectorization(name, decl.expression, array_fields, nested_paths, vectorized_values, errors,
|
41
|
+
definitions)
|
34
42
|
|
35
43
|
case result[:type]
|
36
44
|
when :vectorized
|
37
45
|
compiler_metadata[:vectorized_operations][name] = result[:info]
|
46
|
+
|
47
|
+
# If this is a cascade with processing strategy, store it separately for easy compiler access
|
48
|
+
compiler_metadata[:cascade_strategies][name] = result[:info][:processing_strategy] if result[:info][:processing_strategy]
|
49
|
+
|
38
50
|
# Store array source information for dimension checking
|
39
51
|
array_source = extract_array_source(result[:info], array_fields)
|
40
52
|
vectorized_values[name] = { vectorized: true, array_source: array_source }
|
41
53
|
when :reduction
|
42
54
|
compiler_metadata[:reduction_operations][name] = result[:info]
|
55
|
+
# Mark this specific declaration as needing flattening for its argument
|
56
|
+
compiler_metadata[:flattening_declarations][name] = result[:info]
|
43
57
|
# Reduction produces scalar, not vectorized
|
44
58
|
vectorized_values[name] = { vectorized: false }
|
45
59
|
end
|
60
|
+
|
61
|
+
# Pre-compute compilation metadata for each declaration
|
62
|
+
compilation_meta = compute_compilation_metadata(
|
63
|
+
name, decl, compiler_metadata, vectorized_values, array_fields
|
64
|
+
)
|
65
|
+
compiler_metadata[:compilation_metadata][name] = compilation_meta
|
46
66
|
end
|
47
67
|
|
48
68
|
state.with(:broadcasts, compiler_metadata.freeze)
|
@@ -50,6 +70,76 @@ module Kumi
|
|
50
70
|
|
51
71
|
private
|
52
72
|
|
73
|
+
def compute_compilation_metadata(name, _decl, compiler_metadata, _vectorized_values, _array_fields)
|
74
|
+
metadata = {
|
75
|
+
operation_mode: :broadcast, # Default mode
|
76
|
+
is_vectorized: false,
|
77
|
+
vectorization_context: {},
|
78
|
+
cascade_info: {},
|
79
|
+
function_call_strategy: {}
|
80
|
+
}
|
81
|
+
|
82
|
+
# Check if this declaration is vectorized
|
83
|
+
if compiler_metadata[:vectorized_operations][name]
|
84
|
+
metadata[:is_vectorized] = true
|
85
|
+
vectorized_info = compiler_metadata[:vectorized_operations][name]
|
86
|
+
|
87
|
+
# Pre-compute vectorization context
|
88
|
+
metadata[:vectorization_context] = {
|
89
|
+
has_vectorized_args: true,
|
90
|
+
needs_broadcasting: true,
|
91
|
+
array_structure_depth: estimate_array_depth(vectorized_info, compiler_metadata[:nested_paths])
|
92
|
+
}
|
93
|
+
|
94
|
+
# If this is a cascade, pre-compute cascade processing strategy
|
95
|
+
if vectorized_info[:source] == :cascade_with_vectorized_conditions_or_results
|
96
|
+
strategy = compiler_metadata[:cascade_strategies][name]
|
97
|
+
metadata[:cascade_info] = {
|
98
|
+
is_vectorized: true,
|
99
|
+
processing_mode: strategy&.dig(:mode) || :hierarchical,
|
100
|
+
needs_hierarchical_processing: needs_hierarchical_processing?(strategy)
|
101
|
+
}
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
# Check if this declaration needs flattening
|
106
|
+
if compiler_metadata[:flattening_declarations][name]
|
107
|
+
metadata[:operation_mode] = :flatten
|
108
|
+
flattening_info = compiler_metadata[:flattening_declarations][name]
|
109
|
+
|
110
|
+
metadata[:function_call_strategy] = {
|
111
|
+
flattening_required: true,
|
112
|
+
flatten_argument_indices: flattening_info[:flatten_argument_indices] || [0],
|
113
|
+
result_structure: :scalar
|
114
|
+
}
|
115
|
+
end
|
116
|
+
|
117
|
+
metadata
|
118
|
+
end
|
119
|
+
|
120
|
+
def estimate_array_depth(vectorized_info, nested_paths)
|
121
|
+
case vectorized_info[:source]
|
122
|
+
when :nested_array_access
|
123
|
+
path = vectorized_info[:path]
|
124
|
+
nested_paths[path]&.dig(:array_depth) || 1
|
125
|
+
when :array_field_access
|
126
|
+
1
|
127
|
+
else
|
128
|
+
1
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
def needs_hierarchical_processing?(strategy)
|
133
|
+
return false unless strategy
|
134
|
+
|
135
|
+
case strategy[:mode]
|
136
|
+
when :nested_array, :deep_nested_array
|
137
|
+
true
|
138
|
+
else
|
139
|
+
false
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
53
143
|
def find_array_fields(input_meta)
|
54
144
|
result = {}
|
55
145
|
input_meta.each do |name, meta|
|
@@ -63,10 +153,63 @@ module Kumi
|
|
63
153
|
result
|
64
154
|
end
|
65
155
|
|
66
|
-
def
|
156
|
+
def build_nested_paths_metadata(input_meta)
|
157
|
+
nested_paths = {}
|
158
|
+
|
159
|
+
# Recursively build all possible nested paths from input metadata
|
160
|
+
input_meta.each do |root_name, root_meta|
|
161
|
+
collect_nested_paths(nested_paths, [root_name], root_meta, 0, nil)
|
162
|
+
end
|
163
|
+
|
164
|
+
nested_paths
|
165
|
+
end
|
166
|
+
|
167
|
+
def collect_nested_paths(nested_paths, current_path, current_meta, array_depth, parent_access_mode = nil)
|
168
|
+
# If current field is an array, increment array depth and track its access_mode
|
169
|
+
current_access_mode = parent_access_mode
|
170
|
+
if current_meta[:type] == :array
|
171
|
+
array_depth += 1
|
172
|
+
current_access_mode = current_meta[:access_mode] || :object # Default to :object if not specified
|
173
|
+
end
|
174
|
+
|
175
|
+
# If this field has children, recurse into them
|
176
|
+
if current_meta[:children]
|
177
|
+
current_meta[:children].each do |child_name, child_meta|
|
178
|
+
child_path = current_path + [child_name]
|
179
|
+
|
180
|
+
# Create metadata for this path if it involves arrays
|
181
|
+
if array_depth.positive?
|
182
|
+
nested_paths[child_path] =
|
183
|
+
build_path_metadata(child_path, child_meta, array_depth, current_access_mode)
|
184
|
+
end
|
185
|
+
|
186
|
+
# Recurse into child's children
|
187
|
+
collect_nested_paths(nested_paths, child_path, child_meta, array_depth, current_access_mode)
|
188
|
+
end
|
189
|
+
elsif array_depth.positive?
|
190
|
+
# Leaf field - create metadata if it involves arrays
|
191
|
+
nested_paths[current_path] = build_path_metadata(current_path, current_meta, array_depth, current_access_mode)
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
def build_path_metadata(_path, field_meta, array_depth, parent_access_mode = nil)
|
196
|
+
{
|
197
|
+
array_depth: array_depth,
|
198
|
+
element_type: field_meta[:type] || :any,
|
199
|
+
operation_mode: :broadcast, # Default mode - may be overridden for aggregations
|
200
|
+
result_structure: array_depth > 1 ? :nested_array : :array,
|
201
|
+
access_mode: parent_access_mode # Access mode of the parent array field
|
202
|
+
}
|
203
|
+
end
|
204
|
+
|
205
|
+
def analyze_value_vectorization(name, expr, array_fields, nested_paths, vectorized_values, errors, definitions = nil)
|
67
206
|
case expr
|
68
207
|
when Kumi::Syntax::InputElementReference
|
69
|
-
if
|
208
|
+
# Check if this path exists in nested_paths metadata (supports nested arrays)
|
209
|
+
if nested_paths.key?(expr.path)
|
210
|
+
{ type: :vectorized, info: { source: :nested_array_access, path: expr.path, nested_metadata: nested_paths[expr.path] } }
|
211
|
+
# Fallback to old array_fields detection for backward compatibility
|
212
|
+
elsif array_fields.key?(expr.path.first)
|
70
213
|
{ type: :vectorized, info: { source: :array_field_access, path: expr.path } }
|
71
214
|
else
|
72
215
|
{ type: :scalar }
|
@@ -82,43 +225,57 @@ module Kumi
|
|
82
225
|
end
|
83
226
|
|
84
227
|
when Kumi::Syntax::CallExpression
|
85
|
-
analyze_call_vectorization(name, expr, array_fields, vectorized_values, errors)
|
228
|
+
analyze_call_vectorization(name, expr, array_fields, nested_paths, vectorized_values, errors, definitions)
|
86
229
|
|
87
230
|
when Kumi::Syntax::CascadeExpression
|
88
|
-
analyze_cascade_vectorization(name, expr, array_fields, vectorized_values, errors)
|
231
|
+
analyze_cascade_vectorization(name, expr, array_fields, nested_paths, vectorized_values, errors, definitions)
|
89
232
|
|
90
233
|
else
|
91
234
|
{ type: :scalar }
|
92
235
|
end
|
93
236
|
end
|
94
237
|
|
95
|
-
def analyze_call_vectorization(_name, expr, array_fields, vectorized_values, errors)
|
238
|
+
def analyze_call_vectorization(_name, expr, array_fields, nested_paths, vectorized_values, errors, definitions = nil)
|
96
239
|
# Check if this is a reduction function using function registry metadata
|
97
240
|
if Kumi::Registry.reducer?(expr.fn_name)
|
98
241
|
# Only treat as reduction if the argument is actually vectorized
|
99
|
-
arg_info = analyze_argument_vectorization(expr.args.first, array_fields, vectorized_values)
|
242
|
+
arg_info = analyze_argument_vectorization(expr.args.first, array_fields, nested_paths, vectorized_values, definitions)
|
100
243
|
if arg_info[:vectorized]
|
101
|
-
|
244
|
+
# Pre-compute which argument indices need flattening
|
245
|
+
flatten_indices = []
|
246
|
+
expr.args.each_with_index do |arg, index|
|
247
|
+
arg_vectorization = analyze_argument_vectorization(arg, array_fields, nested_paths, vectorized_values, definitions)
|
248
|
+
flatten_indices << index if arg_vectorization[:vectorized]
|
249
|
+
end
|
250
|
+
|
251
|
+
{ type: :reduction, info: {
|
252
|
+
function: expr.fn_name,
|
253
|
+
source: arg_info[:source],
|
254
|
+
argument: expr.args.first,
|
255
|
+
flatten_argument_indices: flatten_indices
|
256
|
+
} }
|
102
257
|
else
|
103
258
|
# Not a vectorized reduction - just a regular function call
|
104
259
|
{ type: :scalar }
|
105
260
|
end
|
106
261
|
|
107
262
|
else
|
108
|
-
|
109
|
-
#
|
110
|
-
if
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
263
|
+
|
264
|
+
# Special case: cascade_and takes individual trait arguments
|
265
|
+
if expr.fn_name == :cascade_and
|
266
|
+
# Check if any of the individual arguments are vectorized traits
|
267
|
+
vectorized_trait = expr.args.find do |arg|
|
268
|
+
arg.is_a?(Kumi::Syntax::DeclarationReference) && vectorized_values[arg.name]&.[](:vectorized)
|
269
|
+
end
|
270
|
+
if vectorized_trait
|
271
|
+
return { type: :vectorized, info: { source: :cascade_condition_with_vectorized_trait, trait: vectorized_trait.name } }
|
117
272
|
end
|
118
273
|
end
|
119
274
|
|
120
|
-
#
|
121
|
-
arg_infos = expr.args.map
|
275
|
+
# Analyze arguments to determine function behavior
|
276
|
+
arg_infos = expr.args.map do |arg|
|
277
|
+
analyze_argument_vectorization(arg, array_fields, nested_paths, vectorized_values, definitions)
|
278
|
+
end
|
122
279
|
|
123
280
|
if arg_infos.any? { |info| info[:vectorized] }
|
124
281
|
# Check for dimension mismatches when multiple arguments are vectorized
|
@@ -133,21 +290,37 @@ module Kumi
|
|
133
290
|
return { type: :scalar } # Treat as scalar to prevent further errors
|
134
291
|
end
|
135
292
|
|
136
|
-
#
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
293
|
+
# Check if this is a structure function that should work on the array as-is
|
294
|
+
if structure_function?(expr.fn_name)
|
295
|
+
# Structure functions like size should work on structure as-is (scalar)
|
296
|
+
{ type: :scalar }
|
297
|
+
else
|
298
|
+
# This is a vectorized operation - broadcast over elements
|
299
|
+
{ type: :vectorized, info: {
|
300
|
+
operation: expr.fn_name,
|
301
|
+
vectorized_args: arg_infos.map.with_index { |info, i| [i, info[:vectorized]] }.to_h
|
302
|
+
} }
|
303
|
+
end
|
141
304
|
else
|
305
|
+
# No vectorized arguments - regular scalar function
|
142
306
|
{ type: :scalar }
|
143
307
|
end
|
144
308
|
end
|
145
309
|
end
|
146
310
|
|
147
|
-
def
|
311
|
+
def structure_function?(fn_name)
|
312
|
+
# Check if function is marked as working on structure (not broadcast over elements)
|
313
|
+
Kumi::Registry.structure_function?(fn_name)
|
314
|
+
end
|
315
|
+
|
316
|
+
def analyze_argument_vectorization(arg, array_fields, nested_paths, vectorized_values, definitions = nil)
|
148
317
|
case arg
|
149
318
|
when Kumi::Syntax::InputElementReference
|
150
|
-
|
319
|
+
# Check nested paths first (supports nested arrays)
|
320
|
+
if nested_paths.key?(arg.path)
|
321
|
+
{ vectorized: true, source: :nested_array_field, array_source: arg.path.first }
|
322
|
+
# Fallback to old array_fields detection for backward compatibility
|
323
|
+
elsif array_fields.key?(arg.path.first)
|
151
324
|
{ vectorized: true, source: :array_field, array_source: arg.path.first }
|
152
325
|
else
|
153
326
|
{ vectorized: false }
|
@@ -165,7 +338,7 @@ module Kumi
|
|
165
338
|
|
166
339
|
when Kumi::Syntax::CallExpression
|
167
340
|
# Recursively check
|
168
|
-
result = analyze_value_vectorization(nil, arg, array_fields, vectorized_values, [])
|
341
|
+
result = analyze_value_vectorization(nil, arg, array_fields, nested_paths, vectorized_values, [], definitions)
|
169
342
|
{ vectorized: result[:type] == :vectorized, source: :expression }
|
170
343
|
|
171
344
|
else
|
@@ -183,30 +356,415 @@ module Kumi
|
|
183
356
|
end
|
184
357
|
end
|
185
358
|
|
186
|
-
def analyze_cascade_vectorization(
|
187
|
-
#
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
359
|
+
def analyze_cascade_vectorization(name, expr, array_fields, nested_paths, vectorized_values, errors, definitions = nil)
|
360
|
+
# Enhanced cascade analysis with dimensional intelligence
|
361
|
+
condition_sources = []
|
362
|
+
result_sources = []
|
363
|
+
condition_dimensions = []
|
364
|
+
result_dimensions = []
|
365
|
+
is_vectorized = false
|
366
|
+
|
367
|
+
if ENV["DEBUG_CASCADE"]
|
368
|
+
puts "DEBUG: analyze_cascade_vectorization for #{name}"
|
369
|
+
puts " Expression: #{expr.inspect}"
|
370
|
+
puts " Cases: #{expr.cases.length}"
|
371
|
+
end
|
192
372
|
|
193
373
|
expr.cases.each do |case_expr|
|
194
|
-
#
|
195
|
-
result_info = analyze_value_vectorization(nil, case_expr.result, array_fields, vectorized_values, errors
|
196
|
-
|
374
|
+
# Analyze result expression
|
375
|
+
result_info = analyze_value_vectorization(nil, case_expr.result, array_fields, nested_paths, vectorized_values, errors,
|
376
|
+
definitions)
|
377
|
+
if result_info[:type] == :vectorized
|
378
|
+
is_vectorized = true
|
379
|
+
source, dimension = trace_dimensional_source(case_expr.result, result_info, vectorized_values, array_fields, definitions)
|
380
|
+
result_sources << source
|
381
|
+
result_dimensions << dimension
|
382
|
+
end
|
383
|
+
|
384
|
+
# Analyze condition expression
|
385
|
+
condition_info = analyze_value_vectorization(nil, case_expr.condition, array_fields, nested_paths, vectorized_values, errors,
|
386
|
+
definitions)
|
387
|
+
next unless condition_info[:type] == :vectorized
|
388
|
+
|
389
|
+
is_vectorized = true
|
390
|
+
|
391
|
+
# Special handling for cascade_and to check all arguments for dimensional conflicts
|
392
|
+
if ENV["DEBUG_CASCADE"]
|
393
|
+
puts " Checking condition type: #{case_expr.condition.class}"
|
394
|
+
puts " Condition fn_name: #{case_expr.condition.fn_name}" if case_expr.condition.is_a?(Kumi::Syntax::CallExpression)
|
395
|
+
end
|
396
|
+
|
397
|
+
if case_expr.condition.is_a?(Kumi::Syntax::CallExpression) && case_expr.condition.fn_name == :cascade_and
|
398
|
+
puts " -> ENTERING CASCADE_AND SPECIAL HANDLING" if ENV["DEBUG_CASCADE"]
|
399
|
+
# For cascade_and, check all individual trait references for dimensional conflicts
|
400
|
+
cascade_sources = []
|
401
|
+
cascade_dimensions = []
|
402
|
+
|
403
|
+
puts " cascade_and args: #{case_expr.condition.args.map(&:class)}" if ENV["DEBUG_CASCADE"]
|
404
|
+
|
405
|
+
case_expr.condition.args.each do |arg|
|
406
|
+
puts " Processing arg: #{arg.inspect}" if ENV["DEBUG_CASCADE"]
|
407
|
+
next unless arg.is_a?(Kumi::Syntax::DeclarationReference)
|
408
|
+
|
409
|
+
puts " Looking up declaration: #{arg.name}" if ENV["DEBUG_CASCADE"]
|
410
|
+
decl = definitions[arg.name] if definitions
|
411
|
+
if decl
|
412
|
+
puts " Found declaration, tracing source..." if ENV["DEBUG_CASCADE"]
|
413
|
+
arg_source, arg_dimension = trace_dimensional_source(decl.expression, condition_info, vectorized_values,
|
414
|
+
array_fields, definitions)
|
415
|
+
puts " Traced source: #{arg_source}, dimension: #{arg_dimension}" if ENV["DEBUG_CASCADE"]
|
416
|
+
cascade_sources << arg_source
|
417
|
+
cascade_dimensions << arg_dimension
|
418
|
+
elsif ENV["DEBUG_CASCADE"]
|
419
|
+
puts " Declaration not found: #{arg.name}"
|
420
|
+
end
|
421
|
+
end
|
422
|
+
|
423
|
+
# Check for conflicts between cascade_and arguments
|
424
|
+
unique_sources = cascade_sources.uniq
|
425
|
+
unique_dimensions = cascade_dimensions.uniq
|
426
|
+
|
427
|
+
if ENV["DEBUG_CASCADE"]
|
428
|
+
puts " cascade_sources: #{cascade_sources.inspect}"
|
429
|
+
puts " cascade_dimensions: #{cascade_dimensions.inspect}"
|
430
|
+
puts " unique_sources: #{unique_sources.inspect}"
|
431
|
+
puts " unique_dimensions: #{unique_dimensions.inspect}"
|
432
|
+
end
|
433
|
+
|
434
|
+
# Check for dimensional conflicts - either different sources OR incompatible dimensions
|
435
|
+
has_source_conflict = unique_sources.length > 1 && unique_sources.none? { |s| s.to_s.include?("unknown") }
|
436
|
+
has_dimension_conflict = unique_dimensions.length > 1 && !valid_hierarchical_broadcasting?(unique_dimensions)
|
437
|
+
|
438
|
+
if ENV["DEBUG_CASCADE"]
|
439
|
+
puts " has_source_conflict: #{has_source_conflict}"
|
440
|
+
puts " has_dimension_conflict: #{has_dimension_conflict}"
|
441
|
+
if unique_dimensions.length > 1
|
442
|
+
puts " valid_hierarchical_broadcasting?: #{valid_hierarchical_broadcasting?(unique_dimensions)}"
|
443
|
+
end
|
444
|
+
end
|
445
|
+
|
446
|
+
if has_source_conflict || has_dimension_conflict
|
447
|
+
# Multiple different sources or incompatible dimensions in same cascade_and - this is invalid
|
448
|
+
if ENV["DEBUG_CASCADE"]
|
449
|
+
puts " -> FOUND CASCADE_AND DIMENSIONAL CONFLICT:"
|
450
|
+
puts " Sources: #{unique_sources.inspect}"
|
451
|
+
puts " Dimensions: #{unique_dimensions.inspect}"
|
452
|
+
puts " Source conflict: #{has_source_conflict}"
|
453
|
+
puts " Dimension conflict: #{has_dimension_conflict}"
|
454
|
+
end
|
455
|
+
report_cascade_dimension_mismatch(errors, expr, unique_sources, unique_dimensions)
|
456
|
+
return { type: :scalar }
|
457
|
+
end
|
197
458
|
|
198
|
-
|
199
|
-
|
200
|
-
|
459
|
+
# Use the first valid source as the overall condition source
|
460
|
+
condition_sources.concat(cascade_sources)
|
461
|
+
condition_dimensions.concat(cascade_dimensions)
|
462
|
+
else
|
463
|
+
source, dimension = trace_dimensional_source(case_expr.condition, condition_info, vectorized_values, array_fields,
|
464
|
+
definitions)
|
465
|
+
condition_sources << source
|
466
|
+
condition_dimensions << dimension
|
467
|
+
end
|
201
468
|
end
|
202
469
|
|
203
|
-
if
|
204
|
-
|
470
|
+
if is_vectorized
|
471
|
+
# Validate dimensional compatibility
|
472
|
+
all_sources = (condition_sources + result_sources).compact.uniq
|
473
|
+
all_dimensions = (condition_dimensions + result_dimensions).compact.uniq
|
474
|
+
|
475
|
+
if ENV["DEBUG_CASCADE"]
|
476
|
+
puts " is_vectorized: true"
|
477
|
+
puts " condition_sources: #{condition_sources.inspect}"
|
478
|
+
puts " result_sources: #{result_sources.inspect}"
|
479
|
+
puts " condition_dimensions: #{condition_dimensions.inspect}"
|
480
|
+
puts " result_dimensions: #{result_dimensions.inspect}"
|
481
|
+
puts " all_sources: #{all_sources.inspect}"
|
482
|
+
puts " all_dimensions: #{all_dimensions.inspect}"
|
483
|
+
end
|
484
|
+
|
485
|
+
# For now, be less strict about dimensional validation
|
486
|
+
# Only report mismatches for clearly incompatible sources
|
487
|
+
definite_sources = all_sources.reject { |s| s.to_s.include?("unknown") || s.to_s.include?("operation") }
|
488
|
+
|
489
|
+
if ENV["DEBUG_CASCADE"]
|
490
|
+
puts " definite_sources: #{definite_sources.inspect}"
|
491
|
+
puts " definite_sources.length: #{definite_sources.length}"
|
492
|
+
end
|
493
|
+
|
494
|
+
if definite_sources.length > 1
|
495
|
+
# Check if sources are in valid hierarchical relationship (parent-child broadcasting)
|
496
|
+
is_valid_hierarchical = valid_hierarchical_broadcasting?(all_dimensions)
|
497
|
+
puts " valid_hierarchical_broadcasting?: #{is_valid_hierarchical}" if ENV["DEBUG_CASCADE"]
|
498
|
+
unless is_valid_hierarchical
|
499
|
+
# Multiple definite dimensional sources - this is a real mismatch
|
500
|
+
puts " -> REPORTING DIMENSIONAL MISMATCH" if ENV["DEBUG_CASCADE"]
|
501
|
+
report_cascade_dimension_mismatch(errors, expr, definite_sources, all_dimensions)
|
502
|
+
return { type: :scalar } # Treat as scalar to prevent further errors
|
503
|
+
end
|
504
|
+
end
|
505
|
+
|
506
|
+
# Compute cascade processing strategy based on dimensional analysis
|
507
|
+
processing_strategy = compute_cascade_processing_strategy(all_dimensions.first, nested_paths)
|
508
|
+
|
509
|
+
{ type: :vectorized, info: {
|
510
|
+
source: :cascade_with_vectorized_conditions_or_results,
|
511
|
+
dimensional_requirements: {
|
512
|
+
conditions: { sources: condition_sources.uniq, dimensions: condition_dimensions.uniq },
|
513
|
+
results: { sources: result_sources.uniq, dimensions: result_dimensions.uniq }
|
514
|
+
},
|
515
|
+
primary_dimension: all_dimensions.first,
|
516
|
+
nested_paths: extract_nested_paths_from_dimensions(all_dimensions.first, nested_paths),
|
517
|
+
processing_strategy: processing_strategy
|
518
|
+
} }
|
205
519
|
else
|
206
520
|
{ type: :scalar }
|
207
521
|
end
|
208
522
|
end
|
209
523
|
|
524
|
+
def trace_dimensional_source(expr, info, vectorized_values, array_fields, definitions = nil)
|
525
|
+
# Trace dimensional source by examining the AST node directly
|
526
|
+
case expr
|
527
|
+
when Kumi::Syntax::InputElementReference
|
528
|
+
# Direct array field access
|
529
|
+
source = expr.path.first
|
530
|
+
dimension = expr.path
|
531
|
+
[source, dimension]
|
532
|
+
when Kumi::Syntax::DeclarationReference
|
533
|
+
# Reference to another declaration - look up its dimensional info
|
534
|
+
if vectorized_values[expr.name]
|
535
|
+
vectorized_info = vectorized_values[expr.name]
|
536
|
+
if vectorized_info[:array_source]
|
537
|
+
[vectorized_info[:array_source], [vectorized_info[:array_source]]]
|
538
|
+
else
|
539
|
+
# Need to trace through the declaration's expression to find the real source
|
540
|
+
decl = definitions[expr.name] if definitions
|
541
|
+
if decl
|
542
|
+
# Recursively trace the declaration's expression
|
543
|
+
trace_dimensional_source(decl.expression, info, vectorized_values, array_fields, definitions)
|
544
|
+
else
|
545
|
+
[:unknown_vectorized_operation, [:unknown_vectorized_operation]]
|
546
|
+
end
|
547
|
+
end
|
548
|
+
else
|
549
|
+
[:unknown_declaration, [:unknown_declaration]]
|
550
|
+
end
|
551
|
+
when Kumi::Syntax::CallExpression
|
552
|
+
# For call expressions, trace through the arguments to find dimensional source
|
553
|
+
first_vectorized_arg = expr.args.find do |arg|
|
554
|
+
arg_info = analyze_argument_vectorization(arg, array_fields, {}, vectorized_values, definitions)
|
555
|
+
arg_info[:vectorized]
|
556
|
+
end
|
557
|
+
|
558
|
+
if first_vectorized_arg
|
559
|
+
trace_dimensional_source(first_vectorized_arg, info, vectorized_values, array_fields, definitions)
|
560
|
+
else
|
561
|
+
[:operation_unknown, [:operation_unknown]]
|
562
|
+
end
|
563
|
+
else
|
564
|
+
[:unknown_expr, [:unknown_expr]]
|
565
|
+
end
|
566
|
+
end
|
567
|
+
|
568
|
+
def extract_dimensional_info_with_context(info, _array_fields, _nested_paths, vectorized_values)
|
569
|
+
case info[:source]
|
570
|
+
when :array_field_access, :nested_array_access
|
571
|
+
# Direct array field access - use the path
|
572
|
+
source = info[:path]&.first
|
573
|
+
dimension = info[:path]
|
574
|
+
[source, dimension]
|
575
|
+
when :vectorized_declaration
|
576
|
+
# Reference to another vectorized declaration - look it up
|
577
|
+
if info[:name] && vectorized_values[info[:name]]
|
578
|
+
vectorized_info = vectorized_values[info[:name]]
|
579
|
+
if vectorized_info[:array_source]
|
580
|
+
# This declaration references an array field, use that source
|
581
|
+
[vectorized_info[:array_source], [vectorized_info[:array_source]]]
|
582
|
+
else
|
583
|
+
# This is a derived vectorized value, try to trace its source
|
584
|
+
[:vectorized_reference, [:vectorized_reference]]
|
585
|
+
end
|
586
|
+
else
|
587
|
+
[:unknown_declaration, [:unknown_declaration]]
|
588
|
+
end
|
589
|
+
else
|
590
|
+
# Operations and other cases - try to extract from operation args
|
591
|
+
if info[:operation] && info[:vectorized_args]
|
592
|
+
# This is an operation result - trace the vectorized arguments
|
593
|
+
# For now, assume operations inherit the dimension of their first vectorized arg
|
594
|
+
[:operation_result, [:operation_result]]
|
595
|
+
else
|
596
|
+
[:unknown, [:unknown]]
|
597
|
+
end
|
598
|
+
end
|
599
|
+
end
|
600
|
+
|
601
|
+
def extract_dimensional_source(info, _array_fields)
|
602
|
+
case info[:source]
|
603
|
+
when :array_field_access
|
604
|
+
info[:path]&.first
|
605
|
+
when :nested_array_access
|
606
|
+
info[:path]&.first
|
607
|
+
when :vectorized_declaration, :vectorized_value
|
608
|
+
# Try to extract from the vectorized value info if available
|
609
|
+
if info[:name] && info.dig(:info, :path)
|
610
|
+
info[:info][:path].first
|
611
|
+
else
|
612
|
+
:vectorized_reference
|
613
|
+
end
|
614
|
+
else
|
615
|
+
# For operations and other cases, try to infer from vectorized args
|
616
|
+
if info[:vectorized_args]
|
617
|
+
# This is likely an operation - we should look at its arguments
|
618
|
+
:operation_result
|
619
|
+
else
|
620
|
+
:unknown
|
621
|
+
end
|
622
|
+
end
|
623
|
+
end
|
624
|
+
|
625
|
+
def extract_dimensions(info, _array_fields, _nested_paths)
|
626
|
+
case info[:source]
|
627
|
+
when :array_field_access
|
628
|
+
info[:path]
|
629
|
+
when :nested_array_access
|
630
|
+
info[:path]
|
631
|
+
when :vectorized_declaration, :vectorized_value
|
632
|
+
# Try to extract from the vectorized value info if available
|
633
|
+
if info[:name] && info.dig(:info, :path)
|
634
|
+
info[:info][:path]
|
635
|
+
else
|
636
|
+
[:vectorized_reference]
|
637
|
+
end
|
638
|
+
else
|
639
|
+
# For operations, try to infer from the operation context
|
640
|
+
if info[:vectorized_args]
|
641
|
+
# This is likely an operation - we should trace its arguments
|
642
|
+
[:operation_result]
|
643
|
+
else
|
644
|
+
[:unknown]
|
645
|
+
end
|
646
|
+
end
|
647
|
+
end
|
648
|
+
|
649
|
+
def extract_nested_paths_from_dimensions(dimension, nested_paths)
|
650
|
+
return nil unless dimension.is_a?(Array)
|
651
|
+
|
652
|
+
nested_paths[dimension]
|
653
|
+
end
|
654
|
+
|
655
|
+
# Check if dimensions represent valid hierarchical broadcasting (parent-to-child)
|
656
|
+
# Example: [:regions, :offices, :teams] can broadcast to [:regions, :offices, :teams, :employees]
|
657
|
+
def valid_hierarchical_broadcasting?(dimensions)
|
658
|
+
puts " DEBUG valid_hierarchical_broadcasting?: dimensions=#{dimensions.inspect}" if ENV["DEBUG_CASCADE"]
|
659
|
+
|
660
|
+
return true if dimensions.length <= 1
|
661
|
+
|
662
|
+
# Extract structural paths by removing the final field name from each dimension
|
663
|
+
# This allows us to identify that [:regions, :offices, :teams, :performance_score]
|
664
|
+
# and [:regions, :offices, :teams, :employees, :rating] both have the structural
|
665
|
+
# path [:regions, :offices, :teams] and [:regions, :offices, :teams, :employees] respectively
|
666
|
+
structural_paths = dimensions.map do |dim|
|
667
|
+
if dim.length > 1
|
668
|
+
dim[0..-2] # Remove the final field name
|
669
|
+
else
|
670
|
+
dim
|
671
|
+
end
|
672
|
+
end.uniq
|
673
|
+
|
674
|
+
puts " structural_paths: #{structural_paths.inspect}" if ENV["DEBUG_CASCADE"]
|
675
|
+
|
676
|
+
# Group dimensions by their root (first element)
|
677
|
+
root_groups = structural_paths.group_by(&:first)
|
678
|
+
|
679
|
+
puts " root_groups: #{root_groups.keys.inspect}" if ENV["DEBUG_CASCADE"]
|
680
|
+
|
681
|
+
# All dimensions must come from the same root
|
682
|
+
if root_groups.length > 1
|
683
|
+
puts " -> REJECT: Multiple roots" if ENV["DEBUG_CASCADE"]
|
684
|
+
return false
|
685
|
+
end
|
686
|
+
|
687
|
+
# If all structural paths are the same, this is valid (same level)
|
688
|
+
if structural_paths.length == 1
|
689
|
+
puts " -> ACCEPT: All dimensions at same structural level" if ENV["DEBUG_CASCADE"]
|
690
|
+
return true
|
691
|
+
end
|
692
|
+
|
693
|
+
# Within the same root, check if we have valid parent-child relationships
|
694
|
+
sorted_paths = structural_paths.sort_by(&:length)
|
695
|
+
|
696
|
+
puts " sorted structural paths: #{sorted_paths.inspect}" if ENV["DEBUG_CASCADE"]
|
697
|
+
|
698
|
+
# Check if all structural paths form a valid hierarchical structure
|
699
|
+
# For valid hierarchical broadcasting, structural paths should be related by parent-child relationships
|
700
|
+
|
701
|
+
# Check if there are any actual parent-child relationships
|
702
|
+
has_real_hierarchy = false
|
703
|
+
|
704
|
+
(0...sorted_paths.length).each do |i|
|
705
|
+
((i + 1)...sorted_paths.length).each do |j|
|
706
|
+
path1 = sorted_paths[i]
|
707
|
+
path2 = sorted_paths[j]
|
708
|
+
shorter, longer = [path1, path2].sort_by(&:length)
|
709
|
+
|
710
|
+
next unless longer[0, shorter.length] == shorter
|
711
|
+
|
712
|
+
puts " Found parent-child relationship: #{shorter.inspect} → #{longer.inspect}" if ENV["DEBUG_CASCADE"]
|
713
|
+
has_real_hierarchy = true
|
714
|
+
end
|
715
|
+
end
|
716
|
+
|
717
|
+
puts " has_real_hierarchy: #{has_real_hierarchy}" if ENV["DEBUG_CASCADE"]
|
718
|
+
|
719
|
+
# Allow same-level dimensions or hierarchical relationships
|
720
|
+
if !has_real_hierarchy && sorted_paths.length > 1
|
721
|
+
puts " -> REJECT: No parent-child relationships found - these are sibling branches" if ENV["DEBUG_CASCADE"]
|
722
|
+
return false
|
723
|
+
end
|
724
|
+
|
725
|
+
puts " -> ACCEPT: All dimensions compatible" if ENV["DEBUG_CASCADE"]
|
726
|
+
true
|
727
|
+
end
|
728
|
+
|
729
|
+
def compute_cascade_processing_strategy(primary_dimension, nested_paths)
|
730
|
+
return { mode: :scalar } unless primary_dimension
|
731
|
+
|
732
|
+
# Determine structure depth from the dimension path
|
733
|
+
structure_depth = primary_dimension.length
|
734
|
+
|
735
|
+
# Determine processing mode based on structure complexity
|
736
|
+
processing_mode = case structure_depth
|
737
|
+
when 0, 1
|
738
|
+
:simple_array # Single-level array processing
|
739
|
+
when 2, 3, 4
|
740
|
+
:nested_array # Multi-level nested array processing
|
741
|
+
else
|
742
|
+
:deep_nested_array # Very deep nesting (5+ levels)
|
743
|
+
end
|
744
|
+
|
745
|
+
# Get nested path information for this dimension
|
746
|
+
nested_path_info = nested_paths[primary_dimension]
|
747
|
+
|
748
|
+
{
|
749
|
+
mode: processing_mode,
|
750
|
+
structure_depth: structure_depth,
|
751
|
+
dimension_path: primary_dimension,
|
752
|
+
element_processing: :cascade_conditional_logic,
|
753
|
+
nested_path_info: nested_path_info
|
754
|
+
}
|
755
|
+
end
|
756
|
+
|
757
|
+
def report_cascade_dimension_mismatch(errors, expr, sources, dimensions)
|
758
|
+
puts "DEBUG: Dimensional analysis details:" if ENV["DEBUG_CASCADE"]
|
759
|
+
puts " Sources: #{sources.inspect}" if ENV["DEBUG_CASCADE"]
|
760
|
+
puts " Dimensions: #{dimensions.inspect}" if ENV["DEBUG_CASCADE"]
|
761
|
+
puts " Valid hierarchical? #{valid_hierarchical_broadcasting?(dimensions)}" if ENV["DEBUG_CASCADE"]
|
762
|
+
|
763
|
+
message = "Cascade dimensional mismatch: Cannot mix arrays from different sources (#{sources.join(', ')}) " \
|
764
|
+
"with dimensions (#{dimensions.map(&:inspect).join(', ')}) in cascade conditions and results."
|
765
|
+
report_error(errors, message, location: expr.loc, type: :semantic)
|
766
|
+
end
|
767
|
+
|
210
768
|
def build_dimension_mismatch_error(_expr, arg_infos, array_fields, vectorized_sources)
|
211
769
|
# Build detailed error message with type information
|
212
770
|
summary = "Cannot broadcast operation across arrays from different sources: #{vectorized_sources.join(', ')}. "
|