kumi 0.0.5 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. checksums.yaml +4 -4
  2. data/CLAUDE.md +76 -174
  3. data/README.md +205 -52
  4. data/{documents → docs}/AST.md +29 -29
  5. data/{documents → docs}/SYNTAX.md +95 -8
  6. data/docs/features/README.md +45 -0
  7. data/docs/features/analysis-cascade-mutual-exclusion.md +89 -0
  8. data/docs/features/analysis-type-inference.md +42 -0
  9. data/docs/features/analysis-unsat-detection.md +71 -0
  10. data/docs/features/array-broadcasting.md +170 -0
  11. data/docs/features/input-declaration-system.md +42 -0
  12. data/docs/features/performance.md +16 -0
  13. data/docs/schema_metadata/broadcasts.md +53 -0
  14. data/docs/schema_metadata/cascades.md +45 -0
  15. data/docs/schema_metadata/declarations.md +54 -0
  16. data/docs/schema_metadata/dependencies.md +57 -0
  17. data/docs/schema_metadata/evaluation_order.md +29 -0
  18. data/docs/schema_metadata/examples.md +95 -0
  19. data/docs/schema_metadata/inferred_types.md +46 -0
  20. data/docs/schema_metadata/inputs.md +86 -0
  21. data/docs/schema_metadata.md +108 -0
  22. data/examples/federal_tax_calculator_2024.rb +11 -6
  23. data/lib/kumi/analyzer/constant_evaluator.rb +1 -1
  24. data/lib/kumi/analyzer/passes/broadcast_detector.rb +246 -0
  25. data/lib/kumi/analyzer/passes/{definition_validator.rb → declaration_validator.rb} +4 -4
  26. data/lib/kumi/analyzer/passes/dependency_resolver.rb +78 -38
  27. data/lib/kumi/analyzer/passes/input_collector.rb +91 -30
  28. data/lib/kumi/analyzer/passes/name_indexer.rb +2 -2
  29. data/lib/kumi/analyzer/passes/pass_base.rb +1 -1
  30. data/lib/kumi/analyzer/passes/semantic_constraint_validator.rb +24 -25
  31. data/lib/kumi/analyzer/passes/toposorter.rb +44 -8
  32. data/lib/kumi/analyzer/passes/type_checker.rb +34 -14
  33. data/lib/kumi/analyzer/passes/type_consistency_checker.rb +2 -2
  34. data/lib/kumi/analyzer/passes/type_inferencer.rb +130 -21
  35. data/lib/kumi/analyzer/passes/unsat_detector.rb +134 -56
  36. data/lib/kumi/analyzer/passes/visitor_pass.rb +2 -2
  37. data/lib/kumi/analyzer.rb +16 -17
  38. data/lib/kumi/compiler.rb +188 -16
  39. data/lib/kumi/constraint_relationship_solver.rb +6 -6
  40. data/lib/kumi/domain/validator.rb +0 -4
  41. data/lib/kumi/error_reporting.rb +1 -1
  42. data/lib/kumi/explain.rb +32 -20
  43. data/lib/kumi/export/node_registry.rb +26 -12
  44. data/lib/kumi/export/node_serializers.rb +1 -1
  45. data/lib/kumi/function_registry/collection_functions.rb +14 -9
  46. data/lib/kumi/function_registry/function_builder.rb +4 -3
  47. data/lib/kumi/function_registry.rb +8 -2
  48. data/lib/kumi/input/type_matcher.rb +3 -0
  49. data/lib/kumi/input/validator.rb +0 -3
  50. data/lib/kumi/json_schema/generator.rb +63 -0
  51. data/lib/kumi/json_schema/validator.rb +25 -0
  52. data/lib/kumi/json_schema.rb +14 -0
  53. data/lib/kumi/{parser → ruby_parser}/build_context.rb +1 -1
  54. data/lib/kumi/ruby_parser/declaration_reference_proxy.rb +36 -0
  55. data/lib/kumi/{parser → ruby_parser}/dsl.rb +1 -1
  56. data/lib/kumi/{parser → ruby_parser}/dsl_cascade_builder.rb +5 -5
  57. data/lib/kumi/{parser → ruby_parser}/expression_converter.rb +20 -20
  58. data/lib/kumi/{parser → ruby_parser}/guard_rails.rb +1 -1
  59. data/lib/kumi/{parser → ruby_parser}/input_builder.rb +41 -10
  60. data/lib/kumi/ruby_parser/input_field_proxy.rb +46 -0
  61. data/lib/kumi/{parser → ruby_parser}/input_proxy.rb +4 -4
  62. data/lib/kumi/ruby_parser/nested_input.rb +15 -0
  63. data/lib/kumi/{parser → ruby_parser}/parser.rb +11 -10
  64. data/lib/kumi/{parser → ruby_parser}/schema_builder.rb +11 -10
  65. data/lib/kumi/{parser → ruby_parser}/sugar.rb +62 -10
  66. data/lib/kumi/ruby_parser.rb +10 -0
  67. data/lib/kumi/schema.rb +10 -4
  68. data/lib/kumi/schema_instance.rb +6 -6
  69. data/lib/kumi/schema_metadata.rb +524 -0
  70. data/lib/kumi/syntax/array_expression.rb +15 -0
  71. data/lib/kumi/syntax/call_expression.rb +11 -0
  72. data/lib/kumi/syntax/cascade_expression.rb +11 -0
  73. data/lib/kumi/syntax/case_expression.rb +11 -0
  74. data/lib/kumi/syntax/declaration_reference.rb +11 -0
  75. data/lib/kumi/syntax/hash_expression.rb +11 -0
  76. data/lib/kumi/syntax/input_declaration.rb +12 -0
  77. data/lib/kumi/syntax/input_element_reference.rb +12 -0
  78. data/lib/kumi/syntax/input_reference.rb +12 -0
  79. data/lib/kumi/syntax/literal.rb +11 -0
  80. data/lib/kumi/syntax/trait_declaration.rb +11 -0
  81. data/lib/kumi/syntax/value_declaration.rb +11 -0
  82. data/lib/kumi/vectorization_metadata.rb +108 -0
  83. data/lib/kumi/version.rb +1 -1
  84. data/lib/kumi.rb +14 -0
  85. metadata +55 -25
  86. data/lib/generators/trait_engine/templates/schema_spec.rb.erb +0 -27
  87. data/lib/kumi/domain.rb +0 -8
  88. data/lib/kumi/input.rb +0 -8
  89. data/lib/kumi/syntax/declarations.rb +0 -26
  90. data/lib/kumi/syntax/expressions.rb +0 -34
  91. data/lib/kumi/syntax/terminal_expressions.rb +0 -30
  92. data/lib/kumi/syntax.rb +0 -9
  93. /data/{documents → docs}/DSL.md +0 -0
  94. /data/{documents → docs}/FUNCTIONS.md +0 -0
@@ -0,0 +1,95 @@
1
+ # Schema Metadata Examples
2
+
3
+ For comprehensive API documentation with detailed examples, see the YARD documentation in the SchemaMetadata class.
4
+
5
+ ## Basic Usage
6
+
7
+ ```ruby
8
+ class TaxSchema
9
+ extend Kumi::Schema
10
+
11
+ schema do
12
+ input do
13
+ integer :income, domain: 0..1_000_000
14
+ string :filing_status, domain: %w[single married]
15
+ integer :age, domain: 18..100
16
+ end
17
+
18
+ trait :adult, (input.age >= 18)
19
+ trait :high_income, (input.income > 100_000)
20
+
21
+ value :tax_rate do
22
+ on high_income, 0.25
23
+ base 0.15
24
+ end
25
+
26
+ value :tax_amount, input.income * tax_rate
27
+ end
28
+ end
29
+
30
+ # Access schema metadata - clean object interface!
31
+ metadata = TaxSchema.schema_metadata
32
+
33
+ # Processed semantic metadata (rich, transformed from AST)
34
+ puts metadata.inputs
35
+ # => { :income => { type: :integer, domain: {...}, required: true }, ... }
36
+
37
+ puts metadata.values
38
+ # => { :tax_rate => { type: :float, cascade: {...} }, ... }
39
+
40
+ puts metadata.traits
41
+ # => { :adult => { type: :boolean, condition: "input.age >= 18" }, ... }
42
+
43
+ # Raw analyzer state (direct from analysis passes)
44
+ puts metadata.evaluation_order
45
+ # => [:adult, :high_income, :tax_rate, :tax_amount]
46
+
47
+ puts metadata.dependencies
48
+ # => { :tax_amount => [#<Edge to: :tax_rate>, #<Edge to: :income>], ... }
49
+
50
+ puts metadata.inferred_types
51
+ # => { :adult => :boolean, :tax_rate => :float, :tax_amount => :float }
52
+
53
+ # Serializable processed hash
54
+ processed_hash = metadata.to_h
55
+ puts processed_hash.keys
56
+ # => [:inputs, :values, :traits, :functions]
57
+
58
+ # Raw analyzer state (contains AST nodes)
59
+ raw_state = metadata.analyzer_state
60
+ puts raw_state.keys
61
+ # => [:declarations, :inputs, :dependencies, :dependents, :leaves, :evaluation_order, :inferred_types, :cascades, :broadcasts]
62
+ ```
63
+
64
+ ## Tool Integration
65
+
66
+ ```ruby
67
+ # Form generator example
68
+ def generate_form_fields(schema_class)
69
+ metadata = schema_class.schema_metadata
70
+
71
+ metadata.inputs.map do |field_name, field_info|
72
+ case field_info[:type]
73
+ when :integer
74
+ create_number_input(field_name, field_info[:domain])
75
+ when :string
76
+ create_select_input(field_name, field_info[:domain])
77
+ when :boolean
78
+ create_checkbox_input(field_name)
79
+ end
80
+ end
81
+ end
82
+
83
+ # Dependency analysis example
84
+ def analyze_field_dependencies(schema_class, field_name)
85
+ metadata = schema_class.schema_metadata
86
+
87
+ # Find what depends on this field
88
+ dependents = metadata.dependents[field_name] || []
89
+
90
+ # Find what this field depends on
91
+ dependencies = metadata.dependencies[field_name]&.map(&:to) || []
92
+
93
+ { affects: dependents, requires: dependencies }
94
+ end
95
+ ```
@@ -0,0 +1,46 @@
1
+ # Inferred Types Metadata
2
+
3
+ Type inference results for all declarations based on expression analysis.
4
+
5
+ ## Access
6
+
7
+ ```ruby
8
+ metadata = MySchema.schema_metadata
9
+ types = metadata.inferred_types
10
+ ```
11
+
12
+ ## Structure
13
+
14
+ ```ruby
15
+ # Returns Hash<Symbol, Object>
16
+ {
17
+ declaration_name => type_specification
18
+ }
19
+ ```
20
+
21
+ ## Example
22
+
23
+ ```ruby
24
+ metadata.inferred_types
25
+ # => {
26
+ # :adult => :boolean,
27
+ # :age_group => :string,
28
+ # :tax_rate => :float,
29
+ # :count => :integer,
30
+ # :item_prices => { array: :float },
31
+ # :categories => { array: :string }
32
+ # }
33
+ ```
34
+
35
+ ## Type Values
36
+
37
+ - `:boolean`, `:string`, `:integer`, `:float`, `:any`
38
+ - `{ array: element_type }` for arrays
39
+ - `{ hash: { key: key_type, value: value_type } }` for hashes
40
+
41
+ ## Usage
42
+
43
+ - Type checking
44
+ - Code generation
45
+ - Editor support
46
+ - Runtime validation
@@ -0,0 +1,86 @@
1
+ # Input Metadata
2
+
3
+ Raw input field metadata extracted from `input` blocks during analysis.
4
+
5
+ ## Access
6
+
7
+ ```ruby
8
+ metadata = MySchema.schema_metadata
9
+
10
+ # Processed input metadata (recommended for tools)
11
+ inputs = metadata.inputs
12
+
13
+ # Raw input metadata (advanced usage)
14
+ raw_inputs = metadata.analyzer_state[:inputs]
15
+ ```
16
+
17
+ ## Raw Structure
18
+
19
+ ```ruby
20
+ # Raw analyzer state format
21
+ {
22
+ field_name => {
23
+ type: Symbol, # :integer, :string, :float, :boolean, :array, etc.
24
+ domain: Range|Array, # optional domain constraints
25
+ children: Hash # for array/hash types
26
+ }
27
+ }
28
+ ```
29
+
30
+ ## Processed Structure
31
+
32
+ ```ruby
33
+ # Processed metadata format (via metadata.inputs)
34
+ {
35
+ field_name => {
36
+ type: Symbol, # normalized type
37
+ domain: Hash, # normalized domain metadata
38
+ required: Boolean # always true currently
39
+ }
40
+ }
41
+ ```
42
+
43
+ ## Examples
44
+
45
+ **Processed Input Metadata:**
46
+ ```ruby
47
+ metadata.inputs
48
+ # => {
49
+ # :age => {
50
+ # type: :integer,
51
+ # domain: { type: :range, min: 0, max: 120, exclusive_end: false },
52
+ # required: true
53
+ # },
54
+ # :name => { type: :string, required: true },
55
+ # :active => { type: :boolean, required: true }
56
+ # }
57
+ ```
58
+
59
+ **Raw Input Metadata:**
60
+ ```ruby
61
+ metadata.analyzer_state[:inputs]
62
+ # => {
63
+ # :age => { type: :integer, domain: 0..120 },
64
+ # :name => { type: :string },
65
+ # :line_items => {
66
+ # type: :array,
67
+ # children: {
68
+ # :price => { type: :float, domain: 0..Float::INFINITY },
69
+ # :quantity => { type: :integer, domain: 1..100 }
70
+ # }
71
+ # }
72
+ # }
73
+ ```
74
+
75
+ **Domain Types:**
76
+ - Range: `18..65`, `0..Float::INFINITY`
77
+ - Array: `%w[active inactive suspended]`
78
+ - Proc: Custom validation functions
79
+
80
+ ## Usage
81
+
82
+ Form generators use this metadata to:
83
+ - Create appropriate input controls
84
+ - Set validation rules
85
+ - Build nested forms for arrays
86
+ - Generate type-safe schemas
@@ -0,0 +1,108 @@
1
+ # Schema Metadata
2
+
3
+ Kumi's SchemaMetadata interface provides structured access to analyzed schema information for building external tools like form generators, documentation systems, and analysis utilities.
4
+
5
+ ## Primary Interface
6
+
7
+ SchemaMetadata is the main interface for extracting metadata from Kumi schemas:
8
+
9
+ ```ruby
10
+ metadata = MySchema.schema_metadata
11
+ ```
12
+
13
+ See the comprehensive API documentation in the SchemaMetadata class for detailed method documentation, examples, and usage patterns.
14
+
15
+ ## Processed Metadata (Tool-Friendly)
16
+
17
+ These methods return clean, serializable data structures:
18
+
19
+ | Method | Returns | Description |
20
+ |--------|---------|-------------|
21
+ | `inputs` | Hash | Input field metadata with normalized types and domains |
22
+ | `values` | Hash | Value declarations with dependencies and expressions |
23
+ | `traits` | Hash | Trait conditions with dependency information |
24
+ | `functions` | Hash | Function registry info for functions used in schema |
25
+ | `to_h` | Hash | Complete processed metadata (inputs, values, traits, functions) |
26
+ | `to_json` | String | JSON serialization of processed metadata |
27
+ | `to_json_schema` | Hash | JSON Schema document for input validation |
28
+
29
+ ## Raw Analyzer State (Advanced)
30
+
31
+ Direct access to internal analyzer results:
32
+
33
+ | Method | Returns | Description |
34
+ |--------|---------|-------------|
35
+ | [`declarations`](schema_metadata/declarations.md) | Hash | Raw AST declaration nodes by name |
36
+ | [`dependencies`](schema_metadata/dependencies.md) | Hash | Dependency graph with Edge objects |
37
+ | `dependents` | Hash | Reverse dependency lookup |
38
+ | `leaves` | Hash | Leaf nodes (no dependencies) by type |
39
+ | [`evaluation_order`](schema_metadata/evaluation_order.md) | Array | Topologically sorted evaluation order |
40
+ | [`inferred_types`](schema_metadata/inferred_types.md) | Hash | Type inference results for declarations |
41
+ | [`cascades`](schema_metadata/cascades.md) | Hash | Cascade mutual exclusion analysis |
42
+ | [`broadcasts`](schema_metadata/broadcasts.md) | Hash | Array broadcasting operation metadata |
43
+ | `analyzer_state` | Hash | Complete raw analyzer state with AST nodes |
44
+
45
+ Note: Raw `inputs` metadata is available via `analyzer_state[:inputs]` but the processed `inputs` method is recommended for tool development.
46
+
47
+ ## Usage Patterns
48
+
49
+ ```ruby
50
+ # Tool development - use processed metadata
51
+ metadata = MySchema.schema_metadata
52
+ form_fields = metadata.inputs.map { |name, info| create_field(name, info) }
53
+ documentation = metadata.values.map { |name, info| document_value(name, info) }
54
+
55
+ # Advanced analysis - use raw state when needed
56
+ dependency_graph = metadata.dependencies
57
+ ast_nodes = metadata.declarations
58
+ evaluation_sequence = metadata.evaluation_order
59
+ ```
60
+
61
+ ## Data Structure Examples
62
+
63
+ ### Processed Input Metadata
64
+ ```ruby
65
+ metadata.inputs
66
+ # => {
67
+ # :age => { type: :integer, domain: { type: :range, min: 18, max: 65 }, required: true },
68
+ # :name => { type: :string, required: true },
69
+ # :items => { type: :array, required: true }
70
+ # }
71
+ ```
72
+
73
+ ### Processed Value Metadata
74
+ ```ruby
75
+ metadata.values
76
+ # => {
77
+ # :tax_amount => {
78
+ # type: :float,
79
+ # dependencies: [:income, :tax_rate],
80
+ # computed: true,
81
+ # expression: "multiply(input.income, tax_rate)"
82
+ # }
83
+ # }
84
+ ```
85
+
86
+ ### Clean Public Interface Examples
87
+ ```ruby
88
+ # Processed dependency information (clean hashes)
89
+ metadata.dependencies
90
+ # => { :tax_amount => [{ to: :income, conditional: false }, { to: :tax_rate, conditional: false }] }
91
+
92
+ # Processed declaration metadata (clean hashes)
93
+ metadata.declarations
94
+ # => { :adult => { type: :trait, expression: ">=(input.age, 18)" }, :tax_amount => { type: :value, expression: "multiply(input.income, tax_rate)" } }
95
+
96
+ # Type inference results (clean data)
97
+ metadata.inferred_types
98
+ # => { :adult => :boolean, :tax_amount => :float, :item_totals => { array: :float } }
99
+ ```
100
+
101
+ ### Raw Analyzer State (Advanced Usage)
102
+ ```ruby
103
+ # Complete raw state hash with internal objects (AST nodes, Edge objects)
104
+ metadata.analyzer_state
105
+ # => { declarations: {AST nodes...}, dependencies: {Edge objects...}, ... }
106
+ ```
107
+
108
+ See `docs/schema_metadata/` for detailed examples.
@@ -28,7 +28,7 @@ module FederalTaxCalculator
28
28
  schema do
29
29
  input do
30
30
  float :income
31
- string :filing_status, domain: %(single married_joint married_separate head_of_household)
31
+ string :filing_status, domain: %w[single married_joint married_separate head_of_household]
32
32
  end
33
33
 
34
34
  # ── standard deduction table ───────────────────────────────────────
@@ -59,7 +59,7 @@ module FederalTaxCalculator
59
59
 
60
60
  value :fed_tax, fed_calc[0]
61
61
  value :fed_marginal, fed_calc[1]
62
- value :fed_eff, fed_tax / f[input.income, 1.0].max
62
+ value :fed_eff, fed_tax / [input.income, 1.0].max
63
63
 
64
64
  # ── FICA (employee share) ─────────────────────────────────────────────
65
65
  value :ss_wage_base, 168_600.0
@@ -96,10 +96,11 @@ module FederalTaxCalculator
96
96
  end
97
97
  end
98
98
 
99
- def calculate_tax(calculator, income: 1_000_000, status: "single")
99
+ def print_tax_summary(args)
100
+ r = FederalTaxCalculator.from(args)
100
101
  puts "\n=== 2024 U.S. Income‑Tax Example ==="
101
- printf "Income: $%0.2f\n", income
102
- puts "Filing status: #{status}\n\n"
102
+ printf "Income: $%0.2f\n", args[:income]
103
+ puts "Filing status: #{args[:filing_status]}\n\n"
103
104
 
104
105
  puts "Federal tax: $#{r[:fed_tax].round(2)} (#{(r[:fed_eff] * 100).round(2)}% effective)"
105
106
  puts "FICA tax: $#{r[:fica_tax].round(2)} (#{(r[:fica_eff] * 100).round(2)}% effective)"
@@ -107,4 +108,8 @@ def calculate_tax(calculator, income: 1_000_000, status: "single")
107
108
  puts "After-tax income: $#{r[:after_tax].round(2)}"
108
109
  end
109
110
 
110
- calculate_tax(FederalTaxCalculator, income: 1_000_000, status: "single")
111
+
112
+ input = { income: 1_000_000,
113
+ filing_status: "single"
114
+ }
115
+ print_tax_summary(input)
@@ -23,7 +23,7 @@ module Kumi
23
23
  return node.value if node.is_a?(Literal)
24
24
 
25
25
  result = case node
26
- when Binding then evaluate_binding(node, visited)
26
+ when DeclarationReference then evaluate_binding(node, visited)
27
27
  when CallExpression then evaluate_call_expression(node, visited)
28
28
  else :unknown
29
29
  end
@@ -0,0 +1,246 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kumi
4
+ module Analyzer
5
+ module Passes
6
+ # Detects which operations should be broadcast over arrays
7
+ # DEPENDENCIES: :inputs, :declarations
8
+ # PRODUCES: :broadcasts
9
+ class BroadcastDetector < PassBase
10
+ def run(errors)
11
+ input_meta = get_state(:inputs) || {}
12
+ definitions = get_state(:declarations) || {}
13
+
14
+ # Find array fields with their element types
15
+ array_fields = find_array_fields(input_meta)
16
+
17
+ # Build compiler metadata
18
+ compiler_metadata = {
19
+ array_fields: array_fields,
20
+ vectorized_operations: {},
21
+ reduction_operations: {}
22
+ }
23
+
24
+ # Track which values are vectorized for type inference
25
+ vectorized_values = {}
26
+
27
+ # Analyze traits first, then values (to handle dependencies)
28
+ traits = definitions.select { |_name, decl| decl.is_a?(Kumi::Syntax::TraitDeclaration) }
29
+ values = definitions.select { |_name, decl| decl.is_a?(Kumi::Syntax::ValueDeclaration) }
30
+
31
+ (traits.to_a + values.to_a).each do |name, decl|
32
+ result = analyze_value_vectorization(name, decl.expression, array_fields, vectorized_values, errors)
33
+
34
+ case result[:type]
35
+ when :vectorized
36
+ compiler_metadata[:vectorized_operations][name] = result[:info]
37
+ # Store array source information for dimension checking
38
+ array_source = extract_array_source(result[:info], array_fields)
39
+ vectorized_values[name] = { vectorized: true, array_source: array_source }
40
+ when :reduction
41
+ compiler_metadata[:reduction_operations][name] = result[:info]
42
+ # Reduction produces scalar, not vectorized
43
+ vectorized_values[name] = { vectorized: false }
44
+ end
45
+ end
46
+
47
+ state.with(:broadcasts, compiler_metadata.freeze)
48
+ end
49
+
50
+ private
51
+
52
+ def find_array_fields(input_meta)
53
+ result = {}
54
+ input_meta.each do |name, meta|
55
+ next unless meta[:type] == :array && meta[:children]
56
+
57
+ result[name] = {
58
+ element_fields: meta[:children].keys,
59
+ element_types: meta[:children].transform_values { |v| v[:type] || :any }
60
+ }
61
+ end
62
+ result
63
+ end
64
+
65
+ def analyze_value_vectorization(name, expr, array_fields, vectorized_values, errors)
66
+ case expr
67
+ when Kumi::Syntax::InputElementReference
68
+ if array_fields.key?(expr.path.first)
69
+ { type: :vectorized, info: { source: :array_field_access, path: expr.path } }
70
+ else
71
+ { type: :scalar }
72
+ end
73
+
74
+ when Kumi::Syntax::DeclarationReference
75
+ # Check if this references a vectorized value
76
+ vector_info = vectorized_values[expr.name]
77
+ if vector_info && vector_info[:vectorized]
78
+ { type: :vectorized, info: { source: :vectorized_declaration, name: expr.name } }
79
+ else
80
+ { type: :scalar }
81
+ end
82
+
83
+ when Kumi::Syntax::CallExpression
84
+ analyze_call_vectorization(name, expr, array_fields, vectorized_values, errors)
85
+
86
+ when Kumi::Syntax::CascadeExpression
87
+ analyze_cascade_vectorization(name, expr, array_fields, vectorized_values, errors)
88
+
89
+ else
90
+ { type: :scalar }
91
+ end
92
+ end
93
+
94
+ def analyze_call_vectorization(_name, expr, array_fields, vectorized_values, errors)
95
+ # Check if this is a reduction function using function registry metadata
96
+ if FunctionRegistry.reducer?(expr.fn_name)
97
+ # Only treat as reduction if the argument is actually vectorized
98
+ arg_info = analyze_argument_vectorization(expr.args.first, array_fields, vectorized_values)
99
+ if arg_info[:vectorized]
100
+ { type: :reduction, info: { function: expr.fn_name, source: arg_info[:source] } }
101
+ else
102
+ # Not a vectorized reduction - just a regular function call
103
+ { type: :scalar }
104
+ end
105
+
106
+ else
107
+ # Special case: all?, any?, none? functions with vectorized trait arguments should be treated as vectorized
108
+ # for cascade condition purposes (they get transformed during compilation)
109
+ if %i[all? any? none?].include?(expr.fn_name) && expr.args.length == 1
110
+ arg = expr.args.first
111
+ if arg.is_a?(Kumi::Syntax::ArrayExpression) && arg.elements.length == 1
112
+ trait_ref = arg.elements.first
113
+ if trait_ref.is_a?(Kumi::Syntax::DeclarationReference) && vectorized_values[trait_ref.name]&.[](:vectorized)
114
+ return { type: :vectorized, info: { source: :cascade_condition_with_vectorized_trait, trait: trait_ref.name } }
115
+ end
116
+ end
117
+ end
118
+
119
+ # ANY function with vectorized arguments becomes vectorized (with broadcasting)
120
+ arg_infos = expr.args.map { |arg| analyze_argument_vectorization(arg, array_fields, vectorized_values) }
121
+
122
+ if arg_infos.any? { |info| info[:vectorized] }
123
+ # Check for dimension mismatches when multiple arguments are vectorized
124
+ vectorized_sources = arg_infos.select { |info| info[:vectorized] }.filter_map { |info| info[:array_source] }.uniq
125
+
126
+ if vectorized_sources.length > 1
127
+ # Multiple different array sources - this is a dimension mismatch
128
+ # Generate enhanced error message with type information
129
+ enhanced_message = build_dimension_mismatch_error(expr, arg_infos, array_fields, vectorized_sources)
130
+
131
+ report_error(errors, enhanced_message, location: expr.loc, type: :semantic)
132
+ return { type: :scalar } # Treat as scalar to prevent further errors
133
+ end
134
+
135
+ # This is a vectorized operation - ANY function supports broadcasting
136
+ { type: :vectorized, info: {
137
+ operation: expr.fn_name,
138
+ vectorized_args: arg_infos.map.with_index { |info, i| [i, info[:vectorized]] }.to_h
139
+ } }
140
+ else
141
+ { type: :scalar }
142
+ end
143
+ end
144
+ end
145
+
146
+ def analyze_argument_vectorization(arg, array_fields, vectorized_values)
147
+ case arg
148
+ when Kumi::Syntax::InputElementReference
149
+ if array_fields.key?(arg.path.first)
150
+ { vectorized: true, source: :array_field, array_source: arg.path.first }
151
+ else
152
+ { vectorized: false }
153
+ end
154
+
155
+ when Kumi::Syntax::DeclarationReference
156
+ # Check if this references a vectorized value
157
+ vector_info = vectorized_values[arg.name]
158
+ if vector_info && vector_info[:vectorized]
159
+ array_source = vector_info[:array_source]
160
+ { vectorized: true, source: :vectorized_value, array_source: array_source }
161
+ else
162
+ { vectorized: false }
163
+ end
164
+
165
+ when Kumi::Syntax::CallExpression
166
+ # Recursively check
167
+ result = analyze_value_vectorization(nil, arg, array_fields, vectorized_values, [])
168
+ { vectorized: result[:type] == :vectorized, source: :expression }
169
+
170
+ else
171
+ { vectorized: false }
172
+ end
173
+ end
174
+
175
+ def extract_array_source(info, _array_fields)
176
+ case info[:source]
177
+ when :array_field_access
178
+ info[:path]&.first
179
+ when :cascade_condition_with_vectorized_trait
180
+ # For cascades, we'd need to trace back to the original source
181
+ nil # TODO: Could be enhanced to trace through trait dependencies
182
+ end
183
+ end
184
+
185
+ def analyze_cascade_vectorization(_name, expr, array_fields, vectorized_values, errors)
186
+ # A cascade is vectorized if:
187
+ # 1. Any of its result expressions are vectorized, OR
188
+ # 2. Any of its conditions reference vectorized values (traits or arrays)
189
+ vectorized_results = []
190
+ vectorized_conditions = []
191
+
192
+ expr.cases.each do |case_expr|
193
+ # Check if result is vectorized
194
+ result_info = analyze_value_vectorization(nil, case_expr.result, array_fields, vectorized_values, errors)
195
+ vectorized_results << (result_info[:type] == :vectorized)
196
+
197
+ # Check if condition is vectorized
198
+ condition_info = analyze_value_vectorization(nil, case_expr.condition, array_fields, vectorized_values, errors)
199
+ vectorized_conditions << (condition_info[:type] == :vectorized)
200
+ end
201
+
202
+ if vectorized_results.any? || vectorized_conditions.any?
203
+ { type: :vectorized, info: { source: :cascade_with_vectorized_conditions_or_results } }
204
+ else
205
+ { type: :scalar }
206
+ end
207
+ end
208
+
209
+ def build_dimension_mismatch_error(_expr, arg_infos, array_fields, vectorized_sources)
210
+ # Build detailed error message with type information
211
+ summary = "Cannot broadcast operation across arrays from different sources: #{vectorized_sources.join(', ')}. "
212
+
213
+ problem_desc = "Problem: Multiple operands are arrays from different sources:\n"
214
+
215
+ vectorized_args = arg_infos.select { |info| info[:vectorized] }
216
+ vectorized_args.each_with_index do |arg_info, index|
217
+ array_source = arg_info[:array_source]
218
+ next unless array_source && array_fields[array_source]
219
+
220
+ # Determine the type based on array field metadata
221
+ type_desc = determine_array_type(array_source, array_fields)
222
+ problem_desc += " - Operand #{index + 1} resolves to #{type_desc} from array '#{array_source}'\n"
223
+ end
224
+
225
+ explanation = "Direct operations on arrays from different sources is ambiguous and not supported. " \
226
+ "Vectorized operations can only work on fields from the same array input."
227
+
228
+ "#{summary}#{problem_desc}#{explanation}"
229
+ end
230
+
231
+ def determine_array_type(array_source, array_fields)
232
+ field_info = array_fields[array_source]
233
+ return "array(any)" unless field_info[:element_types]
234
+
235
+ # For nested arrays (like items.name where items is an array), this represents array(element_type)
236
+ element_types = field_info[:element_types].values.uniq
237
+ if element_types.length == 1
238
+ "array(#{element_types.first})"
239
+ else
240
+ "array(mixed)"
241
+ end
242
+ end
243
+ end
244
+ end
245
+ end
246
+ end
@@ -7,7 +7,7 @@ module Kumi
7
7
  # DEPENDENCIES: :definitions
8
8
  # PRODUCES: None (validation only)
9
9
  # INTERFACE: new(schema, state).run(errors)
10
- class DefinitionValidator < VisitorPass
10
+ class DeclarationValidator < VisitorPass
11
11
  def run(errors)
12
12
  each_decl do |decl|
13
13
  visit(decl) { |node| validate_node(node, errors) }
@@ -19,9 +19,9 @@ module Kumi
19
19
 
20
20
  def validate_node(node, errors)
21
21
  case node
22
- when Declarations::Attribute
22
+ when Kumi::Syntax::ValueDeclaration
23
23
  validate_attribute(node, errors)
24
- when Declarations::Trait
24
+ when Kumi::Syntax::TraitDeclaration
25
25
  validate_trait(node, errors)
26
26
  end
27
27
  end
@@ -33,7 +33,7 @@ module Kumi
33
33
  end
34
34
 
35
35
  def validate_trait(node, errors)
36
- return if node.expression.is_a?(Expressions::CallExpression)
36
+ return if node.expression.is_a?(Kumi::Syntax::CallExpression)
37
37
 
38
38
  report_error(errors, "trait `#{node.name}` must wrap a CallExpression", location: node.loc)
39
39
  end