kumi 0.0.5 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CLAUDE.md +76 -174
- data/README.md +205 -52
- data/{documents → docs}/AST.md +29 -29
- data/{documents → docs}/SYNTAX.md +95 -8
- data/docs/features/README.md +45 -0
- data/docs/features/analysis-cascade-mutual-exclusion.md +89 -0
- data/docs/features/analysis-type-inference.md +42 -0
- data/docs/features/analysis-unsat-detection.md +71 -0
- data/docs/features/array-broadcasting.md +170 -0
- data/docs/features/input-declaration-system.md +42 -0
- data/docs/features/performance.md +16 -0
- data/docs/schema_metadata/broadcasts.md +53 -0
- data/docs/schema_metadata/cascades.md +45 -0
- data/docs/schema_metadata/declarations.md +54 -0
- data/docs/schema_metadata/dependencies.md +57 -0
- data/docs/schema_metadata/evaluation_order.md +29 -0
- data/docs/schema_metadata/examples.md +95 -0
- data/docs/schema_metadata/inferred_types.md +46 -0
- data/docs/schema_metadata/inputs.md +86 -0
- data/docs/schema_metadata.md +108 -0
- data/examples/federal_tax_calculator_2024.rb +11 -6
- data/lib/kumi/analyzer/constant_evaluator.rb +1 -1
- data/lib/kumi/analyzer/passes/broadcast_detector.rb +246 -0
- data/lib/kumi/analyzer/passes/{definition_validator.rb → declaration_validator.rb} +4 -4
- data/lib/kumi/analyzer/passes/dependency_resolver.rb +78 -38
- data/lib/kumi/analyzer/passes/input_collector.rb +91 -30
- data/lib/kumi/analyzer/passes/name_indexer.rb +2 -2
- data/lib/kumi/analyzer/passes/pass_base.rb +1 -1
- data/lib/kumi/analyzer/passes/semantic_constraint_validator.rb +24 -25
- data/lib/kumi/analyzer/passes/toposorter.rb +44 -8
- data/lib/kumi/analyzer/passes/type_checker.rb +34 -14
- data/lib/kumi/analyzer/passes/type_consistency_checker.rb +2 -2
- data/lib/kumi/analyzer/passes/type_inferencer.rb +130 -21
- data/lib/kumi/analyzer/passes/unsat_detector.rb +134 -56
- data/lib/kumi/analyzer/passes/visitor_pass.rb +2 -2
- data/lib/kumi/analyzer.rb +16 -17
- data/lib/kumi/compiler.rb +188 -16
- data/lib/kumi/constraint_relationship_solver.rb +6 -6
- data/lib/kumi/domain/validator.rb +0 -4
- data/lib/kumi/error_reporting.rb +1 -1
- data/lib/kumi/explain.rb +32 -20
- data/lib/kumi/export/node_registry.rb +26 -12
- data/lib/kumi/export/node_serializers.rb +1 -1
- data/lib/kumi/function_registry/collection_functions.rb +14 -9
- data/lib/kumi/function_registry/function_builder.rb +4 -3
- data/lib/kumi/function_registry.rb +8 -2
- data/lib/kumi/input/type_matcher.rb +3 -0
- data/lib/kumi/input/validator.rb +0 -3
- data/lib/kumi/json_schema/generator.rb +63 -0
- data/lib/kumi/json_schema/validator.rb +25 -0
- data/lib/kumi/json_schema.rb +14 -0
- data/lib/kumi/{parser → ruby_parser}/build_context.rb +1 -1
- data/lib/kumi/ruby_parser/declaration_reference_proxy.rb +36 -0
- data/lib/kumi/{parser → ruby_parser}/dsl.rb +1 -1
- data/lib/kumi/{parser → ruby_parser}/dsl_cascade_builder.rb +5 -5
- data/lib/kumi/{parser → ruby_parser}/expression_converter.rb +20 -20
- data/lib/kumi/{parser → ruby_parser}/guard_rails.rb +1 -1
- data/lib/kumi/{parser → ruby_parser}/input_builder.rb +41 -10
- data/lib/kumi/ruby_parser/input_field_proxy.rb +46 -0
- data/lib/kumi/{parser → ruby_parser}/input_proxy.rb +4 -4
- data/lib/kumi/ruby_parser/nested_input.rb +15 -0
- data/lib/kumi/{parser → ruby_parser}/parser.rb +11 -10
- data/lib/kumi/{parser → ruby_parser}/schema_builder.rb +11 -10
- data/lib/kumi/{parser → ruby_parser}/sugar.rb +62 -10
- data/lib/kumi/ruby_parser.rb +10 -0
- data/lib/kumi/schema.rb +10 -4
- data/lib/kumi/schema_instance.rb +6 -6
- data/lib/kumi/schema_metadata.rb +524 -0
- data/lib/kumi/syntax/array_expression.rb +15 -0
- data/lib/kumi/syntax/call_expression.rb +11 -0
- data/lib/kumi/syntax/cascade_expression.rb +11 -0
- data/lib/kumi/syntax/case_expression.rb +11 -0
- data/lib/kumi/syntax/declaration_reference.rb +11 -0
- data/lib/kumi/syntax/hash_expression.rb +11 -0
- data/lib/kumi/syntax/input_declaration.rb +12 -0
- data/lib/kumi/syntax/input_element_reference.rb +12 -0
- data/lib/kumi/syntax/input_reference.rb +12 -0
- data/lib/kumi/syntax/literal.rb +11 -0
- data/lib/kumi/syntax/trait_declaration.rb +11 -0
- data/lib/kumi/syntax/value_declaration.rb +11 -0
- data/lib/kumi/vectorization_metadata.rb +108 -0
- data/lib/kumi/version.rb +1 -1
- data/lib/kumi.rb +14 -0
- metadata +55 -25
- data/lib/generators/trait_engine/templates/schema_spec.rb.erb +0 -27
- data/lib/kumi/domain.rb +0 -8
- data/lib/kumi/input.rb +0 -8
- data/lib/kumi/syntax/declarations.rb +0 -26
- data/lib/kumi/syntax/expressions.rb +0 -34
- data/lib/kumi/syntax/terminal_expressions.rb +0 -30
- data/lib/kumi/syntax.rb +0 -9
- /data/{documents → docs}/DSL.md +0 -0
- /data/{documents → docs}/FUNCTIONS.md +0 -0
@@ -0,0 +1,95 @@
|
|
1
|
+
# Schema Metadata Examples
|
2
|
+
|
3
|
+
For comprehensive API documentation with detailed examples, see the YARD documentation in the SchemaMetadata class.
|
4
|
+
|
5
|
+
## Basic Usage
|
6
|
+
|
7
|
+
```ruby
|
8
|
+
class TaxSchema
|
9
|
+
extend Kumi::Schema
|
10
|
+
|
11
|
+
schema do
|
12
|
+
input do
|
13
|
+
integer :income, domain: 0..1_000_000
|
14
|
+
string :filing_status, domain: %w[single married]
|
15
|
+
integer :age, domain: 18..100
|
16
|
+
end
|
17
|
+
|
18
|
+
trait :adult, (input.age >= 18)
|
19
|
+
trait :high_income, (input.income > 100_000)
|
20
|
+
|
21
|
+
value :tax_rate do
|
22
|
+
on high_income, 0.25
|
23
|
+
base 0.15
|
24
|
+
end
|
25
|
+
|
26
|
+
value :tax_amount, input.income * tax_rate
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# Access schema metadata - clean object interface!
|
31
|
+
metadata = TaxSchema.schema_metadata
|
32
|
+
|
33
|
+
# Processed semantic metadata (rich, transformed from AST)
|
34
|
+
puts metadata.inputs
|
35
|
+
# => { :income => { type: :integer, domain: {...}, required: true }, ... }
|
36
|
+
|
37
|
+
puts metadata.values
|
38
|
+
# => { :tax_rate => { type: :float, cascade: {...} }, ... }
|
39
|
+
|
40
|
+
puts metadata.traits
|
41
|
+
# => { :adult => { type: :boolean, condition: "input.age >= 18" }, ... }
|
42
|
+
|
43
|
+
# Raw analyzer state (direct from analysis passes)
|
44
|
+
puts metadata.evaluation_order
|
45
|
+
# => [:adult, :high_income, :tax_rate, :tax_amount]
|
46
|
+
|
47
|
+
puts metadata.dependencies
|
48
|
+
# => { :tax_amount => [#<Edge to: :tax_rate>, #<Edge to: :income>], ... }
|
49
|
+
|
50
|
+
puts metadata.inferred_types
|
51
|
+
# => { :adult => :boolean, :tax_rate => :float, :tax_amount => :float }
|
52
|
+
|
53
|
+
# Serializable processed hash
|
54
|
+
processed_hash = metadata.to_h
|
55
|
+
puts processed_hash.keys
|
56
|
+
# => [:inputs, :values, :traits, :functions]
|
57
|
+
|
58
|
+
# Raw analyzer state (contains AST nodes)
|
59
|
+
raw_state = metadata.analyzer_state
|
60
|
+
puts raw_state.keys
|
61
|
+
# => [:declarations, :inputs, :dependencies, :dependents, :leaves, :evaluation_order, :inferred_types, :cascades, :broadcasts]
|
62
|
+
```
|
63
|
+
|
64
|
+
## Tool Integration
|
65
|
+
|
66
|
+
```ruby
|
67
|
+
# Form generator example
|
68
|
+
def generate_form_fields(schema_class)
|
69
|
+
metadata = schema_class.schema_metadata
|
70
|
+
|
71
|
+
metadata.inputs.map do |field_name, field_info|
|
72
|
+
case field_info[:type]
|
73
|
+
when :integer
|
74
|
+
create_number_input(field_name, field_info[:domain])
|
75
|
+
when :string
|
76
|
+
create_select_input(field_name, field_info[:domain])
|
77
|
+
when :boolean
|
78
|
+
create_checkbox_input(field_name)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
# Dependency analysis example
|
84
|
+
def analyze_field_dependencies(schema_class, field_name)
|
85
|
+
metadata = schema_class.schema_metadata
|
86
|
+
|
87
|
+
# Find what depends on this field
|
88
|
+
dependents = metadata.dependents[field_name] || []
|
89
|
+
|
90
|
+
# Find what this field depends on
|
91
|
+
dependencies = metadata.dependencies[field_name]&.map(&:to) || []
|
92
|
+
|
93
|
+
{ affects: dependents, requires: dependencies }
|
94
|
+
end
|
95
|
+
```
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# Inferred Types Metadata
|
2
|
+
|
3
|
+
Type inference results for all declarations based on expression analysis.
|
4
|
+
|
5
|
+
## Access
|
6
|
+
|
7
|
+
```ruby
|
8
|
+
metadata = MySchema.schema_metadata
|
9
|
+
types = metadata.inferred_types
|
10
|
+
```
|
11
|
+
|
12
|
+
## Structure
|
13
|
+
|
14
|
+
```ruby
|
15
|
+
# Returns Hash<Symbol, Object>
|
16
|
+
{
|
17
|
+
declaration_name => type_specification
|
18
|
+
}
|
19
|
+
```
|
20
|
+
|
21
|
+
## Example
|
22
|
+
|
23
|
+
```ruby
|
24
|
+
metadata.inferred_types
|
25
|
+
# => {
|
26
|
+
# :adult => :boolean,
|
27
|
+
# :age_group => :string,
|
28
|
+
# :tax_rate => :float,
|
29
|
+
# :count => :integer,
|
30
|
+
# :item_prices => { array: :float },
|
31
|
+
# :categories => { array: :string }
|
32
|
+
# }
|
33
|
+
```
|
34
|
+
|
35
|
+
## Type Values
|
36
|
+
|
37
|
+
- `:boolean`, `:string`, `:integer`, `:float`, `:any`
|
38
|
+
- `{ array: element_type }` for arrays
|
39
|
+
- `{ hash: { key: key_type, value: value_type } }` for hashes
|
40
|
+
|
41
|
+
## Usage
|
42
|
+
|
43
|
+
- Type checking
|
44
|
+
- Code generation
|
45
|
+
- Editor support
|
46
|
+
- Runtime validation
|
@@ -0,0 +1,86 @@
|
|
1
|
+
# Input Metadata
|
2
|
+
|
3
|
+
Raw input field metadata extracted from `input` blocks during analysis.
|
4
|
+
|
5
|
+
## Access
|
6
|
+
|
7
|
+
```ruby
|
8
|
+
metadata = MySchema.schema_metadata
|
9
|
+
|
10
|
+
# Processed input metadata (recommended for tools)
|
11
|
+
inputs = metadata.inputs
|
12
|
+
|
13
|
+
# Raw input metadata (advanced usage)
|
14
|
+
raw_inputs = metadata.analyzer_state[:inputs]
|
15
|
+
```
|
16
|
+
|
17
|
+
## Raw Structure
|
18
|
+
|
19
|
+
```ruby
|
20
|
+
# Raw analyzer state format
|
21
|
+
{
|
22
|
+
field_name => {
|
23
|
+
type: Symbol, # :integer, :string, :float, :boolean, :array, etc.
|
24
|
+
domain: Range|Array, # optional domain constraints
|
25
|
+
children: Hash # for array/hash types
|
26
|
+
}
|
27
|
+
}
|
28
|
+
```
|
29
|
+
|
30
|
+
## Processed Structure
|
31
|
+
|
32
|
+
```ruby
|
33
|
+
# Processed metadata format (via metadata.inputs)
|
34
|
+
{
|
35
|
+
field_name => {
|
36
|
+
type: Symbol, # normalized type
|
37
|
+
domain: Hash, # normalized domain metadata
|
38
|
+
required: Boolean # always true currently
|
39
|
+
}
|
40
|
+
}
|
41
|
+
```
|
42
|
+
|
43
|
+
## Examples
|
44
|
+
|
45
|
+
**Processed Input Metadata:**
|
46
|
+
```ruby
|
47
|
+
metadata.inputs
|
48
|
+
# => {
|
49
|
+
# :age => {
|
50
|
+
# type: :integer,
|
51
|
+
# domain: { type: :range, min: 0, max: 120, exclusive_end: false },
|
52
|
+
# required: true
|
53
|
+
# },
|
54
|
+
# :name => { type: :string, required: true },
|
55
|
+
# :active => { type: :boolean, required: true }
|
56
|
+
# }
|
57
|
+
```
|
58
|
+
|
59
|
+
**Raw Input Metadata:**
|
60
|
+
```ruby
|
61
|
+
metadata.analyzer_state[:inputs]
|
62
|
+
# => {
|
63
|
+
# :age => { type: :integer, domain: 0..120 },
|
64
|
+
# :name => { type: :string },
|
65
|
+
# :line_items => {
|
66
|
+
# type: :array,
|
67
|
+
# children: {
|
68
|
+
# :price => { type: :float, domain: 0..Float::INFINITY },
|
69
|
+
# :quantity => { type: :integer, domain: 1..100 }
|
70
|
+
# }
|
71
|
+
# }
|
72
|
+
# }
|
73
|
+
```
|
74
|
+
|
75
|
+
**Domain Types:**
|
76
|
+
- Range: `18..65`, `0..Float::INFINITY`
|
77
|
+
- Array: `%w[active inactive suspended]`
|
78
|
+
- Proc: Custom validation functions
|
79
|
+
|
80
|
+
## Usage
|
81
|
+
|
82
|
+
Form generators use this metadata to:
|
83
|
+
- Create appropriate input controls
|
84
|
+
- Set validation rules
|
85
|
+
- Build nested forms for arrays
|
86
|
+
- Generate type-safe schemas
|
@@ -0,0 +1,108 @@
|
|
1
|
+
# Schema Metadata
|
2
|
+
|
3
|
+
Kumi's SchemaMetadata interface provides structured access to analyzed schema information for building external tools like form generators, documentation systems, and analysis utilities.
|
4
|
+
|
5
|
+
## Primary Interface
|
6
|
+
|
7
|
+
SchemaMetadata is the main interface for extracting metadata from Kumi schemas:
|
8
|
+
|
9
|
+
```ruby
|
10
|
+
metadata = MySchema.schema_metadata
|
11
|
+
```
|
12
|
+
|
13
|
+
See the comprehensive API documentation in the SchemaMetadata class for detailed method documentation, examples, and usage patterns.
|
14
|
+
|
15
|
+
## Processed Metadata (Tool-Friendly)
|
16
|
+
|
17
|
+
These methods return clean, serializable data structures:
|
18
|
+
|
19
|
+
| Method | Returns | Description |
|
20
|
+
|--------|---------|-------------|
|
21
|
+
| `inputs` | Hash | Input field metadata with normalized types and domains |
|
22
|
+
| `values` | Hash | Value declarations with dependencies and expressions |
|
23
|
+
| `traits` | Hash | Trait conditions with dependency information |
|
24
|
+
| `functions` | Hash | Function registry info for functions used in schema |
|
25
|
+
| `to_h` | Hash | Complete processed metadata (inputs, values, traits, functions) |
|
26
|
+
| `to_json` | String | JSON serialization of processed metadata |
|
27
|
+
| `to_json_schema` | Hash | JSON Schema document for input validation |
|
28
|
+
|
29
|
+
## Raw Analyzer State (Advanced)
|
30
|
+
|
31
|
+
Direct access to internal analyzer results:
|
32
|
+
|
33
|
+
| Method | Returns | Description |
|
34
|
+
|--------|---------|-------------|
|
35
|
+
| [`declarations`](schema_metadata/declarations.md) | Hash | Raw AST declaration nodes by name |
|
36
|
+
| [`dependencies`](schema_metadata/dependencies.md) | Hash | Dependency graph with Edge objects |
|
37
|
+
| `dependents` | Hash | Reverse dependency lookup |
|
38
|
+
| `leaves` | Hash | Leaf nodes (no dependencies) by type |
|
39
|
+
| [`evaluation_order`](schema_metadata/evaluation_order.md) | Array | Topologically sorted evaluation order |
|
40
|
+
| [`inferred_types`](schema_metadata/inferred_types.md) | Hash | Type inference results for declarations |
|
41
|
+
| [`cascades`](schema_metadata/cascades.md) | Hash | Cascade mutual exclusion analysis |
|
42
|
+
| [`broadcasts`](schema_metadata/broadcasts.md) | Hash | Array broadcasting operation metadata |
|
43
|
+
| `analyzer_state` | Hash | Complete raw analyzer state with AST nodes |
|
44
|
+
|
45
|
+
Note: Raw `inputs` metadata is available via `analyzer_state[:inputs]` but the processed `inputs` method is recommended for tool development.
|
46
|
+
|
47
|
+
## Usage Patterns
|
48
|
+
|
49
|
+
```ruby
|
50
|
+
# Tool development - use processed metadata
|
51
|
+
metadata = MySchema.schema_metadata
|
52
|
+
form_fields = metadata.inputs.map { |name, info| create_field(name, info) }
|
53
|
+
documentation = metadata.values.map { |name, info| document_value(name, info) }
|
54
|
+
|
55
|
+
# Advanced analysis - use raw state when needed
|
56
|
+
dependency_graph = metadata.dependencies
|
57
|
+
ast_nodes = metadata.declarations
|
58
|
+
evaluation_sequence = metadata.evaluation_order
|
59
|
+
```
|
60
|
+
|
61
|
+
## Data Structure Examples
|
62
|
+
|
63
|
+
### Processed Input Metadata
|
64
|
+
```ruby
|
65
|
+
metadata.inputs
|
66
|
+
# => {
|
67
|
+
# :age => { type: :integer, domain: { type: :range, min: 18, max: 65 }, required: true },
|
68
|
+
# :name => { type: :string, required: true },
|
69
|
+
# :items => { type: :array, required: true }
|
70
|
+
# }
|
71
|
+
```
|
72
|
+
|
73
|
+
### Processed Value Metadata
|
74
|
+
```ruby
|
75
|
+
metadata.values
|
76
|
+
# => {
|
77
|
+
# :tax_amount => {
|
78
|
+
# type: :float,
|
79
|
+
# dependencies: [:income, :tax_rate],
|
80
|
+
# computed: true,
|
81
|
+
# expression: "multiply(input.income, tax_rate)"
|
82
|
+
# }
|
83
|
+
# }
|
84
|
+
```
|
85
|
+
|
86
|
+
### Clean Public Interface Examples
|
87
|
+
```ruby
|
88
|
+
# Processed dependency information (clean hashes)
|
89
|
+
metadata.dependencies
|
90
|
+
# => { :tax_amount => [{ to: :income, conditional: false }, { to: :tax_rate, conditional: false }] }
|
91
|
+
|
92
|
+
# Processed declaration metadata (clean hashes)
|
93
|
+
metadata.declarations
|
94
|
+
# => { :adult => { type: :trait, expression: ">=(input.age, 18)" }, :tax_amount => { type: :value, expression: "multiply(input.income, tax_rate)" } }
|
95
|
+
|
96
|
+
# Type inference results (clean data)
|
97
|
+
metadata.inferred_types
|
98
|
+
# => { :adult => :boolean, :tax_amount => :float, :item_totals => { array: :float } }
|
99
|
+
```
|
100
|
+
|
101
|
+
### Raw Analyzer State (Advanced Usage)
|
102
|
+
```ruby
|
103
|
+
# Complete raw state hash with internal objects (AST nodes, Edge objects)
|
104
|
+
metadata.analyzer_state
|
105
|
+
# => { declarations: {AST nodes...}, dependencies: {Edge objects...}, ... }
|
106
|
+
```
|
107
|
+
|
108
|
+
See `docs/schema_metadata/` for detailed examples.
|
@@ -28,7 +28,7 @@ module FederalTaxCalculator
|
|
28
28
|
schema do
|
29
29
|
input do
|
30
30
|
float :income
|
31
|
-
string :filing_status, domain: %
|
31
|
+
string :filing_status, domain: %w[single married_joint married_separate head_of_household]
|
32
32
|
end
|
33
33
|
|
34
34
|
# ── standard deduction table ───────────────────────────────────────
|
@@ -59,7 +59,7 @@ module FederalTaxCalculator
|
|
59
59
|
|
60
60
|
value :fed_tax, fed_calc[0]
|
61
61
|
value :fed_marginal, fed_calc[1]
|
62
|
-
value :fed_eff, fed_tax /
|
62
|
+
value :fed_eff, fed_tax / [input.income, 1.0].max
|
63
63
|
|
64
64
|
# ── FICA (employee share) ─────────────────────────────────────────────
|
65
65
|
value :ss_wage_base, 168_600.0
|
@@ -96,10 +96,11 @@ module FederalTaxCalculator
|
|
96
96
|
end
|
97
97
|
end
|
98
98
|
|
99
|
-
def
|
99
|
+
def print_tax_summary(args)
|
100
|
+
r = FederalTaxCalculator.from(args)
|
100
101
|
puts "\n=== 2024 U.S. Income‑Tax Example ==="
|
101
|
-
printf "Income: $%0.2f\n", income
|
102
|
-
puts "Filing status: #{
|
102
|
+
printf "Income: $%0.2f\n", args[:income]
|
103
|
+
puts "Filing status: #{args[:filing_status]}\n\n"
|
103
104
|
|
104
105
|
puts "Federal tax: $#{r[:fed_tax].round(2)} (#{(r[:fed_eff] * 100).round(2)}% effective)"
|
105
106
|
puts "FICA tax: $#{r[:fica_tax].round(2)} (#{(r[:fica_eff] * 100).round(2)}% effective)"
|
@@ -107,4 +108,8 @@ def calculate_tax(calculator, income: 1_000_000, status: "single")
|
|
107
108
|
puts "After-tax income: $#{r[:after_tax].round(2)}"
|
108
109
|
end
|
109
110
|
|
110
|
-
|
111
|
+
|
112
|
+
input = { income: 1_000_000,
|
113
|
+
filing_status: "single"
|
114
|
+
}
|
115
|
+
print_tax_summary(input)
|
@@ -23,7 +23,7 @@ module Kumi
|
|
23
23
|
return node.value if node.is_a?(Literal)
|
24
24
|
|
25
25
|
result = case node
|
26
|
-
when
|
26
|
+
when DeclarationReference then evaluate_binding(node, visited)
|
27
27
|
when CallExpression then evaluate_call_expression(node, visited)
|
28
28
|
else :unknown
|
29
29
|
end
|
@@ -0,0 +1,246 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Kumi
|
4
|
+
module Analyzer
|
5
|
+
module Passes
|
6
|
+
# Detects which operations should be broadcast over arrays
|
7
|
+
# DEPENDENCIES: :inputs, :declarations
|
8
|
+
# PRODUCES: :broadcasts
|
9
|
+
class BroadcastDetector < PassBase
|
10
|
+
def run(errors)
|
11
|
+
input_meta = get_state(:inputs) || {}
|
12
|
+
definitions = get_state(:declarations) || {}
|
13
|
+
|
14
|
+
# Find array fields with their element types
|
15
|
+
array_fields = find_array_fields(input_meta)
|
16
|
+
|
17
|
+
# Build compiler metadata
|
18
|
+
compiler_metadata = {
|
19
|
+
array_fields: array_fields,
|
20
|
+
vectorized_operations: {},
|
21
|
+
reduction_operations: {}
|
22
|
+
}
|
23
|
+
|
24
|
+
# Track which values are vectorized for type inference
|
25
|
+
vectorized_values = {}
|
26
|
+
|
27
|
+
# Analyze traits first, then values (to handle dependencies)
|
28
|
+
traits = definitions.select { |_name, decl| decl.is_a?(Kumi::Syntax::TraitDeclaration) }
|
29
|
+
values = definitions.select { |_name, decl| decl.is_a?(Kumi::Syntax::ValueDeclaration) }
|
30
|
+
|
31
|
+
(traits.to_a + values.to_a).each do |name, decl|
|
32
|
+
result = analyze_value_vectorization(name, decl.expression, array_fields, vectorized_values, errors)
|
33
|
+
|
34
|
+
case result[:type]
|
35
|
+
when :vectorized
|
36
|
+
compiler_metadata[:vectorized_operations][name] = result[:info]
|
37
|
+
# Store array source information for dimension checking
|
38
|
+
array_source = extract_array_source(result[:info], array_fields)
|
39
|
+
vectorized_values[name] = { vectorized: true, array_source: array_source }
|
40
|
+
when :reduction
|
41
|
+
compiler_metadata[:reduction_operations][name] = result[:info]
|
42
|
+
# Reduction produces scalar, not vectorized
|
43
|
+
vectorized_values[name] = { vectorized: false }
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
state.with(:broadcasts, compiler_metadata.freeze)
|
48
|
+
end
|
49
|
+
|
50
|
+
private
|
51
|
+
|
52
|
+
def find_array_fields(input_meta)
|
53
|
+
result = {}
|
54
|
+
input_meta.each do |name, meta|
|
55
|
+
next unless meta[:type] == :array && meta[:children]
|
56
|
+
|
57
|
+
result[name] = {
|
58
|
+
element_fields: meta[:children].keys,
|
59
|
+
element_types: meta[:children].transform_values { |v| v[:type] || :any }
|
60
|
+
}
|
61
|
+
end
|
62
|
+
result
|
63
|
+
end
|
64
|
+
|
65
|
+
def analyze_value_vectorization(name, expr, array_fields, vectorized_values, errors)
|
66
|
+
case expr
|
67
|
+
when Kumi::Syntax::InputElementReference
|
68
|
+
if array_fields.key?(expr.path.first)
|
69
|
+
{ type: :vectorized, info: { source: :array_field_access, path: expr.path } }
|
70
|
+
else
|
71
|
+
{ type: :scalar }
|
72
|
+
end
|
73
|
+
|
74
|
+
when Kumi::Syntax::DeclarationReference
|
75
|
+
# Check if this references a vectorized value
|
76
|
+
vector_info = vectorized_values[expr.name]
|
77
|
+
if vector_info && vector_info[:vectorized]
|
78
|
+
{ type: :vectorized, info: { source: :vectorized_declaration, name: expr.name } }
|
79
|
+
else
|
80
|
+
{ type: :scalar }
|
81
|
+
end
|
82
|
+
|
83
|
+
when Kumi::Syntax::CallExpression
|
84
|
+
analyze_call_vectorization(name, expr, array_fields, vectorized_values, errors)
|
85
|
+
|
86
|
+
when Kumi::Syntax::CascadeExpression
|
87
|
+
analyze_cascade_vectorization(name, expr, array_fields, vectorized_values, errors)
|
88
|
+
|
89
|
+
else
|
90
|
+
{ type: :scalar }
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def analyze_call_vectorization(_name, expr, array_fields, vectorized_values, errors)
|
95
|
+
# Check if this is a reduction function using function registry metadata
|
96
|
+
if FunctionRegistry.reducer?(expr.fn_name)
|
97
|
+
# Only treat as reduction if the argument is actually vectorized
|
98
|
+
arg_info = analyze_argument_vectorization(expr.args.first, array_fields, vectorized_values)
|
99
|
+
if arg_info[:vectorized]
|
100
|
+
{ type: :reduction, info: { function: expr.fn_name, source: arg_info[:source] } }
|
101
|
+
else
|
102
|
+
# Not a vectorized reduction - just a regular function call
|
103
|
+
{ type: :scalar }
|
104
|
+
end
|
105
|
+
|
106
|
+
else
|
107
|
+
# Special case: all?, any?, none? functions with vectorized trait arguments should be treated as vectorized
|
108
|
+
# for cascade condition purposes (they get transformed during compilation)
|
109
|
+
if %i[all? any? none?].include?(expr.fn_name) && expr.args.length == 1
|
110
|
+
arg = expr.args.first
|
111
|
+
if arg.is_a?(Kumi::Syntax::ArrayExpression) && arg.elements.length == 1
|
112
|
+
trait_ref = arg.elements.first
|
113
|
+
if trait_ref.is_a?(Kumi::Syntax::DeclarationReference) && vectorized_values[trait_ref.name]&.[](:vectorized)
|
114
|
+
return { type: :vectorized, info: { source: :cascade_condition_with_vectorized_trait, trait: trait_ref.name } }
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
# ANY function with vectorized arguments becomes vectorized (with broadcasting)
|
120
|
+
arg_infos = expr.args.map { |arg| analyze_argument_vectorization(arg, array_fields, vectorized_values) }
|
121
|
+
|
122
|
+
if arg_infos.any? { |info| info[:vectorized] }
|
123
|
+
# Check for dimension mismatches when multiple arguments are vectorized
|
124
|
+
vectorized_sources = arg_infos.select { |info| info[:vectorized] }.filter_map { |info| info[:array_source] }.uniq
|
125
|
+
|
126
|
+
if vectorized_sources.length > 1
|
127
|
+
# Multiple different array sources - this is a dimension mismatch
|
128
|
+
# Generate enhanced error message with type information
|
129
|
+
enhanced_message = build_dimension_mismatch_error(expr, arg_infos, array_fields, vectorized_sources)
|
130
|
+
|
131
|
+
report_error(errors, enhanced_message, location: expr.loc, type: :semantic)
|
132
|
+
return { type: :scalar } # Treat as scalar to prevent further errors
|
133
|
+
end
|
134
|
+
|
135
|
+
# This is a vectorized operation - ANY function supports broadcasting
|
136
|
+
{ type: :vectorized, info: {
|
137
|
+
operation: expr.fn_name,
|
138
|
+
vectorized_args: arg_infos.map.with_index { |info, i| [i, info[:vectorized]] }.to_h
|
139
|
+
} }
|
140
|
+
else
|
141
|
+
{ type: :scalar }
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
def analyze_argument_vectorization(arg, array_fields, vectorized_values)
|
147
|
+
case arg
|
148
|
+
when Kumi::Syntax::InputElementReference
|
149
|
+
if array_fields.key?(arg.path.first)
|
150
|
+
{ vectorized: true, source: :array_field, array_source: arg.path.first }
|
151
|
+
else
|
152
|
+
{ vectorized: false }
|
153
|
+
end
|
154
|
+
|
155
|
+
when Kumi::Syntax::DeclarationReference
|
156
|
+
# Check if this references a vectorized value
|
157
|
+
vector_info = vectorized_values[arg.name]
|
158
|
+
if vector_info && vector_info[:vectorized]
|
159
|
+
array_source = vector_info[:array_source]
|
160
|
+
{ vectorized: true, source: :vectorized_value, array_source: array_source }
|
161
|
+
else
|
162
|
+
{ vectorized: false }
|
163
|
+
end
|
164
|
+
|
165
|
+
when Kumi::Syntax::CallExpression
|
166
|
+
# Recursively check
|
167
|
+
result = analyze_value_vectorization(nil, arg, array_fields, vectorized_values, [])
|
168
|
+
{ vectorized: result[:type] == :vectorized, source: :expression }
|
169
|
+
|
170
|
+
else
|
171
|
+
{ vectorized: false }
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
def extract_array_source(info, _array_fields)
|
176
|
+
case info[:source]
|
177
|
+
when :array_field_access
|
178
|
+
info[:path]&.first
|
179
|
+
when :cascade_condition_with_vectorized_trait
|
180
|
+
# For cascades, we'd need to trace back to the original source
|
181
|
+
nil # TODO: Could be enhanced to trace through trait dependencies
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
def analyze_cascade_vectorization(_name, expr, array_fields, vectorized_values, errors)
|
186
|
+
# A cascade is vectorized if:
|
187
|
+
# 1. Any of its result expressions are vectorized, OR
|
188
|
+
# 2. Any of its conditions reference vectorized values (traits or arrays)
|
189
|
+
vectorized_results = []
|
190
|
+
vectorized_conditions = []
|
191
|
+
|
192
|
+
expr.cases.each do |case_expr|
|
193
|
+
# Check if result is vectorized
|
194
|
+
result_info = analyze_value_vectorization(nil, case_expr.result, array_fields, vectorized_values, errors)
|
195
|
+
vectorized_results << (result_info[:type] == :vectorized)
|
196
|
+
|
197
|
+
# Check if condition is vectorized
|
198
|
+
condition_info = analyze_value_vectorization(nil, case_expr.condition, array_fields, vectorized_values, errors)
|
199
|
+
vectorized_conditions << (condition_info[:type] == :vectorized)
|
200
|
+
end
|
201
|
+
|
202
|
+
if vectorized_results.any? || vectorized_conditions.any?
|
203
|
+
{ type: :vectorized, info: { source: :cascade_with_vectorized_conditions_or_results } }
|
204
|
+
else
|
205
|
+
{ type: :scalar }
|
206
|
+
end
|
207
|
+
end
|
208
|
+
|
209
|
+
def build_dimension_mismatch_error(_expr, arg_infos, array_fields, vectorized_sources)
|
210
|
+
# Build detailed error message with type information
|
211
|
+
summary = "Cannot broadcast operation across arrays from different sources: #{vectorized_sources.join(', ')}. "
|
212
|
+
|
213
|
+
problem_desc = "Problem: Multiple operands are arrays from different sources:\n"
|
214
|
+
|
215
|
+
vectorized_args = arg_infos.select { |info| info[:vectorized] }
|
216
|
+
vectorized_args.each_with_index do |arg_info, index|
|
217
|
+
array_source = arg_info[:array_source]
|
218
|
+
next unless array_source && array_fields[array_source]
|
219
|
+
|
220
|
+
# Determine the type based on array field metadata
|
221
|
+
type_desc = determine_array_type(array_source, array_fields)
|
222
|
+
problem_desc += " - Operand #{index + 1} resolves to #{type_desc} from array '#{array_source}'\n"
|
223
|
+
end
|
224
|
+
|
225
|
+
explanation = "Direct operations on arrays from different sources is ambiguous and not supported. " \
|
226
|
+
"Vectorized operations can only work on fields from the same array input."
|
227
|
+
|
228
|
+
"#{summary}#{problem_desc}#{explanation}"
|
229
|
+
end
|
230
|
+
|
231
|
+
def determine_array_type(array_source, array_fields)
|
232
|
+
field_info = array_fields[array_source]
|
233
|
+
return "array(any)" unless field_info[:element_types]
|
234
|
+
|
235
|
+
# For nested arrays (like items.name where items is an array), this represents array(element_type)
|
236
|
+
element_types = field_info[:element_types].values.uniq
|
237
|
+
if element_types.length == 1
|
238
|
+
"array(#{element_types.first})"
|
239
|
+
else
|
240
|
+
"array(mixed)"
|
241
|
+
end
|
242
|
+
end
|
243
|
+
end
|
244
|
+
end
|
245
|
+
end
|
246
|
+
end
|
@@ -7,7 +7,7 @@ module Kumi
|
|
7
7
|
# DEPENDENCIES: :definitions
|
8
8
|
# PRODUCES: None (validation only)
|
9
9
|
# INTERFACE: new(schema, state).run(errors)
|
10
|
-
class
|
10
|
+
class DeclarationValidator < VisitorPass
|
11
11
|
def run(errors)
|
12
12
|
each_decl do |decl|
|
13
13
|
visit(decl) { |node| validate_node(node, errors) }
|
@@ -19,9 +19,9 @@ module Kumi
|
|
19
19
|
|
20
20
|
def validate_node(node, errors)
|
21
21
|
case node
|
22
|
-
when
|
22
|
+
when Kumi::Syntax::ValueDeclaration
|
23
23
|
validate_attribute(node, errors)
|
24
|
-
when
|
24
|
+
when Kumi::Syntax::TraitDeclaration
|
25
25
|
validate_trait(node, errors)
|
26
26
|
end
|
27
27
|
end
|
@@ -33,7 +33,7 @@ module Kumi
|
|
33
33
|
end
|
34
34
|
|
35
35
|
def validate_trait(node, errors)
|
36
|
-
return if node.expression.is_a?(
|
36
|
+
return if node.expression.is_a?(Kumi::Syntax::CallExpression)
|
37
37
|
|
38
38
|
report_error(errors, "trait `#{node.name}` must wrap a CallExpression", location: node.loc)
|
39
39
|
end
|