kumi 0.0.10 → 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +1 -1
- data/CHANGELOG.md +23 -0
- data/CLAUDE.md +7 -231
- data/README.md +5 -5
- data/docs/SYNTAX.md +66 -0
- data/docs/VECTOR_SEMANTICS.md +286 -0
- data/docs/features/hierarchical-broadcasting.md +67 -1
- data/docs/features/input-declaration-system.md +16 -0
- data/docs/features/s-expression-printer.md +2 -2
- data/lib/kumi/analyzer.rb +34 -12
- data/lib/kumi/compiler.rb +2 -12
- data/lib/kumi/core/analyzer/passes/broadcast_detector.rb +157 -64
- data/lib/kumi/core/analyzer/passes/dependency_resolver.rb +1 -1
- data/lib/kumi/core/analyzer/passes/input_access_planner_pass.rb +47 -0
- data/lib/kumi/core/analyzer/passes/input_collector.rb +123 -101
- data/lib/kumi/core/analyzer/passes/join_reduce_planning_pass.rb +293 -0
- data/lib/kumi/core/analyzer/passes/lower_to_ir_pass.rb +993 -0
- data/lib/kumi/core/analyzer/passes/pass_base.rb +2 -2
- data/lib/kumi/core/analyzer/passes/scope_resolution_pass.rb +346 -0
- data/lib/kumi/core/analyzer/passes/semantic_constraint_validator.rb +2 -1
- data/lib/kumi/core/analyzer/passes/toposorter.rb +9 -3
- data/lib/kumi/core/analyzer/passes/type_checker.rb +3 -3
- data/lib/kumi/core/analyzer/passes/type_consistency_checker.rb +2 -2
- data/lib/kumi/core/analyzer/passes/{type_inferencer.rb → type_inferencer_pass.rb} +4 -4
- data/lib/kumi/core/analyzer/passes/unsat_detector.rb +2 -2
- data/lib/kumi/core/analyzer/plans.rb +52 -0
- data/lib/kumi/core/analyzer/structs/access_plan.rb +20 -0
- data/lib/kumi/core/analyzer/structs/input_meta.rb +29 -0
- data/lib/kumi/core/compiler/access_builder.rb +36 -0
- data/lib/kumi/core/compiler/access_planner.rb +219 -0
- data/lib/kumi/core/compiler/accessors/base.rb +69 -0
- data/lib/kumi/core/compiler/accessors/each_indexed_accessor.rb +84 -0
- data/lib/kumi/core/compiler/accessors/materialize_accessor.rb +55 -0
- data/lib/kumi/core/compiler/accessors/ravel_accessor.rb +73 -0
- data/lib/kumi/core/compiler/accessors/read_accessor.rb +41 -0
- data/lib/kumi/core/compiler_base.rb +2 -2
- data/lib/kumi/core/error_reporter.rb +6 -5
- data/lib/kumi/core/errors.rb +4 -0
- data/lib/kumi/core/explain.rb +157 -205
- data/lib/kumi/core/export/node_builders.rb +2 -2
- data/lib/kumi/core/export/node_serializers.rb +1 -1
- data/lib/kumi/core/function_registry/collection_functions.rb +21 -10
- data/lib/kumi/core/function_registry/conditional_functions.rb +14 -4
- data/lib/kumi/core/function_registry/function_builder.rb +142 -55
- data/lib/kumi/core/function_registry/logical_functions.rb +5 -5
- data/lib/kumi/core/function_registry/stat_functions.rb +2 -2
- data/lib/kumi/core/function_registry.rb +126 -108
- data/lib/kumi/core/input/validator.rb +1 -1
- data/lib/kumi/core/ir/execution_engine/combinators.rb +117 -0
- data/lib/kumi/core/ir/execution_engine/interpreter.rb +336 -0
- data/lib/kumi/core/ir/execution_engine/values.rb +46 -0
- data/lib/kumi/core/ir/execution_engine.rb +50 -0
- data/lib/kumi/core/ir.rb +58 -0
- data/lib/kumi/core/ruby_parser/build_context.rb +2 -2
- data/lib/kumi/core/ruby_parser/declaration_reference_proxy.rb +0 -12
- data/lib/kumi/core/ruby_parser/dsl_cascade_builder.rb +36 -15
- data/lib/kumi/core/ruby_parser/input_builder.rb +30 -9
- data/lib/kumi/core/ruby_parser/parser.rb +1 -1
- data/lib/kumi/core/ruby_parser/schema_builder.rb +2 -2
- data/lib/kumi/core/ruby_parser/sugar.rb +7 -0
- data/lib/kumi/core/types/validator.rb +1 -1
- data/lib/kumi/registry.rb +14 -79
- data/lib/kumi/runtime/executable.rb +213 -0
- data/lib/kumi/schema.rb +14 -3
- data/lib/kumi/schema_metadata.rb +2 -2
- data/lib/kumi/support/ir_dump.rb +491 -0
- data/lib/kumi/support/s_expression_printer.rb +1 -1
- data/lib/kumi/syntax/location.rb +5 -0
- data/lib/kumi/syntax/node.rb +0 -1
- data/lib/kumi/syntax/root.rb +2 -2
- data/lib/kumi/version.rb +1 -1
- data/lib/kumi.rb +6 -15
- metadata +37 -19
- data/lib/kumi/core/cascade_executor_builder.rb +0 -132
- data/lib/kumi/core/compiled_schema.rb +0 -43
- data/lib/kumi/core/compiler/expression_compiler.rb +0 -146
- data/lib/kumi/core/compiler/function_invoker.rb +0 -55
- data/lib/kumi/core/compiler/path_traversal_compiler.rb +0 -158
- data/lib/kumi/core/compiler/reference_compiler.rb +0 -46
- data/lib/kumi/core/evaluation_wrapper.rb +0 -40
- data/lib/kumi/core/nested_structure_utils.rb +0 -78
- data/lib/kumi/core/schema_instance.rb +0 -115
- data/lib/kumi/core/vectorized_function_builder.rb +0 -88
- data/lib/kumi/js/compiler.rb +0 -878
- data/lib/kumi/js/function_registry.rb +0 -333
- data/migrate_to_core_iterative.rb +0 -938
@@ -0,0 +1,286 @@
|
|
1
|
+
# Kumi Vector Semantics — Short Guide
|
2
|
+
|
3
|
+
This note documents how Kumi handles **vectorized traversal** over **arbitrary nested objects**, how **alignment/broadcasting** works, and how **reducers** and **structure functions** behave. It’s intentionally concise but hits all the sharp edges.
|
4
|
+
|
5
|
+
---
|
6
|
+
|
7
|
+
## Terminology
|
8
|
+
|
9
|
+
* **Path** – a dot-separated traversal, e.g. `input.regions.offices.employees.salary`.
|
10
|
+
* **Scope (axes)** – the list of array segments encountered along a path.
|
11
|
+
Example: for `regions.offices.employees.salary` the scope is `[:regions, :offices, :employees]`.
|
12
|
+
* **Rank** – number of axes = `scope.length`.
|
13
|
+
* **Index tuple** – lexicographic coordinates per axis, e.g. `[region_i, office_j, employee_k]`.
|
14
|
+
|
15
|
+
**Three Laws (think of them as invariants):**
|
16
|
+
|
17
|
+
1. **Enumeration**
|
18
|
+
`each_indexed(path).map(&:first) == ravel(path)`
|
19
|
+
|
20
|
+
2. **Reconstruction**
|
21
|
+
`lift(to_scope, each_indexed(path))` regroups by `to_scope` (must be a prefix of `scope(path)`).
|
22
|
+
|
23
|
+
3. **Counting**
|
24
|
+
`size(path) == ravel(path).length == each_indexed(path).count`
|
25
|
+
|
26
|
+
These laws are the mental model. Everything else is just mechanics.
|
27
|
+
|
28
|
+
---
|
29
|
+
|
30
|
+
## Access Modes
|
31
|
+
|
32
|
+
Kumi’s Access Planner emits low-level ops (`enter_hash`, `enter_array`) and supports three vector modes per path:
|
33
|
+
|
34
|
+
### 1) `:materialize`
|
35
|
+
|
36
|
+
Return the **original nested structure** down to that path (no enumeration).
|
37
|
+
Good for “give me the data shaped like the input.”
|
38
|
+
|
39
|
+
```ruby
|
40
|
+
# Input (object mode)
|
41
|
+
{
|
42
|
+
regions: [
|
43
|
+
{ name: "E", offices: [{ employees: [{salary: 100}, {salary: 120}] }] },
|
44
|
+
{ name: "D", offices: [{ employees: [{salary: 90}] }] }
|
45
|
+
]
|
46
|
+
}
|
47
|
+
|
48
|
+
materialize("regions.offices.employees.salary")
|
49
|
+
# => [[ [100,120] ], [ [90] ]]
|
50
|
+
```
|
51
|
+
|
52
|
+
### 2) `:ravel`
|
53
|
+
|
54
|
+
**Enumerate elements at the next array boundary** for that path, i.e., “collect the items at this depth.”
|
55
|
+
It is **not** NumPy’s “flatten everything.” It collects the next level.
|
56
|
+
|
57
|
+
```ruby
|
58
|
+
ravel("regions") # => [ {…E…}, {…D…} ] (enumerate regions)
|
59
|
+
ravel("regions.offices") # => [ {employees:[…]}, {employees:[…]} ] (each office)
|
60
|
+
ravel("regions.offices.employees.salary") # => [ [100,120], [90] ] (each employee group at that depth)
|
61
|
+
```
|
62
|
+
|
63
|
+
### 3) `:each_indexed`
|
64
|
+
|
65
|
+
Enumerate leaf values **with** their index tuple (authoritative for `lift` and alignment):
|
66
|
+
|
67
|
+
```ruby
|
68
|
+
each_indexed("regions.offices.employees.salary")
|
69
|
+
# => [
|
70
|
+
# [100, [0,0,0]], [120, [0,0,1]],
|
71
|
+
# [ 90, [1,0,0]]
|
72
|
+
# ]
|
73
|
+
```
|
74
|
+
|
75
|
+
---
|
76
|
+
|
77
|
+
## Lift (Regroup by prefix)
|
78
|
+
|
79
|
+
`lift(to_scope)` turns a vector-of-rows (from `each_indexed`) into a nested array grouped by `to_scope`.
|
80
|
+
|
81
|
+
```ruby
|
82
|
+
# Given values from each_indexed above:
|
83
|
+
lift([:regions], …) # => [ [100,120], [90] ]
|
84
|
+
lift([:regions,:offices], …) # => [ [[100,120]], [[90]] ]
|
85
|
+
lift([:regions,:offices,:employees], …) # => [ [[[100,120]]], [[[90]]] ]
|
86
|
+
```
|
87
|
+
|
88
|
+
* `to_scope` must be a **prefix** of the vector’s `scope`.
|
89
|
+
* Depth is derived mechanically from index arity; VM doesn’t guess.
|
90
|
+
|
91
|
+
---
|
92
|
+
|
93
|
+
## Alignment & Broadcasting
|
94
|
+
|
95
|
+
When mapping a function over multiple arguments, Kumi:
|
96
|
+
|
97
|
+
1. Picks a **carrier** vector (the one with the longest scope).
|
98
|
+
2. **Aligns** other vectors to the carrier if they are **prefix-compatible** (same axes prefix).
|
99
|
+
3. **Broadcasts** scalars across the carrier.
|
100
|
+
|
101
|
+
If scopes aren’t prefix-compatible, lowering raises:
|
102
|
+
`cross-scope map without join: [:a] vs [:b,:c]`
|
103
|
+
|
104
|
+
```ruby
|
105
|
+
# price, quantity both scope [:items]
|
106
|
+
final = price * quantity # zip by position (same scope)
|
107
|
+
|
108
|
+
# Broadcast scalar across [:items]
|
109
|
+
discounted = price * 0.9
|
110
|
+
|
111
|
+
# Align prefix [:regions] to carrier [:regions,:offices]
|
112
|
+
aligned_tax = align_to(offices_subtotals, regions_tax)
|
113
|
+
total = offices_subtotals * (1 - aligned_tax)
|
114
|
+
```
|
115
|
+
|
116
|
+
---
|
117
|
+
|
118
|
+
## Structure Functions vs Reducers
|
119
|
+
|
120
|
+
* **Reducers** collapse a vector to a **scalar** (e.g., `sum`, `min`, `avg`).
|
121
|
+
Lowering selects a vector argument and emits a `Reduce`.
|
122
|
+
|
123
|
+
* **Structure functions** observe or reshape **structure** (e.g., `size`, `flatten`, `count_across`).
|
124
|
+
Lowering usually uses a `:ravel` plan and a plain `Map` (no indices required).
|
125
|
+
|
126
|
+
### Laws for `size` and `flatten`
|
127
|
+
|
128
|
+
* `size(path) == ravel(path).length` (Counting Law)
|
129
|
+
* `flatten(path)` flattens nested arrays (by default all levels; use `flatten_one` for one level).
|
130
|
+
|
131
|
+
---
|
132
|
+
|
133
|
+
## End-to-End Mini Examples
|
134
|
+
|
135
|
+
### A. Simple vector math + reducers (object access)
|
136
|
+
|
137
|
+
```ruby
|
138
|
+
module Cart
|
139
|
+
extend Kumi::Schema
|
140
|
+
schema do
|
141
|
+
input do
|
142
|
+
array :items do
|
143
|
+
float :price
|
144
|
+
integer :qty
|
145
|
+
end
|
146
|
+
float :shipping_threshold
|
147
|
+
end
|
148
|
+
|
149
|
+
value :subtotals, input.items.price * input.items.qty
|
150
|
+
value :subtotal, fn(:sum, subtotals)
|
151
|
+
value :shipping, subtotal > input.shipping_threshold ? 0.0 : 9.99
|
152
|
+
value :total, subtotal + shipping
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
data = {
|
157
|
+
items: [{price: 100.0, qty: 2}, {price: 200.0, qty: 1}],
|
158
|
+
shipping_threshold: 50.0
|
159
|
+
}
|
160
|
+
|
161
|
+
r = Cart.from(data)
|
162
|
+
r[:subtotals] # => [200.0, 200.0] (vector map)
|
163
|
+
r[:subtotal] # => 400.0 (reducer)
|
164
|
+
r[:shipping] # => 0.0
|
165
|
+
r[:total] # => 400.0
|
166
|
+
```
|
167
|
+
|
168
|
+
**Internal truths**:
|
169
|
+
|
170
|
+
* `each_indexed(input.items.price)` → `[[100.0,[0]],[200.0,[1]]]`
|
171
|
+
* `size(input.items)` → `2` because `ravel(input.items)` has length 2.
|
172
|
+
|
173
|
+
### B. Mixed scopes + alignment
|
174
|
+
|
175
|
+
```ruby
|
176
|
+
module Regions
|
177
|
+
extend Kumi::Schema
|
178
|
+
schema do
|
179
|
+
input do
|
180
|
+
array :regions do
|
181
|
+
float :tax
|
182
|
+
array :offices do
|
183
|
+
array :employees do
|
184
|
+
float :salary
|
185
|
+
end
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
value :office_payrolls, fn(:sum, input.regions.offices.employees.salary) # vector reduce per office
|
191
|
+
value :taxed, office_payrolls * (1 - input.regions.tax) # tax (align regions.tax to [:regions,:offices])
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
# Alignment rule: regions.tax (scope [:regions]) aligns to office_payrolls (scope [:regions,:offices])
|
196
|
+
```
|
197
|
+
|
198
|
+
### C. Element access (pure arrays) + structure functions
|
199
|
+
|
200
|
+
```ruby
|
201
|
+
module Cube
|
202
|
+
extend Kumi::Schema
|
203
|
+
schema do
|
204
|
+
input do
|
205
|
+
array :cube do
|
206
|
+
element :array, :layer do
|
207
|
+
element :array, :row do
|
208
|
+
element :float, :cell
|
209
|
+
end
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
value :layers, fn(:size, input.cube) # == ravel(input.cube).length
|
215
|
+
value :matrices, fn(:size, input.cube.layer) # enumerate at next depth
|
216
|
+
value :rows, fn(:size, input.cube.layer.row)
|
217
|
+
value :all_values, fn(:flatten, input.cube.layer.row.cell)
|
218
|
+
value :total, fn(:sum, all_values)
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
data = { cube: [ [[1,2],[3]], [[4]] ] }
|
223
|
+
|
224
|
+
# ravel views (intuition)
|
225
|
+
# ravel(cube) => [ [[1,2],[3]], [[4]] ]
|
226
|
+
# ravel(cube.layer) => [ [1,2], [3], [4] ]
|
227
|
+
# ravel(cube.layer.row) => [ 1, 2, 3, 4 ]
|
228
|
+
# ravel(cube.layer.row.cell) => [ 1, 2, 3, 4 ] (same leaf)
|
229
|
+
|
230
|
+
c = Cube.from(data)
|
231
|
+
c[:layers] # => 2
|
232
|
+
c[:matrices] # => 3
|
233
|
+
c[:rows] # => 4
|
234
|
+
c[:all_values] # => [1,2,3,4]
|
235
|
+
c[:total] # => 10
|
236
|
+
```
|
237
|
+
|
238
|
+
---
|
239
|
+
|
240
|
+
## Planner & VM: Who does what?
|
241
|
+
|
242
|
+
* **Planner**: Emits deterministic `enter_hash`/`enter_array` sequences per path and mode.
|
243
|
+
|
244
|
+
* For element edges (inline array aliases), it **does not** emit `enter_hash`.
|
245
|
+
* For `:each_indexed` / `:ravel`, it appends a terminal `enter_array` **only if** the final node is an array.
|
246
|
+
* **Lowerer**: Decides plans (`:ravel`, `:each_indexed`, `:materialize`), inserts `align_to`, emits `lift` at declaration boundary when a vector result should be exposed as a scalar nested array.
|
247
|
+
* **VM**: Purely mechanical:
|
248
|
+
|
249
|
+
* `broadcast_scalar` for scalar→vec expansion,
|
250
|
+
* `zip_same_scope` when scopes match,
|
251
|
+
* `align_to` for prefix alignment,
|
252
|
+
* `group_rows` inside `lift` to reconstruct prefixes.
|
253
|
+
|
254
|
+
No type sniffing or guesses: the IR is the source of truth.
|
255
|
+
|
256
|
+
---
|
257
|
+
|
258
|
+
## Jagged & Sparse Arrays
|
259
|
+
|
260
|
+
* Ordering is **lexicographic by index tuple** (stable).
|
261
|
+
* No padding is introduced; missing branches are just… missing.
|
262
|
+
* `align_to(..., on_missing: :error|:nil)` enforces policy.
|
263
|
+
|
264
|
+
---
|
265
|
+
|
266
|
+
## Error Policies
|
267
|
+
|
268
|
+
For missing keys/arrays, accessors obey policy:
|
269
|
+
|
270
|
+
* `:error` (default) – raise descriptive error with the path/mode.
|
271
|
+
* `:skip` – drop the missing branch (useful in ravels).
|
272
|
+
* `:yield_nil` – emit `nil` in place (preserves cardinality).
|
273
|
+
|
274
|
+
Document these on any user-facing accessor.
|
275
|
+
|
276
|
+
---
|
277
|
+
|
278
|
+
## Quick Cheatsheet
|
279
|
+
|
280
|
+
* Use **`ravel(path)`** to “list the things at this level.”
|
281
|
+
* Use **`each_indexed(path)`** when you need `(value, idx)` pairs for joins/regroup.
|
282
|
+
* Use **`lift(to_scope, each_indexed(path))`** to reconstruct nested structure.
|
283
|
+
* **Reducers** (e.g., `sum`, `avg`, `min`) consume the raveled view of their argument.
|
284
|
+
* **Structure functions** (e.g., `size`, `flatten`, `flatten_one`, `count_across`) operate on structure at that depth and usually compile via `:ravel`.
|
285
|
+
|
286
|
+
Keep the three laws in mind and Kumi’s behavior is predictable—even over deeply nested, heterogeneous data.
|
@@ -67,6 +67,72 @@ value :cell_data, input.cube.layer.row.cell # 1D values
|
|
67
67
|
- **Ranked polymorphism**: Same operations work across different dimensional arrays
|
68
68
|
- **Clean code**: `fn(:size, input.cube.layer.row)` instead of `fn(:size, fn(:flatten_one, input.cube.layer))`
|
69
69
|
|
70
|
+
### Dynamic Hash Elements with `element :any`
|
71
|
+
|
72
|
+
For arrays containing hash data with flexible or unknown structure, use `element :any` to access dynamic hash content:
|
73
|
+
|
74
|
+
```ruby
|
75
|
+
input do
|
76
|
+
array :api_responses do
|
77
|
+
element :any, :response_data # Flexible hash structure
|
78
|
+
end
|
79
|
+
|
80
|
+
array :user_events do
|
81
|
+
element :any, :event_data # Dynamic event properties (includes event_type)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# Access hash fields using fn(:fetch)
|
86
|
+
value :response_codes, fn(:fetch, input.api_responses.response_data, "status")
|
87
|
+
value :error_messages, fn(:fetch, input.api_responses.response_data, "error")
|
88
|
+
value :event_types, fn(:fetch, input.user_events.event_data, "event_type")
|
89
|
+
value :user_ids, fn(:fetch, input.user_events.event_data, "user_id")
|
90
|
+
value :timestamps, fn(:fetch, input.user_events.event_data, "timestamp")
|
91
|
+
|
92
|
+
# Mathematical operations on extracted values
|
93
|
+
value :avg_response_time, fn(:mean, fn(:fetch, input.api_responses.response_data, "response_time"))
|
94
|
+
value :total_events, fn(:size, input.user_events.event_data)
|
95
|
+
|
96
|
+
# Traits and cascades with dynamic data
|
97
|
+
trait :has_errors, fn(:any?, fn(:fetch, input.api_responses.response_data, "status") >= 400)
|
98
|
+
trait :recent_events, fn(:any?, fn(:fetch, input.user_events.event_data, "timestamp") > 1640995200)
|
99
|
+
|
100
|
+
value :system_status do
|
101
|
+
on has_errors, "Error State"
|
102
|
+
on recent_events, "Active"
|
103
|
+
base "Idle"
|
104
|
+
end
|
105
|
+
```
|
106
|
+
|
107
|
+
**When to use `element :any`:**
|
108
|
+
- API responses with varying JSON schemas
|
109
|
+
- Configuration files with flexible key-value structures
|
110
|
+
- Event data where properties vary by event type
|
111
|
+
- Legacy systems where data structure may change
|
112
|
+
- Prototyping when exact hash structure is unknown
|
113
|
+
|
114
|
+
**Comparison with Hash Objects:**
|
115
|
+
|
116
|
+
| Approach | Use Case | Flexibility | Type Safety |
|
117
|
+
|----------|----------|-------------|-------------|
|
118
|
+
| `hash :field do ... end` | Known structure, strong typing | Limited | High |
|
119
|
+
| `element :any, :field` | Unknown/flexible structure | High | Low |
|
120
|
+
|
121
|
+
```ruby
|
122
|
+
# Known structure - use hash objects
|
123
|
+
array :orders do
|
124
|
+
hash :customer do
|
125
|
+
string :name
|
126
|
+
string :email
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
# Unknown/flexible structure - use element :any
|
131
|
+
array :api_calls do
|
132
|
+
element :any, :response # Could be any JSON structure
|
133
|
+
end
|
134
|
+
```
|
135
|
+
|
70
136
|
## Business Use Cases
|
71
137
|
|
72
138
|
Element access mode is essential for common business scenarios involving simple nested arrays:
|
@@ -193,7 +259,7 @@ The type system automatically infers appropriate types for broadcasted operation
|
|
193
259
|
|
194
260
|
### Analysis Layer
|
195
261
|
- **BroadcastDetector** - Identifies vectorized vs scalar operations
|
196
|
-
- **
|
262
|
+
- **TypeInferencerPass** - Infers types for array element access patterns
|
197
263
|
|
198
264
|
### Compilation Layer
|
199
265
|
- **Automatic Dispatch** - Maps element-wise operations to array map functions
|
@@ -14,10 +14,26 @@ schema do
|
|
14
14
|
array :tags, elem: { type: :string }
|
15
15
|
hash :metadata, key: { type: :string }, val: { type: :any }
|
16
16
|
any :flexible
|
17
|
+
|
18
|
+
# Structured arrays with defined fields
|
19
|
+
array :orders do
|
20
|
+
hash :customer do
|
21
|
+
string :name
|
22
|
+
string :email
|
23
|
+
end
|
24
|
+
float :total
|
25
|
+
end
|
26
|
+
|
27
|
+
# Dynamic arrays with flexible elements
|
28
|
+
array :api_responses do
|
29
|
+
element :any, :response_data # For unknown/flexible hash structures
|
30
|
+
end
|
17
31
|
end
|
18
32
|
|
19
33
|
trait :adult, (input.age >= 18)
|
20
34
|
value :status, input.verified ? "verified" : "pending"
|
35
|
+
value :customer_emails, input.orders.customer.email # Structured access
|
36
|
+
value :response_codes, fn(:fetch, input.api_responses.response_data, "status") # Dynamic access
|
21
37
|
end
|
22
38
|
```
|
23
39
|
|
@@ -42,7 +42,7 @@ The printer produces indented S-expressions that clearly show the hierarchical s
|
|
42
42
|
(InputDeclaration :age :integer)
|
43
43
|
(InputDeclaration :name :string)
|
44
44
|
]
|
45
|
-
|
45
|
+
values: [
|
46
46
|
(ValueDeclaration :greeting
|
47
47
|
(CallExpression :concat
|
48
48
|
(Literal "Hello ")
|
@@ -65,7 +65,7 @@ The printer produces indented S-expressions that clearly show the hierarchical s
|
|
65
65
|
|
66
66
|
The printer handles all Kumi AST node types:
|
67
67
|
|
68
|
-
- **Root** - Schema container with inputs,
|
68
|
+
- **Root** - Schema container with inputs, values, and traits
|
69
69
|
- **Declarations** - InputDeclaration, ValueDeclaration, TraitDeclaration
|
70
70
|
- **Expressions** - CallExpression, ArrayExpression, CascadeExpression, CaseExpression
|
71
71
|
- **References** - InputReference, InputElementReference, DeclarationReference
|
data/lib/kumi/analyzer.rb
CHANGED
@@ -7,15 +7,19 @@ module Kumi
|
|
7
7
|
DEFAULT_PASSES = [
|
8
8
|
Core::Analyzer::Passes::NameIndexer, # 1. Finds all names and checks for duplicates.
|
9
9
|
Core::Analyzer::Passes::InputCollector, # 2. Collects field metadata from input declarations.
|
10
|
-
Core::Analyzer::Passes::DeclarationValidator, #
|
11
|
-
Core::Analyzer::Passes::SemanticConstraintValidator, #
|
12
|
-
Core::Analyzer::Passes::DependencyResolver, #
|
13
|
-
Core::Analyzer::Passes::UnsatDetector, #
|
14
|
-
Core::Analyzer::Passes::Toposorter, #
|
15
|
-
Core::Analyzer::Passes::BroadcastDetector,
|
16
|
-
Core::Analyzer::Passes::
|
17
|
-
Core::Analyzer::Passes::TypeConsistencyChecker, #
|
18
|
-
Core::Analyzer::Passes::TypeChecker
|
10
|
+
Core::Analyzer::Passes::DeclarationValidator, # 4. Checks the basic structure of each rule.
|
11
|
+
Core::Analyzer::Passes::SemanticConstraintValidator, # 5. Validates DSL semantic constraints at AST level.
|
12
|
+
Core::Analyzer::Passes::DependencyResolver, # 6. Builds the dependency graph with conditional dependencies.
|
13
|
+
Core::Analyzer::Passes::UnsatDetector, # 7. Detects unsatisfiable constraints and analyzes cascade mutual exclusion.
|
14
|
+
Core::Analyzer::Passes::Toposorter, # 8. Creates the final evaluation order, allowing safe cycles.
|
15
|
+
Core::Analyzer::Passes::BroadcastDetector, # 9. Detects which operations should be broadcast over arrays.
|
16
|
+
Core::Analyzer::Passes::TypeInferencerPass, # 10. Infers types for all declarations (uses vectorization metadata).
|
17
|
+
Core::Analyzer::Passes::TypeConsistencyChecker, # 11. Validates declared vs inferred type consistency.
|
18
|
+
Core::Analyzer::Passes::TypeChecker, # 12. Validates types using inferred information.
|
19
|
+
Core::Analyzer::Passes::InputAccessPlannerPass, # 13. Plans access strategies for input fields.
|
20
|
+
Core::Analyzer::Passes::ScopeResolutionPass, # 14. Plans execution scope and lifting needs for declarations.
|
21
|
+
Core::Analyzer::Passes::JoinReducePlanningPass, # 15. Plans join/reduce operations (Generates IR Structs)
|
22
|
+
Core::Analyzer::Passes::LowerToIRPass # 16. Lowers the schema to IR (Generates IR Structs)
|
19
23
|
].freeze
|
20
24
|
|
21
25
|
def self.analyze!(schema, passes: DEFAULT_PASSES, **opts)
|
@@ -33,7 +37,13 @@ module Kumi
|
|
33
37
|
begin
|
34
38
|
state = pass_instance.run(errors)
|
35
39
|
rescue StandardError => e
|
36
|
-
|
40
|
+
# TODO: - GREATLY improve this, need to capture the context of the error
|
41
|
+
# and the pass that failed and line number if relevant
|
42
|
+
pass_name = pass_class.name.split("::").last
|
43
|
+
message = "Error in Analysis Pass(#{pass_name}): #{e.message}"
|
44
|
+
errors << Core::ErrorReporter.create_error(message, location: nil, type: :semantic, backtrace: e.backtrace)
|
45
|
+
|
46
|
+
raise
|
37
47
|
end
|
38
48
|
end
|
39
49
|
state
|
@@ -41,11 +51,14 @@ module Kumi
|
|
41
51
|
|
42
52
|
def self.handle_analysis_errors(errors)
|
43
53
|
type_errors = errors.select { |e| e.type == :type }
|
54
|
+
semantic_errors = errors.select { |e| e.type == :semantic }
|
44
55
|
first_error_location = errors.first.location
|
45
56
|
|
46
57
|
raise Errors::TypeError.new(format_errors(errors), first_error_location) if type_errors.any?
|
47
58
|
|
48
|
-
raise Errors::SemanticError.new(format_errors(errors), first_error_location)
|
59
|
+
raise Errors::SemanticError.new(format_errors(errors), first_error_location) if first_error_location || semantic_errors
|
60
|
+
|
61
|
+
raise Errors::AnalysisError.new(format_errors(errors))
|
49
62
|
end
|
50
63
|
|
51
64
|
def self.create_analysis_result(state)
|
@@ -63,7 +76,16 @@ module Kumi
|
|
63
76
|
def self.format_errors(errors)
|
64
77
|
return "" if errors.empty?
|
65
78
|
|
66
|
-
errors.
|
79
|
+
backtrace = errors.first.backtrace
|
80
|
+
|
81
|
+
message = errors.map(&:to_s).join("\n")
|
82
|
+
|
83
|
+
message.tap do |msg|
|
84
|
+
if backtrace && !backtrace.empty?
|
85
|
+
msg << "\n\nBacktrace:\n"
|
86
|
+
msg << backtrace[0..10].join("\n") # Limit to first 10 lines for readability
|
87
|
+
end
|
88
|
+
end
|
67
89
|
end
|
68
90
|
end
|
69
91
|
end
|
data/lib/kumi/compiler.rb
CHANGED
@@ -3,11 +3,6 @@
|
|
3
3
|
module Kumi
|
4
4
|
# Compiles an analyzed schema into executable lambdas
|
5
5
|
class Compiler < Core::CompilerBase
|
6
|
-
include Kumi::Core::Compiler::ReferenceCompiler
|
7
|
-
include Kumi::Core::Compiler::PathTraversalCompiler
|
8
|
-
include Kumi::Core::Compiler::ExpressionCompiler
|
9
|
-
include Kumi::Core::Compiler::FunctionInvoker
|
10
|
-
|
11
6
|
def self.compile(schema, analyzer:)
|
12
7
|
new(schema, analyzer).compile
|
13
8
|
end
|
@@ -18,13 +13,8 @@ module Kumi
|
|
18
13
|
end
|
19
14
|
|
20
15
|
def compile
|
21
|
-
|
22
|
-
@analysis.
|
23
|
-
decl = @index[name] or raise("Unknown binding #{name}")
|
24
|
-
compile_declaration(decl)
|
25
|
-
end
|
26
|
-
|
27
|
-
Core::CompiledSchema.new(@bindings.freeze)
|
16
|
+
# Switch to LIR: Use the analysis state instead of old compilation
|
17
|
+
Runtime::Executable.from_analysis(@analysis.state)
|
28
18
|
end
|
29
19
|
end
|
30
20
|
end
|