kumi 0.0.10 → 0.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +1 -1
  3. data/CHANGELOG.md +23 -0
  4. data/CLAUDE.md +7 -231
  5. data/README.md +5 -5
  6. data/docs/SYNTAX.md +66 -0
  7. data/docs/VECTOR_SEMANTICS.md +286 -0
  8. data/docs/features/hierarchical-broadcasting.md +67 -1
  9. data/docs/features/input-declaration-system.md +16 -0
  10. data/docs/features/s-expression-printer.md +2 -2
  11. data/lib/kumi/analyzer.rb +34 -12
  12. data/lib/kumi/compiler.rb +2 -12
  13. data/lib/kumi/core/analyzer/passes/broadcast_detector.rb +157 -64
  14. data/lib/kumi/core/analyzer/passes/dependency_resolver.rb +1 -1
  15. data/lib/kumi/core/analyzer/passes/input_access_planner_pass.rb +47 -0
  16. data/lib/kumi/core/analyzer/passes/input_collector.rb +123 -101
  17. data/lib/kumi/core/analyzer/passes/join_reduce_planning_pass.rb +293 -0
  18. data/lib/kumi/core/analyzer/passes/lower_to_ir_pass.rb +993 -0
  19. data/lib/kumi/core/analyzer/passes/pass_base.rb +2 -2
  20. data/lib/kumi/core/analyzer/passes/scope_resolution_pass.rb +346 -0
  21. data/lib/kumi/core/analyzer/passes/semantic_constraint_validator.rb +2 -1
  22. data/lib/kumi/core/analyzer/passes/toposorter.rb +9 -3
  23. data/lib/kumi/core/analyzer/passes/type_checker.rb +3 -3
  24. data/lib/kumi/core/analyzer/passes/type_consistency_checker.rb +2 -2
  25. data/lib/kumi/core/analyzer/passes/{type_inferencer.rb → type_inferencer_pass.rb} +4 -4
  26. data/lib/kumi/core/analyzer/passes/unsat_detector.rb +2 -2
  27. data/lib/kumi/core/analyzer/plans.rb +52 -0
  28. data/lib/kumi/core/analyzer/structs/access_plan.rb +20 -0
  29. data/lib/kumi/core/analyzer/structs/input_meta.rb +29 -0
  30. data/lib/kumi/core/compiler/access_builder.rb +36 -0
  31. data/lib/kumi/core/compiler/access_planner.rb +219 -0
  32. data/lib/kumi/core/compiler/accessors/base.rb +69 -0
  33. data/lib/kumi/core/compiler/accessors/each_indexed_accessor.rb +84 -0
  34. data/lib/kumi/core/compiler/accessors/materialize_accessor.rb +55 -0
  35. data/lib/kumi/core/compiler/accessors/ravel_accessor.rb +73 -0
  36. data/lib/kumi/core/compiler/accessors/read_accessor.rb +41 -0
  37. data/lib/kumi/core/compiler_base.rb +2 -2
  38. data/lib/kumi/core/error_reporter.rb +6 -5
  39. data/lib/kumi/core/errors.rb +4 -0
  40. data/lib/kumi/core/explain.rb +157 -205
  41. data/lib/kumi/core/export/node_builders.rb +2 -2
  42. data/lib/kumi/core/export/node_serializers.rb +1 -1
  43. data/lib/kumi/core/function_registry/collection_functions.rb +21 -10
  44. data/lib/kumi/core/function_registry/conditional_functions.rb +14 -4
  45. data/lib/kumi/core/function_registry/function_builder.rb +142 -55
  46. data/lib/kumi/core/function_registry/logical_functions.rb +5 -5
  47. data/lib/kumi/core/function_registry/stat_functions.rb +2 -2
  48. data/lib/kumi/core/function_registry.rb +126 -108
  49. data/lib/kumi/core/input/validator.rb +1 -1
  50. data/lib/kumi/core/ir/execution_engine/combinators.rb +117 -0
  51. data/lib/kumi/core/ir/execution_engine/interpreter.rb +336 -0
  52. data/lib/kumi/core/ir/execution_engine/values.rb +46 -0
  53. data/lib/kumi/core/ir/execution_engine.rb +50 -0
  54. data/lib/kumi/core/ir.rb +58 -0
  55. data/lib/kumi/core/ruby_parser/build_context.rb +2 -2
  56. data/lib/kumi/core/ruby_parser/declaration_reference_proxy.rb +0 -12
  57. data/lib/kumi/core/ruby_parser/dsl_cascade_builder.rb +36 -15
  58. data/lib/kumi/core/ruby_parser/input_builder.rb +30 -9
  59. data/lib/kumi/core/ruby_parser/parser.rb +1 -1
  60. data/lib/kumi/core/ruby_parser/schema_builder.rb +2 -2
  61. data/lib/kumi/core/ruby_parser/sugar.rb +7 -0
  62. data/lib/kumi/core/types/validator.rb +1 -1
  63. data/lib/kumi/registry.rb +14 -79
  64. data/lib/kumi/runtime/executable.rb +213 -0
  65. data/lib/kumi/schema.rb +14 -3
  66. data/lib/kumi/schema_metadata.rb +2 -2
  67. data/lib/kumi/support/ir_dump.rb +491 -0
  68. data/lib/kumi/support/s_expression_printer.rb +1 -1
  69. data/lib/kumi/syntax/location.rb +5 -0
  70. data/lib/kumi/syntax/node.rb +0 -1
  71. data/lib/kumi/syntax/root.rb +2 -2
  72. data/lib/kumi/version.rb +1 -1
  73. data/lib/kumi.rb +6 -15
  74. metadata +37 -19
  75. data/lib/kumi/core/cascade_executor_builder.rb +0 -132
  76. data/lib/kumi/core/compiled_schema.rb +0 -43
  77. data/lib/kumi/core/compiler/expression_compiler.rb +0 -146
  78. data/lib/kumi/core/compiler/function_invoker.rb +0 -55
  79. data/lib/kumi/core/compiler/path_traversal_compiler.rb +0 -158
  80. data/lib/kumi/core/compiler/reference_compiler.rb +0 -46
  81. data/lib/kumi/core/evaluation_wrapper.rb +0 -40
  82. data/lib/kumi/core/nested_structure_utils.rb +0 -78
  83. data/lib/kumi/core/schema_instance.rb +0 -115
  84. data/lib/kumi/core/vectorized_function_builder.rb +0 -88
  85. data/lib/kumi/js/compiler.rb +0 -878
  86. data/lib/kumi/js/function_registry.rb +0 -333
  87. data/migrate_to_core_iterative.rb +0 -938
@@ -0,0 +1,286 @@
1
+ # Kumi Vector Semantics — Short Guide
2
+
3
+ This note documents how Kumi handles **vectorized traversal** over **arbitrary nested objects**, how **alignment/broadcasting** works, and how **reducers** and **structure functions** behave. It’s intentionally concise but hits all the sharp edges.
4
+
5
+ ---
6
+
7
+ ## Terminology
8
+
9
+ * **Path** – a dot-separated traversal, e.g. `input.regions.offices.employees.salary`.
10
+ * **Scope (axes)** – the list of array segments encountered along a path.
11
+ Example: for `regions.offices.employees.salary` the scope is `[:regions, :offices, :employees]`.
12
+ * **Rank** – number of axes = `scope.length`.
13
+ * **Index tuple** – lexicographic coordinates per axis, e.g. `[region_i, office_j, employee_k]`.
14
+
15
+ **Three Laws (think of them as invariants):**
16
+
17
+ 1. **Enumeration**
18
+ `each_indexed(path).map(&:first) == ravel(path)`
19
+
20
+ 2. **Reconstruction**
21
+ `lift(to_scope, each_indexed(path))` regroups by `to_scope` (must be a prefix of `scope(path)`).
22
+
23
+ 3. **Counting**
24
+ `size(path) == ravel(path).length == each_indexed(path).count`
25
+
26
+ These laws are the mental model. Everything else is just mechanics.
27
+
28
+ ---
29
+
30
+ ## Access Modes
31
+
32
+ Kumi’s Access Planner emits low-level ops (`enter_hash`, `enter_array`) and supports three vector modes per path:
33
+
34
+ ### 1) `:materialize`
35
+
36
+ Return the **original nested structure** down to that path (no enumeration).
37
+ Good for “give me the data shaped like the input.”
38
+
39
+ ```ruby
40
+ # Input (object mode)
41
+ {
42
+ regions: [
43
+ { name: "E", offices: [{ employees: [{salary: 100}, {salary: 120}] }] },
44
+ { name: "D", offices: [{ employees: [{salary: 90}] }] }
45
+ ]
46
+ }
47
+
48
+ materialize("regions.offices.employees.salary")
49
+ # => [[ [100,120] ], [ [90] ]]
50
+ ```
51
+
52
+ ### 2) `:ravel`
53
+
54
+ **Enumerate elements at the next array boundary** for that path, i.e., “collect the items at this depth.”
55
+ It is **not** NumPy’s “flatten everything.” It collects the next level.
56
+
57
+ ```ruby
58
+ ravel("regions") # => [ {…E…}, {…D…} ] (enumerate regions)
59
+ ravel("regions.offices") # => [ {employees:[…]}, {employees:[…]} ] (each office)
60
+ ravel("regions.offices.employees.salary") # => [ [100,120], [90] ] (each employee group at that depth)
61
+ ```
62
+
63
+ ### 3) `:each_indexed`
64
+
65
+ Enumerate leaf values **with** their index tuple (authoritative for `lift` and alignment):
66
+
67
+ ```ruby
68
+ each_indexed("regions.offices.employees.salary")
69
+ # => [
70
+ # [100, [0,0,0]], [120, [0,0,1]],
71
+ # [ 90, [1,0,0]]
72
+ # ]
73
+ ```
74
+
75
+ ---
76
+
77
+ ## Lift (Regroup by prefix)
78
+
79
+ `lift(to_scope)` turns a vector-of-rows (from `each_indexed`) into a nested array grouped by `to_scope`.
80
+
81
+ ```ruby
82
+ # Given values from each_indexed above:
83
+ lift([:regions], …) # => [ [100,120], [90] ]
84
+ lift([:regions,:offices], …) # => [ [[100,120]], [[90]] ]
85
+ lift([:regions,:offices,:employees], …) # => [ [[[100,120]]], [[[90]]] ]
86
+ ```
87
+
88
+ * `to_scope` must be a **prefix** of the vector’s `scope`.
89
+ * Depth is derived mechanically from index arity; VM doesn’t guess.
90
+
91
+ ---
92
+
93
+ ## Alignment & Broadcasting
94
+
95
+ When mapping a function over multiple arguments, Kumi:
96
+
97
+ 1. Picks a **carrier** vector (the one with the longest scope).
98
+ 2. **Aligns** other vectors to the carrier if they are **prefix-compatible** (same axes prefix).
99
+ 3. **Broadcasts** scalars across the carrier.
100
+
101
+ If scopes aren’t prefix-compatible, lowering raises:
102
+ `cross-scope map without join: [:a] vs [:b,:c]`
103
+
104
+ ```ruby
105
+ # price, quantity both scope [:items]
106
+ final = price * quantity # zip by position (same scope)
107
+
108
+ # Broadcast scalar across [:items]
109
+ discounted = price * 0.9
110
+
111
+ # Align prefix [:regions] to carrier [:regions,:offices]
112
+ aligned_tax = align_to(offices_subtotals, regions_tax)
113
+ total = offices_subtotals * (1 - aligned_tax)
114
+ ```
115
+
116
+ ---
117
+
118
+ ## Structure Functions vs Reducers
119
+
120
+ * **Reducers** collapse a vector to a **scalar** (e.g., `sum`, `min`, `avg`).
121
+ Lowering selects a vector argument and emits a `Reduce`.
122
+
123
+ * **Structure functions** observe or reshape **structure** (e.g., `size`, `flatten`, `count_across`).
124
+ Lowering usually uses a `:ravel` plan and a plain `Map` (no indices required).
125
+
126
+ ### Laws for `size` and `flatten`
127
+
128
+ * `size(path) == ravel(path).length` (Counting Law)
129
+ * `flatten(path)` flattens nested arrays (by default all levels; use `flatten_one` for one level).
130
+
131
+ ---
132
+
133
+ ## End-to-End Mini Examples
134
+
135
+ ### A. Simple vector math + reducers (object access)
136
+
137
+ ```ruby
138
+ module Cart
139
+ extend Kumi::Schema
140
+ schema do
141
+ input do
142
+ array :items do
143
+ float :price
144
+ integer :qty
145
+ end
146
+ float :shipping_threshold
147
+ end
148
+
149
+ value :subtotals, input.items.price * input.items.qty
150
+ value :subtotal, fn(:sum, subtotals)
151
+ value :shipping, subtotal > input.shipping_threshold ? 0.0 : 9.99
152
+ value :total, subtotal + shipping
153
+ end
154
+ end
155
+
156
+ data = {
157
+ items: [{price: 100.0, qty: 2}, {price: 200.0, qty: 1}],
158
+ shipping_threshold: 50.0
159
+ }
160
+
161
+ r = Cart.from(data)
162
+ r[:subtotals] # => [200.0, 200.0] (vector map)
163
+ r[:subtotal] # => 400.0 (reducer)
164
+ r[:shipping] # => 0.0
165
+ r[:total] # => 400.0
166
+ ```
167
+
168
+ **Internal truths**:
169
+
170
+ * `each_indexed(input.items.price)` → `[[100.0,[0]],[200.0,[1]]]`
171
+ * `size(input.items)` → `2` because `ravel(input.items)` has length 2.
172
+
173
+ ### B. Mixed scopes + alignment
174
+
175
+ ```ruby
176
+ module Regions
177
+ extend Kumi::Schema
178
+ schema do
179
+ input do
180
+ array :regions do
181
+ float :tax
182
+ array :offices do
183
+ array :employees do
184
+ float :salary
185
+ end
186
+ end
187
+ end
188
+ end
189
+
190
+ value :office_payrolls, fn(:sum, input.regions.offices.employees.salary) # vector reduce per office
191
+ value :taxed, office_payrolls * (1 - input.regions.tax) # tax (align regions.tax to [:regions,:offices])
192
+ end
193
+ end
194
+
195
+ # Alignment rule: regions.tax (scope [:regions]) aligns to office_payrolls (scope [:regions,:offices])
196
+ ```
197
+
198
+ ### C. Element access (pure arrays) + structure functions
199
+
200
+ ```ruby
201
+ module Cube
202
+ extend Kumi::Schema
203
+ schema do
204
+ input do
205
+ array :cube do
206
+ element :array, :layer do
207
+ element :array, :row do
208
+ element :float, :cell
209
+ end
210
+ end
211
+ end
212
+ end
213
+
214
+ value :layers, fn(:size, input.cube) # == ravel(input.cube).length
215
+ value :matrices, fn(:size, input.cube.layer) # enumerate at next depth
216
+ value :rows, fn(:size, input.cube.layer.row)
217
+ value :all_values, fn(:flatten, input.cube.layer.row.cell)
218
+ value :total, fn(:sum, all_values)
219
+ end
220
+ end
221
+
222
+ data = { cube: [ [[1,2],[3]], [[4]] ] }
223
+
224
+ # ravel views (intuition)
225
+ # ravel(cube) => [ [[1,2],[3]], [[4]] ]
226
+ # ravel(cube.layer) => [ [1,2], [3], [4] ]
227
+ # ravel(cube.layer.row) => [ 1, 2, 3, 4 ]
228
+ # ravel(cube.layer.row.cell) => [ 1, 2, 3, 4 ] (same leaf)
229
+
230
+ c = Cube.from(data)
231
+ c[:layers] # => 2
232
+ c[:matrices] # => 3
233
+ c[:rows] # => 4
234
+ c[:all_values] # => [1,2,3,4]
235
+ c[:total] # => 10
236
+ ```
237
+
238
+ ---
239
+
240
+ ## Planner & VM: Who does what?
241
+
242
+ * **Planner**: Emits deterministic `enter_hash`/`enter_array` sequences per path and mode.
243
+
244
+ * For element edges (inline array aliases), it **does not** emit `enter_hash`.
245
+ * For `:each_indexed` / `:ravel`, it appends a terminal `enter_array` **only if** the final node is an array.
246
+ * **Lowerer**: Decides plans (`:ravel`, `:each_indexed`, `:materialize`), inserts `align_to`, emits `lift` at declaration boundary when a vector result should be exposed as a scalar nested array.
247
+ * **VM**: Purely mechanical:
248
+
249
+ * `broadcast_scalar` for scalar→vec expansion,
250
+ * `zip_same_scope` when scopes match,
251
+ * `align_to` for prefix alignment,
252
+ * `group_rows` inside `lift` to reconstruct prefixes.
253
+
254
+ No type sniffing or guesses: the IR is the source of truth.
255
+
256
+ ---
257
+
258
+ ## Jagged & Sparse Arrays
259
+
260
+ * Ordering is **lexicographic by index tuple** (stable).
261
+ * No padding is introduced; missing branches are just… missing.
262
+ * `align_to(..., on_missing: :error|:nil)` enforces policy.
263
+
264
+ ---
265
+
266
+ ## Error Policies
267
+
268
+ For missing keys/arrays, accessors obey policy:
269
+
270
+ * `:error` (default) – raise descriptive error with the path/mode.
271
+ * `:skip` – drop the missing branch (useful in ravels).
272
+ * `:yield_nil` – emit `nil` in place (preserves cardinality).
273
+
274
+ Document these on any user-facing accessor.
275
+
276
+ ---
277
+
278
+ ## Quick Cheatsheet
279
+
280
+ * Use **`ravel(path)`** to “list the things at this level.”
281
+ * Use **`each_indexed(path)`** when you need `(value, idx)` pairs for joins/regroup.
282
+ * Use **`lift(to_scope, each_indexed(path))`** to reconstruct nested structure.
283
+ * **Reducers** (e.g., `sum`, `avg`, `min`) consume the raveled view of their argument.
284
+ * **Structure functions** (e.g., `size`, `flatten`, `flatten_one`, `count_across`) operate on structure at that depth and usually compile via `:ravel`.
285
+
286
+ Keep the three laws in mind and Kumi’s behavior is predictable—even over deeply nested, heterogeneous data.
@@ -67,6 +67,72 @@ value :cell_data, input.cube.layer.row.cell # 1D values
67
67
  - **Ranked polymorphism**: Same operations work across different dimensional arrays
68
68
  - **Clean code**: `fn(:size, input.cube.layer.row)` instead of `fn(:size, fn(:flatten_one, input.cube.layer))`
69
69
 
70
+ ### Dynamic Hash Elements with `element :any`
71
+
72
+ For arrays containing hash data with flexible or unknown structure, use `element :any` to access dynamic hash content:
73
+
74
+ ```ruby
75
+ input do
76
+ array :api_responses do
77
+ element :any, :response_data # Flexible hash structure
78
+ end
79
+
80
+ array :user_events do
81
+ element :any, :event_data # Dynamic event properties (includes event_type)
82
+ end
83
+ end
84
+
85
+ # Access hash fields using fn(:fetch)
86
+ value :response_codes, fn(:fetch, input.api_responses.response_data, "status")
87
+ value :error_messages, fn(:fetch, input.api_responses.response_data, "error")
88
+ value :event_types, fn(:fetch, input.user_events.event_data, "event_type")
89
+ value :user_ids, fn(:fetch, input.user_events.event_data, "user_id")
90
+ value :timestamps, fn(:fetch, input.user_events.event_data, "timestamp")
91
+
92
+ # Mathematical operations on extracted values
93
+ value :avg_response_time, fn(:mean, fn(:fetch, input.api_responses.response_data, "response_time"))
94
+ value :total_events, fn(:size, input.user_events.event_data)
95
+
96
+ # Traits and cascades with dynamic data
97
+ trait :has_errors, fn(:any?, fn(:fetch, input.api_responses.response_data, "status") >= 400)
98
+ trait :recent_events, fn(:any?, fn(:fetch, input.user_events.event_data, "timestamp") > 1640995200)
99
+
100
+ value :system_status do
101
+ on has_errors, "Error State"
102
+ on recent_events, "Active"
103
+ base "Idle"
104
+ end
105
+ ```
106
+
107
+ **When to use `element :any`:**
108
+ - API responses with varying JSON schemas
109
+ - Configuration files with flexible key-value structures
110
+ - Event data where properties vary by event type
111
+ - Legacy systems where data structure may change
112
+ - Prototyping when exact hash structure is unknown
113
+
114
+ **Comparison with Hash Objects:**
115
+
116
+ | Approach | Use Case | Flexibility | Type Safety |
117
+ |----------|----------|-------------|-------------|
118
+ | `hash :field do ... end` | Known structure, strong typing | Limited | High |
119
+ | `element :any, :field` | Unknown/flexible structure | High | Low |
120
+
121
+ ```ruby
122
+ # Known structure - use hash objects
123
+ array :orders do
124
+ hash :customer do
125
+ string :name
126
+ string :email
127
+ end
128
+ end
129
+
130
+ # Unknown/flexible structure - use element :any
131
+ array :api_calls do
132
+ element :any, :response # Could be any JSON structure
133
+ end
134
+ ```
135
+
70
136
  ## Business Use Cases
71
137
 
72
138
  Element access mode is essential for common business scenarios involving simple nested arrays:
@@ -193,7 +259,7 @@ The type system automatically infers appropriate types for broadcasted operation
193
259
 
194
260
  ### Analysis Layer
195
261
  - **BroadcastDetector** - Identifies vectorized vs scalar operations
196
- - **TypeInferencer** - Infers types for array element access patterns
262
+ - **TypeInferencerPass** - Infers types for array element access patterns
197
263
 
198
264
  ### Compilation Layer
199
265
  - **Automatic Dispatch** - Maps element-wise operations to array map functions
@@ -14,10 +14,26 @@ schema do
14
14
  array :tags, elem: { type: :string }
15
15
  hash :metadata, key: { type: :string }, val: { type: :any }
16
16
  any :flexible
17
+
18
+ # Structured arrays with defined fields
19
+ array :orders do
20
+ hash :customer do
21
+ string :name
22
+ string :email
23
+ end
24
+ float :total
25
+ end
26
+
27
+ # Dynamic arrays with flexible elements
28
+ array :api_responses do
29
+ element :any, :response_data # For unknown/flexible hash structures
30
+ end
17
31
  end
18
32
 
19
33
  trait :adult, (input.age >= 18)
20
34
  value :status, input.verified ? "verified" : "pending"
35
+ value :customer_emails, input.orders.customer.email # Structured access
36
+ value :response_codes, fn(:fetch, input.api_responses.response_data, "status") # Dynamic access
21
37
  end
22
38
  ```
23
39
 
@@ -42,7 +42,7 @@ The printer produces indented S-expressions that clearly show the hierarchical s
42
42
  (InputDeclaration :age :integer)
43
43
  (InputDeclaration :name :string)
44
44
  ]
45
- attributes: [
45
+ values: [
46
46
  (ValueDeclaration :greeting
47
47
  (CallExpression :concat
48
48
  (Literal "Hello ")
@@ -65,7 +65,7 @@ The printer produces indented S-expressions that clearly show the hierarchical s
65
65
 
66
66
  The printer handles all Kumi AST node types:
67
67
 
68
- - **Root** - Schema container with inputs, attributes, and traits
68
+ - **Root** - Schema container with inputs, values, and traits
69
69
  - **Declarations** - InputDeclaration, ValueDeclaration, TraitDeclaration
70
70
  - **Expressions** - CallExpression, ArrayExpression, CascadeExpression, CaseExpression
71
71
  - **References** - InputReference, InputElementReference, DeclarationReference
data/lib/kumi/analyzer.rb CHANGED
@@ -7,15 +7,19 @@ module Kumi
7
7
  DEFAULT_PASSES = [
8
8
  Core::Analyzer::Passes::NameIndexer, # 1. Finds all names and checks for duplicates.
9
9
  Core::Analyzer::Passes::InputCollector, # 2. Collects field metadata from input declarations.
10
- Core::Analyzer::Passes::DeclarationValidator, # 3. Checks the basic structure of each rule.
11
- Core::Analyzer::Passes::SemanticConstraintValidator, # 4. Validates DSL semantic constraints at AST level.
12
- Core::Analyzer::Passes::DependencyResolver, # 5. Builds the dependency graph with conditional dependencies.
13
- Core::Analyzer::Passes::UnsatDetector, # 6. Detects unsatisfiable constraints and analyzes cascade mutual exclusion.
14
- Core::Analyzer::Passes::Toposorter, # 7. Creates the final evaluation order, allowing safe cycles.
15
- Core::Analyzer::Passes::BroadcastDetector, # 8. Detects which operations should be broadcast over arrays (must run before type inference).
16
- Core::Analyzer::Passes::TypeInferencer, # 9. Infers types for all declarations (uses vectorization metadata).
17
- Core::Analyzer::Passes::TypeConsistencyChecker, # 10. Validates declared vs inferred type consistency.
18
- Core::Analyzer::Passes::TypeChecker # 11. Validates types using inferred information.
10
+ Core::Analyzer::Passes::DeclarationValidator, # 4. Checks the basic structure of each rule.
11
+ Core::Analyzer::Passes::SemanticConstraintValidator, # 5. Validates DSL semantic constraints at AST level.
12
+ Core::Analyzer::Passes::DependencyResolver, # 6. Builds the dependency graph with conditional dependencies.
13
+ Core::Analyzer::Passes::UnsatDetector, # 7. Detects unsatisfiable constraints and analyzes cascade mutual exclusion.
14
+ Core::Analyzer::Passes::Toposorter, # 8. Creates the final evaluation order, allowing safe cycles.
15
+ Core::Analyzer::Passes::BroadcastDetector, # 9. Detects which operations should be broadcast over arrays.
16
+ Core::Analyzer::Passes::TypeInferencerPass, # 10. Infers types for all declarations (uses vectorization metadata).
17
+ Core::Analyzer::Passes::TypeConsistencyChecker, # 11. Validates declared vs inferred type consistency.
18
+ Core::Analyzer::Passes::TypeChecker, # 12. Validates types using inferred information.
19
+ Core::Analyzer::Passes::InputAccessPlannerPass, # 13. Plans access strategies for input fields.
20
+ Core::Analyzer::Passes::ScopeResolutionPass, # 14. Plans execution scope and lifting needs for declarations.
21
+ Core::Analyzer::Passes::JoinReducePlanningPass, # 15. Plans join/reduce operations (Generates IR Structs)
22
+ Core::Analyzer::Passes::LowerToIRPass # 16. Lowers the schema to IR (Generates IR Structs)
19
23
  ].freeze
20
24
 
21
25
  def self.analyze!(schema, passes: DEFAULT_PASSES, **opts)
@@ -33,7 +37,13 @@ module Kumi
33
37
  begin
34
38
  state = pass_instance.run(errors)
35
39
  rescue StandardError => e
36
- errors << Core::ErrorReporter.create_error(e.message, location: nil, type: :semantic)
40
+ # TODO: - GREATLY improve this, need to capture the context of the error
41
+ # and the pass that failed and line number if relevant
42
+ pass_name = pass_class.name.split("::").last
43
+ message = "Error in Analysis Pass(#{pass_name}): #{e.message}"
44
+ errors << Core::ErrorReporter.create_error(message, location: nil, type: :semantic, backtrace: e.backtrace)
45
+
46
+ raise
37
47
  end
38
48
  end
39
49
  state
@@ -41,11 +51,14 @@ module Kumi
41
51
 
42
52
  def self.handle_analysis_errors(errors)
43
53
  type_errors = errors.select { |e| e.type == :type }
54
+ semantic_errors = errors.select { |e| e.type == :semantic }
44
55
  first_error_location = errors.first.location
45
56
 
46
57
  raise Errors::TypeError.new(format_errors(errors), first_error_location) if type_errors.any?
47
58
 
48
- raise Errors::SemanticError.new(format_errors(errors), first_error_location)
59
+ raise Errors::SemanticError.new(format_errors(errors), first_error_location) if first_error_location || semantic_errors
60
+
61
+ raise Errors::AnalysisError.new(format_errors(errors))
49
62
  end
50
63
 
51
64
  def self.create_analysis_result(state)
@@ -63,7 +76,16 @@ module Kumi
63
76
  def self.format_errors(errors)
64
77
  return "" if errors.empty?
65
78
 
66
- errors.map(&:to_s).join("\n")
79
+ backtrace = errors.first.backtrace
80
+
81
+ message = errors.map(&:to_s).join("\n")
82
+
83
+ message.tap do |msg|
84
+ if backtrace && !backtrace.empty?
85
+ msg << "\n\nBacktrace:\n"
86
+ msg << backtrace[0..10].join("\n") # Limit to first 10 lines for readability
87
+ end
88
+ end
67
89
  end
68
90
  end
69
91
  end
data/lib/kumi/compiler.rb CHANGED
@@ -3,11 +3,6 @@
3
3
  module Kumi
4
4
  # Compiles an analyzed schema into executable lambdas
5
5
  class Compiler < Core::CompilerBase
6
- include Kumi::Core::Compiler::ReferenceCompiler
7
- include Kumi::Core::Compiler::PathTraversalCompiler
8
- include Kumi::Core::Compiler::ExpressionCompiler
9
- include Kumi::Core::Compiler::FunctionInvoker
10
-
11
6
  def self.compile(schema, analyzer:)
12
7
  new(schema, analyzer).compile
13
8
  end
@@ -18,13 +13,8 @@ module Kumi
18
13
  end
19
14
 
20
15
  def compile
21
- build_index
22
- @analysis.topo_order.each do |name|
23
- decl = @index[name] or raise("Unknown binding #{name}")
24
- compile_declaration(decl)
25
- end
26
-
27
- Core::CompiledSchema.new(@bindings.freeze)
16
+ # Switch to LIR: Use the analysis state instead of old compilation
17
+ Runtime::Executable.from_analysis(@analysis.state)
28
18
  end
29
19
  end
30
20
  end