kumi 0.0.10 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +18 -0
  3. data/CLAUDE.md +7 -231
  4. data/README.md +1 -1
  5. data/docs/VECTOR_SEMANTICS.md +286 -0
  6. data/docs/features/hierarchical-broadcasting.md +1 -1
  7. data/docs/features/s-expression-printer.md +2 -2
  8. data/examples/deep_schema_compilation_and_evaluation_benchmark.rb +21 -15
  9. data/lib/kumi/analyzer.rb +34 -12
  10. data/lib/kumi/compiler.rb +2 -12
  11. data/lib/kumi/core/analyzer/passes/broadcast_detector.rb +157 -64
  12. data/lib/kumi/core/analyzer/passes/dependency_resolver.rb +1 -1
  13. data/lib/kumi/core/analyzer/passes/input_access_planner_pass.rb +47 -0
  14. data/lib/kumi/core/analyzer/passes/input_collector.rb +118 -101
  15. data/lib/kumi/core/analyzer/passes/join_reduce_planning_pass.rb +293 -0
  16. data/lib/kumi/core/analyzer/passes/lower_to_ir_pass.rb +993 -0
  17. data/lib/kumi/core/analyzer/passes/pass_base.rb +2 -2
  18. data/lib/kumi/core/analyzer/passes/scope_resolution_pass.rb +346 -0
  19. data/lib/kumi/core/analyzer/passes/semantic_constraint_validator.rb +2 -1
  20. data/lib/kumi/core/analyzer/passes/toposorter.rb +9 -3
  21. data/lib/kumi/core/analyzer/passes/type_checker.rb +3 -3
  22. data/lib/kumi/core/analyzer/passes/type_consistency_checker.rb +2 -2
  23. data/lib/kumi/core/analyzer/passes/{type_inferencer.rb → type_inferencer_pass.rb} +4 -4
  24. data/lib/kumi/core/analyzer/passes/unsat_detector.rb +2 -2
  25. data/lib/kumi/core/analyzer/plans.rb +52 -0
  26. data/lib/kumi/core/analyzer/structs/access_plan.rb +20 -0
  27. data/lib/kumi/core/analyzer/structs/input_meta.rb +29 -0
  28. data/lib/kumi/core/compiler/access_builder.rb +36 -0
  29. data/lib/kumi/core/compiler/access_planner.rb +219 -0
  30. data/lib/kumi/core/compiler/accessors/base.rb +69 -0
  31. data/lib/kumi/core/compiler/accessors/each_indexed_accessor.rb +84 -0
  32. data/lib/kumi/core/compiler/accessors/materialize_accessor.rb +55 -0
  33. data/lib/kumi/core/compiler/accessors/ravel_accessor.rb +73 -0
  34. data/lib/kumi/core/compiler/accessors/read_accessor.rb +41 -0
  35. data/lib/kumi/core/compiler_base.rb +2 -2
  36. data/lib/kumi/core/error_reporter.rb +6 -5
  37. data/lib/kumi/core/errors.rb +4 -0
  38. data/lib/kumi/core/explain.rb +157 -205
  39. data/lib/kumi/core/export/node_builders.rb +2 -2
  40. data/lib/kumi/core/export/node_serializers.rb +1 -1
  41. data/lib/kumi/core/function_registry/collection_functions.rb +21 -10
  42. data/lib/kumi/core/function_registry/conditional_functions.rb +14 -4
  43. data/lib/kumi/core/function_registry/function_builder.rb +142 -55
  44. data/lib/kumi/core/function_registry/logical_functions.rb +5 -5
  45. data/lib/kumi/core/function_registry/stat_functions.rb +2 -2
  46. data/lib/kumi/core/function_registry.rb +126 -108
  47. data/lib/kumi/core/ir/execution_engine/combinators.rb +117 -0
  48. data/lib/kumi/core/ir/execution_engine/interpreter.rb +336 -0
  49. data/lib/kumi/core/ir/execution_engine/values.rb +46 -0
  50. data/lib/kumi/core/ir/execution_engine.rb +50 -0
  51. data/lib/kumi/core/ir.rb +58 -0
  52. data/lib/kumi/core/ruby_parser/build_context.rb +2 -2
  53. data/lib/kumi/core/ruby_parser/declaration_reference_proxy.rb +0 -12
  54. data/lib/kumi/core/ruby_parser/dsl_cascade_builder.rb +36 -15
  55. data/lib/kumi/core/ruby_parser/input_builder.rb +5 -5
  56. data/lib/kumi/core/ruby_parser/parser.rb +1 -1
  57. data/lib/kumi/core/ruby_parser/schema_builder.rb +2 -2
  58. data/lib/kumi/core/ruby_parser/sugar.rb +7 -0
  59. data/lib/kumi/registry.rb +14 -79
  60. data/lib/kumi/runtime/executable.rb +213 -0
  61. data/lib/kumi/schema.rb +14 -3
  62. data/lib/kumi/schema_metadata.rb +2 -2
  63. data/lib/kumi/support/ir_dump.rb +491 -0
  64. data/lib/kumi/support/s_expression_printer.rb +1 -1
  65. data/lib/kumi/syntax/location.rb +5 -0
  66. data/lib/kumi/syntax/node.rb +0 -1
  67. data/lib/kumi/syntax/root.rb +2 -2
  68. data/lib/kumi/version.rb +1 -1
  69. data/lib/kumi.rb +6 -15
  70. metadata +26 -15
  71. data/lib/kumi/core/cascade_executor_builder.rb +0 -132
  72. data/lib/kumi/core/compiled_schema.rb +0 -43
  73. data/lib/kumi/core/compiler/expression_compiler.rb +0 -146
  74. data/lib/kumi/core/compiler/function_invoker.rb +0 -55
  75. data/lib/kumi/core/compiler/path_traversal_compiler.rb +0 -158
  76. data/lib/kumi/core/compiler/reference_compiler.rb +0 -46
  77. data/lib/kumi/core/evaluation_wrapper.rb +0 -40
  78. data/lib/kumi/core/nested_structure_utils.rb +0 -78
  79. data/lib/kumi/core/schema_instance.rb +0 -115
  80. data/lib/kumi/core/vectorized_function_builder.rb +0 -88
  81. data/lib/kumi/js/compiler.rb +0 -878
  82. data/lib/kumi/js/function_registry.rb +0 -333
  83. data/migrate_to_core_iterative.rb +0 -938
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 98308b4c27cb40488f14215c8da9700c62a51fc54d11b9d822a1eb25d396a724
4
- data.tar.gz: 3fbbb50dc0c74ba14d83fbcf71c5866efa814b71b4d2679c45b57d59a1f778da
3
+ metadata.gz: 3eb46e14716bf14c3d9165ffac957211f41ad5a21a74ad37c47b37c37e01b312
4
+ data.tar.gz: fd0e36d65ac41079c27cf9699a3e092ff762bc76b3af8fc90df2ec763fed3806
5
5
  SHA512:
6
- metadata.gz: 4453ea76de50c433696c3ae5ebe4d39ae049cb35e40f34cf9a3cce0e52e5d0967a8ccfd9ca7627eafe33444ea4da186015a59796a59e022fdf5b5edc04c50444
7
- data.tar.gz: dfdb9f118ee7c0c16fa30392b3591b88c8320a2d19ff331253a4344e49fa773967e6b8d2451c0fd490f200171846fec2b408ecd000e6ad111e3c2d89bd92c000
6
+ metadata.gz: 54ed5e72d6acf863e0f7ed0986c8db83fab22526bcffb97b4c1b96343e724b0ae505804baed5554933c452478ced8c46ec24a6568426813e69c4007994588de6
7
+ data.tar.gz: '09aabb772643aab71d957060c934f05be5838a1056b7c635822b75759106a3119844cc7e20f96b8990ec658d969f2d2e0143ceb43d36f8d48da061c2bb80cb6a'
data/CHANGELOG.md ADDED
@@ -0,0 +1,18 @@
1
+ ## [0.0.11] – 2025-08-13
2
+ ### Added
3
+ - Intermediate Representation (IR) and slot-based VM interpreter.
4
+ - Scope-aware vector semantics (alignment, lift, hierarchical indices).
5
+ - Debug tooling: IR dump, VM/lowering traces via DEBUG_* flags.
6
+
7
+ ### Changed
8
+ - Analyzer now lowers to IR via `LowerToIRPass`.
9
+ - Access modes: `:read`, `:ravel`, `:each_indexed`, `:materialize`.
10
+
11
+ ### Removed (BREAKING)
12
+ - JavaScript transpiler (legacy compiler).
13
+
14
+ ### Requirements
15
+ - Ruby >= 3.1 (Was >= 3.0)
16
+
17
+ ### Notes
18
+ - No expected DSL changes for typical schemas; report regressions.
data/CLAUDE.md CHANGED
@@ -18,11 +18,6 @@ Kumi is a Declarative logic and rules engine framework with static analysis for
18
18
  - `bundle exec rspec spec/path/to/specific_spec.rb` - Run specific test file
19
19
  - `bundle exec rspec spec/path/to/specific_spec.rb:123` - Run specific test at line
20
20
 
21
- ### Gem Management
22
- - `bundle install` - Install dependencies
23
- - `gem build kumi.gemspec` - Build the gem
24
- - `gem install ./kumi-*.gem` - Install locally built gem
25
-
26
21
  ## Architecture Overview
27
22
 
28
23
  ### Core Components
@@ -72,50 +67,8 @@ Kumi is a Declarative logic and rules engine framework with static analysis for
72
67
  - Supports custom function registration with type metadata
73
68
  - Each function includes param_types, return_type, arity, and description
74
69
  - Core functions include: `==`, `>`, `<`, `add`, `multiply`, `and`, `or`, `clamp`, etc.
75
- - Maintains backward compatibility with legacy type checking system
76
70
  - Function documents are generated by the script ./scripts/generate_function_docs.rb
77
71
 
78
- **Input Validation System** (`lib/kumi/input/` and `lib/kumi/domain/`):
79
- - `input/validator.rb` - Main validation coordinator for type and domain checking
80
- - `input/type_matcher.rb` - Type validation logic for primitive and complex types
81
- - `input/violation_creator.rb` - Creates standardized violation objects with detailed messages
82
- - `domain/validator.rb` - Domain constraint validation (ranges, arrays, procs)
83
- - `domain/range_analyzer.rb` - Range domain analysis and validation
84
- - `domain/enum_analyzer.rb` - Enumeration domain analysis and validation
85
- - `domain/violation_formatter.rb` - Formats domain violation error messages
86
-
87
- ## DSL Syntax Requirements
88
-
89
- ### Critical Syntax Rules
90
-
91
- **Module Definition Structure**
92
- ```ruby
93
- # CORRECT - CLI can find and load this
94
- module SchemaName
95
- extend Kumi::Schema
96
-
97
- schema do
98
- # schema definition here
99
- end
100
- end
101
- ```
102
-
103
- **Function Call Syntax**:
104
- - **Symbol style**: `fn(:function_name, arg1, arg2, ...)` - The only supported function call syntax
105
-
106
- **Arithmetic Operations**:
107
- - **Sugar Syntax**: `input.field1 + input.field2` - Works for input fields and value references
108
- - **Function Syntax**: `fn(:add, input.field1, input.field2)` - Always works, more explicit
109
- - **Mixed**: Sugar syntax for basic operations, function syntax for complex ones
110
-
111
- **Cascade Condition Syntax**:
112
- ```ruby
113
- value :status do
114
- on trait_name, "Result"
115
- base "Default"
116
- end
117
- ```
118
-
119
72
  ### Key Patterns
120
73
  **DSL Structure**:
121
74
  ```ruby
@@ -133,17 +86,15 @@ schema do
133
86
  string :category
134
87
  end
135
88
 
136
-
137
-
138
89
  # Fields with no declared type
139
90
  any :misc_field
140
91
  end
141
92
 
142
- trait :name, (expression) # Boolean conditions with new syntax
93
+ trait :name, (expression) # Boolean conditions
143
94
  value :name, expression # Computed values
144
95
  value :name do # Conditional logic
145
- on condition, result
146
- base default_result
96
+ on condition, result # on <trait> ?,<trait> , <expr>
97
+ base default_result # base <expr>
147
98
  end
148
99
  end
149
100
  ```
@@ -151,13 +102,6 @@ end
151
102
  **IMPORTANT CASCADE CONDITION SYNTAX:**
152
103
  In cascade expressions (`value :name do ... end`), trait references use bare identifiers:
153
104
 
154
- **Input Block System**:
155
- - **Required**: All schemas must have an `input` block declaring expected fields
156
- - **Type Declarations**: Preferred via type-specific methods (e.g. `integer :field`, `string :name`, `any :field` for untyped fields)
157
- - **Complex Types**: Use helper functions: `array(:element_type)` and `hash(:key_type, :value_type)`
158
- - **Domain Constraints**: Fields can have domains: `integer :age, domain: 18..65` (validated at runtime)
159
- - **Field Access**: Use `input.field_name` to reference input fields in expressions
160
- - **Separation**: Input metadata (types, domains) is separate from business logic
161
105
 
162
106
  **Expression Types**:
163
107
  - `input.field_name` - Access input data with operator methods (>=, <=, >, <, ==, !=)
@@ -179,30 +123,6 @@ In cascade expressions (`value :name do ... end`), trait references use bare ide
179
123
  - Type inference for all declarations based on expression analysis
180
124
  - Type primitives: `:string`, `:integer`, `:float`, `:boolean`, `:any`, `:symbol`, `:regexp`, `:time`, `:date`, `:datetime`
181
125
  - Collection types: `array(:element_type)` and `hash(:key_type, :value_type)` helper functions
182
- - Type compatibility checking and unification algorithms for numeric types
183
- - Enhanced error messages showing type provenance (declared vs inferred)
184
- - Legacy compatibility constants maintained for backward compatibility
185
-
186
- ### Examples Directory
187
-
188
- The `examples/` directory contains examples showing Kumi usage patterns:
189
- - `cascade_demonstration.rb` - Demonstrates cascade logic with UnsatDetector fixes (working)
190
- - `working_comprehensive_schema.rb` - Feature showcase (current best practices, working)
191
- - Mathematical predicate examples - Safe mutual recursion patterns using cascade mutual exclusion
192
- - `federal_tax_calculator_2024.rb` - Real-world tax calculation example (working)
193
- - `tax_2024.rb` - Tax example with explain functionality (working)
194
- - `wide_schema_compilation_and_evaluation_benchmark.rb` - Benchmark for wide schemas (compilation and evaluation)
195
- - `deep_schema_compilation_and_evaluation_benchmark.rb` - Performance benchmark for deep dependency chains (stack-safe evaluation)
196
- - `comprehensive_god_schema.rb` - Complex example (currently has UnsatDetector semantic errors)
197
-
198
- *Note: Some examples may use deprecated syntax and should be updated to use the new input block system.*
199
-
200
- ## Test Structure
201
-
202
- - `spec/kumi/` - Unit tests for core components
203
- - `spec/integration/` - Integration tests for full workflows
204
- - `spec/fixtures/` - Test fixtures and sample schemas
205
- - `spec/support/` - Test helpers (`ast_factory.rb`, `schema_generator.rb`)
206
126
 
207
127
  ## Files for Understanding
208
128
 
@@ -210,46 +130,8 @@ The `examples/` directory contains examples showing Kumi usage patterns:
210
130
  - `examples/*` Random examples of diverse contexts.
211
131
 
212
132
  ### Troubleshooting Schema Issues
213
- - **Parse Errors**: Check function syntax (avoid empty `fn()` calls)
214
- - **Module Not Found**: Check module structure and naming, see examples
215
- - **UnsatDetector Errors**: Review trait logic for contradictions, add debugs!
216
- - **Type Errors**: Check input block type declarations match usage, add debugs!
217
- - **Runtime Errors**: Use explain to trace computation dependencies, add debugs!
133
+ DEBUG, DEBUG. DEBUG LOGS!
218
134
 
219
- ## Input Block System Details
220
-
221
- ### Required Input Blocks
222
- - **All schemas must have an input block** -
223
- - Input blocks declare expected fields with optional type and domain constraints
224
- - **Empty input blocks are allowed** -`input {}` Even if not useful.
225
- - Fields are accessed via `input.field_name` or `input.field.nested_field.nested_nested_field` which
226
- works for referencing nested array input declarations.
227
-
228
- ### Type System Integration
229
- - **Declared Types**: Explicit type declarations in input blocks (e.g. `integer :field`, `string :name`, `any :field`)
230
- - **Inferred Types**: Types automatically inferred from expression analysis
231
- - **Type Checking**: Validates compatibility between declared and inferred types
232
- - **Enhanced Errors**: Error messages show type provenance (declared vs inferred)
233
- - **Helper Functions**: Use `array(:type)` and `hash(:key_type, :value_type)` for complex types
234
-
235
- ### Parser Components
236
- See `lib/kumi/ruby_parser/parser.rb`
237
-
238
- ### Domain Constraints
239
- - Can be declared: `integer :age, domain: 18..65`
240
- - Supports Range domains (`18..65`), Array domains (`%w[active inactive]`), and Proc domains for custom validation
241
- - Analyzer do some limited domain UNSAT detection, and its used to validated against input at Runtime
242
- ### Type Examples
243
- ```ruby
244
- input do
245
- string :name
246
- integer :age, domain: 18..65
247
- hash :metadata, key: { type: :string }, val: { type: :any }
248
-
249
- #generic type
250
- any :misc # this reduces Kumi's analyze/inference capabilities
251
- end
252
- ```
253
135
 
254
136
  ### Array Broadcasting System
255
137
 
@@ -262,6 +144,9 @@ input do
262
144
  float :price
263
145
  integer :quantity
264
146
  string :category
147
+ array :prices do
148
+ element :integer, :val
149
+ end
265
150
  end
266
151
  end
267
152
 
@@ -269,112 +154,3 @@ end
269
154
  value :subtotals, input.line_items.price * input.line_items.quantity
270
155
  trait :is_taxable, (input.line_items.category != "digital")
271
156
  ```
272
-
273
- **Aggregation Operations**: Functions consuming arrays are detected:
274
- ```ruby
275
- value :total_subtotal, fn(:sum, subtotals)
276
- value :avg_price, fn(:avg, input.line_items.price)
277
- value :max_quantity, fn(:max, input.line_items.quantity)
278
- ```
279
-
280
- **Implementation Components**:
281
- - **InputElementReference** AST nodes for nested field access paths
282
- - **BroadcastDetector** analyzer pass identifies vectorized vs scalar operations
283
- - **Compiler** generates appropriate map/reduce functions based on usage context
284
- - **Type Inference** infers types for array element operations
285
- - Supports arbitrary depth field access with nested arrays and hashes
286
-
287
- ### Trait Syntax Evolution
288
-
289
- **Current Syntax** (recommended):
290
- ```ruby
291
- trait :adult, (input.age >= 18)
292
- trait :qualified, (input.age >= 21) & (input.score > 80) & (input.verified == true)
293
- ```
294
-
295
- **Composite Trait Syntax** (NEW - bare identifier references):
296
- ```ruby
297
- # Base traits
298
- trait :adult, (input.age >= 18)
299
- trait :verified, (input.verified == true)
300
- trait :high_score, (input.score > 80)
301
-
302
- # Composite traits using bare identifier syntax
303
- trait :eligible, adult & verified & high_score
304
- trait :mixed, adult & (input.income > 50_000) & verified
305
-
306
- # Backward compatibility - both syntaxes work together
307
- trait :legacy_mix, adult & ref(:verified) & (input.score > 90)
308
- ```
309
-
310
- **Deprecated Syntax** (with warnings):
311
- ```ruby
312
- trait :adult, input.age, :>=, 18 # OLD - shows deprecation warning
313
- trait :qualified, input.age, :>=, 21, input.score # OLD - shows deprecation warning
314
- ```
315
-
316
- **Key Changes**:
317
- - **NEW**: Bare identifier syntax for direct trait reference: `adult` instead of `ref(:adult)`
318
- - New syntax uses parenthesized expressions: `trait :name, (expression)`
319
- - FieldRef nodes have operator methods that create CallExpression nodes
320
- - Logical AND chaining via `&` operator (Ruby limitation prevents `&&`)
321
- - Only AND operations supported to maintain constraint satisfaction system
322
- - **Backward Compatible**: Both `trait_name` and `ref(:trait_name)` work together
323
- - Old syntax maintained with deprecation warnings for backward compatibility
324
-
325
- ## Common Development Tasks
326
-
327
- ### Adding New Analyzer Passes
328
- 1. Create pass class inheriting from `PassBase` in `lib/kumi/analyzer/passes/`
329
- 2. Implement `run(errors)` method that calls `set_state(key, value)` to store results
330
- 3. Add pass to `PASSES` array in `lib/kumi/analyzer.rb` in correct order
331
- 4. Consider dependencies on other passes (e.g., TypeChecker needs TypeInferencer)
332
-
333
- ## Architecture Design Principles
334
-
335
- - **Multi-pass Analysis**: Each analysis pass has a single responsibility and builds on previous passes
336
- - **Immutable Syntax Tree**: AST nodes are immutable; analysis results stored separately in analyzer state
337
- - **Dependency-driven Evaluation**: All computation follows dependency graph for correct order
338
- - **Type Safety**: Optional type checking without breaking existing schemas
339
- - **Ruby Integration**: Leverages Ruby's metaprogramming with structured analysis
340
- - **Unified Error Reporting**: Consistent, localized error messages throughout the system with clear interface patterns
341
-
342
- ## Code Organization Patterns
343
-
344
- ### Testing Best Practices
345
- - **Spec Organization**: Tests organized by component with clear separation between unit and integration tests
346
- - **Error Variable Extraction**: RSpec patterns avoid multiline block chains by extracting error variables for assertion
347
-
348
- ## Development Guides and Standards
349
-
350
- ### Error Reporting Standards
351
- **For Parser Classes**:
352
- ```ruby
353
- class MyParser
354
- include ErrorReporting
355
-
356
- def parse_something
357
- # Error raising
358
- raise_syntax_error("Invalid syntax", location: current_location)
359
- end
360
- end
361
- ```
362
-
363
- **For Analyzer Passes**:
364
- ```ruby
365
- class MyAnalyzerPass < PassBase
366
- def run(errors)
367
- # Error accumulation with enhanced location
368
- report_error(errors, "semantic error", location: node.loc, type: :semantic)
369
-
370
- # Backward compatible method
371
- add_error(errors, node.loc, "legacy format error")
372
- end
373
- end
374
- ```
375
- ### Testing Error Scenarios
376
- - Use `spec/integration/dsl_breakage_spec.rb` patterns for error testing
377
- - Use `spec/integration/potential_breakage_spec.rb` for edge cases break
378
- - Use `spec/fixtures/location_tracking_test_schema.rb` fixture for testing different syntax error types
379
-
380
- #
data/README.md CHANGED
@@ -66,7 +66,7 @@ Validation happens during schema definition.
66
66
  ## Installation
67
67
 
68
68
  ```bash
69
- # Requires Ruby 3.0+
69
+ # Requires Ruby 3.1+
70
70
  # No external dependencies
71
71
  gem install kumi
72
72
  ```
@@ -0,0 +1,286 @@
1
+ # Kumi Vector Semantics — Short Guide
2
+
3
+ This note documents how Kumi handles **vectorized traversal** over **arbitrary nested objects**, how **alignment/broadcasting** works, and how **reducers** and **structure functions** behave. It’s intentionally concise but hits all the sharp edges.
4
+
5
+ ---
6
+
7
+ ## Terminology
8
+
9
+ * **Path** – a dot-separated traversal, e.g. `input.regions.offices.employees.salary`.
10
+ * **Scope (axes)** – the list of array segments encountered along a path.
11
+ Example: for `regions.offices.employees.salary` the scope is `[:regions, :offices, :employees]`.
12
+ * **Rank** – number of axes = `scope.length`.
13
+ * **Index tuple** – lexicographic coordinates per axis, e.g. `[region_i, office_j, employee_k]`.
14
+
15
+ **Three Laws (think of them as invariants):**
16
+
17
+ 1. **Enumeration**
18
+ `each_indexed(path).map(&:first) == ravel(path)`
19
+
20
+ 2. **Reconstruction**
21
+ `lift(to_scope, each_indexed(path))` regroups by `to_scope` (must be a prefix of `scope(path)`).
22
+
23
+ 3. **Counting**
24
+ `size(path) == ravel(path).length == each_indexed(path).count`
25
+
26
+ These laws are the mental model. Everything else is just mechanics.
27
+
28
+ ---
29
+
30
+ ## Access Modes
31
+
32
+ Kumi’s Access Planner emits low-level ops (`enter_hash`, `enter_array`) and supports three vector modes per path:
33
+
34
+ ### 1) `:materialize`
35
+
36
+ Return the **original nested structure** down to that path (no enumeration).
37
+ Good for “give me the data shaped like the input.”
38
+
39
+ ```ruby
40
+ # Input (object mode)
41
+ {
42
+ regions: [
43
+ { name: "E", offices: [{ employees: [{salary: 100}, {salary: 120}] }] },
44
+ { name: "D", offices: [{ employees: [{salary: 90}] }] }
45
+ ]
46
+ }
47
+
48
+ materialize("regions.offices.employees.salary")
49
+ # => [[ [100,120] ], [ [90] ]]
50
+ ```
51
+
52
+ ### 2) `:ravel`
53
+
54
+ **Enumerate elements at the next array boundary** for that path, i.e., “collect the items at this depth.”
55
+ It is **not** NumPy’s “flatten everything.” It collects the next level.
56
+
57
+ ```ruby
58
+ ravel("regions") # => [ {…E…}, {…D…} ] (enumerate regions)
59
+ ravel("regions.offices") # => [ {employees:[…]}, {employees:[…]} ] (each office)
60
+ ravel("regions.offices.employees.salary") # => [ [100,120], [90] ] (each employee group at that depth)
61
+ ```
62
+
63
+ ### 3) `:each_indexed`
64
+
65
+ Enumerate leaf values **with** their index tuple (authoritative for `lift` and alignment):
66
+
67
+ ```ruby
68
+ each_indexed("regions.offices.employees.salary")
69
+ # => [
70
+ # [100, [0,0,0]], [120, [0,0,1]],
71
+ # [ 90, [1,0,0]]
72
+ # ]
73
+ ```
74
+
75
+ ---
76
+
77
+ ## Lift (Regroup by prefix)
78
+
79
+ `lift(to_scope)` turns a vector-of-rows (from `each_indexed`) into a nested array grouped by `to_scope`.
80
+
81
+ ```ruby
82
+ # Given values from each_indexed above:
83
+ lift([:regions], …) # => [ [100,120], [90] ]
84
+ lift([:regions,:offices], …) # => [ [[100,120]], [[90]] ]
85
+ lift([:regions,:offices,:employees], …) # => [ [[[100,120]]], [[[90]]] ]
86
+ ```
87
+
88
+ * `to_scope` must be a **prefix** of the vector’s `scope`.
89
+ * Depth is derived mechanically from index arity; VM doesn’t guess.
90
+
91
+ ---
92
+
93
+ ## Alignment & Broadcasting
94
+
95
+ When mapping a function over multiple arguments, Kumi:
96
+
97
+ 1. Picks a **carrier** vector (the one with the longest scope).
98
+ 2. **Aligns** other vectors to the carrier if they are **prefix-compatible** (same axes prefix).
99
+ 3. **Broadcasts** scalars across the carrier.
100
+
101
+ If scopes aren’t prefix-compatible, lowering raises:
102
+ `cross-scope map without join: [:a] vs [:b,:c]`
103
+
104
+ ```ruby
105
+ # price, quantity both scope [:items]
106
+ final = price * quantity # zip by position (same scope)
107
+
108
+ # Broadcast scalar across [:items]
109
+ discounted = price * 0.9
110
+
111
+ # Align prefix [:regions] to carrier [:regions,:offices]
112
+ aligned_tax = align_to(offices_subtotals, regions_tax)
113
+ total = offices_subtotals * (1 - aligned_tax)
114
+ ```
115
+
116
+ ---
117
+
118
+ ## Structure Functions vs Reducers
119
+
120
+ * **Reducers** collapse a vector to a **scalar** (e.g., `sum`, `min`, `avg`).
121
+ Lowering selects a vector argument and emits a `Reduce`.
122
+
123
+ * **Structure functions** observe or reshape **structure** (e.g., `size`, `flatten`, `count_across`).
124
+ Lowering usually uses a `:ravel` plan and a plain `Map` (no indices required).
125
+
126
+ ### Laws for `size` and `flatten`
127
+
128
+ * `size(path) == ravel(path).length` (Counting Law)
129
+ * `flatten(path)` flattens nested arrays (by default all levels; use `flatten_one` for one level).
130
+
131
+ ---
132
+
133
+ ## End-to-End Mini Examples
134
+
135
+ ### A. Simple vector math + reducers (object access)
136
+
137
+ ```ruby
138
+ module Cart
139
+ extend Kumi::Schema
140
+ schema do
141
+ input do
142
+ array :items do
143
+ float :price
144
+ integer :qty
145
+ end
146
+ float :shipping_threshold
147
+ end
148
+
149
+ value :subtotals, input.items.price * input.items.qty
150
+ value :subtotal, fn(:sum, subtotals)
151
+ value :shipping, subtotal > input.shipping_threshold ? 0.0 : 9.99
152
+ value :total, subtotal + shipping
153
+ end
154
+ end
155
+
156
+ data = {
157
+ items: [{price: 100.0, qty: 2}, {price: 200.0, qty: 1}],
158
+ shipping_threshold: 50.0
159
+ }
160
+
161
+ r = Cart.from(data)
162
+ r[:subtotals] # => [200.0, 200.0] (vector map)
163
+ r[:subtotal] # => 400.0 (reducer)
164
+ r[:shipping] # => 0.0
165
+ r[:total] # => 400.0
166
+ ```
167
+
168
+ **Internal truths**:
169
+
170
+ * `each_indexed(input.items.price)` → `[[100.0,[0]],[200.0,[1]]]`
171
+ * `size(input.items)` → `2` because `ravel(input.items)` has length 2.
172
+
173
+ ### B. Mixed scopes + alignment
174
+
175
+ ```ruby
176
+ module Regions
177
+ extend Kumi::Schema
178
+ schema do
179
+ input do
180
+ array :regions do
181
+ float :tax
182
+ array :offices do
183
+ array :employees do
184
+ float :salary
185
+ end
186
+ end
187
+ end
188
+ end
189
+
190
+ value :office_payrolls, fn(:sum, input.regions.offices.employees.salary) # vector reduce per office
191
+ value :taxed, office_payrolls * (1 - input.regions.tax) # tax (align regions.tax to [:regions,:offices])
192
+ end
193
+ end
194
+
195
+ # Alignment rule: regions.tax (scope [:regions]) aligns to office_payrolls (scope [:regions,:offices])
196
+ ```
197
+
198
+ ### C. Element access (pure arrays) + structure functions
199
+
200
+ ```ruby
201
+ module Cube
202
+ extend Kumi::Schema
203
+ schema do
204
+ input do
205
+ array :cube do
206
+ element :array, :layer do
207
+ element :array, :row do
208
+ element :float, :cell
209
+ end
210
+ end
211
+ end
212
+ end
213
+
214
+ value :layers, fn(:size, input.cube) # == ravel(input.cube).length
215
+ value :matrices, fn(:size, input.cube.layer) # enumerate at next depth
216
+ value :rows, fn(:size, input.cube.layer.row)
217
+ value :all_values, fn(:flatten, input.cube.layer.row.cell)
218
+ value :total, fn(:sum, all_values)
219
+ end
220
+ end
221
+
222
+ data = { cube: [ [[1,2],[3]], [[4]] ] }
223
+
224
+ # ravel views (intuition)
225
+ # ravel(cube) => [ [[1,2],[3]], [[4]] ]
226
+ # ravel(cube.layer) => [ [1,2], [3], [4] ]
227
+ # ravel(cube.layer.row) => [ 1, 2, 3, 4 ]
228
+ # ravel(cube.layer.row.cell) => [ 1, 2, 3, 4 ] (same leaf)
229
+
230
+ c = Cube.from(data)
231
+ c[:layers] # => 2
232
+ c[:matrices] # => 3
233
+ c[:rows] # => 4
234
+ c[:all_values] # => [1,2,3,4]
235
+ c[:total] # => 10
236
+ ```
237
+
238
+ ---
239
+
240
+ ## Planner & VM: Who does what?
241
+
242
+ * **Planner**: Emits deterministic `enter_hash`/`enter_array` sequences per path and mode.
243
+
244
+ * For element edges (inline array aliases), it **does not** emit `enter_hash`.
245
+ * For `:each_indexed` / `:ravel`, it appends a terminal `enter_array` **only if** the final node is an array.
246
+ * **Lowerer**: Decides plans (`:ravel`, `:each_indexed`, `:materialize`), inserts `align_to`, emits `lift` at declaration boundary when a vector result should be exposed as a scalar nested array.
247
+ * **VM**: Purely mechanical:
248
+
249
+ * `broadcast_scalar` for scalar→vec expansion,
250
+ * `zip_same_scope` when scopes match,
251
+ * `align_to` for prefix alignment,
252
+ * `group_rows` inside `lift` to reconstruct prefixes.
253
+
254
+ No type sniffing or guesses: the IR is the source of truth.
255
+
256
+ ---
257
+
258
+ ## Jagged & Sparse Arrays
259
+
260
+ * Ordering is **lexicographic by index tuple** (stable).
261
+ * No padding is introduced; missing branches are just… missing.
262
+ * `align_to(..., on_missing: :error|:nil)` enforces policy.
263
+
264
+ ---
265
+
266
+ ## Error Policies
267
+
268
+ For missing keys/arrays, accessors obey policy:
269
+
270
+ * `:error` (default) – raise descriptive error with the path/mode.
271
+ * `:skip` – drop the missing branch (useful in ravels).
272
+ * `:yield_nil` – emit `nil` in place (preserves cardinality).
273
+
274
+ Document these on any user-facing accessor.
275
+
276
+ ---
277
+
278
+ ## Quick Cheatsheet
279
+
280
+ * Use **`ravel(path)`** to “list the things at this level.”
281
+ * Use **`each_indexed(path)`** when you need `(value, idx)` pairs for joins/regroup.
282
+ * Use **`lift(to_scope, each_indexed(path))`** to reconstruct nested structure.
283
+ * **Reducers** (e.g., `sum`, `avg`, `min`) consume the raveled view of their argument.
284
+ * **Structure functions** (e.g., `size`, `flatten`, `flatten_one`, `count_across`) operate on structure at that depth and usually compile via `:ravel`.
285
+
286
+ Keep the three laws in mind and Kumi’s behavior is predictable—even over deeply nested, heterogeneous data.
@@ -193,7 +193,7 @@ The type system automatically infers appropriate types for broadcasted operation
193
193
 
194
194
  ### Analysis Layer
195
195
  - **BroadcastDetector** - Identifies vectorized vs scalar operations
196
- - **TypeInferencer** - Infers types for array element access patterns
196
+ - **TypeInferencerPass** - Infers types for array element access patterns
197
197
 
198
198
  ### Compilation Layer
199
199
  - **Automatic Dispatch** - Maps element-wise operations to array map functions
@@ -42,7 +42,7 @@ The printer produces indented S-expressions that clearly show the hierarchical s
42
42
  (InputDeclaration :age :integer)
43
43
  (InputDeclaration :name :string)
44
44
  ]
45
- attributes: [
45
+ values: [
46
46
  (ValueDeclaration :greeting
47
47
  (CallExpression :concat
48
48
  (Literal "Hello ")
@@ -65,7 +65,7 @@ The printer produces indented S-expressions that clearly show the hierarchical s
65
65
 
66
66
  The printer handles all Kumi AST node types:
67
67
 
68
- - **Root** - Schema container with inputs, attributes, and traits
68
+ - **Root** - Schema container with inputs, values, and traits
69
69
  - **Declarations** - InputDeclaration, ValueDeclaration, TraitDeclaration
70
70
  - **Expressions** - CallExpression, ArrayExpression, CascadeExpression, CaseExpression
71
71
  - **References** - InputReference, InputElementReference, DeclarationReference
@@ -86,21 +86,27 @@ puts
86
86
  # ------------------------------------------------------------------
87
87
  Benchmark.ips do |x|
88
88
  schemas.each do |d, schema|
89
- runner = schema.from(seed: 0) # memoised runner
90
- x.report("eval #{d}-deep") { runner[:final_result] }
89
+ # 1) HOT (memoized): expect ~flat, nanosecond-level if cached
90
+ hot = schema.from(seed: 0)
91
+ x.report("HOT fetch #{d}-deep") do
92
+ hot[:final_result]
93
+ end
94
+
95
+ # 2) COLD via UPDATE (no memoized result): change a dependent input each iter
96
+ upd = schema.from(seed: 0)
97
+ i = 0
98
+ x.report("COLD update #{d}-deep") do
99
+ i += 1
100
+ upd.update(seed: i) # invalidates v0..vN; forces recompute
101
+ upd[:final_result]
102
+ end
103
+
104
+ # 3) COLD new runner (includes construction)
105
+ prng = Random.new(42)
106
+ x.report("COLD new #{d}-deep") do
107
+ r = schema.from(seed: prng.rand(1_000_000))
108
+ r[:final_result]
109
+ end
91
110
  end
92
111
  x.compare!
93
112
  end
94
- # Warming up --------------------------------------
95
- # eval 50-deep 222.000 i/100ms
96
- # eval 100-deep 57.000 i/100ms
97
- # eval 150-deep 26.000 i/100ms
98
- # Calculating -------------------------------------
99
- # eval 50-deep 2.166k (± 1.9%) i/s (461.70 μs/i) - 10.878k in 5.024320s
100
- # eval 100-deep 561.698 (± 1.4%) i/s (1.78 ms/i) - 2.850k in 5.075057s
101
- # eval 150-deep 253.732 (± 0.8%) i/s (3.94 ms/i) - 1.274k in 5.021499s
102
-
103
- # Comparison:
104
- # eval 50-deep: 2165.9 i/s
105
- # eval 100-deep: 561.7 i/s - 3.86x slower
106
- # eval 150-deep: 253.7 i/s - 8.54x slower