kumi 0.0.10 → 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +18 -0
- data/CLAUDE.md +7 -231
- data/README.md +1 -1
- data/docs/VECTOR_SEMANTICS.md +286 -0
- data/docs/features/hierarchical-broadcasting.md +1 -1
- data/docs/features/s-expression-printer.md +2 -2
- data/examples/deep_schema_compilation_and_evaluation_benchmark.rb +21 -15
- data/lib/kumi/analyzer.rb +34 -12
- data/lib/kumi/compiler.rb +2 -12
- data/lib/kumi/core/analyzer/passes/broadcast_detector.rb +157 -64
- data/lib/kumi/core/analyzer/passes/dependency_resolver.rb +1 -1
- data/lib/kumi/core/analyzer/passes/input_access_planner_pass.rb +47 -0
- data/lib/kumi/core/analyzer/passes/input_collector.rb +118 -101
- data/lib/kumi/core/analyzer/passes/join_reduce_planning_pass.rb +293 -0
- data/lib/kumi/core/analyzer/passes/lower_to_ir_pass.rb +993 -0
- data/lib/kumi/core/analyzer/passes/pass_base.rb +2 -2
- data/lib/kumi/core/analyzer/passes/scope_resolution_pass.rb +346 -0
- data/lib/kumi/core/analyzer/passes/semantic_constraint_validator.rb +2 -1
- data/lib/kumi/core/analyzer/passes/toposorter.rb +9 -3
- data/lib/kumi/core/analyzer/passes/type_checker.rb +3 -3
- data/lib/kumi/core/analyzer/passes/type_consistency_checker.rb +2 -2
- data/lib/kumi/core/analyzer/passes/{type_inferencer.rb → type_inferencer_pass.rb} +4 -4
- data/lib/kumi/core/analyzer/passes/unsat_detector.rb +2 -2
- data/lib/kumi/core/analyzer/plans.rb +52 -0
- data/lib/kumi/core/analyzer/structs/access_plan.rb +20 -0
- data/lib/kumi/core/analyzer/structs/input_meta.rb +29 -0
- data/lib/kumi/core/compiler/access_builder.rb +36 -0
- data/lib/kumi/core/compiler/access_planner.rb +219 -0
- data/lib/kumi/core/compiler/accessors/base.rb +69 -0
- data/lib/kumi/core/compiler/accessors/each_indexed_accessor.rb +84 -0
- data/lib/kumi/core/compiler/accessors/materialize_accessor.rb +55 -0
- data/lib/kumi/core/compiler/accessors/ravel_accessor.rb +73 -0
- data/lib/kumi/core/compiler/accessors/read_accessor.rb +41 -0
- data/lib/kumi/core/compiler_base.rb +2 -2
- data/lib/kumi/core/error_reporter.rb +6 -5
- data/lib/kumi/core/errors.rb +4 -0
- data/lib/kumi/core/explain.rb +157 -205
- data/lib/kumi/core/export/node_builders.rb +2 -2
- data/lib/kumi/core/export/node_serializers.rb +1 -1
- data/lib/kumi/core/function_registry/collection_functions.rb +21 -10
- data/lib/kumi/core/function_registry/conditional_functions.rb +14 -4
- data/lib/kumi/core/function_registry/function_builder.rb +142 -55
- data/lib/kumi/core/function_registry/logical_functions.rb +5 -5
- data/lib/kumi/core/function_registry/stat_functions.rb +2 -2
- data/lib/kumi/core/function_registry.rb +126 -108
- data/lib/kumi/core/ir/execution_engine/combinators.rb +117 -0
- data/lib/kumi/core/ir/execution_engine/interpreter.rb +336 -0
- data/lib/kumi/core/ir/execution_engine/values.rb +46 -0
- data/lib/kumi/core/ir/execution_engine.rb +50 -0
- data/lib/kumi/core/ir.rb +58 -0
- data/lib/kumi/core/ruby_parser/build_context.rb +2 -2
- data/lib/kumi/core/ruby_parser/declaration_reference_proxy.rb +0 -12
- data/lib/kumi/core/ruby_parser/dsl_cascade_builder.rb +36 -15
- data/lib/kumi/core/ruby_parser/input_builder.rb +5 -5
- data/lib/kumi/core/ruby_parser/parser.rb +1 -1
- data/lib/kumi/core/ruby_parser/schema_builder.rb +2 -2
- data/lib/kumi/core/ruby_parser/sugar.rb +7 -0
- data/lib/kumi/registry.rb +14 -79
- data/lib/kumi/runtime/executable.rb +213 -0
- data/lib/kumi/schema.rb +14 -3
- data/lib/kumi/schema_metadata.rb +2 -2
- data/lib/kumi/support/ir_dump.rb +491 -0
- data/lib/kumi/support/s_expression_printer.rb +1 -1
- data/lib/kumi/syntax/location.rb +5 -0
- data/lib/kumi/syntax/node.rb +0 -1
- data/lib/kumi/syntax/root.rb +2 -2
- data/lib/kumi/version.rb +1 -1
- data/lib/kumi.rb +6 -15
- metadata +26 -15
- data/lib/kumi/core/cascade_executor_builder.rb +0 -132
- data/lib/kumi/core/compiled_schema.rb +0 -43
- data/lib/kumi/core/compiler/expression_compiler.rb +0 -146
- data/lib/kumi/core/compiler/function_invoker.rb +0 -55
- data/lib/kumi/core/compiler/path_traversal_compiler.rb +0 -158
- data/lib/kumi/core/compiler/reference_compiler.rb +0 -46
- data/lib/kumi/core/evaluation_wrapper.rb +0 -40
- data/lib/kumi/core/nested_structure_utils.rb +0 -78
- data/lib/kumi/core/schema_instance.rb +0 -115
- data/lib/kumi/core/vectorized_function_builder.rb +0 -88
- data/lib/kumi/js/compiler.rb +0 -878
- data/lib/kumi/js/function_registry.rb +0 -333
- data/migrate_to_core_iterative.rb +0 -938
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3eb46e14716bf14c3d9165ffac957211f41ad5a21a74ad37c47b37c37e01b312
|
4
|
+
data.tar.gz: fd0e36d65ac41079c27cf9699a3e092ff762bc76b3af8fc90df2ec763fed3806
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 54ed5e72d6acf863e0f7ed0986c8db83fab22526bcffb97b4c1b96343e724b0ae505804baed5554933c452478ced8c46ec24a6568426813e69c4007994588de6
|
7
|
+
data.tar.gz: '09aabb772643aab71d957060c934f05be5838a1056b7c635822b75759106a3119844cc7e20f96b8990ec658d969f2d2e0143ceb43d36f8d48da061c2bb80cb6a'
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
## [0.0.11] – 2025-08-13
|
2
|
+
### Added
|
3
|
+
- Intermediate Representation (IR) and slot-based VM interpreter.
|
4
|
+
- Scope-aware vector semantics (alignment, lift, hierarchical indices).
|
5
|
+
- Debug tooling: IR dump, VM/lowering traces via DEBUG_* flags.
|
6
|
+
|
7
|
+
### Changed
|
8
|
+
- Analyzer now lowers to IR via `LowerToIRPass`.
|
9
|
+
- Access modes: `:read`, `:ravel`, `:each_indexed`, `:materialize`.
|
10
|
+
|
11
|
+
### Removed (BREAKING)
|
12
|
+
- JavaScript transpiler (legacy compiler).
|
13
|
+
|
14
|
+
### Requirements
|
15
|
+
- Ruby >= 3.1 (Was >= 3.0)
|
16
|
+
|
17
|
+
### Notes
|
18
|
+
- No expected DSL changes for typical schemas; report regressions.
|
data/CLAUDE.md
CHANGED
@@ -18,11 +18,6 @@ Kumi is a Declarative logic and rules engine framework with static analysis for
|
|
18
18
|
- `bundle exec rspec spec/path/to/specific_spec.rb` - Run specific test file
|
19
19
|
- `bundle exec rspec spec/path/to/specific_spec.rb:123` - Run specific test at line
|
20
20
|
|
21
|
-
### Gem Management
|
22
|
-
- `bundle install` - Install dependencies
|
23
|
-
- `gem build kumi.gemspec` - Build the gem
|
24
|
-
- `gem install ./kumi-*.gem` - Install locally built gem
|
25
|
-
|
26
21
|
## Architecture Overview
|
27
22
|
|
28
23
|
### Core Components
|
@@ -72,50 +67,8 @@ Kumi is a Declarative logic and rules engine framework with static analysis for
|
|
72
67
|
- Supports custom function registration with type metadata
|
73
68
|
- Each function includes param_types, return_type, arity, and description
|
74
69
|
- Core functions include: `==`, `>`, `<`, `add`, `multiply`, `and`, `or`, `clamp`, etc.
|
75
|
-
- Maintains backward compatibility with legacy type checking system
|
76
70
|
- Function documents are generated by the script ./scripts/generate_function_docs.rb
|
77
71
|
|
78
|
-
**Input Validation System** (`lib/kumi/input/` and `lib/kumi/domain/`):
|
79
|
-
- `input/validator.rb` - Main validation coordinator for type and domain checking
|
80
|
-
- `input/type_matcher.rb` - Type validation logic for primitive and complex types
|
81
|
-
- `input/violation_creator.rb` - Creates standardized violation objects with detailed messages
|
82
|
-
- `domain/validator.rb` - Domain constraint validation (ranges, arrays, procs)
|
83
|
-
- `domain/range_analyzer.rb` - Range domain analysis and validation
|
84
|
-
- `domain/enum_analyzer.rb` - Enumeration domain analysis and validation
|
85
|
-
- `domain/violation_formatter.rb` - Formats domain violation error messages
|
86
|
-
|
87
|
-
## DSL Syntax Requirements
|
88
|
-
|
89
|
-
### Critical Syntax Rules
|
90
|
-
|
91
|
-
**Module Definition Structure**
|
92
|
-
```ruby
|
93
|
-
# CORRECT - CLI can find and load this
|
94
|
-
module SchemaName
|
95
|
-
extend Kumi::Schema
|
96
|
-
|
97
|
-
schema do
|
98
|
-
# schema definition here
|
99
|
-
end
|
100
|
-
end
|
101
|
-
```
|
102
|
-
|
103
|
-
**Function Call Syntax**:
|
104
|
-
- **Symbol style**: `fn(:function_name, arg1, arg2, ...)` - The only supported function call syntax
|
105
|
-
|
106
|
-
**Arithmetic Operations**:
|
107
|
-
- **Sugar Syntax**: `input.field1 + input.field2` - Works for input fields and value references
|
108
|
-
- **Function Syntax**: `fn(:add, input.field1, input.field2)` - Always works, more explicit
|
109
|
-
- **Mixed**: Sugar syntax for basic operations, function syntax for complex ones
|
110
|
-
|
111
|
-
**Cascade Condition Syntax**:
|
112
|
-
```ruby
|
113
|
-
value :status do
|
114
|
-
on trait_name, "Result"
|
115
|
-
base "Default"
|
116
|
-
end
|
117
|
-
```
|
118
|
-
|
119
72
|
### Key Patterns
|
120
73
|
**DSL Structure**:
|
121
74
|
```ruby
|
@@ -133,17 +86,15 @@ schema do
|
|
133
86
|
string :category
|
134
87
|
end
|
135
88
|
|
136
|
-
|
137
|
-
|
138
89
|
# Fields with no declared type
|
139
90
|
any :misc_field
|
140
91
|
end
|
141
92
|
|
142
|
-
trait :name, (expression) # Boolean conditions
|
93
|
+
trait :name, (expression) # Boolean conditions
|
143
94
|
value :name, expression # Computed values
|
144
95
|
value :name do # Conditional logic
|
145
|
-
on condition, result
|
146
|
-
base default_result
|
96
|
+
on condition, result # on <trait> ?,<trait> , <expr>
|
97
|
+
base default_result # base <expr>
|
147
98
|
end
|
148
99
|
end
|
149
100
|
```
|
@@ -151,13 +102,6 @@ end
|
|
151
102
|
**IMPORTANT CASCADE CONDITION SYNTAX:**
|
152
103
|
In cascade expressions (`value :name do ... end`), trait references use bare identifiers:
|
153
104
|
|
154
|
-
**Input Block System**:
|
155
|
-
- **Required**: All schemas must have an `input` block declaring expected fields
|
156
|
-
- **Type Declarations**: Preferred via type-specific methods (e.g. `integer :field`, `string :name`, `any :field` for untyped fields)
|
157
|
-
- **Complex Types**: Use helper functions: `array(:element_type)` and `hash(:key_type, :value_type)`
|
158
|
-
- **Domain Constraints**: Fields can have domains: `integer :age, domain: 18..65` (validated at runtime)
|
159
|
-
- **Field Access**: Use `input.field_name` to reference input fields in expressions
|
160
|
-
- **Separation**: Input metadata (types, domains) is separate from business logic
|
161
105
|
|
162
106
|
**Expression Types**:
|
163
107
|
- `input.field_name` - Access input data with operator methods (>=, <=, >, <, ==, !=)
|
@@ -179,30 +123,6 @@ In cascade expressions (`value :name do ... end`), trait references use bare ide
|
|
179
123
|
- Type inference for all declarations based on expression analysis
|
180
124
|
- Type primitives: `:string`, `:integer`, `:float`, `:boolean`, `:any`, `:symbol`, `:regexp`, `:time`, `:date`, `:datetime`
|
181
125
|
- Collection types: `array(:element_type)` and `hash(:key_type, :value_type)` helper functions
|
182
|
-
- Type compatibility checking and unification algorithms for numeric types
|
183
|
-
- Enhanced error messages showing type provenance (declared vs inferred)
|
184
|
-
- Legacy compatibility constants maintained for backward compatibility
|
185
|
-
|
186
|
-
### Examples Directory
|
187
|
-
|
188
|
-
The `examples/` directory contains examples showing Kumi usage patterns:
|
189
|
-
- `cascade_demonstration.rb` - Demonstrates cascade logic with UnsatDetector fixes (working)
|
190
|
-
- `working_comprehensive_schema.rb` - Feature showcase (current best practices, working)
|
191
|
-
- Mathematical predicate examples - Safe mutual recursion patterns using cascade mutual exclusion
|
192
|
-
- `federal_tax_calculator_2024.rb` - Real-world tax calculation example (working)
|
193
|
-
- `tax_2024.rb` - Tax example with explain functionality (working)
|
194
|
-
- `wide_schema_compilation_and_evaluation_benchmark.rb` - Benchmark for wide schemas (compilation and evaluation)
|
195
|
-
- `deep_schema_compilation_and_evaluation_benchmark.rb` - Performance benchmark for deep dependency chains (stack-safe evaluation)
|
196
|
-
- `comprehensive_god_schema.rb` - Complex example (currently has UnsatDetector semantic errors)
|
197
|
-
|
198
|
-
*Note: Some examples may use deprecated syntax and should be updated to use the new input block system.*
|
199
|
-
|
200
|
-
## Test Structure
|
201
|
-
|
202
|
-
- `spec/kumi/` - Unit tests for core components
|
203
|
-
- `spec/integration/` - Integration tests for full workflows
|
204
|
-
- `spec/fixtures/` - Test fixtures and sample schemas
|
205
|
-
- `spec/support/` - Test helpers (`ast_factory.rb`, `schema_generator.rb`)
|
206
126
|
|
207
127
|
## Files for Understanding
|
208
128
|
|
@@ -210,46 +130,8 @@ The `examples/` directory contains examples showing Kumi usage patterns:
|
|
210
130
|
- `examples/*` Random examples of diverse contexts.
|
211
131
|
|
212
132
|
### Troubleshooting Schema Issues
|
213
|
-
|
214
|
-
- **Module Not Found**: Check module structure and naming, see examples
|
215
|
-
- **UnsatDetector Errors**: Review trait logic for contradictions, add debugs!
|
216
|
-
- **Type Errors**: Check input block type declarations match usage, add debugs!
|
217
|
-
- **Runtime Errors**: Use explain to trace computation dependencies, add debugs!
|
133
|
+
DEBUG, DEBUG. DEBUG LOGS!
|
218
134
|
|
219
|
-
## Input Block System Details
|
220
|
-
|
221
|
-
### Required Input Blocks
|
222
|
-
- **All schemas must have an input block** -
|
223
|
-
- Input blocks declare expected fields with optional type and domain constraints
|
224
|
-
- **Empty input blocks are allowed** -`input {}` Even if not useful.
|
225
|
-
- Fields are accessed via `input.field_name` or `input.field.nested_field.nested_nested_field` which
|
226
|
-
works for referencing nested array input declarations.
|
227
|
-
|
228
|
-
### Type System Integration
|
229
|
-
- **Declared Types**: Explicit type declarations in input blocks (e.g. `integer :field`, `string :name`, `any :field`)
|
230
|
-
- **Inferred Types**: Types automatically inferred from expression analysis
|
231
|
-
- **Type Checking**: Validates compatibility between declared and inferred types
|
232
|
-
- **Enhanced Errors**: Error messages show type provenance (declared vs inferred)
|
233
|
-
- **Helper Functions**: Use `array(:type)` and `hash(:key_type, :value_type)` for complex types
|
234
|
-
|
235
|
-
### Parser Components
|
236
|
-
See `lib/kumi/ruby_parser/parser.rb`
|
237
|
-
|
238
|
-
### Domain Constraints
|
239
|
-
- Can be declared: `integer :age, domain: 18..65`
|
240
|
-
- Supports Range domains (`18..65`), Array domains (`%w[active inactive]`), and Proc domains for custom validation
|
241
|
-
- Analyzer do some limited domain UNSAT detection, and its used to validated against input at Runtime
|
242
|
-
### Type Examples
|
243
|
-
```ruby
|
244
|
-
input do
|
245
|
-
string :name
|
246
|
-
integer :age, domain: 18..65
|
247
|
-
hash :metadata, key: { type: :string }, val: { type: :any }
|
248
|
-
|
249
|
-
#generic type
|
250
|
-
any :misc # this reduces Kumi's analyze/inference capabilities
|
251
|
-
end
|
252
|
-
```
|
253
135
|
|
254
136
|
### Array Broadcasting System
|
255
137
|
|
@@ -262,6 +144,9 @@ input do
|
|
262
144
|
float :price
|
263
145
|
integer :quantity
|
264
146
|
string :category
|
147
|
+
array :prices do
|
148
|
+
element :integer, :val
|
149
|
+
end
|
265
150
|
end
|
266
151
|
end
|
267
152
|
|
@@ -269,112 +154,3 @@ end
|
|
269
154
|
value :subtotals, input.line_items.price * input.line_items.quantity
|
270
155
|
trait :is_taxable, (input.line_items.category != "digital")
|
271
156
|
```
|
272
|
-
|
273
|
-
**Aggregation Operations**: Functions consuming arrays are detected:
|
274
|
-
```ruby
|
275
|
-
value :total_subtotal, fn(:sum, subtotals)
|
276
|
-
value :avg_price, fn(:avg, input.line_items.price)
|
277
|
-
value :max_quantity, fn(:max, input.line_items.quantity)
|
278
|
-
```
|
279
|
-
|
280
|
-
**Implementation Components**:
|
281
|
-
- **InputElementReference** AST nodes for nested field access paths
|
282
|
-
- **BroadcastDetector** analyzer pass identifies vectorized vs scalar operations
|
283
|
-
- **Compiler** generates appropriate map/reduce functions based on usage context
|
284
|
-
- **Type Inference** infers types for array element operations
|
285
|
-
- Supports arbitrary depth field access with nested arrays and hashes
|
286
|
-
|
287
|
-
### Trait Syntax Evolution
|
288
|
-
|
289
|
-
**Current Syntax** (recommended):
|
290
|
-
```ruby
|
291
|
-
trait :adult, (input.age >= 18)
|
292
|
-
trait :qualified, (input.age >= 21) & (input.score > 80) & (input.verified == true)
|
293
|
-
```
|
294
|
-
|
295
|
-
**Composite Trait Syntax** (NEW - bare identifier references):
|
296
|
-
```ruby
|
297
|
-
# Base traits
|
298
|
-
trait :adult, (input.age >= 18)
|
299
|
-
trait :verified, (input.verified == true)
|
300
|
-
trait :high_score, (input.score > 80)
|
301
|
-
|
302
|
-
# Composite traits using bare identifier syntax
|
303
|
-
trait :eligible, adult & verified & high_score
|
304
|
-
trait :mixed, adult & (input.income > 50_000) & verified
|
305
|
-
|
306
|
-
# Backward compatibility - both syntaxes work together
|
307
|
-
trait :legacy_mix, adult & ref(:verified) & (input.score > 90)
|
308
|
-
```
|
309
|
-
|
310
|
-
**Deprecated Syntax** (with warnings):
|
311
|
-
```ruby
|
312
|
-
trait :adult, input.age, :>=, 18 # OLD - shows deprecation warning
|
313
|
-
trait :qualified, input.age, :>=, 21, input.score # OLD - shows deprecation warning
|
314
|
-
```
|
315
|
-
|
316
|
-
**Key Changes**:
|
317
|
-
- **NEW**: Bare identifier syntax for direct trait reference: `adult` instead of `ref(:adult)`
|
318
|
-
- New syntax uses parenthesized expressions: `trait :name, (expression)`
|
319
|
-
- FieldRef nodes have operator methods that create CallExpression nodes
|
320
|
-
- Logical AND chaining via `&` operator (Ruby limitation prevents `&&`)
|
321
|
-
- Only AND operations supported to maintain constraint satisfaction system
|
322
|
-
- **Backward Compatible**: Both `trait_name` and `ref(:trait_name)` work together
|
323
|
-
- Old syntax maintained with deprecation warnings for backward compatibility
|
324
|
-
|
325
|
-
## Common Development Tasks
|
326
|
-
|
327
|
-
### Adding New Analyzer Passes
|
328
|
-
1. Create pass class inheriting from `PassBase` in `lib/kumi/analyzer/passes/`
|
329
|
-
2. Implement `run(errors)` method that calls `set_state(key, value)` to store results
|
330
|
-
3. Add pass to `PASSES` array in `lib/kumi/analyzer.rb` in correct order
|
331
|
-
4. Consider dependencies on other passes (e.g., TypeChecker needs TypeInferencer)
|
332
|
-
|
333
|
-
## Architecture Design Principles
|
334
|
-
|
335
|
-
- **Multi-pass Analysis**: Each analysis pass has a single responsibility and builds on previous passes
|
336
|
-
- **Immutable Syntax Tree**: AST nodes are immutable; analysis results stored separately in analyzer state
|
337
|
-
- **Dependency-driven Evaluation**: All computation follows dependency graph for correct order
|
338
|
-
- **Type Safety**: Optional type checking without breaking existing schemas
|
339
|
-
- **Ruby Integration**: Leverages Ruby's metaprogramming with structured analysis
|
340
|
-
- **Unified Error Reporting**: Consistent, localized error messages throughout the system with clear interface patterns
|
341
|
-
|
342
|
-
## Code Organization Patterns
|
343
|
-
|
344
|
-
### Testing Best Practices
|
345
|
-
- **Spec Organization**: Tests organized by component with clear separation between unit and integration tests
|
346
|
-
- **Error Variable Extraction**: RSpec patterns avoid multiline block chains by extracting error variables for assertion
|
347
|
-
|
348
|
-
## Development Guides and Standards
|
349
|
-
|
350
|
-
### Error Reporting Standards
|
351
|
-
**For Parser Classes**:
|
352
|
-
```ruby
|
353
|
-
class MyParser
|
354
|
-
include ErrorReporting
|
355
|
-
|
356
|
-
def parse_something
|
357
|
-
# Error raising
|
358
|
-
raise_syntax_error("Invalid syntax", location: current_location)
|
359
|
-
end
|
360
|
-
end
|
361
|
-
```
|
362
|
-
|
363
|
-
**For Analyzer Passes**:
|
364
|
-
```ruby
|
365
|
-
class MyAnalyzerPass < PassBase
|
366
|
-
def run(errors)
|
367
|
-
# Error accumulation with enhanced location
|
368
|
-
report_error(errors, "semantic error", location: node.loc, type: :semantic)
|
369
|
-
|
370
|
-
# Backward compatible method
|
371
|
-
add_error(errors, node.loc, "legacy format error")
|
372
|
-
end
|
373
|
-
end
|
374
|
-
```
|
375
|
-
### Testing Error Scenarios
|
376
|
-
- Use `spec/integration/dsl_breakage_spec.rb` patterns for error testing
|
377
|
-
- Use `spec/integration/potential_breakage_spec.rb` for edge cases break
|
378
|
-
- Use `spec/fixtures/location_tracking_test_schema.rb` fixture for testing different syntax error types
|
379
|
-
|
380
|
-
#
|
data/README.md
CHANGED
@@ -0,0 +1,286 @@
|
|
1
|
+
# Kumi Vector Semantics — Short Guide
|
2
|
+
|
3
|
+
This note documents how Kumi handles **vectorized traversal** over **arbitrary nested objects**, how **alignment/broadcasting** works, and how **reducers** and **structure functions** behave. It’s intentionally concise but hits all the sharp edges.
|
4
|
+
|
5
|
+
---
|
6
|
+
|
7
|
+
## Terminology
|
8
|
+
|
9
|
+
* **Path** – a dot-separated traversal, e.g. `input.regions.offices.employees.salary`.
|
10
|
+
* **Scope (axes)** – the list of array segments encountered along a path.
|
11
|
+
Example: for `regions.offices.employees.salary` the scope is `[:regions, :offices, :employees]`.
|
12
|
+
* **Rank** – number of axes = `scope.length`.
|
13
|
+
* **Index tuple** – lexicographic coordinates per axis, e.g. `[region_i, office_j, employee_k]`.
|
14
|
+
|
15
|
+
**Three Laws (think of them as invariants):**
|
16
|
+
|
17
|
+
1. **Enumeration**
|
18
|
+
`each_indexed(path).map(&:first) == ravel(path)`
|
19
|
+
|
20
|
+
2. **Reconstruction**
|
21
|
+
`lift(to_scope, each_indexed(path))` regroups by `to_scope` (must be a prefix of `scope(path)`).
|
22
|
+
|
23
|
+
3. **Counting**
|
24
|
+
`size(path) == ravel(path).length == each_indexed(path).count`
|
25
|
+
|
26
|
+
These laws are the mental model. Everything else is just mechanics.
|
27
|
+
|
28
|
+
---
|
29
|
+
|
30
|
+
## Access Modes
|
31
|
+
|
32
|
+
Kumi’s Access Planner emits low-level ops (`enter_hash`, `enter_array`) and supports three vector modes per path:
|
33
|
+
|
34
|
+
### 1) `:materialize`
|
35
|
+
|
36
|
+
Return the **original nested structure** down to that path (no enumeration).
|
37
|
+
Good for “give me the data shaped like the input.”
|
38
|
+
|
39
|
+
```ruby
|
40
|
+
# Input (object mode)
|
41
|
+
{
|
42
|
+
regions: [
|
43
|
+
{ name: "E", offices: [{ employees: [{salary: 100}, {salary: 120}] }] },
|
44
|
+
{ name: "D", offices: [{ employees: [{salary: 90}] }] }
|
45
|
+
]
|
46
|
+
}
|
47
|
+
|
48
|
+
materialize("regions.offices.employees.salary")
|
49
|
+
# => [[ [100,120] ], [ [90] ]]
|
50
|
+
```
|
51
|
+
|
52
|
+
### 2) `:ravel`
|
53
|
+
|
54
|
+
**Enumerate elements at the next array boundary** for that path, i.e., “collect the items at this depth.”
|
55
|
+
It is **not** NumPy’s “flatten everything.” It collects the next level.
|
56
|
+
|
57
|
+
```ruby
|
58
|
+
ravel("regions") # => [ {…E…}, {…D…} ] (enumerate regions)
|
59
|
+
ravel("regions.offices") # => [ {employees:[…]}, {employees:[…]} ] (each office)
|
60
|
+
ravel("regions.offices.employees.salary") # => [ [100,120], [90] ] (each employee group at that depth)
|
61
|
+
```
|
62
|
+
|
63
|
+
### 3) `:each_indexed`
|
64
|
+
|
65
|
+
Enumerate leaf values **with** their index tuple (authoritative for `lift` and alignment):
|
66
|
+
|
67
|
+
```ruby
|
68
|
+
each_indexed("regions.offices.employees.salary")
|
69
|
+
# => [
|
70
|
+
# [100, [0,0,0]], [120, [0,0,1]],
|
71
|
+
# [ 90, [1,0,0]]
|
72
|
+
# ]
|
73
|
+
```
|
74
|
+
|
75
|
+
---
|
76
|
+
|
77
|
+
## Lift (Regroup by prefix)
|
78
|
+
|
79
|
+
`lift(to_scope)` turns a vector-of-rows (from `each_indexed`) into a nested array grouped by `to_scope`.
|
80
|
+
|
81
|
+
```ruby
|
82
|
+
# Given values from each_indexed above:
|
83
|
+
lift([:regions], …) # => [ [100,120], [90] ]
|
84
|
+
lift([:regions,:offices], …) # => [ [[100,120]], [[90]] ]
|
85
|
+
lift([:regions,:offices,:employees], …) # => [ [[[100,120]]], [[[90]]] ]
|
86
|
+
```
|
87
|
+
|
88
|
+
* `to_scope` must be a **prefix** of the vector’s `scope`.
|
89
|
+
* Depth is derived mechanically from index arity; VM doesn’t guess.
|
90
|
+
|
91
|
+
---
|
92
|
+
|
93
|
+
## Alignment & Broadcasting
|
94
|
+
|
95
|
+
When mapping a function over multiple arguments, Kumi:
|
96
|
+
|
97
|
+
1. Picks a **carrier** vector (the one with the longest scope).
|
98
|
+
2. **Aligns** other vectors to the carrier if they are **prefix-compatible** (same axes prefix).
|
99
|
+
3. **Broadcasts** scalars across the carrier.
|
100
|
+
|
101
|
+
If scopes aren’t prefix-compatible, lowering raises:
|
102
|
+
`cross-scope map without join: [:a] vs [:b,:c]`
|
103
|
+
|
104
|
+
```ruby
|
105
|
+
# price, quantity both scope [:items]
|
106
|
+
final = price * quantity # zip by position (same scope)
|
107
|
+
|
108
|
+
# Broadcast scalar across [:items]
|
109
|
+
discounted = price * 0.9
|
110
|
+
|
111
|
+
# Align prefix [:regions] to carrier [:regions,:offices]
|
112
|
+
aligned_tax = align_to(offices_subtotals, regions_tax)
|
113
|
+
total = offices_subtotals * (1 - aligned_tax)
|
114
|
+
```
|
115
|
+
|
116
|
+
---
|
117
|
+
|
118
|
+
## Structure Functions vs Reducers
|
119
|
+
|
120
|
+
* **Reducers** collapse a vector to a **scalar** (e.g., `sum`, `min`, `avg`).
|
121
|
+
Lowering selects a vector argument and emits a `Reduce`.
|
122
|
+
|
123
|
+
* **Structure functions** observe or reshape **structure** (e.g., `size`, `flatten`, `count_across`).
|
124
|
+
Lowering usually uses a `:ravel` plan and a plain `Map` (no indices required).
|
125
|
+
|
126
|
+
### Laws for `size` and `flatten`
|
127
|
+
|
128
|
+
* `size(path) == ravel(path).length` (Counting Law)
|
129
|
+
* `flatten(path)` flattens nested arrays (by default all levels; use `flatten_one` for one level).
|
130
|
+
|
131
|
+
---
|
132
|
+
|
133
|
+
## End-to-End Mini Examples
|
134
|
+
|
135
|
+
### A. Simple vector math + reducers (object access)
|
136
|
+
|
137
|
+
```ruby
|
138
|
+
module Cart
|
139
|
+
extend Kumi::Schema
|
140
|
+
schema do
|
141
|
+
input do
|
142
|
+
array :items do
|
143
|
+
float :price
|
144
|
+
integer :qty
|
145
|
+
end
|
146
|
+
float :shipping_threshold
|
147
|
+
end
|
148
|
+
|
149
|
+
value :subtotals, input.items.price * input.items.qty
|
150
|
+
value :subtotal, fn(:sum, subtotals)
|
151
|
+
value :shipping, subtotal > input.shipping_threshold ? 0.0 : 9.99
|
152
|
+
value :total, subtotal + shipping
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
data = {
|
157
|
+
items: [{price: 100.0, qty: 2}, {price: 200.0, qty: 1}],
|
158
|
+
shipping_threshold: 50.0
|
159
|
+
}
|
160
|
+
|
161
|
+
r = Cart.from(data)
|
162
|
+
r[:subtotals] # => [200.0, 200.0] (vector map)
|
163
|
+
r[:subtotal] # => 400.0 (reducer)
|
164
|
+
r[:shipping] # => 0.0
|
165
|
+
r[:total] # => 400.0
|
166
|
+
```
|
167
|
+
|
168
|
+
**Internal truths**:
|
169
|
+
|
170
|
+
* `each_indexed(input.items.price)` → `[[100.0,[0]],[200.0,[1]]]`
|
171
|
+
* `size(input.items)` → `2` because `ravel(input.items)` has length 2.
|
172
|
+
|
173
|
+
### B. Mixed scopes + alignment
|
174
|
+
|
175
|
+
```ruby
|
176
|
+
module Regions
|
177
|
+
extend Kumi::Schema
|
178
|
+
schema do
|
179
|
+
input do
|
180
|
+
array :regions do
|
181
|
+
float :tax
|
182
|
+
array :offices do
|
183
|
+
array :employees do
|
184
|
+
float :salary
|
185
|
+
end
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
value :office_payrolls, fn(:sum, input.regions.offices.employees.salary) # vector reduce per office
|
191
|
+
value :taxed, office_payrolls * (1 - input.regions.tax) # tax (align regions.tax to [:regions,:offices])
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
# Alignment rule: regions.tax (scope [:regions]) aligns to office_payrolls (scope [:regions,:offices])
|
196
|
+
```
|
197
|
+
|
198
|
+
### C. Element access (pure arrays) + structure functions
|
199
|
+
|
200
|
+
```ruby
|
201
|
+
module Cube
|
202
|
+
extend Kumi::Schema
|
203
|
+
schema do
|
204
|
+
input do
|
205
|
+
array :cube do
|
206
|
+
element :array, :layer do
|
207
|
+
element :array, :row do
|
208
|
+
element :float, :cell
|
209
|
+
end
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
value :layers, fn(:size, input.cube) # == ravel(input.cube).length
|
215
|
+
value :matrices, fn(:size, input.cube.layer) # enumerate at next depth
|
216
|
+
value :rows, fn(:size, input.cube.layer.row)
|
217
|
+
value :all_values, fn(:flatten, input.cube.layer.row.cell)
|
218
|
+
value :total, fn(:sum, all_values)
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
data = { cube: [ [[1,2],[3]], [[4]] ] }
|
223
|
+
|
224
|
+
# ravel views (intuition)
|
225
|
+
# ravel(cube) => [ [[1,2],[3]], [[4]] ]
|
226
|
+
# ravel(cube.layer) => [ [1,2], [3], [4] ]
|
227
|
+
# ravel(cube.layer.row) => [ 1, 2, 3, 4 ]
|
228
|
+
# ravel(cube.layer.row.cell) => [ 1, 2, 3, 4 ] (same leaf)
|
229
|
+
|
230
|
+
c = Cube.from(data)
|
231
|
+
c[:layers] # => 2
|
232
|
+
c[:matrices] # => 3
|
233
|
+
c[:rows] # => 4
|
234
|
+
c[:all_values] # => [1,2,3,4]
|
235
|
+
c[:total] # => 10
|
236
|
+
```
|
237
|
+
|
238
|
+
---
|
239
|
+
|
240
|
+
## Planner & VM: Who does what?
|
241
|
+
|
242
|
+
* **Planner**: Emits deterministic `enter_hash`/`enter_array` sequences per path and mode.
|
243
|
+
|
244
|
+
* For element edges (inline array aliases), it **does not** emit `enter_hash`.
|
245
|
+
* For `:each_indexed` / `:ravel`, it appends a terminal `enter_array` **only if** the final node is an array.
|
246
|
+
* **Lowerer**: Decides plans (`:ravel`, `:each_indexed`, `:materialize`), inserts `align_to`, emits `lift` at declaration boundary when a vector result should be exposed as a scalar nested array.
|
247
|
+
* **VM**: Purely mechanical:
|
248
|
+
|
249
|
+
* `broadcast_scalar` for scalar→vec expansion,
|
250
|
+
* `zip_same_scope` when scopes match,
|
251
|
+
* `align_to` for prefix alignment,
|
252
|
+
* `group_rows` inside `lift` to reconstruct prefixes.
|
253
|
+
|
254
|
+
No type sniffing or guesses: the IR is the source of truth.
|
255
|
+
|
256
|
+
---
|
257
|
+
|
258
|
+
## Jagged & Sparse Arrays
|
259
|
+
|
260
|
+
* Ordering is **lexicographic by index tuple** (stable).
|
261
|
+
* No padding is introduced; missing branches are just… missing.
|
262
|
+
* `align_to(..., on_missing: :error|:nil)` enforces policy.
|
263
|
+
|
264
|
+
---
|
265
|
+
|
266
|
+
## Error Policies
|
267
|
+
|
268
|
+
For missing keys/arrays, accessors obey policy:
|
269
|
+
|
270
|
+
* `:error` (default) – raise descriptive error with the path/mode.
|
271
|
+
* `:skip` – drop the missing branch (useful in ravels).
|
272
|
+
* `:yield_nil` – emit `nil` in place (preserves cardinality).
|
273
|
+
|
274
|
+
Document these on any user-facing accessor.
|
275
|
+
|
276
|
+
---
|
277
|
+
|
278
|
+
## Quick Cheatsheet
|
279
|
+
|
280
|
+
* Use **`ravel(path)`** to “list the things at this level.”
|
281
|
+
* Use **`each_indexed(path)`** when you need `(value, idx)` pairs for joins/regroup.
|
282
|
+
* Use **`lift(to_scope, each_indexed(path))`** to reconstruct nested structure.
|
283
|
+
* **Reducers** (e.g., `sum`, `avg`, `min`) consume the raveled view of their argument.
|
284
|
+
* **Structure functions** (e.g., `size`, `flatten`, `flatten_one`, `count_across`) operate on structure at that depth and usually compile via `:ravel`.
|
285
|
+
|
286
|
+
Keep the three laws in mind and Kumi’s behavior is predictable—even over deeply nested, heterogeneous data.
|
@@ -193,7 +193,7 @@ The type system automatically infers appropriate types for broadcasted operation
|
|
193
193
|
|
194
194
|
### Analysis Layer
|
195
195
|
- **BroadcastDetector** - Identifies vectorized vs scalar operations
|
196
|
-
- **
|
196
|
+
- **TypeInferencerPass** - Infers types for array element access patterns
|
197
197
|
|
198
198
|
### Compilation Layer
|
199
199
|
- **Automatic Dispatch** - Maps element-wise operations to array map functions
|
@@ -42,7 +42,7 @@ The printer produces indented S-expressions that clearly show the hierarchical s
|
|
42
42
|
(InputDeclaration :age :integer)
|
43
43
|
(InputDeclaration :name :string)
|
44
44
|
]
|
45
|
-
|
45
|
+
values: [
|
46
46
|
(ValueDeclaration :greeting
|
47
47
|
(CallExpression :concat
|
48
48
|
(Literal "Hello ")
|
@@ -65,7 +65,7 @@ The printer produces indented S-expressions that clearly show the hierarchical s
|
|
65
65
|
|
66
66
|
The printer handles all Kumi AST node types:
|
67
67
|
|
68
|
-
- **Root** - Schema container with inputs,
|
68
|
+
- **Root** - Schema container with inputs, values, and traits
|
69
69
|
- **Declarations** - InputDeclaration, ValueDeclaration, TraitDeclaration
|
70
70
|
- **Expressions** - CallExpression, ArrayExpression, CascadeExpression, CaseExpression
|
71
71
|
- **References** - InputReference, InputElementReference, DeclarationReference
|
@@ -86,21 +86,27 @@ puts
|
|
86
86
|
# ------------------------------------------------------------------
|
87
87
|
Benchmark.ips do |x|
|
88
88
|
schemas.each do |d, schema|
|
89
|
-
|
90
|
-
|
89
|
+
# 1) HOT (memoized): expect ~flat, nanosecond-level if cached
|
90
|
+
hot = schema.from(seed: 0)
|
91
|
+
x.report("HOT fetch #{d}-deep") do
|
92
|
+
hot[:final_result]
|
93
|
+
end
|
94
|
+
|
95
|
+
# 2) COLD via UPDATE (no memoized result): change a dependent input each iter
|
96
|
+
upd = schema.from(seed: 0)
|
97
|
+
i = 0
|
98
|
+
x.report("COLD update #{d}-deep") do
|
99
|
+
i += 1
|
100
|
+
upd.update(seed: i) # invalidates v0..vN; forces recompute
|
101
|
+
upd[:final_result]
|
102
|
+
end
|
103
|
+
|
104
|
+
# 3) COLD new runner (includes construction)
|
105
|
+
prng = Random.new(42)
|
106
|
+
x.report("COLD new #{d}-deep") do
|
107
|
+
r = schema.from(seed: prng.rand(1_000_000))
|
108
|
+
r[:final_result]
|
109
|
+
end
|
91
110
|
end
|
92
111
|
x.compare!
|
93
112
|
end
|
94
|
-
# Warming up --------------------------------------
|
95
|
-
# eval 50-deep 222.000 i/100ms
|
96
|
-
# eval 100-deep 57.000 i/100ms
|
97
|
-
# eval 150-deep 26.000 i/100ms
|
98
|
-
# Calculating -------------------------------------
|
99
|
-
# eval 50-deep 2.166k (± 1.9%) i/s (461.70 μs/i) - 10.878k in 5.024320s
|
100
|
-
# eval 100-deep 561.698 (± 1.4%) i/s (1.78 ms/i) - 2.850k in 5.075057s
|
101
|
-
# eval 150-deep 253.732 (± 0.8%) i/s (3.94 ms/i) - 1.274k in 5.021499s
|
102
|
-
|
103
|
-
# Comparison:
|
104
|
-
# eval 50-deep: 2165.9 i/s
|
105
|
-
# eval 100-deep: 561.7 i/s - 3.86x slower
|
106
|
-
# eval 150-deep: 253.7 i/s - 8.54x slower
|