kumi 0.0.9 → 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +18 -0
- data/CLAUDE.md +18 -258
- data/README.md +188 -121
- data/docs/AST.md +1 -1
- data/docs/FUNCTIONS.md +52 -8
- data/docs/VECTOR_SEMANTICS.md +286 -0
- data/docs/compiler_design_principles.md +86 -0
- data/docs/features/README.md +15 -2
- data/docs/features/hierarchical-broadcasting.md +349 -0
- data/docs/features/javascript-transpiler.md +148 -0
- data/docs/features/performance.md +1 -3
- data/docs/features/s-expression-printer.md +2 -2
- data/docs/schema_metadata.md +7 -7
- data/examples/deep_schema_compilation_and_evaluation_benchmark.rb +21 -15
- data/examples/game_of_life.rb +2 -4
- data/lib/kumi/analyzer.rb +34 -14
- data/lib/kumi/compiler.rb +4 -283
- data/lib/kumi/core/analyzer/passes/broadcast_detector.rb +717 -66
- data/lib/kumi/core/analyzer/passes/dependency_resolver.rb +1 -1
- data/lib/kumi/core/analyzer/passes/input_access_planner_pass.rb +47 -0
- data/lib/kumi/core/analyzer/passes/input_collector.rb +118 -99
- data/lib/kumi/core/analyzer/passes/join_reduce_planning_pass.rb +293 -0
- data/lib/kumi/core/analyzer/passes/lower_to_ir_pass.rb +993 -0
- data/lib/kumi/core/analyzer/passes/pass_base.rb +2 -2
- data/lib/kumi/core/analyzer/passes/scope_resolution_pass.rb +346 -0
- data/lib/kumi/core/analyzer/passes/semantic_constraint_validator.rb +28 -0
- data/lib/kumi/core/analyzer/passes/toposorter.rb +9 -3
- data/lib/kumi/core/analyzer/passes/type_checker.rb +9 -5
- data/lib/kumi/core/analyzer/passes/type_consistency_checker.rb +2 -2
- data/lib/kumi/core/analyzer/passes/{type_inferencer.rb → type_inferencer_pass.rb} +4 -4
- data/lib/kumi/core/analyzer/passes/unsat_detector.rb +92 -48
- data/lib/kumi/core/analyzer/plans.rb +52 -0
- data/lib/kumi/core/analyzer/structs/access_plan.rb +20 -0
- data/lib/kumi/core/analyzer/structs/input_meta.rb +29 -0
- data/lib/kumi/core/compiler/access_builder.rb +36 -0
- data/lib/kumi/core/compiler/access_planner.rb +219 -0
- data/lib/kumi/core/compiler/accessors/base.rb +69 -0
- data/lib/kumi/core/compiler/accessors/each_indexed_accessor.rb +84 -0
- data/lib/kumi/core/compiler/accessors/materialize_accessor.rb +55 -0
- data/lib/kumi/core/compiler/accessors/ravel_accessor.rb +73 -0
- data/lib/kumi/core/compiler/accessors/read_accessor.rb +41 -0
- data/lib/kumi/core/compiler_base.rb +137 -0
- data/lib/kumi/core/error_reporter.rb +6 -5
- data/lib/kumi/core/errors.rb +4 -0
- data/lib/kumi/core/explain.rb +157 -205
- data/lib/kumi/core/export/node_builders.rb +2 -2
- data/lib/kumi/core/export/node_serializers.rb +1 -1
- data/lib/kumi/core/function_registry/collection_functions.rb +100 -6
- data/lib/kumi/core/function_registry/conditional_functions.rb +14 -4
- data/lib/kumi/core/function_registry/function_builder.rb +142 -53
- data/lib/kumi/core/function_registry/logical_functions.rb +173 -3
- data/lib/kumi/core/function_registry/stat_functions.rb +156 -0
- data/lib/kumi/core/function_registry.rb +138 -98
- data/lib/kumi/core/ir/execution_engine/combinators.rb +117 -0
- data/lib/kumi/core/ir/execution_engine/interpreter.rb +336 -0
- data/lib/kumi/core/ir/execution_engine/values.rb +46 -0
- data/lib/kumi/core/ir/execution_engine.rb +50 -0
- data/lib/kumi/core/ir.rb +58 -0
- data/lib/kumi/core/ruby_parser/build_context.rb +2 -2
- data/lib/kumi/core/ruby_parser/declaration_reference_proxy.rb +0 -12
- data/lib/kumi/core/ruby_parser/dsl_cascade_builder.rb +37 -16
- data/lib/kumi/core/ruby_parser/input_builder.rb +61 -8
- data/lib/kumi/core/ruby_parser/parser.rb +1 -1
- data/lib/kumi/core/ruby_parser/schema_builder.rb +2 -2
- data/lib/kumi/core/ruby_parser/sugar.rb +7 -0
- data/lib/kumi/errors.rb +2 -0
- data/lib/kumi/js.rb +23 -0
- data/lib/kumi/registry.rb +17 -22
- data/lib/kumi/runtime/executable.rb +213 -0
- data/lib/kumi/schema.rb +15 -4
- data/lib/kumi/schema_metadata.rb +2 -2
- data/lib/kumi/support/ir_dump.rb +491 -0
- data/lib/kumi/support/s_expression_printer.rb +17 -16
- data/lib/kumi/syntax/array_expression.rb +6 -6
- data/lib/kumi/syntax/call_expression.rb +4 -4
- data/lib/kumi/syntax/cascade_expression.rb +4 -4
- data/lib/kumi/syntax/case_expression.rb +4 -4
- data/lib/kumi/syntax/declaration_reference.rb +4 -4
- data/lib/kumi/syntax/hash_expression.rb +4 -4
- data/lib/kumi/syntax/input_declaration.rb +6 -5
- data/lib/kumi/syntax/input_element_reference.rb +5 -5
- data/lib/kumi/syntax/input_reference.rb +5 -5
- data/lib/kumi/syntax/literal.rb +4 -4
- data/lib/kumi/syntax/location.rb +5 -0
- data/lib/kumi/syntax/node.rb +33 -34
- data/lib/kumi/syntax/root.rb +6 -6
- data/lib/kumi/syntax/trait_declaration.rb +4 -4
- data/lib/kumi/syntax/value_declaration.rb +4 -4
- data/lib/kumi/version.rb +1 -1
- data/lib/kumi.rb +6 -15
- data/scripts/analyze_broadcast_methods.rb +68 -0
- data/scripts/analyze_cascade_methods.rb +74 -0
- data/scripts/check_broadcasting_coverage.rb +51 -0
- data/scripts/find_dead_code.rb +114 -0
- metadata +36 -9
- data/docs/features/array-broadcasting.md +0 -170
- data/lib/kumi/cli.rb +0 -449
- data/lib/kumi/core/compiled_schema.rb +0 -43
- data/lib/kumi/core/evaluation_wrapper.rb +0 -40
- data/lib/kumi/core/schema_instance.rb +0 -111
- data/lib/kumi/core/vectorization_metadata.rb +0 -110
- data/migrate_to_core_iterative.rb +0 -938
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kumi
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.11
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- André Muta
|
@@ -31,6 +31,7 @@ extra_rdoc_files: []
|
|
31
31
|
files:
|
32
32
|
- ".rspec"
|
33
33
|
- ".rubocop.yml"
|
34
|
+
- CHANGELOG.md
|
34
35
|
- CLAUDE.md
|
35
36
|
- LICENSE.txt
|
36
37
|
- README.md
|
@@ -39,14 +40,17 @@ files:
|
|
39
40
|
- docs/DSL.md
|
40
41
|
- docs/FUNCTIONS.md
|
41
42
|
- docs/SYNTAX.md
|
43
|
+
- docs/VECTOR_SEMANTICS.md
|
44
|
+
- docs/compiler_design_principles.md
|
42
45
|
- docs/development/README.md
|
43
46
|
- docs/development/error-reporting.md
|
44
47
|
- docs/features/README.md
|
45
48
|
- docs/features/analysis-cascade-mutual-exclusion.md
|
46
49
|
- docs/features/analysis-type-inference.md
|
47
50
|
- docs/features/analysis-unsat-detection.md
|
48
|
-
- docs/features/
|
51
|
+
- docs/features/hierarchical-broadcasting.md
|
49
52
|
- docs/features/input-declaration-system.md
|
53
|
+
- docs/features/javascript-transpiler.md
|
50
54
|
- docs/features/performance.md
|
51
55
|
- docs/features/s-expression-printer.md
|
52
56
|
- docs/schema_metadata.md
|
@@ -66,25 +70,38 @@ files:
|
|
66
70
|
- examples/wide_schema_compilation_and_evaluation_benchmark.rb
|
67
71
|
- lib/kumi.rb
|
68
72
|
- lib/kumi/analyzer.rb
|
69
|
-
- lib/kumi/cli.rb
|
70
73
|
- lib/kumi/compiler.rb
|
71
74
|
- lib/kumi/core/analyzer/analysis_state.rb
|
72
75
|
- lib/kumi/core/analyzer/constant_evaluator.rb
|
73
76
|
- lib/kumi/core/analyzer/passes/broadcast_detector.rb
|
74
77
|
- lib/kumi/core/analyzer/passes/declaration_validator.rb
|
75
78
|
- lib/kumi/core/analyzer/passes/dependency_resolver.rb
|
79
|
+
- lib/kumi/core/analyzer/passes/input_access_planner_pass.rb
|
76
80
|
- lib/kumi/core/analyzer/passes/input_collector.rb
|
81
|
+
- lib/kumi/core/analyzer/passes/join_reduce_planning_pass.rb
|
82
|
+
- lib/kumi/core/analyzer/passes/lower_to_ir_pass.rb
|
77
83
|
- lib/kumi/core/analyzer/passes/name_indexer.rb
|
78
84
|
- lib/kumi/core/analyzer/passes/pass_base.rb
|
85
|
+
- lib/kumi/core/analyzer/passes/scope_resolution_pass.rb
|
79
86
|
- lib/kumi/core/analyzer/passes/semantic_constraint_validator.rb
|
80
87
|
- lib/kumi/core/analyzer/passes/toposorter.rb
|
81
88
|
- lib/kumi/core/analyzer/passes/type_checker.rb
|
82
89
|
- lib/kumi/core/analyzer/passes/type_consistency_checker.rb
|
83
|
-
- lib/kumi/core/analyzer/passes/
|
90
|
+
- lib/kumi/core/analyzer/passes/type_inferencer_pass.rb
|
84
91
|
- lib/kumi/core/analyzer/passes/unsat_detector.rb
|
85
92
|
- lib/kumi/core/analyzer/passes/visitor_pass.rb
|
93
|
+
- lib/kumi/core/analyzer/plans.rb
|
94
|
+
- lib/kumi/core/analyzer/structs/access_plan.rb
|
95
|
+
- lib/kumi/core/analyzer/structs/input_meta.rb
|
86
96
|
- lib/kumi/core/atom_unsat_solver.rb
|
87
|
-
- lib/kumi/core/
|
97
|
+
- lib/kumi/core/compiler/access_builder.rb
|
98
|
+
- lib/kumi/core/compiler/access_planner.rb
|
99
|
+
- lib/kumi/core/compiler/accessors/base.rb
|
100
|
+
- lib/kumi/core/compiler/accessors/each_indexed_accessor.rb
|
101
|
+
- lib/kumi/core/compiler/accessors/materialize_accessor.rb
|
102
|
+
- lib/kumi/core/compiler/accessors/ravel_accessor.rb
|
103
|
+
- lib/kumi/core/compiler/accessors/read_accessor.rb
|
104
|
+
- lib/kumi/core/compiler_base.rb
|
88
105
|
- lib/kumi/core/constraint_relationship_solver.rb
|
89
106
|
- lib/kumi/core/domain/enum_analyzer.rb
|
90
107
|
- lib/kumi/core/domain/range_analyzer.rb
|
@@ -93,7 +110,6 @@ files:
|
|
93
110
|
- lib/kumi/core/error_reporter.rb
|
94
111
|
- lib/kumi/core/error_reporting.rb
|
95
112
|
- lib/kumi/core/errors.rb
|
96
|
-
- lib/kumi/core/evaluation_wrapper.rb
|
97
113
|
- lib/kumi/core/explain.rb
|
98
114
|
- lib/kumi/core/export.rb
|
99
115
|
- lib/kumi/core/export/deserializer.rb
|
@@ -109,11 +125,17 @@ files:
|
|
109
125
|
- lib/kumi/core/function_registry/function_builder.rb
|
110
126
|
- lib/kumi/core/function_registry/logical_functions.rb
|
111
127
|
- lib/kumi/core/function_registry/math_functions.rb
|
128
|
+
- lib/kumi/core/function_registry/stat_functions.rb
|
112
129
|
- lib/kumi/core/function_registry/string_functions.rb
|
113
130
|
- lib/kumi/core/function_registry/type_functions.rb
|
114
131
|
- lib/kumi/core/input/type_matcher.rb
|
115
132
|
- lib/kumi/core/input/validator.rb
|
116
133
|
- lib/kumi/core/input/violation_creator.rb
|
134
|
+
- lib/kumi/core/ir.rb
|
135
|
+
- lib/kumi/core/ir/execution_engine.rb
|
136
|
+
- lib/kumi/core/ir/execution_engine/combinators.rb
|
137
|
+
- lib/kumi/core/ir/execution_engine/interpreter.rb
|
138
|
+
- lib/kumi/core/ir/execution_engine/values.rb
|
117
139
|
- lib/kumi/core/json_schema.rb
|
118
140
|
- lib/kumi/core/json_schema/generator.rb
|
119
141
|
- lib/kumi/core/json_schema/validator.rb
|
@@ -131,7 +153,6 @@ files:
|
|
131
153
|
- lib/kumi/core/ruby_parser/parser.rb
|
132
154
|
- lib/kumi/core/ruby_parser/schema_builder.rb
|
133
155
|
- lib/kumi/core/ruby_parser/sugar.rb
|
134
|
-
- lib/kumi/core/schema_instance.rb
|
135
156
|
- lib/kumi/core/types.rb
|
136
157
|
- lib/kumi/core/types/builder.rb
|
137
158
|
- lib/kumi/core/types/compatibility.rb
|
@@ -139,11 +160,13 @@ files:
|
|
139
160
|
- lib/kumi/core/types/inference.rb
|
140
161
|
- lib/kumi/core/types/normalizer.rb
|
141
162
|
- lib/kumi/core/types/validator.rb
|
142
|
-
- lib/kumi/core/vectorization_metadata.rb
|
143
163
|
- lib/kumi/errors.rb
|
164
|
+
- lib/kumi/js.rb
|
144
165
|
- lib/kumi/registry.rb
|
166
|
+
- lib/kumi/runtime/executable.rb
|
145
167
|
- lib/kumi/schema.rb
|
146
168
|
- lib/kumi/schema_metadata.rb
|
169
|
+
- lib/kumi/support/ir_dump.rb
|
147
170
|
- lib/kumi/support/s_expression_printer.rb
|
148
171
|
- lib/kumi/syntax/array_expression.rb
|
149
172
|
- lib/kumi/syntax/call_expression.rb
|
@@ -155,12 +178,16 @@ files:
|
|
155
178
|
- lib/kumi/syntax/input_element_reference.rb
|
156
179
|
- lib/kumi/syntax/input_reference.rb
|
157
180
|
- lib/kumi/syntax/literal.rb
|
181
|
+
- lib/kumi/syntax/location.rb
|
158
182
|
- lib/kumi/syntax/node.rb
|
159
183
|
- lib/kumi/syntax/root.rb
|
160
184
|
- lib/kumi/syntax/trait_declaration.rb
|
161
185
|
- lib/kumi/syntax/value_declaration.rb
|
162
186
|
- lib/kumi/version.rb
|
163
|
-
-
|
187
|
+
- scripts/analyze_broadcast_methods.rb
|
188
|
+
- scripts/analyze_cascade_methods.rb
|
189
|
+
- scripts/check_broadcasting_coverage.rb
|
190
|
+
- scripts/find_dead_code.rb
|
164
191
|
- scripts/generate_function_docs.rb
|
165
192
|
homepage: https://github.com/amuta/kumi
|
166
193
|
licenses:
|
@@ -1,170 +0,0 @@
|
|
1
|
-
# Array Broadcasting
|
2
|
-
|
3
|
-
Automatic vectorization of operations over array fields with element-wise computation and aggregation.
|
4
|
-
|
5
|
-
## Overview
|
6
|
-
|
7
|
-
The array broadcasting system enables natural field access syntax on array inputs (`input.items.price`) that automatically applies operations element-wise across the array, with intelligent detection of map vs reduce operations.
|
8
|
-
|
9
|
-
## Core Mechanism
|
10
|
-
|
11
|
-
The system uses a three-stage pipeline:
|
12
|
-
|
13
|
-
1. **Parser** - Creates InputElementReference AST nodes for nested field access
|
14
|
-
2. **BroadcastDetector** - Identifies which operations should be vectorized vs scalar
|
15
|
-
3. **Compiler** - Generates appropriate map/reduce functions based on usage context
|
16
|
-
|
17
|
-
## Basic Broadcasting
|
18
|
-
|
19
|
-
```ruby
|
20
|
-
schema do
|
21
|
-
input do
|
22
|
-
array :line_items do
|
23
|
-
float :price
|
24
|
-
integer :quantity
|
25
|
-
string :category
|
26
|
-
end
|
27
|
-
float :tax_rate, type: :float
|
28
|
-
end
|
29
|
-
|
30
|
-
# Element-wise computation - broadcasts over each item
|
31
|
-
value :subtotals, input.line_items.price * input.line_items.quantity
|
32
|
-
|
33
|
-
# Element-wise traits - applied to each item
|
34
|
-
trait :is_taxable, (input.line_items.category != "digital")
|
35
|
-
|
36
|
-
# Conditional logic - element-wise evaluation
|
37
|
-
value :taxes, fn(:if, is_taxable, subtotals * input.tax_rate, 0.0)
|
38
|
-
end
|
39
|
-
```
|
40
|
-
|
41
|
-
## Aggregation Operations
|
42
|
-
|
43
|
-
Operations that consume arrays to produce scalars are automatically detected:
|
44
|
-
|
45
|
-
```ruby
|
46
|
-
schema do
|
47
|
-
# These aggregate the vectorized results
|
48
|
-
value :total_subtotal, fn(:sum, subtotals)
|
49
|
-
value :total_tax, fn(:sum, taxes)
|
50
|
-
value :grand_total, total_subtotal + total_tax
|
51
|
-
|
52
|
-
# Statistics over arrays
|
53
|
-
value :avg_price, fn(:avg, input.line_items.price)
|
54
|
-
value :max_quantity, fn(:max, input.line_items.quantity)
|
55
|
-
end
|
56
|
-
```
|
57
|
-
|
58
|
-
## Field Access Nesting
|
59
|
-
|
60
|
-
Supports arbitrary depth field access with path building:
|
61
|
-
|
62
|
-
```ruby
|
63
|
-
schema do
|
64
|
-
input do
|
65
|
-
array :orders do
|
66
|
-
array :items do
|
67
|
-
hash :product do
|
68
|
-
string :name
|
69
|
-
float :base_price
|
70
|
-
end
|
71
|
-
integer :quantity
|
72
|
-
end
|
73
|
-
end
|
74
|
-
end
|
75
|
-
|
76
|
-
# Deep field access - automatically broadcasts over nested arrays
|
77
|
-
value :all_product_names, input.orders.items.product.name
|
78
|
-
value :total_values, input.orders.items.product.base_price * input.orders.items.quantity
|
79
|
-
end
|
80
|
-
```
|
81
|
-
|
82
|
-
## Type Inference
|
83
|
-
|
84
|
-
The type system automatically infers appropriate types for broadcasted operations:
|
85
|
-
|
86
|
-
- `input.items.price` (float array) → inferred as `:float` per element
|
87
|
-
- `input.items.price * input.items.quantity` → element-wise `:float` result
|
88
|
-
- `fn(:sum, input.items.price)` → scalar `:float` result
|
89
|
-
|
90
|
-
## Implementation Details
|
91
|
-
|
92
|
-
### Parser Layer
|
93
|
-
- **InputFieldProxy** - Handles `input.field.subfield...` with path building
|
94
|
-
- **InputElementReference** - AST node representing array field access paths
|
95
|
-
|
96
|
-
### Analysis Layer
|
97
|
-
- **BroadcastDetector** - Identifies vectorized vs scalar operations
|
98
|
-
- **TypeInferencer** - Infers types for array element access patterns
|
99
|
-
|
100
|
-
### Compilation Layer
|
101
|
-
- **Automatic Dispatch** - Maps element-wise operations to array map functions
|
102
|
-
- **Reduction Detection** - Converts aggregation functions to array reduce operations
|
103
|
-
|
104
|
-
## Usage Patterns
|
105
|
-
|
106
|
-
### Element-wise Operations
|
107
|
-
```ruby
|
108
|
-
# All of these broadcast element-wise
|
109
|
-
value :discounted_prices, input.items.price * 0.9
|
110
|
-
trait :expensive, (input.items.price > 100.0)
|
111
|
-
value :categories, input.items.category
|
112
|
-
```
|
113
|
-
|
114
|
-
### Aggregation Operations
|
115
|
-
```ruby
|
116
|
-
# These consume arrays to produce scalars
|
117
|
-
value :item_count, fn(:size, input.items)
|
118
|
-
value :total_price, fn(:sum, input.items.price)
|
119
|
-
value :has_expensive, fn(:any?, expensive)
|
120
|
-
```
|
121
|
-
|
122
|
-
### Mixed Operations
|
123
|
-
```ruby
|
124
|
-
# Element-wise computation followed by aggregation
|
125
|
-
value :line_totals, input.items.price * input.items.quantity
|
126
|
-
value :order_total, fn(:sum, line_totals)
|
127
|
-
value :avg_line_total, fn(:avg, line_totals)
|
128
|
-
```
|
129
|
-
|
130
|
-
## Error Handling
|
131
|
-
|
132
|
-
### Dimension Mismatch Detection
|
133
|
-
|
134
|
-
Array broadcasting operations are only valid within the same array source. Attempting to broadcast across different arrays generates detailed error messages:
|
135
|
-
|
136
|
-
```ruby
|
137
|
-
schema do
|
138
|
-
input do
|
139
|
-
array :items do
|
140
|
-
string :name
|
141
|
-
end
|
142
|
-
array :logs do
|
143
|
-
string :user_name
|
144
|
-
end
|
145
|
-
end
|
146
|
-
|
147
|
-
# This will generate a dimension mismatch error
|
148
|
-
trait :same_name, input.items.name == input.logs.user_name
|
149
|
-
end
|
150
|
-
|
151
|
-
# Error:
|
152
|
-
# Cannot broadcast operation across arrays from different sources: items, logs.
|
153
|
-
# Problem: Multiple operands are arrays from different sources:
|
154
|
-
# - Operand 1 resolves to array(string) from array 'items'
|
155
|
-
# - Operand 2 resolves to array(string) from array 'logs'
|
156
|
-
# Direct operations on arrays from different sources is ambiguous and not supported.
|
157
|
-
# Vectorized operations can only work on fields from the same array input.
|
158
|
-
```
|
159
|
-
|
160
|
-
The error messages provide:
|
161
|
-
- **Quick Summary**: Identifies the conflicting array sources
|
162
|
-
- **Type Information**: Shows the resolved types of each operand
|
163
|
-
- **Clear Explanation**: Why the operation is ambiguous and not supported
|
164
|
-
|
165
|
-
## Performance Characteristics
|
166
|
-
|
167
|
-
- **Single Pass** - Each array is traversed once per computation chain
|
168
|
-
- **Lazy Evaluation** - Operations are composed into efficient pipelines
|
169
|
-
- **Memory Efficient** - No intermediate array allocations for simple operations
|
170
|
-
- **Type Safe** - Full compile-time type checking for array element operations
|