kumi 0.0.14 → 0.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +33 -0
- data/README.md +0 -27
- data/docs/dev/vm-profiling.md +95 -0
- data/docs/features/README.md +0 -7
- data/lib/kumi/analyzer.rb +5 -2
- data/lib/kumi/compiler.rb +6 -5
- data/lib/kumi/core/analyzer/passes/ir_dependency_pass.rb +67 -0
- data/lib/kumi/core/analyzer/passes/toposorter.rb +3 -35
- data/lib/kumi/core/ir/execution_engine/interpreter.rb +42 -30
- data/lib/kumi/core/ir/execution_engine/profiler.rb +139 -11
- data/lib/kumi/core/ir/execution_engine.rb +6 -15
- data/lib/kumi/dev/profile_aggregator.rb +301 -0
- data/lib/kumi/dev/profile_runner.rb +199 -0
- data/lib/kumi/dev/runner.rb +3 -1
- data/lib/kumi/dev.rb +14 -0
- data/lib/kumi/runtime/executable.rb +61 -29
- data/lib/kumi/schema.rb +9 -3
- data/lib/kumi/version.rb +1 -1
- data/lib/kumi.rb +1 -0
- metadata +6 -2
- data/docs/features/analysis-cascade-mutual-exclusion.md +0 -89
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5405d7d0612a81e5154bd1d452fdfc150691b022137fc0ee132c47ede1a58e2e
|
4
|
+
data.tar.gz: '093cf7a6d305c02f92de600b06f62be39f8af90d798a0f93ed3ef59f539ada9b'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b3ea711bf465e0c11cc95fabb3809dd632ebbfcc8c36297b161fb1f179fffdda5df1e5c033968e837dd8ed3f983639416de08bd371f30be9f2cefd5543efe1ff
|
7
|
+
data.tar.gz: 0a63fe824fb604639b4efb9cfc2ce24a93429110adc75cfd3edc905c58b3501273ad07a3cf66415eabce4e099c5fd4d202bd2c4064875d7a73e0c2a26f0689cb
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,38 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
## [0.0.15] – 2025-08-21
|
4
|
+
### Added
|
5
|
+
- (DX) Schema-aware VM profiling with multi-schema performance analysis
|
6
|
+
- DAG-based execution optimization with pre-computed dependency resolution
|
7
|
+
|
8
|
+
### Performance
|
9
|
+
- Reference operations eliminated as VM bottleneck via O(1) hash lookups
|
10
|
+
|
11
|
+
## [0.0.14] – 2025-08-21
|
12
|
+
### Added
|
13
|
+
- Text schema frontend with `.kumi` file format support
|
14
|
+
- `bin/kumi parse` command for schema analysis and golden file testing
|
15
|
+
- LoadInputCSE optimization pass to eliminate redundant load operations
|
16
|
+
- Runtime accessor caching with precise field-based invalidation
|
17
|
+
- VM profiler with wall time, CPU time, and cache hit rate analysis
|
18
|
+
- Structured analyzer debug system with state inspection
|
19
|
+
- Checkpoint system for capturing and comparing analyzer states
|
20
|
+
- State serialization (StateSerde) for golden testing and regression detection
|
21
|
+
- Debug object printers with configurable truncation
|
22
|
+
- Multi-run averaging for stable performance benchmarking
|
23
|
+
|
24
|
+
### Fixed
|
25
|
+
- VM targeting for `__vec` twin declarations that were failing to resolve
|
26
|
+
- Demand-driven reference resolution with proper name indexing and cycle detection
|
27
|
+
- Accessor cache invalidation now uses precise field dependencies instead of clearing all caches
|
28
|
+
- StateSerde JSON serialization issues with frozen hashes, Sets, and Symbols
|
29
|
+
|
30
|
+
### Performance
|
31
|
+
- 14x improvement on update-heavy workloads (1.88k → 26.88k iterations/second)
|
32
|
+
- 30-40% reduction in IR module size for schemas with repeated field access
|
33
|
+
- Eliminated load_input performance bottleneck that was consuming ~99% of execution time
|
34
|
+
- Optional caching system (enabled via KUMI_VM_CACHE=1) for performance-critical scenarios
|
35
|
+
|
3
36
|
## [0.0.13] – 2025-08-14
|
4
37
|
### Added
|
5
38
|
- Runtime performance optimizations for interpreter execution
|
data/README.md
CHANGED
@@ -207,33 +207,6 @@ end
|
|
207
207
|
# ❌ Function arity error: divide expects 2 arguments, got 1
|
208
208
|
```
|
209
209
|
|
210
|
-
**Mutual Recursion**: Kumi supports mutual recursion when cascade conditions are mutually exclusive:
|
211
|
-
|
212
|
-
```ruby
|
213
|
-
trait :is_forward, input.operation == "forward"
|
214
|
-
trait :is_reverse, input.operation == "reverse"
|
215
|
-
|
216
|
-
# Safe mutual recursion - conditions are mutually exclusive
|
217
|
-
value :forward_processor do
|
218
|
-
on is_forward, input.value * 2 # Direct calculation
|
219
|
-
on is_reverse, reveAnalysisrse_processor + 10 # Delegates to reverse (safe)
|
220
|
-
base "invalid operation"
|
221
|
-
end
|
222
|
-
|
223
|
-
value :reverse_processor do
|
224
|
-
on is_forward, forward_processor - 5 # Delegates to forward (safe)
|
225
|
-
on is_reverse, input.value / 2 # Direct calculation
|
226
|
-
base "invalid operation"
|
227
|
-
end
|
228
|
-
|
229
|
-
# Usage examples:
|
230
|
-
# operation="forward", value=10 => forward: 20, reverse: 15
|
231
|
-
# operation="reverse", value=10 => forward: 15, reverse: 5
|
232
|
-
# operation="unknown", value=10 => both: "invalid operation"
|
233
|
-
```
|
234
|
-
|
235
|
-
This compiles because `operation` can only be "forward" or "reverse", never both. Each recursion executes one step before hitting a direct calculation.
|
236
|
-
|
237
210
|
#### **Runtime Introspection: Debug and Understand**
|
238
211
|
|
239
212
|
**Explainability**: Trace exactly how any value is computed, step-by-step. This is invaluable for debugging complex logic and auditing results.
|
@@ -0,0 +1,95 @@
|
|
1
|
+
# VM Profiling with Schema Differentiation
|
2
|
+
|
3
|
+
## Overview
|
4
|
+
|
5
|
+
Profiles VM operation execution with schema-level differentiation. Tracks operations by schema type for multi-schema performance analysis.
|
6
|
+
|
7
|
+
## Core Components
|
8
|
+
|
9
|
+
**Profiler**: `lib/kumi/core/ir/execution_engine/profiler.rb`
|
10
|
+
- Streams VM operation events with schema identification
|
11
|
+
- Supports persistent mode for cross-run analysis
|
12
|
+
- JSONL event format with operation metadata
|
13
|
+
|
14
|
+
**Profile Aggregator**: `lib/kumi/dev/profile_aggregator.rb`
|
15
|
+
- Analyzes profiling data by schema type
|
16
|
+
- Generates summary and detailed performance reports
|
17
|
+
- Schema breakdown showing operations and timing per schema
|
18
|
+
|
19
|
+
**CLI Integration**: `bin/kumi profile`
|
20
|
+
- Processes JSONL profiling data files
|
21
|
+
- Multiple output formats: summary, detailed, raw
|
22
|
+
|
23
|
+
## Usage
|
24
|
+
|
25
|
+
### Basic Profiling
|
26
|
+
|
27
|
+
```bash
|
28
|
+
# Single schema with operations
|
29
|
+
KUMI_PROFILE=1 KUMI_PROFILE_OPS=1 KUMI_PROFILE_FILE=profile.jsonl ruby script.rb
|
30
|
+
|
31
|
+
# Persistent mode across multiple runs
|
32
|
+
KUMI_PROFILE=1 KUMI_PROFILE_PERSISTENT=1 KUMI_PROFILE_OPS=1 KUMI_PROFILE_FILE=profile.jsonl ruby script.rb
|
33
|
+
|
34
|
+
# Streaming mode for real-time analysis
|
35
|
+
KUMI_PROFILE=1 KUMI_PROFILE_STREAM=1 KUMI_PROFILE_OPS=1 KUMI_PROFILE_FILE=profile.jsonl ruby script.rb
|
36
|
+
```
|
37
|
+
|
38
|
+
### CLI Analysis
|
39
|
+
|
40
|
+
```bash
|
41
|
+
# Summary report with schema breakdown
|
42
|
+
kumi profile profile.jsonl --summary
|
43
|
+
|
44
|
+
# Detailed per-operation analysis
|
45
|
+
kumi profile profile.jsonl --detailed
|
46
|
+
|
47
|
+
# Raw event stream
|
48
|
+
kumi profile profile.jsonl --raw
|
49
|
+
```
|
50
|
+
|
51
|
+
## Environment Variables
|
52
|
+
|
53
|
+
**Core**:
|
54
|
+
- `KUMI_PROFILE=1` - Enable profiling
|
55
|
+
- `KUMI_PROFILE_FILE=path` - Output file (required)
|
56
|
+
- `KUMI_PROFILE_OPS=1` - Enable VM operation profiling
|
57
|
+
|
58
|
+
**Modes**:
|
59
|
+
- `KUMI_PROFILE_PERSISTENT=1` - Append to existing files across runs
|
60
|
+
- `KUMI_PROFILE_STREAM=1` - Stream individual events vs batch
|
61
|
+
- `KUMI_PROFILE_TRUNCATE=1` - Truncate existing files
|
62
|
+
|
63
|
+
## Event Format
|
64
|
+
|
65
|
+
JSONL with operation metadata:
|
66
|
+
|
67
|
+
```json
|
68
|
+
{"event":"vm_operation","schema":"TestSchema","operation":"LoadInput","duration_ms":0.001,"timestamp":"2025-01-20T10:30:45.123Z"}
|
69
|
+
{"event":"vm_operation","schema":"TestSchema","operation":"Map","duration_ms":0.002,"timestamp":"2025-01-20T10:30:45.125Z"}
|
70
|
+
```
|
71
|
+
|
72
|
+
## Schema Differentiation
|
73
|
+
|
74
|
+
Tracks operations by schema class name for multi-schema analysis:
|
75
|
+
|
76
|
+
**Implementation**:
|
77
|
+
- Schema name propagated through compilation pipeline
|
78
|
+
- Profiler tags each VM operation with schema identifier
|
79
|
+
- Aggregator groups operations by schema type
|
80
|
+
|
81
|
+
**Output Example**:
|
82
|
+
```
|
83
|
+
Total operations: 24 (0.8746ms)
|
84
|
+
Schemas analyzed: SchemaA, SchemaB
|
85
|
+
SchemaA: 12 operations, 0.3242ms
|
86
|
+
SchemaB: 12 operations, 0.0504ms
|
87
|
+
```
|
88
|
+
|
89
|
+
## Performance Analysis
|
90
|
+
|
91
|
+
**Reference Operations**: Typically dominate execution time in complex schemas
|
92
|
+
**Map Operations**: Element-wise computations on arrays
|
93
|
+
**LoadInput Operations**: Data access operations
|
94
|
+
|
95
|
+
Use schema breakdown to identify performance differences between schema types.
|
data/docs/features/README.md
CHANGED
@@ -9,13 +9,6 @@ Analyzes rule combinations to detect logical impossibilities across dependency c
|
|
9
9
|
- Validates domain constraints
|
10
10
|
- Reports multiple errors
|
11
11
|
|
12
|
-
### [Cascade Mutual Exclusion](analysis-cascade-mutual-exclusion.md)
|
13
|
-
Enables safe mutual recursion when cascade conditions are mutually exclusive.
|
14
|
-
|
15
|
-
- Allows mathematically sound recursive patterns
|
16
|
-
- Detects mutually exclusive conditions
|
17
|
-
- Prevents unsafe cycles while enabling safe ones
|
18
|
-
|
19
12
|
### [Type Inference](analysis-type-inference.md)
|
20
13
|
Determines types from expressions and propagates them through dependencies.
|
21
14
|
|
data/lib/kumi/analyzer.rb
CHANGED
@@ -21,7 +21,8 @@ module Kumi
|
|
21
21
|
Core::Analyzer::Passes::ScopeResolutionPass, # 15. Plans execution scope and lifting needs for declarations.
|
22
22
|
Core::Analyzer::Passes::JoinReducePlanningPass, # 16. Plans join/reduce operations (Generates IR Structs)
|
23
23
|
Core::Analyzer::Passes::LowerToIRPass, # 17. Lowers the schema to IR (Generates IR Structs)
|
24
|
-
Core::Analyzer::Passes::LoadInputCSE
|
24
|
+
Core::Analyzer::Passes::LoadInputCSE, # 18. Eliminates redundant load_input operations
|
25
|
+
Core::Analyzer::Passes::IRDependencyPass # 19. Extracts IR-level dependencies for VM execution optimization
|
25
26
|
].freeze
|
26
27
|
|
27
28
|
def self.analyze!(schema, passes: DEFAULT_PASSES, **opts)
|
@@ -58,7 +59,9 @@ module Kumi
|
|
58
59
|
t0 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
59
60
|
pass_instance = pass_class.new(schema, state)
|
60
61
|
begin
|
61
|
-
state =
|
62
|
+
state = Dev::Profiler.phase("analyzer.pass", pass: pass_name) do
|
63
|
+
pass_instance.run(errors)
|
64
|
+
end
|
62
65
|
rescue StandardError => e
|
63
66
|
# TODO: - GREATLY improve this, need to capture the context of the error
|
64
67
|
# and the pass that failed and line number if relevant
|
data/lib/kumi/compiler.rb
CHANGED
@@ -3,18 +3,19 @@
|
|
3
3
|
module Kumi
|
4
4
|
# Compiles an analyzed schema into executable lambdas
|
5
5
|
class Compiler < Core::CompilerBase
|
6
|
-
def self.compile(schema, analyzer:)
|
7
|
-
new(schema, analyzer).compile
|
6
|
+
def self.compile(schema, analyzer:, schema_name: nil)
|
7
|
+
new(schema, analyzer, schema_name: schema_name).compile
|
8
8
|
end
|
9
9
|
|
10
|
-
def initialize(schema, analyzer)
|
11
|
-
super
|
10
|
+
def initialize(schema, analyzer, schema_name: nil)
|
11
|
+
super(schema, analyzer)
|
12
12
|
@bindings = {}
|
13
|
+
@schema_name = schema_name
|
13
14
|
end
|
14
15
|
|
15
16
|
def compile
|
16
17
|
# Switch to LIR: Use the analysis state instead of old compilation
|
17
|
-
Runtime::Executable.from_analysis(@analysis.state)
|
18
|
+
Runtime::Executable.from_analysis(@analysis.state, schema_name: @schema_name)
|
18
19
|
end
|
19
20
|
end
|
20
21
|
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Kumi
|
4
|
+
module Core
|
5
|
+
module Analyzer
|
6
|
+
module Passes
|
7
|
+
# RESPONSIBILITY: Extract IR-level dependencies for VM execution optimization
|
8
|
+
# DEPENDENCIES: :ir_module from LowerToIRPass
|
9
|
+
# PRODUCES: :ir_dependencies - Hash mapping declaration names to referenced bindings
|
10
|
+
# :name_index - Hash mapping stored binding names to producing declarations
|
11
|
+
# INTERFACE: new(schema, state).run(errors)
|
12
|
+
#
|
13
|
+
# NOTE: This pass extracts actual IR-level dependencies by analyzing :ref operations
|
14
|
+
# in the generated IR, providing the dependency information needed for optimized VM scheduling.
|
15
|
+
class IRDependencyPass < PassBase
|
16
|
+
def run(errors)
|
17
|
+
ir_module = get_state(:ir_module, required: true)
|
18
|
+
|
19
|
+
ir_dependencies = build_ir_dependency_map(ir_module)
|
20
|
+
name_index = build_name_index(ir_module)
|
21
|
+
|
22
|
+
state.with(:ir_dependencies, ir_dependencies).with(:name_index, name_index)
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
# Build a map of declaration -> [stored_bindings_it_references] from the IR
|
28
|
+
def build_ir_dependency_map(ir_module)
|
29
|
+
deps_map = {}
|
30
|
+
|
31
|
+
ir_module.decls.each do |decl|
|
32
|
+
refs = []
|
33
|
+
decl.ops.each do |op|
|
34
|
+
if op.tag == :ref
|
35
|
+
refs << op.attrs[:name]
|
36
|
+
end
|
37
|
+
end
|
38
|
+
deps_map[decl.name] = refs
|
39
|
+
end
|
40
|
+
|
41
|
+
deps_map.freeze
|
42
|
+
end
|
43
|
+
|
44
|
+
# Build name index to map stored binding names to their producing declarations
|
45
|
+
def build_name_index(ir_module)
|
46
|
+
name_index = {}
|
47
|
+
|
48
|
+
ir_module.decls.each do |decl|
|
49
|
+
# Map the primary declaration name
|
50
|
+
name_index[decl.name] = decl
|
51
|
+
|
52
|
+
# Also map any vectorized twin names produced by this declaration
|
53
|
+
decl.ops.each do |op|
|
54
|
+
if op.tag == :store
|
55
|
+
stored_name = op.attrs[:name]
|
56
|
+
name_index[stored_name] = decl
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
name_index.freeze
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -5,8 +5,8 @@ module Kumi
|
|
5
5
|
module Core
|
6
6
|
module Analyzer
|
7
7
|
module Passes
|
8
|
-
# RESPONSIBILITY: Compute topological ordering of declarations,
|
9
|
-
# DEPENDENCIES: :dependencies from DependencyResolver, :declarations from NameIndexer
|
8
|
+
# RESPONSIBILITY: Compute topological ordering of declarations, blocking all cycles
|
9
|
+
# DEPENDENCIES: :dependencies from DependencyResolver, :declarations from NameIndexer
|
10
10
|
# PRODUCES: :evaluation_order - Array of declaration names in evaluation order
|
11
11
|
# :node_index - Hash mapping object_id to node metadata for later passes
|
12
12
|
# INTERFACE: new(schema, state).run(errors)
|
@@ -60,19 +60,13 @@ module Kumi
|
|
60
60
|
temp_marks = Set.new
|
61
61
|
perm_marks = Set.new
|
62
62
|
order = []
|
63
|
-
cascades = get_state(:cascades) || {}
|
64
63
|
|
65
64
|
visit_node = lambda do |node, path = []|
|
66
65
|
return if perm_marks.include?(node)
|
67
66
|
|
68
67
|
if temp_marks.include?(node)
|
69
|
-
#
|
70
|
-
cycle_path = path + [node]
|
71
|
-
return if safe_conditional_cycle?(cycle_path, graph, cascades)
|
72
|
-
|
73
|
-
# Allow this cycle - it's safe due to cascade mutual exclusion
|
68
|
+
# Block all cycles - no mutual recursion allowed
|
74
69
|
report_unexpected_cycle(temp_marks, node, errors)
|
75
|
-
|
76
70
|
return
|
77
71
|
end
|
78
72
|
|
@@ -102,32 +96,6 @@ module Kumi
|
|
102
96
|
order.freeze
|
103
97
|
end
|
104
98
|
|
105
|
-
def safe_conditional_cycle?(cycle_path, graph, cascades)
|
106
|
-
return false if cycle_path.nil? || cycle_path.size < 2
|
107
|
-
|
108
|
-
# Find where the cycle starts - look for the first occurrence of the repeated node
|
109
|
-
last_node = cycle_path.last
|
110
|
-
return false if last_node.nil?
|
111
|
-
|
112
|
-
cycle_start = cycle_path.index(last_node)
|
113
|
-
return false unless cycle_start && cycle_start < cycle_path.size - 1
|
114
|
-
|
115
|
-
cycle_nodes = cycle_path[cycle_start..]
|
116
|
-
|
117
|
-
# Check if all edges in the cycle are conditional
|
118
|
-
cycle_nodes.each_cons(2) do |from, to|
|
119
|
-
edges = graph[from] || []
|
120
|
-
edge = edges.find { |e| e.to == to }
|
121
|
-
|
122
|
-
return false unless edge&.conditional
|
123
|
-
|
124
|
-
# Check if the cascade has mutually exclusive conditions
|
125
|
-
cascade_meta = cascades[edge.cascade_owner]
|
126
|
-
return false unless cascade_meta&.dig(:all_mutually_exclusive)
|
127
|
-
end
|
128
|
-
|
129
|
-
true
|
130
|
-
end
|
131
99
|
|
132
100
|
def report_unexpected_cycle(temp_marks, current_node, errors)
|
133
101
|
cycle_path = temp_marks.to_a.join(" → ") + " → #{current_node}"
|
@@ -14,6 +14,7 @@ module Kumi
|
|
14
14
|
ir_module.decls.each do |decl|
|
15
15
|
decl.ops.each do |op|
|
16
16
|
next unless op.tag == :store
|
17
|
+
|
17
18
|
name = op.attrs[:name]
|
18
19
|
index[name] = decl if name
|
19
20
|
end
|
@@ -26,27 +27,39 @@ module Kumi
|
|
26
27
|
raise ArgumentError, "Registry cannot be nil" if registry.nil?
|
27
28
|
raise ArgumentError, "Registry must be a Hash, got #{registry.class}" unless registry.is_a?(Hash)
|
28
29
|
|
29
|
-
# --- PROFILER: init per run ---
|
30
|
-
|
30
|
+
# --- PROFILER: init per run (but not in persistent mode) ---
|
31
|
+
if Profiler.enabled?
|
32
|
+
schema_name = ctx[:schema_name] || "UnknownSchema"
|
33
|
+
if Profiler.persistent?
|
34
|
+
# In persistent mode, just update schema name without full reset
|
35
|
+
Profiler.set_schema_name(schema_name)
|
36
|
+
else
|
37
|
+
# Normal mode: full reset with schema name
|
38
|
+
Profiler.reset!(meta: { decls: ir_module.decls&.size || 0, schema_name: schema_name })
|
39
|
+
end
|
40
|
+
end
|
31
41
|
|
32
42
|
outputs = {}
|
33
43
|
target = ctx[:target]
|
34
44
|
guard_stack = [true]
|
35
|
-
|
45
|
+
|
36
46
|
# Always ensure we have a declaration cache - either from caller or new for this VM run
|
37
47
|
declaration_cache = ctx[:declaration_cache] || {}
|
38
48
|
|
39
49
|
# Build name index for targeting by stored names
|
40
50
|
name_index = ctx[:name_index] || (target ? build_name_index(ir_module) : nil)
|
41
51
|
|
42
|
-
# Choose declarations to execute
|
43
|
-
decls_to_run =
|
44
|
-
if
|
52
|
+
# Choose declarations to execute - prefer explicit schedule if present
|
53
|
+
decls_to_run =
|
54
|
+
if ctx[:decls_to_run]
|
55
|
+
ctx[:decls_to_run] # array of decl objects
|
56
|
+
elsif target
|
45
57
|
# Prefer a decl that STORES the target (covers __vec twins)
|
46
58
|
d = name_index && name_index[target]
|
47
59
|
# Fallback: allow targeting by decl name (legacy behavior)
|
48
60
|
d ||= ir_module.decls.find { |dd| dd.name == target }
|
49
61
|
raise "Unknown target: #{target}" unless d
|
62
|
+
|
50
63
|
[d]
|
51
64
|
else
|
52
65
|
ir_module.decls
|
@@ -84,7 +97,10 @@ module Kumi
|
|
84
97
|
false
|
85
98
|
end
|
86
99
|
slots << nil # keep slot_id == op_index
|
87
|
-
|
100
|
+
if t0
|
101
|
+
Profiler.record!(decl: decl.name, idx: op_index, tag: op.tag, op: op, t0: t0, cpu_t0: cpu_t0, rows: 0,
|
102
|
+
note: "enter")
|
103
|
+
end
|
88
104
|
next
|
89
105
|
|
90
106
|
when :guard_pop
|
@@ -97,7 +113,10 @@ module Kumi
|
|
97
113
|
# Skip body when guarded off, but keep indices aligned
|
98
114
|
unless guard_stack.last
|
99
115
|
slots << nil if PRODUCES_SLOT.include?(op.tag) || NON_PRODUCERS.include?(op.tag)
|
100
|
-
|
116
|
+
if t0
|
117
|
+
Profiler.record!(decl: decl.name, idx: op_index, tag: op.tag, op: op, t0: t0, cpu_t0: cpu_t0, rows: 0,
|
118
|
+
note: "skipped")
|
119
|
+
end
|
101
120
|
next
|
102
121
|
end
|
103
122
|
|
@@ -149,41 +168,34 @@ module Kumi
|
|
149
168
|
end
|
150
169
|
rows_touched ||= 1
|
151
170
|
cache_note = hit ? "hit:#{plan_id}" : "miss:#{plan_id}"
|
152
|
-
|
153
|
-
|
171
|
+
if t0
|
172
|
+
Profiler.record!(decl: decl.name, idx: op_index, tag: :load_input, op: op, t0: t0, cpu_t0: cpu_t0,
|
173
|
+
rows: rows_touched, note: cache_note)
|
174
|
+
end
|
154
175
|
|
155
176
|
when :ref
|
156
177
|
name = op.attrs[:name]
|
157
|
-
|
178
|
+
|
158
179
|
if outputs.key?(name)
|
159
180
|
referenced = outputs[name]
|
181
|
+
hit = :outputs
|
160
182
|
elsif declaration_cache.key?(name)
|
161
183
|
referenced = declaration_cache[name]
|
184
|
+
hit = :cache
|
162
185
|
else
|
163
|
-
|
164
|
-
active = (ctx[:active] ||= {})
|
165
|
-
raise "cycle detected: #{name}" if active[name]
|
166
|
-
active[name] = true
|
167
|
-
|
168
|
-
subctx = {
|
169
|
-
input: ctx[:input] || ctx["input"],
|
170
|
-
target: name, # target is the STORED NAME
|
171
|
-
accessor_cache: ctx[:accessor_cache],
|
172
|
-
declaration_cache: ctx[:declaration_cache],
|
173
|
-
name_index: name_index, # reuse map
|
174
|
-
active: active
|
175
|
-
}
|
176
|
-
referenced = self.run(ir_module, subctx, accessors: accessors, registry: registry).fetch(name)
|
177
|
-
active.delete(name)
|
186
|
+
raise "unscheduled ref #{name}: producer not executed or dependency analysis failed"
|
178
187
|
end
|
179
|
-
|
188
|
+
|
180
189
|
if ENV["DEBUG_VM_ARGS"]
|
181
190
|
puts "DEBUG Ref #{name}: #{referenced[:k] == :scalar ? "scalar(#{referenced[:v].inspect})" : "#{referenced[:k]}(#{referenced[:rows]&.size || 0} rows)"}"
|
182
191
|
end
|
183
|
-
|
192
|
+
|
184
193
|
slots << referenced
|
185
|
-
rows_touched =
|
186
|
-
|
194
|
+
rows_touched = referenced[:k] == :vec ? (referenced[:rows]&.size || 0) : 1
|
195
|
+
if t0
|
196
|
+
Profiler.record!(decl: decl.name, idx: op_index, tag: :ref, op: op, t0: t0, cpu_t0: cpu_t0,
|
197
|
+
rows: rows_touched, note: hit)
|
198
|
+
end
|
187
199
|
|
188
200
|
when :array
|
189
201
|
# Validate slot indices before accessing
|