kumi 0.0.13 → 0.0.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec +0 -1
- data/BACKLOG.md +34 -0
- data/CLAUDE.md +4 -6
- data/README.md +0 -18
- data/config/functions.yaml +352 -0
- data/docs/dev/analyzer-debug.md +52 -0
- data/docs/dev/parse-command.md +64 -0
- data/docs/functions/analyzer_integration.md +199 -0
- data/docs/functions/signatures.md +171 -0
- data/examples/hash_objects_demo.rb +138 -0
- data/golden/array_operations/schema.kumi +17 -0
- data/golden/cascade_logic/schema.kumi +16 -0
- data/golden/mixed_nesting/schema.kumi +42 -0
- data/golden/simple_math/schema.kumi +10 -0
- data/lib/kumi/analyzer.rb +72 -21
- data/lib/kumi/core/analyzer/checkpoint.rb +72 -0
- data/lib/kumi/core/analyzer/debug.rb +167 -0
- data/lib/kumi/core/analyzer/passes/broadcast_detector.rb +1 -3
- data/lib/kumi/core/analyzer/passes/function_signature_pass.rb +199 -0
- data/lib/kumi/core/analyzer/passes/load_input_cse.rb +120 -0
- data/lib/kumi/core/analyzer/passes/lower_to_ir_pass.rb +72 -157
- data/lib/kumi/core/analyzer/passes/toposorter.rb +37 -1
- data/lib/kumi/core/analyzer/state_serde.rb +64 -0
- data/lib/kumi/core/analyzer/structs/access_plan.rb +12 -10
- data/lib/kumi/core/compiler/access_planner.rb +3 -2
- data/lib/kumi/core/function_registry/collection_functions.rb +3 -1
- data/lib/kumi/core/functions/dimension.rb +98 -0
- data/lib/kumi/core/functions/dtypes.rb +20 -0
- data/lib/kumi/core/functions/errors.rb +11 -0
- data/lib/kumi/core/functions/kernel_adapter.rb +45 -0
- data/lib/kumi/core/functions/loader.rb +119 -0
- data/lib/kumi/core/functions/registry_v2.rb +68 -0
- data/lib/kumi/core/functions/shape.rb +70 -0
- data/lib/kumi/core/functions/signature.rb +122 -0
- data/lib/kumi/core/functions/signature_parser.rb +86 -0
- data/lib/kumi/core/functions/signature_resolver.rb +272 -0
- data/lib/kumi/core/ir/execution_engine/interpreter.rb +98 -7
- data/lib/kumi/core/ir/execution_engine/profiler.rb +202 -0
- data/lib/kumi/dev/ir.rb +75 -0
- data/lib/kumi/dev/parse.rb +105 -0
- data/lib/kumi/dev/runner.rb +83 -0
- data/lib/kumi/frontends/ruby.rb +28 -0
- data/lib/kumi/frontends/text.rb +46 -0
- data/lib/kumi/frontends.rb +29 -0
- data/lib/kumi/kernels/ruby/aggregate_core.rb +105 -0
- data/lib/kumi/kernels/ruby/datetime_scalar.rb +21 -0
- data/lib/kumi/kernels/ruby/mask_scalar.rb +15 -0
- data/lib/kumi/kernels/ruby/scalar_core.rb +63 -0
- data/lib/kumi/kernels/ruby/string_scalar.rb +19 -0
- data/lib/kumi/kernels/ruby/vector_struct.rb +39 -0
- data/lib/kumi/runtime/executable.rb +57 -26
- data/lib/kumi/schema.rb +4 -4
- data/lib/kumi/support/diff.rb +22 -0
- data/lib/kumi/support/ir_render.rb +61 -0
- data/lib/kumi/version.rb +1 -1
- data/lib/kumi.rb +2 -0
- data/performance_results.txt +63 -0
- data/scripts/test_mixed_nesting_performance.rb +206 -0
- metadata +45 -5
- data/docs/features/javascript-transpiler.md +0 -148
- data/lib/kumi/js.rb +0 -23
- data/lib/kumi/support/ir_dump.rb +0 -491
@@ -0,0 +1,272 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "errors"
|
4
|
+
require_relative "shape"
|
5
|
+
require_relative "signature"
|
6
|
+
|
7
|
+
module Kumi
|
8
|
+
module Core
|
9
|
+
module Functions
|
10
|
+
# Given a set of signatures and actual argument shapes, pick the best match.
|
11
|
+
# Supports NEP 20 extensions: fixed-size, flexible, and broadcastable dimensions.
|
12
|
+
#
|
13
|
+
# Inputs:
|
14
|
+
# signatures : Array<Signature> (with Dimension objects)
|
15
|
+
# arg_shapes : Array<Array<Symbol|Integer>> e.g., [[:i], [:i]] or [[], [3]] or [[2, :i]]
|
16
|
+
#
|
17
|
+
# Returns:
|
18
|
+
# { signature:, result_axes:, join_policy:, dropped_axes:, effective_signature: }
|
19
|
+
#
|
20
|
+
# NEP 20 Matching rules:
|
21
|
+
# - Arity must match exactly (before flexible dimension resolution).
|
22
|
+
# - Fixed-size dimensions (integers) must match exactly.
|
23
|
+
# - Flexible dimensions (?) can be omitted if not present in all operands.
|
24
|
+
# - Broadcastable dimensions (|1) can match scalar or size-1 dimensions.
|
25
|
+
# - For each param position, shapes are checked according to NEP 20 rules.
|
26
|
+
# - We prefer exact matches, then flexible matches, then broadcast matches.
|
27
|
+
class SignatureResolver
|
28
|
+
class << self
|
29
|
+
def choose(signatures:, arg_shapes:)
|
30
|
+
# Handle empty arg_shapes for zero-arity functions
|
31
|
+
arg_shapes = [] if arg_shapes.nil?
|
32
|
+
sanity_check_args!(arg_shapes)
|
33
|
+
|
34
|
+
candidates = signatures.map do |sig|
|
35
|
+
score = match_score(sig, arg_shapes)
|
36
|
+
next if score.nil?
|
37
|
+
|
38
|
+
# Convert arg_shapes to normalized Dimension arrays for environment building
|
39
|
+
normalized_args = arg_shapes.map { |shape| normalize_shape(shape) }
|
40
|
+
env = build_dimension_environment(sig, normalized_args)
|
41
|
+
next if env.nil? # Skip candidates with dimension conflicts
|
42
|
+
|
43
|
+
{
|
44
|
+
signature: sig,
|
45
|
+
score: score,
|
46
|
+
result_axes: sig.out_shape.map(&:name), # Convert Dimension objects to names for backward compatibility
|
47
|
+
join_policy: sig.join_policy,
|
48
|
+
dropped_axes: sig.dropped_axes.map { |name| name.is_a?(Symbol) ? name : name.to_sym }, # Convert to symbols
|
49
|
+
env: env
|
50
|
+
}
|
51
|
+
end.compact
|
52
|
+
|
53
|
+
raise SignatureMatchError, mismatch_message(signatures, arg_shapes) if candidates.empty?
|
54
|
+
|
55
|
+
# Lower score is better: 0 = exact-everywhere, then number of broadcasts
|
56
|
+
best = candidates.min_by { |c| c[:score] }
|
57
|
+
|
58
|
+
# Add effective signature and environment for analyzer/lowering
|
59
|
+
best[:effective_signature] = {
|
60
|
+
in_shapes: best[:signature].in_shapes.map { |dims| dims.map(&:name) },
|
61
|
+
out_shape: best[:signature].out_shape.map(&:name),
|
62
|
+
join_policy: best[:signature].join_policy
|
63
|
+
}
|
64
|
+
# env is already included from candidate building
|
65
|
+
|
66
|
+
best
|
67
|
+
end
|
68
|
+
|
69
|
+
private
|
70
|
+
|
71
|
+
def sanity_check_args!(arg_shapes)
|
72
|
+
unless arg_shapes.is_a?(Array) &&
|
73
|
+
arg_shapes.all? { |s| s.is_a?(Array) && s.all? { |a| a.is_a?(Symbol) || a.is_a?(Integer) } }
|
74
|
+
raise SignatureMatchError, "arg_shapes must be an array of dimension arrays (symbols or integers), got: #{arg_shapes.inspect}"
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
# Returns an integer "broadcast cost" or nil if not matchable.
|
79
|
+
# Lower score = better match: 0 = exact, then increasing cost for broadcasts/flexibility
|
80
|
+
def match_score(sig, arg_shapes)
|
81
|
+
return nil unless sig.arity == arg_shapes.length
|
82
|
+
|
83
|
+
# Convert arg_shapes to normalized Dimension arrays for comparison
|
84
|
+
normalized_args = arg_shapes.map { |shape| normalize_shape(shape) }
|
85
|
+
|
86
|
+
# Try to match each argument against its expected signature shape
|
87
|
+
cost = 0
|
88
|
+
sig.in_shapes.each_with_index do |expected_dims, idx|
|
89
|
+
got_dims = normalized_args[idx]
|
90
|
+
arg_cost = match_argument_cost(got: got_dims, expected: expected_dims)
|
91
|
+
return nil if arg_cost.nil?
|
92
|
+
|
93
|
+
cost += arg_cost
|
94
|
+
end
|
95
|
+
|
96
|
+
# Additional checks for join_policy constraints
|
97
|
+
return nil unless valid_join_policy?(sig, normalized_args)
|
98
|
+
|
99
|
+
cost
|
100
|
+
end
|
101
|
+
|
102
|
+
private
|
103
|
+
|
104
|
+
# Convert a shape array (symbols/integers) to normalized Dimension array
|
105
|
+
def normalize_shape(shape)
|
106
|
+
shape.map do |dim|
|
107
|
+
case dim
|
108
|
+
when Symbol
|
109
|
+
Dimension.new(dim)
|
110
|
+
when Integer
|
111
|
+
Dimension.new(dim)
|
112
|
+
else
|
113
|
+
raise SignatureMatchError, "Invalid dimension type: #{dim.class}"
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
# Calculate cost of matching one argument against expected dimensions
|
119
|
+
def match_argument_cost(got:, expected:)
|
120
|
+
# Handle scalar first
|
121
|
+
if got.empty?
|
122
|
+
return expected.empty? ? 0 : (expected.any?(&:flexible?) ? 10 : 1) # scalar broadcast or flexible-tail
|
123
|
+
end
|
124
|
+
|
125
|
+
# Try strict matching first if no flexible dimensions
|
126
|
+
if !expected.any?(&:flexible?) && got.length == expected.length
|
127
|
+
total = 0
|
128
|
+
got.zip(expected).each do |g, e|
|
129
|
+
c = match_dimension_cost(got: g, expected: e)
|
130
|
+
return nil if c.nil?
|
131
|
+
total += c
|
132
|
+
end
|
133
|
+
return total
|
134
|
+
end
|
135
|
+
|
136
|
+
# Use right-aligned flexible matching
|
137
|
+
right_align_match(got: got, expected: expected)
|
138
|
+
end
|
139
|
+
|
140
|
+
# Right-aligned matching for flexible dimensions (NEP 20 ? modifier)
|
141
|
+
def right_align_match(got:, expected:)
|
142
|
+
gi = got.length - 1
|
143
|
+
ei = expected.length - 1
|
144
|
+
cost = 0
|
145
|
+
|
146
|
+
while ei >= 0
|
147
|
+
exp = expected[ei]
|
148
|
+
|
149
|
+
if exp.flexible? && gi < 0
|
150
|
+
# optional tail dimension that we don't have → ok, consume expected only
|
151
|
+
ei -= 1
|
152
|
+
cost += 10
|
153
|
+
next
|
154
|
+
end
|
155
|
+
|
156
|
+
return nil if gi < 0 # ran out of got dims and exp wasn't flexible
|
157
|
+
|
158
|
+
got_dim = got[gi]
|
159
|
+
dim_cost = match_dimension_cost(got: got_dim, expected: exp)
|
160
|
+
if dim_cost.nil?
|
161
|
+
# if exp is flexible, we can try to drop it
|
162
|
+
if exp.flexible?
|
163
|
+
ei -= 1
|
164
|
+
cost += 10
|
165
|
+
next
|
166
|
+
else
|
167
|
+
return nil
|
168
|
+
end
|
169
|
+
else
|
170
|
+
cost += dim_cost
|
171
|
+
gi -= 1
|
172
|
+
ei -= 1
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
# if we still have leftover got dims, argument is longer than expected → not a match
|
177
|
+
return nil if gi >= 0
|
178
|
+
|
179
|
+
cost
|
180
|
+
end
|
181
|
+
|
182
|
+
# Calculate cost of matching one dimension against another
|
183
|
+
def match_dimension_cost(got:, expected:)
|
184
|
+
return 0 if got == expected # Exact match
|
185
|
+
|
186
|
+
# Fixed-size equality
|
187
|
+
if got.fixed_size? && expected.fixed_size?
|
188
|
+
return got.size == expected.size ? 0 : nil
|
189
|
+
end
|
190
|
+
|
191
|
+
# Same symbolic name (ignoring modifiers) → ok unless one is fixed and the other isn't (penalize)
|
192
|
+
if got.named? && expected.named? && got.name == expected.name
|
193
|
+
return (got.fixed_size? || expected.fixed_size?) ? 2 : 0
|
194
|
+
end
|
195
|
+
|
196
|
+
# Broadcastable expected dim accepts scalar or size-1
|
197
|
+
if expected.broadcastable?
|
198
|
+
# scalar at argument level would have been handled in match_argument_cost
|
199
|
+
# so here we check for size-1 fixed dimensions
|
200
|
+
return 3 if got.fixed_size? && got.size == 1
|
201
|
+
# Named dimensions that could be size-1 at runtime also get broadcast cost
|
202
|
+
return 3 if got.named?
|
203
|
+
end
|
204
|
+
|
205
|
+
nil # No match possible
|
206
|
+
end
|
207
|
+
|
208
|
+
# Check if join_policy constraints are satisfied
|
209
|
+
def valid_join_policy?(sig, normalized_args)
|
210
|
+
return true if sig.join_policy # :zip or :product allows different axes
|
211
|
+
|
212
|
+
# nil join_policy: check if dimension names are consistent
|
213
|
+
non_scalar_args = normalized_args.reject { |a| Shape.scalar?(a) }
|
214
|
+
return true if non_scalar_args.empty?
|
215
|
+
|
216
|
+
# For nil join_policy, we allow different dimension names if:
|
217
|
+
# 1. All args have same dimension names (element-wise operations), OR
|
218
|
+
# 2. The constraint solver can validate cross-dimensional consistency (like matmul)
|
219
|
+
first_names = non_scalar_args.first.map(&:name)
|
220
|
+
same_names = non_scalar_args.all? { |arg| arg.map(&:name) == first_names }
|
221
|
+
|
222
|
+
return true if same_names
|
223
|
+
|
224
|
+
# If dimension names differ, check if constraint solver can handle it
|
225
|
+
# This allows operations like matmul where dimensions are linked across arguments
|
226
|
+
env = build_dimension_environment(sig, normalized_args)
|
227
|
+
!env.nil?
|
228
|
+
end
|
229
|
+
|
230
|
+
def mismatch_message(signatures, arg_shapes)
|
231
|
+
sigs = signatures.map(&:inspect).join(", ")
|
232
|
+
"no matching signature for shapes #{pp_shapes(arg_shapes)} among [#{sigs}]"
|
233
|
+
end
|
234
|
+
|
235
|
+
def pp_shapes(shapes)
|
236
|
+
shapes.map { |ax| "(#{ax.join(',')})" }.join(", ")
|
237
|
+
end
|
238
|
+
|
239
|
+
# Build dimension environment by checking consistency of named dimensions across arguments
|
240
|
+
def build_dimension_environment(sig, normalized_args)
|
241
|
+
env = {}
|
242
|
+
|
243
|
+
# Walk all expected dimensions across all arguments
|
244
|
+
sig.in_shapes.each_with_index do |expected_shape, arg_idx|
|
245
|
+
got_shape = normalized_args[arg_idx] || []
|
246
|
+
|
247
|
+
expected_shape.each_with_index do |exp_dim, dim_idx|
|
248
|
+
next unless exp_dim.named? && dim_idx < got_shape.length
|
249
|
+
|
250
|
+
got_dim = got_shape[dim_idx]
|
251
|
+
dim_name = exp_dim.name
|
252
|
+
|
253
|
+
# Check for consistency: same dimension name must map to same concrete value
|
254
|
+
if env.key?(dim_name)
|
255
|
+
# If we've seen this dimension name before, it must match
|
256
|
+
if env[dim_name] != got_dim
|
257
|
+
return nil # Inconsistent binding - signature doesn't match
|
258
|
+
end
|
259
|
+
else
|
260
|
+
# First time seeing this dimension name - record the binding
|
261
|
+
env[dim_name] = got_dim
|
262
|
+
end
|
263
|
+
end
|
264
|
+
end
|
265
|
+
|
266
|
+
env
|
267
|
+
end
|
268
|
+
end
|
269
|
+
end
|
270
|
+
end
|
271
|
+
end
|
272
|
+
end
|
@@ -9,20 +9,57 @@ module Kumi
|
|
9
9
|
PRODUCES_SLOT = %i[const load_input ref array map reduce lift align_to switch].freeze
|
10
10
|
NON_PRODUCERS = %i[guard_push guard_pop assign store].freeze
|
11
11
|
|
12
|
+
def self.build_name_index(ir_module)
|
13
|
+
index = {}
|
14
|
+
ir_module.decls.each do |decl|
|
15
|
+
decl.ops.each do |op|
|
16
|
+
next unless op.tag == :store
|
17
|
+
name = op.attrs[:name]
|
18
|
+
index[name] = decl if name
|
19
|
+
end
|
20
|
+
end
|
21
|
+
index
|
22
|
+
end
|
23
|
+
|
12
24
|
def self.run(ir_module, ctx, accessors:, registry:)
|
13
25
|
# Validate registry is properly initialized
|
14
26
|
raise ArgumentError, "Registry cannot be nil" if registry.nil?
|
15
27
|
raise ArgumentError, "Registry must be a Hash, got #{registry.class}" unless registry.is_a?(Hash)
|
16
28
|
|
29
|
+
# --- PROFILER: init per run ---
|
30
|
+
Profiler.reset!(meta: { decls: ir_module.decls&.size || 0 }) if Profiler.enabled?
|
31
|
+
|
17
32
|
outputs = {}
|
18
33
|
target = ctx[:target]
|
19
34
|
guard_stack = [true]
|
35
|
+
|
36
|
+
# Always ensure we have a declaration cache - either from caller or new for this VM run
|
37
|
+
declaration_cache = ctx[:declaration_cache] || {}
|
38
|
+
|
39
|
+
# Build name index for targeting by stored names
|
40
|
+
name_index = ctx[:name_index] || (target ? build_name_index(ir_module) : nil)
|
41
|
+
|
42
|
+
# Choose declarations to execute by stored name (not only decl name)
|
43
|
+
decls_to_run =
|
44
|
+
if target
|
45
|
+
# Prefer a decl that STORES the target (covers __vec twins)
|
46
|
+
d = name_index && name_index[target]
|
47
|
+
# Fallback: allow targeting by decl name (legacy behavior)
|
48
|
+
d ||= ir_module.decls.find { |dd| dd.name == target }
|
49
|
+
raise "Unknown target: #{target}" unless d
|
50
|
+
[d]
|
51
|
+
else
|
52
|
+
ir_module.decls
|
53
|
+
end
|
20
54
|
|
21
|
-
|
55
|
+
decls_to_run.each do |decl|
|
22
56
|
slots = []
|
23
57
|
guard_stack = [true] # reset per decl
|
24
58
|
|
25
59
|
decl.ops.each_with_index do |op, op_index|
|
60
|
+
t0 = Profiler.enabled? ? Profiler.t0 : nil
|
61
|
+
cpu_t0 = Profiler.enabled? ? Profiler.cpu_t0 : nil
|
62
|
+
rows_touched = nil
|
26
63
|
if ENV["ASSERT_VM_SLOTS"] == "1"
|
27
64
|
expected = op_index
|
28
65
|
unless slots.length == expected
|
@@ -47,17 +84,20 @@ module Kumi
|
|
47
84
|
false
|
48
85
|
end
|
49
86
|
slots << nil # keep slot_id == op_index
|
87
|
+
Profiler.record!(decl: decl.name, idx: op_index, tag: op.tag, op: op, t0: t0, cpu_t0: cpu_t0, rows: 0, note: "enter") if t0
|
50
88
|
next
|
51
89
|
|
52
90
|
when :guard_pop
|
53
91
|
guard_stack.pop
|
54
92
|
slots << nil
|
93
|
+
Profiler.record!(decl: decl.name, idx: op_index, tag: op.tag, op: op, t0: t0, cpu_t0: cpu_t0, rows: 0, note: "exit") if t0
|
55
94
|
next
|
56
95
|
end
|
57
96
|
|
58
97
|
# Skip body when guarded off, but keep indices aligned
|
59
98
|
unless guard_stack.last
|
60
99
|
slots << nil if PRODUCES_SLOT.include?(op.tag) || NON_PRODUCERS.include?(op.tag)
|
100
|
+
Profiler.record!(decl: decl.name, idx: op_index, tag: op.tag, op: op, t0: t0, cpu_t0: cpu_t0, rows: 0, note: "skipped") if t0
|
61
101
|
next
|
62
102
|
end
|
63
103
|
|
@@ -69,35 +109,81 @@ module Kumi
|
|
69
109
|
raise "assign: dst/src OOB" if dst >= slots.length || src >= slots.length
|
70
110
|
|
71
111
|
slots[dst] = slots[src]
|
112
|
+
Profiler.record!(decl: decl.name, idx: op_index, tag: :assign, op: op, t0: t0, cpu_t0: cpu_t0, rows: 1) if t0
|
72
113
|
|
73
114
|
when :const
|
74
115
|
result = Values.scalar(op.attrs[:value])
|
75
116
|
puts "DEBUG Const #{op.attrs[:value].inspect}: result=#{result}" if ENV["DEBUG_VM_ARGS"]
|
76
117
|
slots << result
|
118
|
+
Profiler.record!(decl: decl.name, idx: op_index, tag: :const, op: op, t0: t0, cpu_t0: cpu_t0, rows: 1) if t0
|
77
119
|
|
78
120
|
when :load_input
|
79
121
|
plan_id = op.attrs[:plan_id]
|
80
122
|
scope = op.attrs[:scope] || []
|
81
123
|
scalar = op.attrs[:is_scalar]
|
82
124
|
indexed = op.attrs[:has_idx]
|
83
|
-
raw = accessors.fetch(plan_id).call(ctx[:input] || ctx["input"])
|
84
125
|
|
85
|
-
|
126
|
+
# NEW: consult runtime accessor cache
|
127
|
+
acc_cache = ctx[:accessor_cache] || {}
|
128
|
+
input_obj = ctx[:input] || ctx["input"]
|
129
|
+
cache_key = [plan_id, input_obj.object_id]
|
130
|
+
|
131
|
+
if acc_cache.key?(cache_key)
|
132
|
+
raw = acc_cache[cache_key]
|
133
|
+
hit = true
|
134
|
+
else
|
135
|
+
raw = accessors.fetch(plan_id).call(input_obj)
|
136
|
+
acc_cache[cache_key] = raw
|
137
|
+
hit = false
|
138
|
+
end
|
139
|
+
|
140
|
+
puts "DEBUG LoadInput plan_id: #{plan_id} raw_values: #{raw.inspect} cache_hit: #{hit}" if ENV["DEBUG_VM_ARGS"]
|
86
141
|
slots << if scalar
|
87
142
|
Values.scalar(raw)
|
88
143
|
elsif indexed
|
144
|
+
rows_touched = raw.respond_to?(:size) ? raw.size : raw.count
|
89
145
|
Values.vec(scope, raw.map { |v, idx| { v: v, idx: Array(idx) } }, true)
|
90
146
|
else
|
147
|
+
rows_touched = raw.respond_to?(:size) ? raw.size : raw.count
|
91
148
|
Values.vec(scope, raw.map { |v| { v: v } }, false)
|
92
149
|
end
|
150
|
+
rows_touched ||= 1
|
151
|
+
cache_note = hit ? "hit:#{plan_id}" : "miss:#{plan_id}"
|
152
|
+
Profiler.record!(decl: decl.name, idx: op_index, tag: :load_input, op: op, t0: t0, cpu_t0: cpu_t0,
|
153
|
+
rows: rows_touched, note: cache_note) if t0
|
93
154
|
|
94
155
|
when :ref
|
95
156
|
name = op.attrs[:name]
|
96
|
-
|
157
|
+
|
158
|
+
if outputs.key?(name)
|
159
|
+
referenced = outputs[name]
|
160
|
+
elsif declaration_cache.key?(name)
|
161
|
+
referenced = declaration_cache[name]
|
162
|
+
else
|
163
|
+
# demand-compute the producing decl up to the store of `name`
|
164
|
+
active = (ctx[:active] ||= {})
|
165
|
+
raise "cycle detected: #{name}" if active[name]
|
166
|
+
active[name] = true
|
167
|
+
|
168
|
+
subctx = {
|
169
|
+
input: ctx[:input] || ctx["input"],
|
170
|
+
target: name, # target is the STORED NAME
|
171
|
+
accessor_cache: ctx[:accessor_cache],
|
172
|
+
declaration_cache: ctx[:declaration_cache],
|
173
|
+
name_index: name_index, # reuse map
|
174
|
+
active: active
|
175
|
+
}
|
176
|
+
referenced = self.run(ir_module, subctx, accessors: accessors, registry: registry).fetch(name)
|
177
|
+
active.delete(name)
|
178
|
+
end
|
179
|
+
|
97
180
|
if ENV["DEBUG_VM_ARGS"]
|
98
|
-
puts "DEBUG Ref #{name}: #{
|
181
|
+
puts "DEBUG Ref #{name}: #{referenced[:k] == :scalar ? "scalar(#{referenced[:v].inspect})" : "#{referenced[:k]}(#{referenced[:rows]&.size || 0} rows)"}"
|
99
182
|
end
|
100
|
-
|
183
|
+
|
184
|
+
slots << referenced
|
185
|
+
rows_touched = (referenced[:k] == :vec) ? (referenced[:rows]&.size || 0) : 1
|
186
|
+
Profiler.record!(decl: decl.name, idx: op_index, tag: :ref, op: op, t0: t0, cpu_t0: cpu_t0, rows: rows_touched) if t0
|
101
187
|
|
102
188
|
when :array
|
103
189
|
# Validate slot indices before accessing
|
@@ -216,7 +302,10 @@ module Kumi
|
|
216
302
|
raise "Store operation '#{name}': source slot #{src} is nil (available slots: #{slots.length}, non-nil slots: #{slots.compact.length})"
|
217
303
|
end
|
218
304
|
|
219
|
-
|
305
|
+
result = slots[src]
|
306
|
+
outputs[name] = result
|
307
|
+
# Also store in declaration cache for future ref operations
|
308
|
+
declaration_cache[name] = result
|
220
309
|
|
221
310
|
# keep slot_id == op_index invariant
|
222
311
|
slots << nil
|
@@ -327,6 +416,8 @@ module Kumi
|
|
327
416
|
end
|
328
417
|
end
|
329
418
|
|
419
|
+
# --- end-of-run summary ---
|
420
|
+
Profiler.emit_summary! if Profiler.enabled?
|
330
421
|
outputs
|
331
422
|
end
|
332
423
|
end
|
@@ -0,0 +1,202 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "json"
|
4
|
+
require "fileutils"
|
5
|
+
require "time"
|
6
|
+
|
7
|
+
module Kumi
|
8
|
+
module Core
|
9
|
+
module IR
|
10
|
+
module ExecutionEngine
|
11
|
+
module Profiler
|
12
|
+
class << self
|
13
|
+
def enabled? = ENV["KUMI_PROFILE"] == "1"
|
14
|
+
|
15
|
+
def reset!(meta: {})
|
16
|
+
return unless enabled?
|
17
|
+
@events = []
|
18
|
+
@meta = meta
|
19
|
+
@file = ENV["KUMI_PROFILE_FILE"] || "tmp/profile.jsonl"
|
20
|
+
@run_id = (@run_id || 0) + 1 # Track run number for averaging
|
21
|
+
@aggregated_stats = (@aggregated_stats || Hash.new { |h, k| h[k] = { count: 0, total_ms: 0.0, total_cpu_ms: 0.0, rows: 0, runs: Set.new } })
|
22
|
+
|
23
|
+
if ENV["KUMI_PROFILE_TRUNCATE"] == "1"
|
24
|
+
FileUtils.mkdir_p(File.dirname(@file))
|
25
|
+
File.write(@file, "")
|
26
|
+
@aggregated_stats.clear # Clear aggregated stats on truncate
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# monotonic start time
|
31
|
+
def t0
|
32
|
+
Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
33
|
+
end
|
34
|
+
|
35
|
+
# CPU time start (process + thread)
|
36
|
+
def cpu_t0
|
37
|
+
Process.clock_gettime(Process::CLOCK_PROCESS_CPUTIME_ID)
|
38
|
+
end
|
39
|
+
|
40
|
+
# Per-op record with both wall time and CPU time
|
41
|
+
def record!(decl:, idx:, tag:, op:, t0:, cpu_t0: nil, rows: nil, note: nil)
|
42
|
+
return unless enabled?
|
43
|
+
|
44
|
+
wall_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - t0) * 1000.0)
|
45
|
+
cpu_ms = cpu_t0 ? ((Process.clock_gettime(Process::CLOCK_PROCESS_CPUTIME_ID) - cpu_t0) * 1000.0) : wall_ms
|
46
|
+
|
47
|
+
ev = {
|
48
|
+
ts: Time.now.utc.iso8601(3),
|
49
|
+
run: @run_id,
|
50
|
+
decl: decl, # decl name (string/symbol)
|
51
|
+
i: idx, # op index
|
52
|
+
tag: tag, # op tag (symbol)
|
53
|
+
wall_ms: wall_ms.round(4),
|
54
|
+
cpu_ms: cpu_ms.round(4),
|
55
|
+
rows: rows,
|
56
|
+
note: note,
|
57
|
+
key: op_key(decl, idx, tag, op), # stable key for grep/diff
|
58
|
+
attrs: compact_attrs(op.attrs)
|
59
|
+
}
|
60
|
+
|
61
|
+
# Aggregate stats for multi-run averaging
|
62
|
+
op_key = "#{decl}@#{idx}:#{tag}"
|
63
|
+
agg = @aggregated_stats[op_key]
|
64
|
+
agg[:count] += 1
|
65
|
+
agg[:total_ms] += wall_ms
|
66
|
+
agg[:total_cpu_ms] += cpu_ms
|
67
|
+
agg[:rows] += (rows || 0)
|
68
|
+
agg[:runs] << @run_id
|
69
|
+
agg[:decl] = decl
|
70
|
+
agg[:tag] = tag
|
71
|
+
agg[:idx] = idx
|
72
|
+
agg[:note] = note if note
|
73
|
+
|
74
|
+
(@events ||= []) << ev
|
75
|
+
stream(ev) if ENV["KUMI_PROFILE_STREAM"] == "1"
|
76
|
+
ev
|
77
|
+
end
|
78
|
+
|
79
|
+
def summary(top: 20)
|
80
|
+
return {} unless enabled?
|
81
|
+
|
82
|
+
# Current run summary (legacy format)
|
83
|
+
current_agg = Hash.new { |h, k| h[k] = { count: 0, ms: 0.0, rows: 0 } }
|
84
|
+
(@events || []).each do |e|
|
85
|
+
k = [e[:decl], e[:tag]]
|
86
|
+
a = current_agg[k]
|
87
|
+
a[:count] += 1
|
88
|
+
a[:ms] += (e[:wall_ms] || e[:ms] || 0)
|
89
|
+
a[:rows] += (e[:rows] || 0)
|
90
|
+
end
|
91
|
+
current_ranked = current_agg.map { |(decl, tag), v|
|
92
|
+
{ decl: decl, tag: tag, count: v[:count], ms: v[:ms].round(3), rows: v[:rows],
|
93
|
+
rps: v[:rows] > 0 ? (v[:rows] / v[:ms]).round(1) : nil }
|
94
|
+
}.sort_by { |h| -h[:ms] }.first(top)
|
95
|
+
|
96
|
+
{ meta: @meta || {}, top: current_ranked,
|
97
|
+
total_ms: ((@events || []).sum { |e| e[:wall_ms] || e[:ms] || 0 }).round(3),
|
98
|
+
op_count: (@events || []).size,
|
99
|
+
run_id: @run_id }
|
100
|
+
end
|
101
|
+
|
102
|
+
# Multi-run averaged analysis
|
103
|
+
def averaged_analysis(top: 20)
|
104
|
+
return {} unless enabled? && @aggregated_stats&.any?
|
105
|
+
|
106
|
+
# Convert aggregated stats to averaged metrics
|
107
|
+
averaged = @aggregated_stats.map do |op_key, stats|
|
108
|
+
num_runs = stats[:runs].size
|
109
|
+
avg_wall_ms = stats[:total_ms] / stats[:count]
|
110
|
+
avg_cpu_ms = stats[:total_cpu_ms] / stats[:count]
|
111
|
+
total_wall_ms = stats[:total_ms]
|
112
|
+
total_cpu_ms = stats[:total_cpu_ms]
|
113
|
+
|
114
|
+
{
|
115
|
+
op_key: op_key,
|
116
|
+
decl: stats[:decl],
|
117
|
+
idx: stats[:idx],
|
118
|
+
tag: stats[:tag],
|
119
|
+
runs: num_runs,
|
120
|
+
total_calls: stats[:count],
|
121
|
+
calls_per_run: stats[:count] / num_runs.to_f,
|
122
|
+
avg_wall_ms: avg_wall_ms.round(4),
|
123
|
+
avg_cpu_ms: avg_cpu_ms.round(4),
|
124
|
+
total_wall_ms: total_wall_ms.round(3),
|
125
|
+
total_cpu_ms: total_cpu_ms.round(3),
|
126
|
+
cpu_efficiency: total_wall_ms > 0 ? (total_cpu_ms / total_wall_ms * 100).round(1) : 100,
|
127
|
+
rows_total: stats[:rows],
|
128
|
+
note: stats[:note]
|
129
|
+
}
|
130
|
+
end.sort_by { |s| -s[:total_wall_ms] }.first(top)
|
131
|
+
|
132
|
+
{
|
133
|
+
meta: @meta || {},
|
134
|
+
total_runs: (@aggregated_stats.values.map { |s| s[:runs].size }.max || 0),
|
135
|
+
averaged_ops: averaged,
|
136
|
+
total_operations: @aggregated_stats.size
|
137
|
+
}
|
138
|
+
end
|
139
|
+
|
140
|
+
# Identify potential cache overhead operations
|
141
|
+
def cache_overhead_analysis
|
142
|
+
return {} unless enabled? && @aggregated_stats&.any?
|
143
|
+
|
144
|
+
# Look for operations that might be cache-related
|
145
|
+
cache_ops = @aggregated_stats.select do |op_key, stats|
|
146
|
+
op_key.include?("ref") || op_key.include?("load_input") || stats[:note]&.include?("cache")
|
147
|
+
end
|
148
|
+
|
149
|
+
cache_analysis = cache_ops.map do |op_key, stats|
|
150
|
+
num_runs = stats[:runs].size
|
151
|
+
avg_wall_ms = stats[:total_ms] / stats[:count]
|
152
|
+
|
153
|
+
{
|
154
|
+
op_key: op_key,
|
155
|
+
decl: stats[:decl],
|
156
|
+
tag: stats[:tag],
|
157
|
+
avg_time_ms: avg_wall_ms.round(4),
|
158
|
+
total_time_ms: stats[:total_ms].round(3),
|
159
|
+
call_count: stats[:count],
|
160
|
+
overhead_per_call: avg_wall_ms.round(6)
|
161
|
+
}
|
162
|
+
end.sort_by { |s| -s[:total_time_ms] }
|
163
|
+
|
164
|
+
{
|
165
|
+
cache_operations: cache_analysis,
|
166
|
+
total_cache_time: cache_analysis.sum { |op| op[:total_time_ms] }.round(3)
|
167
|
+
}
|
168
|
+
end
|
169
|
+
|
170
|
+
def emit_summary!
|
171
|
+
return unless enabled?
|
172
|
+
stream({ ts: Time.now.utc.iso8601(3), kind: "summary", data: summary })
|
173
|
+
end
|
174
|
+
|
175
|
+
# Stable textual key for "match ops one by one"
|
176
|
+
def op_key(decl, idx, tag, op)
|
177
|
+
attrs = compact_attrs(op.attrs)
|
178
|
+
args = op.args
|
179
|
+
"#{decl}@#{idx}:#{tag}|#{attrs.keys.sort_by(&:to_s).map { |k| "#{k}=#{attrs[k].inspect}" }.join(",")}|args=#{args.inspect}"
|
180
|
+
end
|
181
|
+
|
182
|
+
def compact_attrs(h)
|
183
|
+
return {} unless h
|
184
|
+
h.transform_values do |v|
|
185
|
+
case v
|
186
|
+
when Array, Hash, Symbol, String, Numeric, TrueClass, FalseClass, NilClass then v
|
187
|
+
else v.to_s
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
def stream(obj)
|
193
|
+
return unless @file
|
194
|
+
FileUtils.mkdir_p(File.dirname(@file))
|
195
|
+
File.open(@file, "a") { |f| f.puts(obj.to_json) }
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|
202
|
+
end
|