kumi 0.0.14 → 0.0.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +40 -0
- data/README.md +0 -27
- data/docs/dev/vm-profiling.md +95 -0
- data/docs/features/README.md +0 -7
- data/lib/kumi/analyzer.rb +10 -2
- data/lib/kumi/compiler.rb +6 -5
- data/lib/kumi/core/analyzer/passes/ir_dependency_pass.rb +65 -0
- data/lib/kumi/core/analyzer/passes/ir_execution_schedule_pass.rb +67 -0
- data/lib/kumi/core/analyzer/passes/toposorter.rb +15 -50
- data/lib/kumi/core/compiler/access_builder.rb +22 -9
- data/lib/kumi/core/compiler/access_codegen.rb +61 -0
- data/lib/kumi/core/compiler/access_emit/base.rb +173 -0
- data/lib/kumi/core/compiler/access_emit/each_indexed.rb +56 -0
- data/lib/kumi/core/compiler/access_emit/materialize.rb +45 -0
- data/lib/kumi/core/compiler/access_emit/ravel.rb +50 -0
- data/lib/kumi/core/compiler/access_emit/read.rb +32 -0
- data/lib/kumi/core/ir/execution_engine/interpreter.rb +56 -189
- data/lib/kumi/core/ir/execution_engine/profiler.rb +139 -11
- data/lib/kumi/core/ir/execution_engine/values.rb +8 -8
- data/lib/kumi/core/ir/execution_engine.rb +5 -30
- data/lib/kumi/dev/parse.rb +12 -12
- data/lib/kumi/dev/profile_aggregator.rb +301 -0
- data/lib/kumi/dev/profile_runner.rb +199 -0
- data/lib/kumi/dev/runner.rb +3 -1
- data/lib/kumi/dev.rb +14 -0
- data/lib/kumi/runtime/executable.rb +32 -153
- data/lib/kumi/runtime/run.rb +105 -0
- data/lib/kumi/schema.rb +15 -14
- data/lib/kumi/version.rb +1 -1
- data/lib/kumi.rb +4 -2
- metadata +15 -3
- data/docs/features/analysis-cascade-mutual-exclusion.md +0 -89
@@ -8,57 +8,34 @@ module Kumi
|
|
8
8
|
module Interpreter
|
9
9
|
PRODUCES_SLOT = %i[const load_input ref array map reduce lift align_to switch].freeze
|
10
10
|
NON_PRODUCERS = %i[guard_push guard_pop assign store].freeze
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
11
|
+
EMPTY_ARY = [].freeze
|
12
|
+
|
13
|
+
def self.run(schedule, input:, runtime:, accessors:, registry:)
|
14
|
+
prof = Profiler.enabled?
|
15
|
+
# --- PROFILER: init per run (but not in persistent mode) ---
|
16
|
+
if prof
|
17
|
+
schema_name = runtime[:schema_name] || "UnknownSchema"
|
18
|
+
# In persistent mode, just update schema name without full reset
|
19
|
+
Profiler.set_schema_name(schema_name)
|
20
20
|
end
|
21
|
-
index
|
22
|
-
end
|
23
|
-
|
24
|
-
def self.run(ir_module, ctx, accessors:, registry:)
|
25
|
-
# Validate registry is properly initialized
|
26
|
-
raise ArgumentError, "Registry cannot be nil" if registry.nil?
|
27
|
-
raise ArgumentError, "Registry must be a Hash, got #{registry.class}" unless registry.is_a?(Hash)
|
28
|
-
|
29
|
-
# --- PROFILER: init per run ---
|
30
|
-
Profiler.reset!(meta: { decls: ir_module.decls&.size || 0 }) if Profiler.enabled?
|
31
21
|
|
32
22
|
outputs = {}
|
33
|
-
target =
|
23
|
+
target = runtime[:target]
|
34
24
|
guard_stack = [true]
|
35
|
-
|
36
|
-
# Always ensure we have a declaration cache - either from caller or new for this VM run
|
37
|
-
declaration_cache = ctx[:declaration_cache] || {}
|
38
|
-
|
39
|
-
# Build name index for targeting by stored names
|
40
|
-
name_index = ctx[:name_index] || (target ? build_name_index(ir_module) : nil)
|
41
|
-
|
42
|
-
# Choose declarations to execute by stored name (not only decl name)
|
43
|
-
decls_to_run =
|
44
|
-
if target
|
45
|
-
# Prefer a decl that STORES the target (covers __vec twins)
|
46
|
-
d = name_index && name_index[target]
|
47
|
-
# Fallback: allow targeting by decl name (legacy behavior)
|
48
|
-
d ||= ir_module.decls.find { |dd| dd.name == target }
|
49
|
-
raise "Unknown target: #{target}" unless d
|
50
|
-
[d]
|
51
|
-
else
|
52
|
-
ir_module.decls
|
53
|
-
end
|
54
25
|
|
55
|
-
|
26
|
+
# Caches live in runtime (engine frame), not input
|
27
|
+
declaration_cache = runtime[:declaration_cache]
|
28
|
+
|
29
|
+
# Choose declarations to execute - prefer explicit schedule if present
|
30
|
+
# decls_to_run = runtime[:decls_to_run] || ir_module.decls
|
31
|
+
|
32
|
+
schedule.each do |decl|
|
56
33
|
slots = []
|
57
34
|
guard_stack = [true] # reset per decl
|
58
35
|
|
59
36
|
decl.ops.each_with_index do |op, op_index|
|
60
|
-
t0 =
|
61
|
-
cpu_t0 =
|
37
|
+
t0 = prof ? Profiler.t0 : nil
|
38
|
+
cpu_t0 = prof ? Profiler.cpu_t0 : nil
|
62
39
|
rows_touched = nil
|
63
40
|
if ENV["ASSERT_VM_SLOTS"] == "1"
|
64
41
|
expected = op_index
|
@@ -84,7 +61,10 @@ module Kumi
|
|
84
61
|
false
|
85
62
|
end
|
86
63
|
slots << nil # keep slot_id == op_index
|
87
|
-
|
64
|
+
if prof
|
65
|
+
Profiler.record!(decl: decl.name, idx: op_index, tag: op.tag, op: op, t0: t0, cpu_t0: cpu_t0, rows: 0,
|
66
|
+
note: "enter")
|
67
|
+
end
|
88
68
|
next
|
89
69
|
|
90
70
|
when :guard_pop
|
@@ -97,105 +77,55 @@ module Kumi
|
|
97
77
|
# Skip body when guarded off, but keep indices aligned
|
98
78
|
unless guard_stack.last
|
99
79
|
slots << nil if PRODUCES_SLOT.include?(op.tag) || NON_PRODUCERS.include?(op.tag)
|
100
|
-
|
80
|
+
if t0
|
81
|
+
Profiler.record!(decl: decl.name, idx: op_index, tag: op.tag, op: op, t0: t0, cpu_t0: cpu_t0, rows: 0,
|
82
|
+
note: "skipped")
|
83
|
+
end
|
101
84
|
next
|
102
85
|
end
|
103
86
|
|
104
87
|
case op.tag
|
105
88
|
|
106
|
-
when :assign
|
107
|
-
dst = op.attrs[:dst]
|
108
|
-
src = op.attrs[:src]
|
109
|
-
raise "assign: dst/src OOB" if dst >= slots.length || src >= slots.length
|
110
|
-
|
111
|
-
slots[dst] = slots[src]
|
112
|
-
Profiler.record!(decl: decl.name, idx: op_index, tag: :assign, op: op, t0: t0, cpu_t0: cpu_t0, rows: 1) if t0
|
113
|
-
|
114
89
|
when :const
|
115
90
|
result = Values.scalar(op.attrs[:value])
|
116
|
-
puts "DEBUG Const #{op.attrs[:value].inspect}: result=#{result}" if ENV["DEBUG_VM_ARGS"]
|
117
91
|
slots << result
|
118
92
|
Profiler.record!(decl: decl.name, idx: op_index, tag: :const, op: op, t0: t0, cpu_t0: cpu_t0, rows: 1) if t0
|
119
93
|
|
120
94
|
when :load_input
|
121
95
|
plan_id = op.attrs[:plan_id]
|
122
|
-
scope = op.attrs[:scope] ||
|
96
|
+
scope = op.attrs[:scope] || EMPTY_ARY
|
123
97
|
scalar = op.attrs[:is_scalar]
|
124
98
|
indexed = op.attrs[:has_idx]
|
125
99
|
|
126
|
-
#
|
127
|
-
acc_cache = ctx[:accessor_cache] || {}
|
128
|
-
input_obj = ctx[:input] || ctx["input"]
|
129
|
-
cache_key = [plan_id, input_obj.object_id]
|
100
|
+
raw = accessors[plan_id].call(input) # <- memoized by ExecutionEngine
|
130
101
|
|
131
|
-
if acc_cache.key?(cache_key)
|
132
|
-
raw = acc_cache[cache_key]
|
133
|
-
hit = true
|
134
|
-
else
|
135
|
-
raw = accessors.fetch(plan_id).call(input_obj)
|
136
|
-
acc_cache[cache_key] = raw
|
137
|
-
hit = false
|
138
|
-
end
|
139
|
-
|
140
|
-
puts "DEBUG LoadInput plan_id: #{plan_id} raw_values: #{raw.inspect} cache_hit: #{hit}" if ENV["DEBUG_VM_ARGS"]
|
141
102
|
slots << if scalar
|
142
103
|
Values.scalar(raw)
|
143
104
|
elsif indexed
|
144
|
-
rows_touched = raw.respond_to?(:size) ? raw.size : raw.count
|
105
|
+
rows_touched = prof && raw.respond_to?(:size) ? raw.size : raw.count
|
145
106
|
Values.vec(scope, raw.map { |v, idx| { v: v, idx: Array(idx) } }, true)
|
146
107
|
else
|
147
|
-
rows_touched = raw.respond_to?(:size) ? raw.size : raw.count
|
108
|
+
rows_touched = prof && raw.respond_to?(:size) ? raw.size : raw.count
|
148
109
|
Values.vec(scope, raw.map { |v| { v: v } }, false)
|
149
110
|
end
|
150
111
|
rows_touched ||= 1
|
151
|
-
|
152
|
-
|
153
|
-
|
112
|
+
if t0
|
113
|
+
Profiler.record!(decl: decl.name, idx: op_index, tag: :load_input, op: op, t0: t0, cpu_t0: cpu_t0,
|
114
|
+
rows: rows_touched, note: "ok")
|
115
|
+
end
|
154
116
|
|
155
117
|
when :ref
|
156
118
|
name = op.attrs[:name]
|
157
|
-
|
158
|
-
if outputs.key?(name)
|
159
|
-
referenced = outputs[name]
|
160
|
-
elsif declaration_cache.key?(name)
|
161
|
-
referenced = declaration_cache[name]
|
162
|
-
else
|
163
|
-
# demand-compute the producing decl up to the store of `name`
|
164
|
-
active = (ctx[:active] ||= {})
|
165
|
-
raise "cycle detected: #{name}" if active[name]
|
166
|
-
active[name] = true
|
167
|
-
|
168
|
-
subctx = {
|
169
|
-
input: ctx[:input] || ctx["input"],
|
170
|
-
target: name, # target is the STORED NAME
|
171
|
-
accessor_cache: ctx[:accessor_cache],
|
172
|
-
declaration_cache: ctx[:declaration_cache],
|
173
|
-
name_index: name_index, # reuse map
|
174
|
-
active: active
|
175
|
-
}
|
176
|
-
referenced = self.run(ir_module, subctx, accessors: accessors, registry: registry).fetch(name)
|
177
|
-
active.delete(name)
|
178
|
-
end
|
179
|
-
|
180
|
-
if ENV["DEBUG_VM_ARGS"]
|
181
|
-
puts "DEBUG Ref #{name}: #{referenced[:k] == :scalar ? "scalar(#{referenced[:v].inspect})" : "#{referenced[:k]}(#{referenced[:rows]&.size || 0} rows)"}"
|
182
|
-
end
|
183
|
-
|
184
|
-
slots << referenced
|
185
|
-
rows_touched = (referenced[:k] == :vec) ? (referenced[:rows]&.size || 0) : 1
|
186
|
-
Profiler.record!(decl: decl.name, idx: op_index, tag: :ref, op: op, t0: t0, cpu_t0: cpu_t0, rows: rows_touched) if t0
|
119
|
+
referenced = outputs[name] { raise "unscheduled ref #{name}: producer not executed or dependency analysis failed" }
|
187
120
|
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
elsif slots[slot_idx].nil?
|
194
|
-
raise "Array operation: slot #{slot_idx} is nil " \
|
195
|
-
"(available slots: #{slots.length}, non-nil slots: #{slots.compact.length})"
|
196
|
-
end
|
121
|
+
slots << referenced
|
122
|
+
rows_touched = referenced[:k] == :vec ? (referenced[:rows]&.size || 0) : 1
|
123
|
+
if prof
|
124
|
+
Profiler.record!(decl: decl.name, idx: op_index, tag: :ref, op: op, t0: t0, cpu_t0: cpu_t0,
|
125
|
+
rows: rows_touched, note: hit)
|
197
126
|
end
|
198
127
|
|
128
|
+
when :array
|
199
129
|
parts = op.args.map { |i| slots[i] }
|
200
130
|
if parts.all? { |p| p[:k] == :scalar }
|
201
131
|
slots << Values.scalar(parts.map { |p| p[:v] })
|
@@ -219,63 +149,43 @@ module Kumi
|
|
219
149
|
fn_name = op.attrs[:fn]
|
220
150
|
fn_entry = registry[fn_name] or raise "Function #{fn_name} not found in registry"
|
221
151
|
fn = fn_entry.fn
|
222
|
-
puts "DEBUG Map #{fn_name}: args=#{op.args.inspect}" if ENV["DEBUG_VM_ARGS"]
|
223
152
|
|
224
153
|
# Validate slot indices before accessing
|
225
|
-
op.args.each do |slot_idx|
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
end
|
154
|
+
# op.args.each do |slot_idx|
|
155
|
+
# if slot_idx >= slots.length
|
156
|
+
# raise "Map operation #{fn_name}: slot index #{slot_idx} out of bounds (slots.length=#{slots.length})"
|
157
|
+
# elsif slots[slot_idx].nil?
|
158
|
+
# raise "Map operation #{fn_name}: slot #{slot_idx} is nil " \
|
159
|
+
# "(available slots: #{slots.length}, non-nil slots: #{slots.compact.length})"
|
160
|
+
# end
|
161
|
+
# end
|
233
162
|
|
234
163
|
args = op.args.map { |slot_idx| slots[slot_idx] }
|
235
164
|
|
236
165
|
if args.all? { |a| a[:k] == :scalar }
|
237
|
-
puts "DEBUG Scalar call #{fn_name}: args=#{args.map { |a| a[:v] }.inspect}" if ENV["DEBUG_VM_ARGS"]
|
238
166
|
scalar_args = args.map { |a| a[:v] }
|
239
167
|
result = fn.call(*scalar_args)
|
240
168
|
slots << Values.scalar(result)
|
241
169
|
else
|
242
170
|
base = args.find { |a| a[:k] == :vec } or raise "Map needs a vec carrier"
|
243
|
-
puts "DEBUG Vec call #{fn_name}: base=#{base.inspect}" if ENV["DEBUG_VM_ARGS"]
|
244
171
|
# Preserve original order: broadcast scalars in-place
|
245
172
|
arg_vecs = args.map { |a| a[:k] == :scalar ? Combinators.broadcast_scalar(a, base) : a }
|
246
|
-
puts "DEBUG Vec call #{fn_name}: arg_vecs=#{arg_vecs.inspect}" if ENV["DEBUG_VM_ARGS"]
|
247
173
|
scopes = arg_vecs.map { |v| v[:scope] }.uniq
|
248
|
-
puts "DEBUG Vec call #{fn_name}: scopes=#{scopes.inspect}" if ENV["DEBUG_VM_ARGS"]
|
249
174
|
raise "Cross-scope Map without Join" unless scopes.size <= 1
|
250
175
|
|
251
176
|
zipped = Combinators.zip_same_scope(*arg_vecs)
|
252
177
|
|
253
|
-
# if ENV["DEBUG_VM_ARGS"] && fn_name == :if
|
254
|
-
# puts "DEBUG Vec call #{fn_name}: zipped rows:"
|
255
|
-
# zipped[:rows].each_with_index do |row, i|
|
256
|
-
# puts " [#{i}] args=#{Array(row[:v]).inspect}"
|
257
|
-
# end
|
258
|
-
# end
|
259
|
-
|
260
|
-
puts "DEBUG Vec call #{fn_name}: zipped rows=#{zipped[:rows].inspect}" if ENV["DEBUG_VM_ARGS"]
|
261
178
|
rows = zipped[:rows].map do |row|
|
262
179
|
row_args = Array(row[:v])
|
263
180
|
vr = fn.call(*row_args)
|
264
181
|
row.key?(:idx) ? { v: vr, idx: row[:idx] } : { v: vr }
|
265
182
|
end
|
266
|
-
puts "DEBUG Vec call #{fn_name}: result rows=#{rows.inspect}" if ENV["DEBUG_VM_ARGS"]
|
267
183
|
|
268
184
|
slots << Values.vec(base[:scope], rows, base[:has_idx])
|
269
185
|
end
|
270
186
|
|
271
187
|
when :switch
|
272
188
|
chosen = op.attrs[:cases].find do |(cond_slot, _)|
|
273
|
-
if cond_slot >= slots.length
|
274
|
-
raise "Switch operation: condition slot #{cond_slot} out of bounds (slots.length=#{slots.length})"
|
275
|
-
elsif slots[cond_slot].nil?
|
276
|
-
raise "Switch operation: condition slot #{cond_slot} is nil (available slots: #{slots.length}, non-nil slots: #{slots.compact.length})"
|
277
|
-
end
|
278
|
-
|
279
189
|
c = slots[cond_slot]
|
280
190
|
if c[:k] == :scalar
|
281
191
|
!!c[:v]
|
@@ -285,22 +195,12 @@ module Kumi
|
|
285
195
|
end
|
286
196
|
end
|
287
197
|
result_slot = chosen ? chosen[1] : op.attrs[:default]
|
288
|
-
if result_slot >= slots.length
|
289
|
-
raise "Switch operation: result slot #{result_slot} out of bounds (slots.length=#{slots.length})"
|
290
|
-
elsif slots[result_slot].nil?
|
291
|
-
raise "Switch operation: result slot #{result_slot} is nil (available slots: #{slots.length}, non-nil slots: #{slots.compact.length})"
|
292
|
-
end
|
293
198
|
|
294
199
|
slots << slots[result_slot]
|
295
200
|
|
296
201
|
when :store
|
297
202
|
name = op.attrs[:name]
|
298
203
|
src = op.args[0] or raise "store: missing source slot"
|
299
|
-
if src >= slots.length
|
300
|
-
raise "Store operation '#{name}': source slot #{src} out of bounds (slots.length=#{slots.length})"
|
301
|
-
elsif slots[src].nil?
|
302
|
-
raise "Store operation '#{name}': source slot #{src} is nil (available slots: #{slots.length}, non-nil slots: #{slots.compact.length})"
|
303
|
-
end
|
304
204
|
|
305
205
|
result = slots[src]
|
306
206
|
outputs[name] = result
|
@@ -317,10 +217,8 @@ module Kumi
|
|
317
217
|
fn = fn_entry.fn
|
318
218
|
|
319
219
|
src = slots[op.args[0]]
|
320
|
-
|
321
|
-
|
322
|
-
result_scope = Array(op.attrs[:result_scope] || [])
|
323
|
-
axis = Array(op.attrs[:axis] || [])
|
220
|
+
result_scope = op.attrs[:result_scope]
|
221
|
+
axis = op.attrs[:axis]
|
324
222
|
|
325
223
|
if result_scope.empty?
|
326
224
|
# === GLOBAL REDUCE ===
|
@@ -328,12 +226,6 @@ module Kumi
|
|
328
226
|
vals = src[:rows].map { |r| r[:v] }
|
329
227
|
slots << Values.scalar(fn.call(vals))
|
330
228
|
else
|
331
|
-
# === GROUPED REDUCE ===
|
332
|
-
# Must have indices to group by prefix keys.
|
333
|
-
unless src[:has_idx]
|
334
|
-
raise "Grouped reduce requires indexed input (got ravel) for #{op.attrs[:fn]} at #{result_scope.inspect}"
|
335
|
-
end
|
336
|
-
|
337
229
|
group_len = result_scope.length
|
338
230
|
|
339
231
|
# Preserve stable source order so zips with other @result_scope vecs line up.
|
@@ -356,39 +248,17 @@ module Kumi
|
|
356
248
|
|
357
249
|
when :lift
|
358
250
|
src_slot = op.args[0]
|
359
|
-
if src_slot >= slots.length
|
360
|
-
raise "Lift operation: source slot #{src_slot} out of bounds (slots.length=#{slots.length})"
|
361
|
-
elsif slots[src_slot].nil?
|
362
|
-
raise "Lift operation: source slot #{src_slot} is nil (available slots: #{slots.length}, non-nil slots: #{slots.compact.length})"
|
363
|
-
end
|
364
251
|
|
365
252
|
v = slots[src_slot]
|
366
|
-
to_scope = op.attrs[:to_scope] ||
|
253
|
+
to_scope = op.attrs[:to_scope] || EMPTY_ARY
|
367
254
|
depth = [to_scope.length, v[:rank] || v[:rows].first&.dig(:idx)&.length || 0].min
|
368
255
|
slots << Values.scalar(Combinators.group_rows(v[:rows], depth))
|
369
256
|
|
370
257
|
when :align_to
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
if tgt_slot >= slots.length
|
375
|
-
raise "AlignTo operation: target slot #{tgt_slot} out of bounds (slots.length=#{slots.length})"
|
376
|
-
elsif slots[tgt_slot].nil?
|
377
|
-
raise "AlignTo operation: target slot #{tgt_slot} is nil " \
|
378
|
-
"(available slots: #{slots.length}, non-nil slots: #{slots.compact.length})"
|
379
|
-
end
|
380
|
-
|
381
|
-
if src_slot >= slots.length
|
382
|
-
raise "AlignTo operation: source slot #{src_slot} out of bounds (slots.length=#{slots.length})"
|
383
|
-
elsif slots[src_slot].nil?
|
384
|
-
raise "AlignTo operation: source slot #{src_slot} is nil " \
|
385
|
-
"(available slots: #{slots.length}, non-nil slots: #{slots.compact.length})"
|
386
|
-
end
|
258
|
+
tgt = slots[op.args[0]]
|
259
|
+
src = slots[op.args[1]]
|
387
260
|
|
388
|
-
|
389
|
-
src = slots[src_slot]
|
390
|
-
|
391
|
-
to_scope = op.attrs[:to_scope] || []
|
261
|
+
to_scope = op.attrs[:to_scope] || EMPTY_ARY
|
392
262
|
require_unique = op.attrs[:require_unique] || false
|
393
263
|
on_missing = op.attrs[:on_missing] || :error
|
394
264
|
|
@@ -397,9 +267,6 @@ module Kumi
|
|
397
267
|
on_missing: on_missing)
|
398
268
|
slots << aligned
|
399
269
|
|
400
|
-
when :join
|
401
|
-
raise NotImplementedError, "Join not implemented yet"
|
402
|
-
|
403
270
|
else
|
404
271
|
raise "Unknown operation: #{op.tag}"
|
405
272
|
end
|
@@ -3,6 +3,7 @@
|
|
3
3
|
require "json"
|
4
4
|
require "fileutils"
|
5
5
|
require "time"
|
6
|
+
require "set"
|
6
7
|
|
7
8
|
module Kumi
|
8
9
|
module Core
|
@@ -11,19 +12,71 @@ module Kumi
|
|
11
12
|
module Profiler
|
12
13
|
class << self
|
13
14
|
def enabled? = ENV["KUMI_PROFILE"] == "1"
|
15
|
+
def ops_enabled? = ENV.fetch("KUMI_PROFILE_OPS", "1") == "1"
|
16
|
+
def sample_rate = (ENV["KUMI_PROFILE_SAMPLE"]&.to_i || 1)
|
17
|
+
def persistent? = ENV["KUMI_PROFILE_PERSISTENT"] == "1"
|
18
|
+
|
19
|
+
def set_schema_name(name)
|
20
|
+
@schema_name = name
|
21
|
+
|
22
|
+
# Ensure profiler is initialized in persistent mode
|
23
|
+
unless @initialized
|
24
|
+
@events = []
|
25
|
+
@meta = {}
|
26
|
+
@file = ENV["KUMI_PROFILE_FILE"] || "tmp/profile.jsonl"
|
27
|
+
@run_id ||= 1
|
28
|
+
@op_seq ||= 0
|
29
|
+
@aggregated_stats ||= Hash.new { |h, k| h[k] = { count: 0, total_ms: 0.0, total_cpu_ms: 0.0, rows: 0, runs: Set.new } }
|
30
|
+
|
31
|
+
# Truncate file if needed
|
32
|
+
if ENV["KUMI_PROFILE_TRUNCATE"] == "1" && !@persistent_initialized
|
33
|
+
FileUtils.mkdir_p(File.dirname(@file))
|
34
|
+
File.write(@file, "")
|
35
|
+
@aggregated_stats.clear
|
36
|
+
@persistent_initialized = true
|
37
|
+
end
|
38
|
+
|
39
|
+
@initialized = true
|
40
|
+
end
|
41
|
+
end
|
14
42
|
|
15
43
|
def reset!(meta: {})
|
44
|
+
set_schema_name(meta[:schema_name]) if meta[:schema_name]
|
16
45
|
return unless enabled?
|
17
|
-
@events = []
|
18
|
-
@meta = meta
|
19
|
-
@file = ENV["KUMI_PROFILE_FILE"] || "tmp/profile.jsonl"
|
20
|
-
@run_id = (@run_id || 0) + 1 # Track run number for averaging
|
21
|
-
@aggregated_stats = (@aggregated_stats || Hash.new { |h, k| h[k] = { count: 0, total_ms: 0.0, total_cpu_ms: 0.0, rows: 0, runs: Set.new } })
|
22
46
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
@
|
47
|
+
# In persistent mode, don't reset aggregated stats or increment run_id
|
48
|
+
# This allows profiling across multiple schema creations
|
49
|
+
if persistent?
|
50
|
+
@events = []
|
51
|
+
@meta = (@meta || {}).merge(meta)
|
52
|
+
@schema_name = meta[:schema_name] if meta[:schema_name]
|
53
|
+
@file = ENV["KUMI_PROFILE_FILE"] || "tmp/profile.jsonl"
|
54
|
+
@run_id ||= 1
|
55
|
+
@op_seq ||= 0
|
56
|
+
@aggregated_stats ||= Hash.new { |h, k| h[k] = { count: 0, total_ms: 0.0, total_cpu_ms: 0.0, rows: 0, runs: Set.new } }
|
57
|
+
|
58
|
+
# Only truncate on very first reset in persistent mode
|
59
|
+
if ENV["KUMI_PROFILE_TRUNCATE"] == "1" && !@persistent_initialized
|
60
|
+
FileUtils.mkdir_p(File.dirname(@file))
|
61
|
+
File.write(@file, "")
|
62
|
+
@aggregated_stats.clear
|
63
|
+
@persistent_initialized = true
|
64
|
+
end
|
65
|
+
else
|
66
|
+
# Original behavior: full reset each time
|
67
|
+
@events = []
|
68
|
+
@meta = meta
|
69
|
+
@schema_name = meta[:schema_name]
|
70
|
+
@file = ENV["KUMI_PROFILE_FILE"] || "tmp/profile.jsonl"
|
71
|
+
@run_id = (@run_id || 0) + 1
|
72
|
+
@op_seq = 0
|
73
|
+
@aggregated_stats = (@aggregated_stats || Hash.new { |h, k| h[k] = { count: 0, total_ms: 0.0, total_cpu_ms: 0.0, rows: 0, runs: Set.new } })
|
74
|
+
|
75
|
+
if ENV["KUMI_PROFILE_TRUNCATE"] == "1"
|
76
|
+
FileUtils.mkdir_p(File.dirname(@file))
|
77
|
+
File.write(@file, "")
|
78
|
+
@aggregated_stats.clear
|
79
|
+
end
|
27
80
|
end
|
28
81
|
end
|
29
82
|
|
@@ -37,9 +90,54 @@ module Kumi
|
|
37
90
|
Process.clock_gettime(Process::CLOCK_PROCESS_CPUTIME_ID)
|
38
91
|
end
|
39
92
|
|
40
|
-
#
|
41
|
-
def
|
93
|
+
# Phase timing for coarse-grained operations
|
94
|
+
def phase(name, tags = {})
|
95
|
+
return yield unless enabled?
|
96
|
+
p0 = t0; c0 = cpu_t0
|
97
|
+
result = yield
|
98
|
+
wall_ms = (t0 - p0) * 1000.0
|
99
|
+
cpu_ms = (cpu_t0 - c0) * 1000.0
|
100
|
+
stream({
|
101
|
+
ts: Time.now.utc.iso8601(3),
|
102
|
+
kind: "phase",
|
103
|
+
name: name,
|
104
|
+
wall_ms: wall_ms.round(3),
|
105
|
+
cpu_ms: cpu_ms.round(3),
|
106
|
+
tags: tags,
|
107
|
+
run: @run_id
|
108
|
+
})
|
109
|
+
result
|
110
|
+
end
|
111
|
+
|
112
|
+
# Memory snapshot with GC statistics
|
113
|
+
def memory_snapshot(label, extra: {})
|
42
114
|
return unless enabled?
|
115
|
+
s = GC.stat
|
116
|
+
stream({
|
117
|
+
ts: Time.now.utc.iso8601(3),
|
118
|
+
kind: "mem",
|
119
|
+
label: label,
|
120
|
+
heap_live: s[:heap_live_slots],
|
121
|
+
old_objects: s[:old_objects],
|
122
|
+
minor_gc: s[:minor_gc_count],
|
123
|
+
major_gc: s[:major_gc_count],
|
124
|
+
rss_mb: read_rss_mb,
|
125
|
+
run: @run_id,
|
126
|
+
**extra
|
127
|
+
})
|
128
|
+
end
|
129
|
+
|
130
|
+
def read_rss_mb
|
131
|
+
((File.read("/proc/#{$$}/status")[/VmRSS:\s+(\d+)\skB/, 1].to_i) / 1024.0).round(2)
|
132
|
+
rescue
|
133
|
+
nil
|
134
|
+
end
|
135
|
+
|
136
|
+
# Per-op record with both wall time and CPU time (with sampling support)
|
137
|
+
def record!(decl:, idx:, tag:, op:, t0:, cpu_t0: nil, rows: nil, note: nil)
|
138
|
+
return unless enabled? && ops_enabled?
|
139
|
+
@op_seq += 1
|
140
|
+
return unless sample_rate <= 1 || (@op_seq % sample_rate).zero?
|
43
141
|
|
44
142
|
wall_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - t0) * 1000.0)
|
45
143
|
cpu_ms = cpu_t0 ? ((Process.clock_gettime(Process::CLOCK_PROCESS_CPUTIME_ID) - cpu_t0) * 1000.0) : wall_ms
|
@@ -47,6 +145,7 @@ module Kumi
|
|
47
145
|
ev = {
|
48
146
|
ts: Time.now.utc.iso8601(3),
|
49
147
|
run: @run_id,
|
148
|
+
schema: @schema_name, # schema identifier for multi-schema differentiation
|
50
149
|
decl: decl, # decl name (string/symbol)
|
51
150
|
i: idx, # op index
|
52
151
|
tag: tag, # op tag (symbol)
|
@@ -172,6 +271,35 @@ module Kumi
|
|
172
271
|
stream({ ts: Time.now.utc.iso8601(3), kind: "summary", data: summary })
|
173
272
|
end
|
174
273
|
|
274
|
+
def init_persistent!
|
275
|
+
return unless enabled? && persistent?
|
276
|
+
@persistent_initialized = false
|
277
|
+
reset!
|
278
|
+
end
|
279
|
+
|
280
|
+
def finalize!
|
281
|
+
return unless enabled?
|
282
|
+
|
283
|
+
# Emit final aggregated summary
|
284
|
+
if @aggregated_stats&.any?
|
285
|
+
stream({
|
286
|
+
ts: Time.now.utc.iso8601(3),
|
287
|
+
kind: "final_summary",
|
288
|
+
data: averaged_analysis
|
289
|
+
})
|
290
|
+
end
|
291
|
+
|
292
|
+
# Emit cache analysis if available
|
293
|
+
cache_analysis = cache_overhead_analysis
|
294
|
+
if cache_analysis[:cache_operations]&.any?
|
295
|
+
stream({
|
296
|
+
ts: Time.now.utc.iso8601(3),
|
297
|
+
kind: "cache_analysis",
|
298
|
+
data: cache_analysis
|
299
|
+
})
|
300
|
+
end
|
301
|
+
end
|
302
|
+
|
175
303
|
# Stable textual key for "match ops one by one"
|
176
304
|
def op_key(decl, idx, tag, op)
|
177
305
|
attrs = compact_attrs(op.attrs)
|
@@ -13,14 +13,14 @@ module Kumi
|
|
13
13
|
|
14
14
|
# Create a vector with scope and rows
|
15
15
|
def self.vec(scope, rows, has_idx)
|
16
|
-
if has_idx
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
16
|
+
rank = if has_idx
|
17
|
+
rows.empty? ? 0 : rows.first[:idx].length
|
18
|
+
# TODO: > Make sure this is not costly
|
19
|
+
# raise if rows.any? { |r| r[:idx].length != rank }
|
20
|
+
# rows = rows.sort_by { |r| r[:idx] } # one-time sort
|
21
|
+
else
|
22
|
+
0
|
23
|
+
end
|
24
24
|
|
25
25
|
{ k: :vec, scope: scope, rows: rows, has_idx: has_idx, rank: rank }
|
26
26
|
end
|
@@ -41,37 +41,12 @@ module Kumi
|
|
41
41
|
# - DEBUG_VM_ARGS=1 prints per-op execution and arguments.
|
42
42
|
# - DEBUG_GROUP_ROWS=1 prints grouping decisions during Lift.
|
43
43
|
module ExecutionEngine
|
44
|
-
def self.run(
|
45
|
-
|
46
|
-
if ctx[:accessor_cache]
|
47
|
-
# Include input data in cache key to avoid cross-context pollution
|
48
|
-
input_key = ctx[:input]&.hash || ctx["input"]&.hash || 0
|
49
|
-
memoized_accessors = add_persistent_memoization(accessors, ctx[:accessor_cache], input_key)
|
50
|
-
else
|
51
|
-
memoized_accessors = add_temporary_memoization(accessors)
|
52
|
-
end
|
53
|
-
|
54
|
-
Interpreter.run(ir_module, ctx, accessors: memoized_accessors, registry: registry)
|
55
|
-
end
|
56
|
-
|
57
|
-
private
|
44
|
+
def self.run(schedule, input:, accessors:, registry:, runtime: {})
|
45
|
+
runtime[:accessor_cache] ||= {}
|
58
46
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
cache_key = [plan_id, input_key]
|
63
|
-
cache[cache_key] ||= accessor_fn.call(input_data)
|
64
|
-
end]
|
65
|
-
end.to_h
|
66
|
-
end
|
67
|
-
|
68
|
-
def self.add_temporary_memoization(accessors)
|
69
|
-
cache = {}
|
70
|
-
accessors.map do |plan_id, accessor_fn|
|
71
|
-
[plan_id, lambda do |input_data|
|
72
|
-
cache[plan_id] ||= accessor_fn.call(input_data)
|
73
|
-
end]
|
74
|
-
end.to_h
|
47
|
+
Dev::Profiler.phase("engine.interpreter") do
|
48
|
+
Interpreter.run(schedule, input: input, runtime: runtime, accessors: accessors, registry: registry)
|
49
|
+
end
|
75
50
|
end
|
76
51
|
end
|
77
52
|
end
|