kumi 0.0.14 → 0.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,6 +3,7 @@
3
3
  require "json"
4
4
  require "fileutils"
5
5
  require "time"
6
+ require "set"
6
7
 
7
8
  module Kumi
8
9
  module Core
@@ -11,19 +12,71 @@ module Kumi
11
12
  module Profiler
12
13
  class << self
13
14
  def enabled? = ENV["KUMI_PROFILE"] == "1"
15
+ def ops_enabled? = ENV.fetch("KUMI_PROFILE_OPS", "1") == "1"
16
+ def sample_rate = (ENV["KUMI_PROFILE_SAMPLE"]&.to_i || 1)
17
+ def persistent? = ENV["KUMI_PROFILE_PERSISTENT"] == "1"
18
+
19
+ def set_schema_name(name)
20
+ @schema_name = name
21
+
22
+ # Ensure profiler is initialized in persistent mode
23
+ unless @initialized
24
+ @events = []
25
+ @meta = {}
26
+ @file = ENV["KUMI_PROFILE_FILE"] || "tmp/profile.jsonl"
27
+ @run_id ||= 1
28
+ @op_seq ||= 0
29
+ @aggregated_stats ||= Hash.new { |h, k| h[k] = { count: 0, total_ms: 0.0, total_cpu_ms: 0.0, rows: 0, runs: Set.new } }
30
+
31
+ # Truncate file if needed
32
+ if ENV["KUMI_PROFILE_TRUNCATE"] == "1" && !@persistent_initialized
33
+ FileUtils.mkdir_p(File.dirname(@file))
34
+ File.write(@file, "")
35
+ @aggregated_stats.clear
36
+ @persistent_initialized = true
37
+ end
38
+
39
+ @initialized = true
40
+ end
41
+ end
14
42
 
15
43
  def reset!(meta: {})
44
+ set_schema_name(meta[:schema_name]) if meta[:schema_name]
16
45
  return unless enabled?
17
- @events = []
18
- @meta = meta
19
- @file = ENV["KUMI_PROFILE_FILE"] || "tmp/profile.jsonl"
20
- @run_id = (@run_id || 0) + 1 # Track run number for averaging
21
- @aggregated_stats = (@aggregated_stats || Hash.new { |h, k| h[k] = { count: 0, total_ms: 0.0, total_cpu_ms: 0.0, rows: 0, runs: Set.new } })
22
46
 
23
- if ENV["KUMI_PROFILE_TRUNCATE"] == "1"
24
- FileUtils.mkdir_p(File.dirname(@file))
25
- File.write(@file, "")
26
- @aggregated_stats.clear # Clear aggregated stats on truncate
47
+ # In persistent mode, don't reset aggregated stats or increment run_id
48
+ # This allows profiling across multiple schema creations
49
+ if persistent?
50
+ @events = []
51
+ @meta = (@meta || {}).merge(meta)
52
+ @schema_name = meta[:schema_name] if meta[:schema_name]
53
+ @file = ENV["KUMI_PROFILE_FILE"] || "tmp/profile.jsonl"
54
+ @run_id ||= 1
55
+ @op_seq ||= 0
56
+ @aggregated_stats ||= Hash.new { |h, k| h[k] = { count: 0, total_ms: 0.0, total_cpu_ms: 0.0, rows: 0, runs: Set.new } }
57
+
58
+ # Only truncate on very first reset in persistent mode
59
+ if ENV["KUMI_PROFILE_TRUNCATE"] == "1" && !@persistent_initialized
60
+ FileUtils.mkdir_p(File.dirname(@file))
61
+ File.write(@file, "")
62
+ @aggregated_stats.clear
63
+ @persistent_initialized = true
64
+ end
65
+ else
66
+ # Original behavior: full reset each time
67
+ @events = []
68
+ @meta = meta
69
+ @schema_name = meta[:schema_name]
70
+ @file = ENV["KUMI_PROFILE_FILE"] || "tmp/profile.jsonl"
71
+ @run_id = (@run_id || 0) + 1
72
+ @op_seq = 0
73
+ @aggregated_stats = (@aggregated_stats || Hash.new { |h, k| h[k] = { count: 0, total_ms: 0.0, total_cpu_ms: 0.0, rows: 0, runs: Set.new } })
74
+
75
+ if ENV["KUMI_PROFILE_TRUNCATE"] == "1"
76
+ FileUtils.mkdir_p(File.dirname(@file))
77
+ File.write(@file, "")
78
+ @aggregated_stats.clear
79
+ end
27
80
  end
28
81
  end
29
82
 
@@ -37,9 +90,54 @@ module Kumi
37
90
  Process.clock_gettime(Process::CLOCK_PROCESS_CPUTIME_ID)
38
91
  end
39
92
 
40
- # Per-op record with both wall time and CPU time
41
- def record!(decl:, idx:, tag:, op:, t0:, cpu_t0: nil, rows: nil, note: nil)
93
+ # Phase timing for coarse-grained operations
94
+ def phase(name, tags = {})
95
+ return yield unless enabled?
96
+ p0 = t0; c0 = cpu_t0
97
+ result = yield
98
+ wall_ms = (t0 - p0) * 1000.0
99
+ cpu_ms = (cpu_t0 - c0) * 1000.0
100
+ stream({
101
+ ts: Time.now.utc.iso8601(3),
102
+ kind: "phase",
103
+ name: name,
104
+ wall_ms: wall_ms.round(3),
105
+ cpu_ms: cpu_ms.round(3),
106
+ tags: tags,
107
+ run: @run_id
108
+ })
109
+ result
110
+ end
111
+
112
+ # Memory snapshot with GC statistics
113
+ def memory_snapshot(label, extra: {})
42
114
  return unless enabled?
115
+ s = GC.stat
116
+ stream({
117
+ ts: Time.now.utc.iso8601(3),
118
+ kind: "mem",
119
+ label: label,
120
+ heap_live: s[:heap_live_slots],
121
+ old_objects: s[:old_objects],
122
+ minor_gc: s[:minor_gc_count],
123
+ major_gc: s[:major_gc_count],
124
+ rss_mb: read_rss_mb,
125
+ run: @run_id,
126
+ **extra
127
+ })
128
+ end
129
+
130
+ def read_rss_mb
131
+ ((File.read("/proc/#{$$}/status")[/VmRSS:\s+(\d+)\skB/, 1].to_i) / 1024.0).round(2)
132
+ rescue
133
+ nil
134
+ end
135
+
136
+ # Per-op record with both wall time and CPU time (with sampling support)
137
+ def record!(decl:, idx:, tag:, op:, t0:, cpu_t0: nil, rows: nil, note: nil)
138
+ return unless enabled? && ops_enabled?
139
+ @op_seq += 1
140
+ return unless sample_rate <= 1 || (@op_seq % sample_rate).zero?
43
141
 
44
142
  wall_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - t0) * 1000.0)
45
143
  cpu_ms = cpu_t0 ? ((Process.clock_gettime(Process::CLOCK_PROCESS_CPUTIME_ID) - cpu_t0) * 1000.0) : wall_ms
@@ -47,6 +145,7 @@ module Kumi
47
145
  ev = {
48
146
  ts: Time.now.utc.iso8601(3),
49
147
  run: @run_id,
148
+ schema: @schema_name, # schema identifier for multi-schema differentiation
50
149
  decl: decl, # decl name (string/symbol)
51
150
  i: idx, # op index
52
151
  tag: tag, # op tag (symbol)
@@ -172,6 +271,35 @@ module Kumi
172
271
  stream({ ts: Time.now.utc.iso8601(3), kind: "summary", data: summary })
173
272
  end
174
273
 
274
+ def init_persistent!
275
+ return unless enabled? && persistent?
276
+ @persistent_initialized = false
277
+ reset!
278
+ end
279
+
280
+ def finalize!
281
+ return unless enabled?
282
+
283
+ # Emit final aggregated summary
284
+ if @aggregated_stats&.any?
285
+ stream({
286
+ ts: Time.now.utc.iso8601(3),
287
+ kind: "final_summary",
288
+ data: averaged_analysis
289
+ })
290
+ end
291
+
292
+ # Emit cache analysis if available
293
+ cache_analysis = cache_overhead_analysis
294
+ if cache_analysis[:cache_operations]&.any?
295
+ stream({
296
+ ts: Time.now.utc.iso8601(3),
297
+ kind: "cache_analysis",
298
+ data: cache_analysis
299
+ })
300
+ end
301
+ end
302
+
175
303
  # Stable textual key for "match ops one by one"
176
304
  def op_key(decl, idx, tag, op)
177
305
  attrs = compact_attrs(op.attrs)
@@ -43,15 +43,15 @@ module Kumi
43
43
  module ExecutionEngine
44
44
  def self.run(ir_module, ctx, accessors:, registry:)
45
45
  # Use persistent accessor cache if available, otherwise create temporary one
46
- if ctx[:accessor_cache]
46
+ memoized_accessors = Dev::Profiler.phase("engine.memoization") do
47
47
  # Include input data in cache key to avoid cross-context pollution
48
48
  input_key = ctx[:input]&.hash || ctx["input"]&.hash || 0
49
- memoized_accessors = add_persistent_memoization(accessors, ctx[:accessor_cache], input_key)
50
- else
51
- memoized_accessors = add_temporary_memoization(accessors)
49
+ add_persistent_memoization(accessors, ctx[:accessor_cache], input_key)
50
+ end
51
+
52
+ Dev::Profiler.phase("engine.interpreter") do
53
+ Interpreter.run(ir_module, ctx, accessors: memoized_accessors, registry: registry)
52
54
  end
53
-
54
- Interpreter.run(ir_module, ctx, accessors: memoized_accessors, registry: registry)
55
55
  end
56
56
 
57
57
  private
@@ -64,15 +64,6 @@ module Kumi
64
64
  end]
65
65
  end.to_h
66
66
  end
67
-
68
- def self.add_temporary_memoization(accessors)
69
- cache = {}
70
- accessors.map do |plan_id, accessor_fn|
71
- [plan_id, lambda do |input_data|
72
- cache[plan_id] ||= accessor_fn.call(input_data)
73
- end]
74
- end.to_h
75
- end
76
67
  end
77
68
  end
78
69
  end
@@ -0,0 +1,301 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+
5
+ module Kumi
6
+ module Dev
7
+ class ProfileAggregator
8
+ attr_reader :events, :phases, :operations, :memory_snapshots, :final_summary
9
+
10
+ def initialize(jsonl_file)
11
+ @jsonl_file = jsonl_file
12
+ @events = []
13
+ @phases = []
14
+ @operations = []
15
+ @memory_snapshots = []
16
+ @final_summary = nil
17
+ load_events
18
+ end
19
+
20
+ def self.load(jsonl_file)
21
+ new(jsonl_file)
22
+ end
23
+
24
+ # Core aggregation methods
25
+ def total_execution_time
26
+ script_phase = phases.find { |p| p["name"] == "script_execution" }
27
+ script_phase ? script_phase["wall_ms"] : 0
28
+ end
29
+
30
+ def vm_execution_time
31
+ vm_phases = phases.select { |p| p["name"] == "vm.run" }
32
+ vm_phases.sum { |p| p["wall_ms"] || 0 }
33
+ end
34
+
35
+ def vm_execution_count
36
+ phases.count { |p| p["name"] == "vm.run" }
37
+ end
38
+
39
+ def runs_analyzed
40
+ (operations + phases + memory_snapshots).map { |e| e["run"] }.compact.uniq.sort
41
+ end
42
+
43
+ def schema_breakdown
44
+ @schema_breakdown ||= operations.group_by { |op| op["schema"] || "Unknown" }.transform_values do |ops|
45
+ {
46
+ operations: ops.length,
47
+ time: ops.sum { |op| op["wall_ms"] || 0 }.round(4),
48
+ declarations: ops.map { |op| op["decl"] }.uniq.compact.sort
49
+ }
50
+ end
51
+ end
52
+
53
+ def operations_by_run
54
+ operations.group_by { |op| op["run"] }
55
+ end
56
+
57
+ def operation_stats_by_type
58
+ operations.group_by { |op| op["tag"] }.transform_values do |ops|
59
+ {
60
+ count: ops.length,
61
+ total_ms: ops.sum { |op| op["wall_ms"] || 0 }.round(4),
62
+ avg_ms: ops.empty? ? 0 : (ops.sum { |op| op["wall_ms"] || 0 } / ops.length).round(6),
63
+ max_ms: ops.map { |op| op["wall_ms"] || 0 }.max || 0,
64
+ declarations: ops.map { |op| op["decl"] }.uniq.compact
65
+ }
66
+ end.sort_by { |_, stats| -stats[:total_ms] }
67
+ end
68
+
69
+ def operation_stats_by_declaration
70
+ operations.group_by { |op| op["decl"] }.transform_values do |ops|
71
+ {
72
+ count: ops.length,
73
+ total_ms: ops.sum { |op| op["wall_ms"] || 0 }.round(4),
74
+ avg_ms: ops.empty? ? 0 : (ops.sum { |op| op["wall_ms"] || 0 } / ops.length).round(6),
75
+ operation_types: ops.map { |op| op["tag"] }.uniq.compact
76
+ }
77
+ end.sort_by { |_, stats| -stats[:total_ms] }
78
+ end
79
+
80
+ def hotspot_analysis(limit: 20)
81
+ operations.map do |op|
82
+ {
83
+ key: "#{op['decl']}@#{op['seq'] || 0}:#{op['tag']}",
84
+ decl: op["decl"],
85
+ tag: op["tag"],
86
+ wall_ms: op["wall_ms"] || 0,
87
+ cpu_ms: op["cpu_ms"] || 0,
88
+ rows: op["rows"] || 0
89
+ }
90
+ end.group_by { |op| op[:key] }.transform_values do |ops|
91
+ {
92
+ count: ops.length,
93
+ total_ms: ops.sum { |op| op[:wall_ms] }.round(4),
94
+ avg_ms: ops.empty? ? 0 : (ops.sum { |op| op[:wall_ms] } / ops.length).round(6),
95
+ decl: ops.first[:decl],
96
+ tag: ops.first[:tag]
97
+ }
98
+ end.sort_by { |_, stats| -stats[:total_ms] }.first(limit)
99
+ end
100
+
101
+ def reference_operation_analysis
102
+ ref_ops = operations.select { |op| op["tag"] == "ref" }
103
+ return { operations: 0, total_time: 0, avg_time: 0, by_declaration: [] } if ref_ops.empty?
104
+
105
+ {
106
+ operations: ref_ops.length,
107
+ total_time: ref_ops.sum { |op| op["wall_ms"] || 0 }.round(4),
108
+ avg_time: (ref_ops.sum { |op| op["wall_ms"] || 0 } / ref_ops.length).round(6),
109
+ by_declaration: ref_ops.group_by { |op| op["decl"] }.transform_values do |ops|
110
+ {
111
+ count: ops.length,
112
+ total_ms: ops.sum { |op| op["wall_ms"] || 0 }.round(4),
113
+ avg_ms: (ops.sum { |op| op["wall_ms"] || 0 } / ops.length).round(6)
114
+ }
115
+ end.sort_by { |_, stats| -stats[:total_ms] }
116
+ }
117
+ end
118
+
119
+ def memory_analysis
120
+ return nil if memory_snapshots.length < 2
121
+
122
+ start_mem = memory_snapshots.first
123
+ end_mem = memory_snapshots.last
124
+
125
+ {
126
+ start: {
127
+ heap_live: start_mem["heap_live"],
128
+ rss_mb: start_mem["rss_mb"],
129
+ minor_gc: start_mem["minor_gc"],
130
+ major_gc: start_mem["major_gc"]
131
+ },
132
+ end: {
133
+ heap_live: end_mem["heap_live"],
134
+ rss_mb: end_mem["rss_mb"],
135
+ minor_gc: end_mem["minor_gc"],
136
+ major_gc: end_mem["major_gc"]
137
+ },
138
+ growth: {
139
+ heap_objects: end_mem["heap_live"] - start_mem["heap_live"],
140
+ heap_growth_pct: ((end_mem["heap_live"] - start_mem["heap_live"]).to_f / start_mem["heap_live"] * 100).round(1),
141
+ rss_mb: (end_mem["rss_mb"] - start_mem["rss_mb"]).round(2),
142
+ rss_growth_pct: ((end_mem["rss_mb"] - start_mem["rss_mb"]) / start_mem["rss_mb"] * 100).round(1),
143
+ minor_gcs: end_mem["minor_gc"] - start_mem["minor_gc"],
144
+ major_gcs: end_mem["major_gc"] - start_mem["major_gc"]
145
+ }
146
+ }
147
+ end
148
+
149
+ def phase_analysis
150
+ phases.group_by { |p| p["name"] }.transform_values do |phase_events|
151
+ {
152
+ count: phase_events.length,
153
+ total_ms: phase_events.sum { |p| p["wall_ms"] || 0 }.round(4),
154
+ avg_ms: phase_events.empty? ? 0 : (phase_events.sum { |p| p["wall_ms"] || 0 } / phase_events.length).round(4),
155
+ max_ms: phase_events.map { |p| p["wall_ms"] || 0 }.max || 0
156
+ }
157
+ end.sort_by { |_, stats| -stats[:total_ms] }
158
+ end
159
+
160
+ # Reporting methods
161
+ def summary_report
162
+ total_ops = operations.length
163
+ total_vm_time = vm_execution_time
164
+ ref_analysis = reference_operation_analysis
165
+
166
+ puts "=== PROFILE AGGREGATION SUMMARY ==="
167
+ puts "Total events: #{events.length}"
168
+ puts "VM operations: #{total_ops}"
169
+ puts "VM executions: #{vm_execution_count}"
170
+
171
+ # Schema differentiation
172
+ schema_stats = schema_breakdown
173
+ if schema_stats.any? && schema_stats.keys.first != "Unknown"
174
+ puts "Schemas analyzed: #{schema_stats.keys.join(", ")}"
175
+ schema_stats.each do |schema, stats|
176
+ puts " #{schema}: #{stats[:operations]} operations, #{stats[:time]}ms"
177
+ end
178
+ else
179
+ puts "Schema runs: #{runs_analyzed.length} (runs: #{runs_analyzed.join(', ')})"
180
+ end
181
+
182
+ puts "Total VM time: #{total_vm_time.round(4)}ms"
183
+ puts "Average per VM execution: #{vm_execution_count > 0 ? (total_vm_time / vm_execution_count).round(4) : 0}ms"
184
+ puts
185
+
186
+ if ref_analysis[:operations] && ref_analysis[:operations] > 0
187
+ puts "Reference Operations:"
188
+ puts " Count: #{ref_analysis[:operations]} (#{(ref_analysis[:operations].to_f / total_ops * 100).round(1)}% of all ops)"
189
+ puts " Time: #{ref_analysis[:total_time]}ms (#{total_vm_time > 0 ? (ref_analysis[:total_time] / total_vm_time * 100).round(1) : 0}% of VM time)"
190
+ puts " Avg: #{ref_analysis[:avg_time]}ms per reference"
191
+ end
192
+
193
+ mem = memory_analysis
194
+ if mem
195
+ puts
196
+ puts "Memory Growth:"
197
+ puts " Heap: +#{mem[:growth][:heap_objects]} objects (#{mem[:growth][:heap_growth_pct]}%)"
198
+ puts " RSS: +#{mem[:growth][:rss_mb]}MB (#{mem[:growth][:rss_growth_pct]}%)"
199
+ puts " GC: #{mem[:growth][:minor_gcs]} minor, #{mem[:growth][:major_gcs]} major"
200
+ end
201
+ end
202
+
203
+ def detailed_report(limit: 15)
204
+ summary_report
205
+ puts
206
+ puts "=== TOP #{limit} HOTSPOTS ==="
207
+ hotspots = hotspot_analysis(limit: limit)
208
+ hotspots.each_with_index do |(key, stats), i|
209
+ puts "#{(i+1).to_s.rjust(2)}. #{key.ljust(40)} #{stats[:total_ms].to_s.rjust(10)}ms (#{stats[:count]} calls, #{stats[:avg_ms]}ms avg)"
210
+ end
211
+
212
+ # Schema breakdown if available
213
+ schema_stats = schema_breakdown
214
+ if schema_stats.keys.length > 1 || (schema_stats.keys.first && schema_stats.keys.first != "Unknown")
215
+ puts
216
+ puts "=== SCHEMA BREAKDOWN ==="
217
+ schema_stats.each do |schema, stats|
218
+ puts "#{schema}:"
219
+ puts " Operations: #{stats[:operations]}"
220
+ puts " Total time: #{stats[:time]}ms"
221
+ puts " Declarations: #{stats[:declarations].join(", ")}"
222
+ puts
223
+ end
224
+ end
225
+
226
+ puts "=== OPERATION TYPE BREAKDOWN ==="
227
+ operation_stats_by_type.each do |op_type, stats|
228
+ puts "#{op_type.ljust(15)} #{stats[:count].to_s.rjust(8)} calls #{stats[:total_ms].to_s.rjust(12)}ms #{stats[:avg_ms].to_s.rjust(10)}ms avg"
229
+ end
230
+
231
+ puts
232
+ puts "=== TOP #{limit} DECLARATIONS BY TIME ==="
233
+ operation_stats_by_declaration.first(limit).each do |decl, stats|
234
+ puts "#{decl.to_s.ljust(35)} #{stats[:count].to_s.rjust(6)} ops #{stats[:total_ms].to_s.rjust(10)}ms"
235
+ end
236
+ end
237
+
238
+ def export_summary(filename)
239
+ summary = {
240
+ metadata: {
241
+ total_events: events.length,
242
+ vm_operations: operations.length,
243
+ vm_executions: vm_execution_count,
244
+ analysis_timestamp: Time.now.strftime("%Y-%m-%dT%H:%M:%SZ")
245
+ },
246
+ timing: {
247
+ total_execution_ms: total_execution_time,
248
+ vm_execution_ms: vm_execution_time,
249
+ avg_vm_execution_ms: vm_execution_count > 0 ? (vm_execution_time / vm_execution_count).round(4) : 0
250
+ },
251
+ operations: {
252
+ by_type: operation_stats_by_type,
253
+ by_declaration: operation_stats_by_declaration,
254
+ hotspots: hotspot_analysis(limit: 20)
255
+ },
256
+ references: reference_operation_analysis,
257
+ memory: memory_analysis,
258
+ phases: phase_analysis
259
+ }
260
+
261
+ File.write(filename, JSON.pretty_generate(summary))
262
+ puts "Summary exported to: #{filename}"
263
+ end
264
+
265
+ private
266
+
267
+ def load_events
268
+ return unless File.exist?(@jsonl_file)
269
+
270
+ File.readlines(@jsonl_file).each do |line|
271
+ begin
272
+ event = JSON.parse(line.strip)
273
+ next unless event && event.is_a?(Hash)
274
+
275
+ @events << event
276
+
277
+ case event["kind"]
278
+ when "phase"
279
+ @phases << event
280
+ when "mem"
281
+ @memory_snapshots << event
282
+ when "final_summary"
283
+ @final_summary = event
284
+ else
285
+ # VM operations don't have a "kind" field - they have ts, run, decl, i, tag, wall_ms, cpu_ms, etc.
286
+ # According to profiler.rb line 118-130, VM operations are identified by having decl + tag but no kind
287
+ if event["decl"] && event["tag"] && !event["kind"]
288
+ @operations << event
289
+ elsif event["kind"] && !["summary", "cache_analysis"].include?(event["kind"])
290
+ # Handle any future event types that have a kind but aren't known
291
+ @operations << event
292
+ end
293
+ end
294
+ rescue JSON::ParserError
295
+ # Skip malformed JSON lines
296
+ end
297
+ end
298
+ end
299
+ end
300
+ end
301
+ end