catpm 0.10.1 → 0.10.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/catpm/collector.rb +160 -4
- data/lib/catpm/segment_subscribers.rb +46 -1
- data/lib/catpm/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 3b2fde17350e1e195b3e7aa1cf8084b435d474c8d73aee1e83b2924cf8809a40
|
|
4
|
+
data.tar.gz: 253e27024b75ef3a5d1cb538b87275afbe373b9645b41d56ff751d86c3c27612
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: dd586cd99584f463ef1c77e25c8c4e2c0920dc66115528c90a53af483e3601f1b1d4a554c9b80e0f403174ceb67fa5e3956d10a60d4c96c206f1669bd74d7c0d
|
|
7
|
+
data.tar.gz: 74fc43ecc58017374a011982100e1780ff44e7c068b36c814626be37e85ac517b658527e7021bd7ea2b500b499935b807745cb52a54989f1e23b413f093cd90a
|
data/README.md
CHANGED
data/lib/catpm/collector.rb
CHANGED
|
@@ -201,6 +201,10 @@ module Catpm
|
|
|
201
201
|
|
|
202
202
|
duration = event.duration
|
|
203
203
|
exception = payload[:exception_object]
|
|
204
|
+
owns_segments = payload[:_catpm_job_owns_segments]
|
|
205
|
+
|
|
206
|
+
req_segments = Thread.current[:catpm_request_segments] if owns_segments
|
|
207
|
+
instrumented = !req_segments.nil?
|
|
204
208
|
|
|
205
209
|
queue_wait = if job.respond_to?(:enqueued_at) && job.enqueued_at
|
|
206
210
|
((Time.current - job.enqueued_at.to_time) * 1000.0) rescue nil
|
|
@@ -208,21 +212,110 @@ module Catpm
|
|
|
208
212
|
|
|
209
213
|
metadata = { queue_wait: queue_wait }.compact
|
|
210
214
|
|
|
215
|
+
if req_segments
|
|
216
|
+
segment_data = req_segments.to_h
|
|
217
|
+
segment_data[:segment_summary].each { |k, v| metadata[k] = v }
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
metadata[:_instrumented] = 1 if instrumented
|
|
221
|
+
|
|
211
222
|
sample_type = early_sample_type(
|
|
212
223
|
error: exception,
|
|
213
224
|
duration: duration,
|
|
214
225
|
kind: :job,
|
|
215
226
|
target: target,
|
|
216
|
-
operation: job.queue_name
|
|
227
|
+
operation: job.queue_name,
|
|
228
|
+
instrumented: instrumented
|
|
217
229
|
)
|
|
218
230
|
|
|
219
|
-
context =
|
|
220
|
-
|
|
231
|
+
context = nil
|
|
232
|
+
if sample_type
|
|
233
|
+
context = {
|
|
221
234
|
job_class: target,
|
|
222
235
|
job_id: job.job_id,
|
|
223
236
|
queue: job.queue_name,
|
|
224
237
|
attempts: job.executions
|
|
225
238
|
}
|
|
239
|
+
|
|
240
|
+
if req_segments
|
|
241
|
+
segments = segment_data[:segments]
|
|
242
|
+
collapse_code_wrappers(segments)
|
|
243
|
+
|
|
244
|
+
# Inject root job segment with full duration
|
|
245
|
+
root_segment = {
|
|
246
|
+
type: 'request',
|
|
247
|
+
detail: "job #{target}",
|
|
248
|
+
duration: duration.round(2),
|
|
249
|
+
offset: 0.0
|
|
250
|
+
}
|
|
251
|
+
segments.each do |seg|
|
|
252
|
+
if seg.key?(:parent_index)
|
|
253
|
+
seg[:parent_index] += 1
|
|
254
|
+
else
|
|
255
|
+
seg[:parent_index] = 0
|
|
256
|
+
end
|
|
257
|
+
end
|
|
258
|
+
segments.unshift(root_segment)
|
|
259
|
+
|
|
260
|
+
# Inject call tree segments from sampler
|
|
261
|
+
ctrl_idx = segments.index { |s| s[:type] == 'controller' }
|
|
262
|
+
if Catpm.config.instrument_call_tree && req_segments
|
|
263
|
+
tree_segs = req_segments.call_tree_segments
|
|
264
|
+
if tree_segs.any?
|
|
265
|
+
base_idx = segments.size
|
|
266
|
+
tree_segs.each do |seg|
|
|
267
|
+
tree_parent = seg.delete(:_tree_parent)
|
|
268
|
+
seg[:parent_index] = tree_parent ? (tree_parent + base_idx) : (ctrl_idx || 0)
|
|
269
|
+
segments << seg
|
|
270
|
+
end
|
|
271
|
+
reparent_under_call_tree(segments, ctrl_idx)
|
|
272
|
+
end
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
# Fill untracked controller time with sampler data or synthetic segment
|
|
276
|
+
ctrl_idx = segments.index { |s| s[:type] == 'controller' }
|
|
277
|
+
if ctrl_idx
|
|
278
|
+
ctrl_seg = segments[ctrl_idx]
|
|
279
|
+
ctrl_dur = (ctrl_seg[:duration] || 0).to_f
|
|
280
|
+
child_dur = segments.each_with_index.sum do |pair|
|
|
281
|
+
seg, i = pair
|
|
282
|
+
next 0.0 if i == ctrl_idx
|
|
283
|
+
(seg[:parent_index] == ctrl_idx) ? (seg[:duration] || 0).to_f : 0.0
|
|
284
|
+
end
|
|
285
|
+
gap = ctrl_dur - child_dur
|
|
286
|
+
|
|
287
|
+
if gap > MIN_GAP_MS && Catpm.config.show_untracked_segments
|
|
288
|
+
inject_gap_segments(segments, req_segments, gap, ctrl_idx, ctrl_seg)
|
|
289
|
+
end
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
context[:segments] = segments
|
|
293
|
+
context[:segment_summary] = segment_data[:segment_summary]
|
|
294
|
+
context[:segments_capped] = segment_data[:segments_capped]
|
|
295
|
+
context[:segments_filtered] = segment_data[:segments_filtered] if segment_data[:segments_filtered] > 0
|
|
296
|
+
|
|
297
|
+
# Append error marker segment inside the controller
|
|
298
|
+
if exception
|
|
299
|
+
error_parent = ctrl_idx || 0
|
|
300
|
+
error_offset = if ctrl_idx
|
|
301
|
+
ctrl = segments[ctrl_idx]
|
|
302
|
+
((ctrl[:offset] || 0) + (ctrl[:duration] || 0)).round(2)
|
|
303
|
+
else
|
|
304
|
+
duration.round(2)
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
context[:segments] << {
|
|
308
|
+
type: 'error',
|
|
309
|
+
detail: "#{exception.class.name}: #{exception.message}".truncate(Catpm.config.max_error_detail_length),
|
|
310
|
+
source: exception.backtrace&.first,
|
|
311
|
+
duration: 0,
|
|
312
|
+
offset: error_offset,
|
|
313
|
+
parent_index: error_parent
|
|
314
|
+
}
|
|
315
|
+
end
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
context = scrub(context)
|
|
226
319
|
end
|
|
227
320
|
|
|
228
321
|
ev = Event.new(
|
|
@@ -240,6 +333,12 @@ module Catpm
|
|
|
240
333
|
)
|
|
241
334
|
|
|
242
335
|
Catpm.buffer&.push(ev)
|
|
336
|
+
ensure
|
|
337
|
+
if owns_segments
|
|
338
|
+
req_segments&.release!
|
|
339
|
+
Collector.end_instrumentation
|
|
340
|
+
Thread.current[:catpm_request_segments] = nil
|
|
341
|
+
end
|
|
243
342
|
end
|
|
244
343
|
|
|
245
344
|
def process_tracked(kind:, target:, operation:, duration:, context:, metadata:, error:, req_segments:)
|
|
@@ -439,6 +538,11 @@ module Catpm
|
|
|
439
538
|
# Re-parent non-code segments (sql, cache, etc.) under call tree code segments
|
|
440
539
|
# when their offset falls within the code segment's time range.
|
|
441
540
|
# This gives proper nesting: code → sql, instead of both being siblings under controller.
|
|
541
|
+
#
|
|
542
|
+
# After reparenting, extends code segment durations up the call tree chain
|
|
543
|
+
# when children (e.g. external HTTP spans) extend beyond the code segment's
|
|
544
|
+
# sampler-derived duration. This happens because the stack sampler may hit its
|
|
545
|
+
# sample cap before the code finishes (e.g. during a long I/O call).
|
|
442
546
|
def reparent_under_call_tree(segments, ctrl_idx)
|
|
443
547
|
# Build index of code segments with their time ranges: [index, offset, end]
|
|
444
548
|
code_nodes = []
|
|
@@ -448,6 +552,12 @@ module Catpm
|
|
|
448
552
|
end
|
|
449
553
|
return if code_nodes.empty?
|
|
450
554
|
|
|
555
|
+
# Tolerance for offset matching: spans created via push_span record exact timing,
|
|
556
|
+
# while call tree code segments start from the first sampler capture (up to one
|
|
557
|
+
# sampling interval later). Without tolerance, the span's offset falls just before
|
|
558
|
+
# the code segment's range and reparenting silently fails.
|
|
559
|
+
sampling_tolerance_ms = Catpm.config.stack_sample_interval * 1000.0
|
|
560
|
+
|
|
451
561
|
segments.each_with_index do |seg, i|
|
|
452
562
|
# Only reparent direct children of controller that aren't code segments
|
|
453
563
|
next if seg[:type] == 'code' || seg[:type] == 'controller' || seg[:type] == 'request'
|
|
@@ -462,7 +572,7 @@ module Catpm
|
|
|
462
572
|
best_dur = Float::INFINITY
|
|
463
573
|
|
|
464
574
|
code_nodes.each do |code_i, code_start, code_end|
|
|
465
|
-
next unless seg_offset >= code_start && seg_offset < code_end
|
|
575
|
+
next unless seg_offset >= (code_start - sampling_tolerance_ms) && seg_offset < code_end
|
|
466
576
|
|
|
467
577
|
dur = code_end - code_start
|
|
468
578
|
if dur < best_dur
|
|
@@ -473,6 +583,52 @@ module Catpm
|
|
|
473
583
|
|
|
474
584
|
seg[:parent_index] = best_idx if best_idx
|
|
475
585
|
end
|
|
586
|
+
|
|
587
|
+
# Extend code segment durations when reparented children extend beyond them.
|
|
588
|
+
# The stack sampler may hit its cap early, producing short code segments that
|
|
589
|
+
# don't cover the full wall-clock time of long I/O calls within them.
|
|
590
|
+
extend_call_tree_durations(segments)
|
|
591
|
+
end
|
|
592
|
+
|
|
593
|
+
# Walk all segments; when a child's end time exceeds its parent code segment's
|
|
594
|
+
# end time, extend the parent (and propagate up the chain).
|
|
595
|
+
def extend_call_tree_durations(segments)
|
|
596
|
+
segments.each do |seg|
|
|
597
|
+
parent_idx = seg[:parent_index]
|
|
598
|
+
next unless parent_idx
|
|
599
|
+
|
|
600
|
+
parent = segments[parent_idx]
|
|
601
|
+
next unless parent && parent[:type] == 'code'
|
|
602
|
+
|
|
603
|
+
seg_end = (seg[:offset] || 0).to_f + (seg[:duration] || 0).to_f
|
|
604
|
+
parent_offset = (parent[:offset] || 0).to_f
|
|
605
|
+
parent_end = parent_offset + (parent[:duration] || 0).to_f
|
|
606
|
+
|
|
607
|
+
next unless seg_end > parent_end
|
|
608
|
+
|
|
609
|
+
parent[:duration] = (seg_end - parent_offset).round(2)
|
|
610
|
+
|
|
611
|
+
# Propagate up the call tree chain
|
|
612
|
+
propagate_duration_up(segments, parent_idx)
|
|
613
|
+
end
|
|
614
|
+
end
|
|
615
|
+
|
|
616
|
+
def propagate_duration_up(segments, idx)
|
|
617
|
+
seg = segments[idx]
|
|
618
|
+
parent_idx = seg[:parent_index]
|
|
619
|
+
return unless parent_idx
|
|
620
|
+
|
|
621
|
+
parent = segments[parent_idx]
|
|
622
|
+
return unless parent && parent[:type] == 'code'
|
|
623
|
+
|
|
624
|
+
seg_end = (seg[:offset] || 0).to_f + (seg[:duration] || 0).to_f
|
|
625
|
+
parent_offset = (parent[:offset] || 0).to_f
|
|
626
|
+
parent_end = parent_offset + (parent[:duration] || 0).to_f
|
|
627
|
+
|
|
628
|
+
return unless seg_end > parent_end
|
|
629
|
+
|
|
630
|
+
parent[:duration] = (seg_end - parent_offset).round(2)
|
|
631
|
+
propagate_duration_up(segments, parent_idx)
|
|
476
632
|
end
|
|
477
633
|
|
|
478
634
|
# Remove near-zero-duration "code" spans that merely wrap a "controller" span.
|
|
@@ -51,6 +51,44 @@ module Catpm
|
|
|
51
51
|
end
|
|
52
52
|
end
|
|
53
53
|
|
|
54
|
+
# Subscriber with start/finish callbacks so all segments (SQL, cache, etc.)
|
|
55
|
+
# fired during a job are automatically captured and nested under the job span.
|
|
56
|
+
class JobSpanSubscriber
|
|
57
|
+
def start(_name, _id, payload)
|
|
58
|
+
return unless Catpm.config.instrument_segments
|
|
59
|
+
|
|
60
|
+
job = payload[:job]
|
|
61
|
+
target = job.class.name
|
|
62
|
+
return if Catpm.config.ignored?(target)
|
|
63
|
+
|
|
64
|
+
if Collector.should_instrument?(:job, target, job.queue_name)
|
|
65
|
+
use_sampler = Catpm.config.instrument_stack_sampler || Catpm.config.instrument_call_tree
|
|
66
|
+
start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
67
|
+
req_segments = RequestSegments.new(
|
|
68
|
+
max_segments: Catpm.config.effective_max_segments_per_request,
|
|
69
|
+
request_start: start_time,
|
|
70
|
+
stack_sample: use_sampler,
|
|
71
|
+
call_tree: Catpm.config.instrument_call_tree
|
|
72
|
+
)
|
|
73
|
+
Thread.current[:catpm_request_segments] = req_segments
|
|
74
|
+
|
|
75
|
+
index = req_segments.push_span(type: :controller, detail: target, started_at: start_time)
|
|
76
|
+
payload[:_catpm_job_span_index] = index
|
|
77
|
+
payload[:_catpm_job_owns_segments] = true
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def finish(_name, _id, payload)
|
|
82
|
+
return unless payload[:_catpm_job_owns_segments]
|
|
83
|
+
|
|
84
|
+
req_segments = Thread.current[:catpm_request_segments]
|
|
85
|
+
return unless req_segments
|
|
86
|
+
|
|
87
|
+
req_segments.pop_span(payload[:_catpm_job_span_index])
|
|
88
|
+
req_segments.stop_sampler
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
54
92
|
IGNORED_SQL_NAMES = Set.new([
|
|
55
93
|
'SCHEMA', 'EXPLAIN',
|
|
56
94
|
'ActiveRecord::SchemaMigration Load',
|
|
@@ -65,6 +103,12 @@ module Catpm
|
|
|
65
103
|
'process_action.action_controller', ControllerSpanSubscriber.new
|
|
66
104
|
)
|
|
67
105
|
|
|
106
|
+
if Catpm.config.instrument_jobs
|
|
107
|
+
@job_span_subscriber = ActiveSupport::Notifications.subscribe(
|
|
108
|
+
'perform.active_job', JobSpanSubscriber.new
|
|
109
|
+
)
|
|
110
|
+
end
|
|
111
|
+
|
|
68
112
|
@sql_subscriber = ActiveSupport::Notifications.subscribe(
|
|
69
113
|
'sql.active_record'
|
|
70
114
|
) do |event|
|
|
@@ -122,7 +166,7 @@ module Catpm
|
|
|
122
166
|
|
|
123
167
|
def unsubscribe!
|
|
124
168
|
[
|
|
125
|
-
@controller_span_subscriber,
|
|
169
|
+
@controller_span_subscriber, @job_span_subscriber,
|
|
126
170
|
@sql_subscriber, @instantiation_subscriber,
|
|
127
171
|
@render_template_subscriber, @render_partial_subscriber,
|
|
128
172
|
@cache_read_subscriber, @cache_write_subscriber,
|
|
@@ -131,6 +175,7 @@ module Catpm
|
|
|
131
175
|
ActiveSupport::Notifications.unsubscribe(sub) if sub
|
|
132
176
|
end
|
|
133
177
|
@controller_span_subscriber = nil
|
|
178
|
+
@job_span_subscriber = nil
|
|
134
179
|
@sql_subscriber = nil
|
|
135
180
|
@instantiation_subscriber = nil
|
|
136
181
|
@render_template_subscriber = nil
|
data/lib/catpm/version.rb
CHANGED