catpm 0.8.4 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/app/views/catpm/endpoints/show.html.erb +4 -2
- data/app/views/catpm/errors/index.html.erb +1 -1
- data/app/views/catpm/samples/show.html.erb +1 -2
- data/lib/catpm/collector.rb +213 -21
- data/lib/catpm/configuration.rb +2 -0
- data/lib/catpm/middleware.rb +9 -7
- data/lib/catpm/request_segments.rb +92 -2
- data/lib/catpm/trace.rb +29 -9
- data/lib/catpm/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 1bb890d1a121d0351a7153a0575608c76c9b003b2d5798931fa4eaaca0067b81
|
|
4
|
+
data.tar.gz: bb4b7b4978e199de34306e18aafcdcac0ed7cd1fcc7a784d685ebe1ba6e9793e
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 1b377c63b639edf420597d451c2ef2349e73598ca7118bdda9a05c5a455ebad517c821f0cd2a8d8e9a6fde4b1dd71f15482442f9192158b08a6a5564996d3a6c
|
|
7
|
+
data.tar.gz: ef93cd60ad7840b0a90fd5389ef3ec8f35016fd01eb5d19032fc7e73822c7218d1259a7c326ccfc80109f944ccddddb2eeb9e8e638dae8fe725d7741974704ac
|
data/README.md
CHANGED
|
@@ -72,11 +72,13 @@
|
|
|
72
72
|
<% end %>
|
|
73
73
|
|
|
74
74
|
<%
|
|
75
|
+
instrumented_count = (@metadata["_instrumented"] || @metadata[:"_instrumented"] || 0).to_f
|
|
76
|
+
instrumented_count = @count.to_f if instrumented_count == 0 # backward compat with pre-sampling data
|
|
75
77
|
type_data = segment_colors.map { |type, color|
|
|
76
78
|
count = (@metadata["#{type}_count"] || @metadata[:"#{type}_count"] || 0).to_f
|
|
77
79
|
dur = (@metadata["#{type}_duration"] || @metadata[:"#{type}_duration"] || 0).to_f
|
|
78
|
-
avg_dur =
|
|
79
|
-
avg_count =
|
|
80
|
+
avg_dur = instrumented_count > 0 ? dur / instrumented_count : 0
|
|
81
|
+
avg_count = instrumented_count > 0 ? count / instrumented_count : 0
|
|
80
82
|
text_color = segment_text_colors[type] || "#4b5563"
|
|
81
83
|
{ type: type, label: segment_labels[type] || type.capitalize, bg: color, text: text_color, count: count, dur: dur, avg_dur: avg_dur, avg_count: avg_count }
|
|
82
84
|
}.select { |d| d[:count] > 0 }
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
<% end %>
|
|
16
16
|
<input type="text" class="search-input" id="error-search" placeholder="Search errors... (/)" oninput="filterByText('error-search','errors-table')">
|
|
17
17
|
<% if @tab == "active" && @active_count > 0 %>
|
|
18
|
-
<span style="display:inline; margin-left:auto"><%= button_to "Resolve all", catpm.resolve_all_errors_path, method: :post, class: "btn"
|
|
18
|
+
<span style="display:inline; margin-left:auto"><%= button_to "Resolve all", catpm.resolve_all_errors_path, method: :post, class: "btn" %></span>
|
|
19
19
|
<% end %>
|
|
20
20
|
</div>
|
|
21
21
|
<% end %>
|
|
@@ -17,8 +17,7 @@
|
|
|
17
17
|
<span>Sample #<%= @sample.id %></span>
|
|
18
18
|
</div>
|
|
19
19
|
<%= button_to "Delete Sample", catpm.sample_path(@sample),
|
|
20
|
-
method: :delete, class: "btn btn-danger"
|
|
21
|
-
data: { confirm: "Delete this sample? This cannot be undone." } %>
|
|
20
|
+
method: :delete, class: "btn btn-danger" %>
|
|
22
21
|
</div>
|
|
23
22
|
|
|
24
23
|
<%# ─── Request Info Bar ─── %>
|
data/lib/catpm/collector.rb
CHANGED
|
@@ -4,6 +4,10 @@ module Catpm
|
|
|
4
4
|
module Collector
|
|
5
5
|
SYNTHETIC_MIDDLEWARE_OFFSET_MS = 0.5
|
|
6
6
|
MIN_GAP_MS = 1.0
|
|
7
|
+
# Cap global force-instrument counter to avoid cascade when many requests
|
|
8
|
+
# are slow. Without this cap, apps with 30% slow requests would see ~23%
|
|
9
|
+
# instrumentation instead of the configured 1/random_sample_rate.
|
|
10
|
+
MAX_FORCE_INSTRUMENT_COUNT = 3
|
|
7
11
|
|
|
8
12
|
class << self
|
|
9
13
|
def process_action_controller(event)
|
|
@@ -19,6 +23,12 @@ module Catpm
|
|
|
19
23
|
metadata = build_http_metadata(payload)
|
|
20
24
|
|
|
21
25
|
req_segments = Thread.current[:catpm_request_segments]
|
|
26
|
+
# track_request clears req_segments after processing but leaves a marker
|
|
27
|
+
# so we know the request was already fully instrumented.
|
|
28
|
+
tracked_instrumented = Thread.current[:catpm_tracked_instrumented]
|
|
29
|
+
Thread.current[:catpm_tracked_instrumented] = nil
|
|
30
|
+
instrumented = !req_segments.nil? || tracked_instrumented
|
|
31
|
+
|
|
22
32
|
if req_segments
|
|
23
33
|
segment_data = req_segments.to_h
|
|
24
34
|
|
|
@@ -28,8 +38,17 @@ module Catpm
|
|
|
28
38
|
|
|
29
39
|
# Segment summary is always needed for bucket metadata aggregation
|
|
30
40
|
segment_data[:segment_summary].each { |k, v| metadata[k] = v }
|
|
41
|
+
else
|
|
42
|
+
# Non-instrumented request — compute duration from thread-local start time
|
|
43
|
+
request_start = Thread.current[:catpm_request_start]
|
|
44
|
+
if request_start
|
|
45
|
+
duration = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - request_start) * 1000.0
|
|
46
|
+
end
|
|
31
47
|
end
|
|
32
48
|
|
|
49
|
+
# Track instrumented count for correct dashboard averaging
|
|
50
|
+
metadata[:_instrumented] = 1 if instrumented
|
|
51
|
+
|
|
33
52
|
# Early sampling decision — only build heavy context for sampled events
|
|
34
53
|
operation = payload[:method] || 'GET'
|
|
35
54
|
sample_type = early_sample_type(
|
|
@@ -37,9 +56,17 @@ module Catpm
|
|
|
37
56
|
duration: duration,
|
|
38
57
|
kind: :http,
|
|
39
58
|
target: target,
|
|
40
|
-
operation: operation
|
|
59
|
+
operation: operation,
|
|
60
|
+
instrumented: instrumented
|
|
41
61
|
)
|
|
42
62
|
|
|
63
|
+
# Slow spike detection: force the NEXT HTTP request through middleware
|
|
64
|
+
# to be fully instrumented (uses global counter for should_instrument_request?).
|
|
65
|
+
# Skip if already handled by track_request (tracked_instrumented).
|
|
66
|
+
if !instrumented && (payload[:exception] || duration >= Catpm.config.slow_threshold_for(:http))
|
|
67
|
+
trigger_force_instrument
|
|
68
|
+
end
|
|
69
|
+
|
|
43
70
|
if sample_type
|
|
44
71
|
context = build_http_context(payload)
|
|
45
72
|
|
|
@@ -224,20 +251,30 @@ module Catpm
|
|
|
224
251
|
return if Catpm.config.ignored?(target)
|
|
225
252
|
|
|
226
253
|
metadata = (metadata || {}).dup
|
|
254
|
+
instrumented = !req_segments.nil?
|
|
227
255
|
|
|
228
256
|
if req_segments
|
|
229
257
|
segment_data = req_segments.to_h
|
|
230
258
|
segment_data[:segment_summary]&.each { |k, v| metadata[k] = v }
|
|
231
259
|
end
|
|
232
260
|
|
|
261
|
+
# Track instrumented count for correct dashboard averaging
|
|
262
|
+
metadata[:_instrumented] = 1 if instrumented
|
|
263
|
+
|
|
233
264
|
sample_type = early_sample_type(
|
|
234
265
|
error: error,
|
|
235
266
|
duration: duration,
|
|
236
267
|
kind: kind,
|
|
237
268
|
target: target,
|
|
238
|
-
operation: operation
|
|
269
|
+
operation: operation,
|
|
270
|
+
instrumented: instrumented
|
|
239
271
|
)
|
|
240
272
|
|
|
273
|
+
# Slow spike detection: force instrument next request for this endpoint
|
|
274
|
+
if !instrumented && (error || duration >= Catpm.config.slow_threshold_for(kind.to_sym))
|
|
275
|
+
trigger_force_instrument(kind: kind, target: target, operation: operation)
|
|
276
|
+
end
|
|
277
|
+
|
|
241
278
|
if sample_type
|
|
242
279
|
context = (context || {}).dup
|
|
243
280
|
|
|
@@ -341,6 +378,53 @@ module Catpm
|
|
|
341
378
|
Catpm.buffer&.push(ev)
|
|
342
379
|
end
|
|
343
380
|
|
|
381
|
+
def process_checkpoint(kind:, target:, operation:, context:, metadata:, checkpoint_data:, request_start:)
|
|
382
|
+
return unless Catpm.enabled?
|
|
383
|
+
|
|
384
|
+
segments = checkpoint_data[:segments].dup
|
|
385
|
+
collapse_code_wrappers(segments)
|
|
386
|
+
|
|
387
|
+
duration_so_far = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - request_start) * 1000.0
|
|
388
|
+
|
|
389
|
+
# Inject root request segment
|
|
390
|
+
root_segment = {
|
|
391
|
+
type: 'request',
|
|
392
|
+
detail: "#{operation.presence || kind} #{target}",
|
|
393
|
+
duration: duration_so_far.round(2),
|
|
394
|
+
offset: 0.0
|
|
395
|
+
}
|
|
396
|
+
segments.each do |seg|
|
|
397
|
+
if seg.key?(:parent_index)
|
|
398
|
+
seg[:parent_index] += 1
|
|
399
|
+
else
|
|
400
|
+
seg[:parent_index] = 0
|
|
401
|
+
end
|
|
402
|
+
end
|
|
403
|
+
segments.unshift(root_segment)
|
|
404
|
+
|
|
405
|
+
checkpoint_context = (context || {}).dup
|
|
406
|
+
checkpoint_context[:segments] = segments
|
|
407
|
+
checkpoint_context[:segment_summary] = checkpoint_data[:summary]
|
|
408
|
+
checkpoint_context[:segments_capped] = checkpoint_data[:overflow]
|
|
409
|
+
checkpoint_context[:partial] = true
|
|
410
|
+
checkpoint_context[:checkpoint_number] = checkpoint_data[:checkpoint_number]
|
|
411
|
+
checkpoint_context = scrub(checkpoint_context)
|
|
412
|
+
|
|
413
|
+
ev = Event.new(
|
|
414
|
+
kind: kind,
|
|
415
|
+
target: target,
|
|
416
|
+
operation: operation.to_s,
|
|
417
|
+
duration: duration_so_far,
|
|
418
|
+
started_at: Time.current,
|
|
419
|
+
status: 200,
|
|
420
|
+
context: checkpoint_context,
|
|
421
|
+
sample_type: 'random',
|
|
422
|
+
metadata: (metadata || {}).dup.merge(checkpoint_data[:summary] || {})
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
Catpm.buffer&.push(ev)
|
|
426
|
+
end
|
|
427
|
+
|
|
344
428
|
def process_custom(name:, duration:, metadata: {}, error: nil, context: {})
|
|
345
429
|
return unless Catpm.enabled?
|
|
346
430
|
return if Catpm.config.ignored?(name)
|
|
@@ -361,8 +445,105 @@ module Catpm
|
|
|
361
445
|
Catpm.buffer&.push(ev)
|
|
362
446
|
end
|
|
363
447
|
|
|
448
|
+
# --- Pre-sampling: decide BEFORE request whether to instrument ---
|
|
449
|
+
|
|
450
|
+
# For HTTP middleware where endpoint is unknown at start.
|
|
451
|
+
# Returns true if this request should get full instrumentation.
|
|
452
|
+
def should_instrument_request?
|
|
453
|
+
# Force after slow spike detection
|
|
454
|
+
if (@force_instrument_count || 0) > 0
|
|
455
|
+
@force_instrument_count -= 1
|
|
456
|
+
return true
|
|
457
|
+
end
|
|
458
|
+
|
|
459
|
+
rand(Catpm.config.random_sample_rate) == 0
|
|
460
|
+
end
|
|
461
|
+
|
|
462
|
+
# For track_request where endpoint is known at start.
|
|
463
|
+
# Filling phase ensures new endpoints get instrumented samples quickly.
|
|
464
|
+
def should_instrument?(kind, target, operation)
|
|
465
|
+
endpoint_key = [kind.to_s, target.to_s, (operation || '').to_s]
|
|
466
|
+
|
|
467
|
+
# Force after slow spike
|
|
468
|
+
if force_instrument_endpoints.delete(endpoint_key)
|
|
469
|
+
return true
|
|
470
|
+
end
|
|
471
|
+
|
|
472
|
+
# Filling phase — endpoint hasn't collected enough instrumented samples yet
|
|
473
|
+
max = Catpm.config.max_random_samples_per_endpoint
|
|
474
|
+
if max.nil? || instrumented_sample_counts[endpoint_key] < max
|
|
475
|
+
return true
|
|
476
|
+
end
|
|
477
|
+
|
|
478
|
+
rand(Catpm.config.random_sample_rate) == 0
|
|
479
|
+
end
|
|
480
|
+
|
|
481
|
+
# Called when a slow/error request had no instrumentation —
|
|
482
|
+
# forces the NEXT request(s) to be fully instrumented.
|
|
483
|
+
#
|
|
484
|
+
# Two modes (mutually exclusive to avoid double-instrumentation):
|
|
485
|
+
# - With endpoint: sets per-endpoint flag consumed by should_instrument?
|
|
486
|
+
# (for track_request paths where endpoint is known)
|
|
487
|
+
# - Without endpoint: increments global counter consumed by
|
|
488
|
+
# should_instrument_request? (for middleware path where endpoint is unknown)
|
|
489
|
+
def trigger_force_instrument(kind: nil, target: nil, operation: nil)
|
|
490
|
+
if kind && target
|
|
491
|
+
endpoint_key = [kind.to_s, target.to_s, (operation || '').to_s]
|
|
492
|
+
force_instrument_endpoints[endpoint_key] = true
|
|
493
|
+
else
|
|
494
|
+
@force_instrument_count = [(@force_instrument_count || 0) + 1, MAX_FORCE_INSTRUMENT_COUNT].min
|
|
495
|
+
end
|
|
496
|
+
end
|
|
497
|
+
|
|
498
|
+
def reset_sample_counts!
|
|
499
|
+
@instrumented_sample_counts = nil
|
|
500
|
+
@instrumented_sample_counts_loaded = false
|
|
501
|
+
@force_instrument_endpoints = nil
|
|
502
|
+
@force_instrument_count = nil
|
|
503
|
+
end
|
|
504
|
+
|
|
364
505
|
private
|
|
365
506
|
|
|
507
|
+
def force_instrument_endpoints
|
|
508
|
+
@force_instrument_endpoints ||= {}
|
|
509
|
+
end
|
|
510
|
+
|
|
511
|
+
def instrumented_sample_counts
|
|
512
|
+
return @instrumented_sample_counts if @instrumented_sample_counts_loaded
|
|
513
|
+
|
|
514
|
+
@instrumented_sample_counts = load_sample_counts_from_db
|
|
515
|
+
@instrumented_sample_counts_loaded = true
|
|
516
|
+
@instrumented_sample_counts
|
|
517
|
+
end
|
|
518
|
+
|
|
519
|
+
# Pre-populate filling counters from DB so old endpoints don't
|
|
520
|
+
# re-enter filling phase on every process restart.
|
|
521
|
+
# Temporarily clears thread-local to prevent our query from being
|
|
522
|
+
# captured as a segment in any active request.
|
|
523
|
+
def load_sample_counts_from_db
|
|
524
|
+
counts = Hash.new(0)
|
|
525
|
+
return counts unless defined?(Catpm::Sample) && Catpm::Bucket.table_exists?
|
|
526
|
+
|
|
527
|
+
saved_rs = Thread.current[:catpm_request_segments]
|
|
528
|
+
Thread.current[:catpm_request_segments] = nil
|
|
529
|
+
begin
|
|
530
|
+
Catpm::Sample.joins(:bucket)
|
|
531
|
+
.where(sample_type: 'random')
|
|
532
|
+
.group('catpm_buckets.kind', 'catpm_buckets.target', 'catpm_buckets.operation')
|
|
533
|
+
.count
|
|
534
|
+
.each do |(kind, target, operation), count|
|
|
535
|
+
counts[[kind.to_s, target.to_s, operation.to_s]] = count
|
|
536
|
+
end
|
|
537
|
+
ensure
|
|
538
|
+
Thread.current[:catpm_request_segments] = saved_rs
|
|
539
|
+
end
|
|
540
|
+
|
|
541
|
+
counts
|
|
542
|
+
rescue => e
|
|
543
|
+
Catpm.config.error_handler&.call(e)
|
|
544
|
+
Hash.new(0)
|
|
545
|
+
end
|
|
546
|
+
|
|
366
547
|
# Remove near-zero-duration "code" spans that merely wrap a "controller" span.
|
|
367
548
|
# This happens when CallTracer (TracePoint) captures a thin dispatch method
|
|
368
549
|
# (e.g. Telegram::WebhookController#process) whose :return fires before the
|
|
@@ -417,31 +598,42 @@ module Catpm
|
|
|
417
598
|
end
|
|
418
599
|
|
|
419
600
|
# Determine sample type at event creation time so only sampled events
|
|
420
|
-
# carry full context in the buffer.
|
|
421
|
-
#
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
601
|
+
# carry full context in the buffer.
|
|
602
|
+
#
|
|
603
|
+
# When instrumented: false, only error/slow get a sample_type —
|
|
604
|
+
# non-instrumented normal requests just contribute duration/count.
|
|
605
|
+
# Filling counter only increments for instrumented requests so
|
|
606
|
+
# non-instrumented requests don't waste filling slots.
|
|
607
|
+
def early_sample_type(error:, duration:, kind:, target:, operation:, instrumented: true)
|
|
608
|
+
# Errors: only create sample for instrumented requests (with segments).
|
|
609
|
+
# Non-instrumented errors are still tracked in error_groups via
|
|
610
|
+
# event.error? — occurrence counts, contexts, and backtrace are preserved.
|
|
611
|
+
# trigger_force_instrument ensures the next occurrence gets full segments.
|
|
612
|
+
return 'error' if error && instrumented
|
|
613
|
+
|
|
614
|
+
is_slow = duration >= Catpm.config.slow_threshold_for(kind.to_sym)
|
|
615
|
+
|
|
616
|
+
# Non-instrumented slow requests still get a sample (for dashboard) but
|
|
617
|
+
# don't count towards filling phase (they have no segments).
|
|
618
|
+
return 'slow' if is_slow && !instrumented
|
|
619
|
+
|
|
620
|
+
# Non-instrumented requests have no segments — skip sample creation
|
|
621
|
+
return nil unless instrumented
|
|
622
|
+
|
|
623
|
+
# Count this instrumented request towards filling phase completion.
|
|
624
|
+
# Both slow and random requests count — without this, endpoints where
|
|
625
|
+
# most requests exceed slow_threshold would never exit the filling phase,
|
|
626
|
+
# causing 100% instrumentation regardless of random_sample_rate.
|
|
427
627
|
endpoint_key = [kind.to_s, target, operation.to_s]
|
|
428
|
-
count =
|
|
628
|
+
count = instrumented_sample_counts[endpoint_key]
|
|
429
629
|
max_random = Catpm.config.max_random_samples_per_endpoint
|
|
430
630
|
if max_random.nil? || count < max_random
|
|
431
|
-
|
|
432
|
-
return 'random'
|
|
631
|
+
instrumented_sample_counts[endpoint_key] = count + 1
|
|
433
632
|
end
|
|
434
633
|
|
|
435
|
-
return '
|
|
436
|
-
nil
|
|
437
|
-
end
|
|
438
|
-
|
|
439
|
-
def random_sample_counts
|
|
440
|
-
@random_sample_counts ||= Hash.new(0)
|
|
441
|
-
end
|
|
634
|
+
return 'slow' if is_slow
|
|
442
635
|
|
|
443
|
-
|
|
444
|
-
@random_sample_counts = nil
|
|
636
|
+
'random'
|
|
445
637
|
end
|
|
446
638
|
|
|
447
639
|
def inject_gap_segments(segments, req_segments, gap, ctrl_idx, ctrl_seg)
|
data/lib/catpm/configuration.rb
CHANGED
|
@@ -43,6 +43,7 @@ module Catpm
|
|
|
43
43
|
events_max_samples_per_name max_stack_samples_per_request
|
|
44
44
|
max_error_detail_length max_fingerprint_app_frames
|
|
45
45
|
max_fingerprint_gem_frames cleanup_batch_size caller_scan_depth
|
|
46
|
+
max_request_memory
|
|
46
47
|
].freeze
|
|
47
48
|
|
|
48
49
|
(REQUIRED_NUMERIC + OPTIONAL_NUMERIC).each do |attr|
|
|
@@ -116,6 +117,7 @@ module Catpm
|
|
|
116
117
|
@max_fingerprint_gem_frames = 3
|
|
117
118
|
@cleanup_batch_size = 1_000
|
|
118
119
|
@caller_scan_depth = 50
|
|
120
|
+
@max_request_memory = 2.megabytes
|
|
119
121
|
@instrument_call_tree = false
|
|
120
122
|
@show_untracked_segments = false
|
|
121
123
|
end
|
data/lib/catpm/middleware.rb
CHANGED
|
@@ -12,14 +12,16 @@ module Catpm
|
|
|
12
12
|
Catpm.flusher&.ensure_running!
|
|
13
13
|
|
|
14
14
|
env['catpm.request_start'] = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
15
|
+
Thread.current[:catpm_request_start] = env['catpm.request_start']
|
|
15
16
|
|
|
16
|
-
if Catpm.config.instrument_segments
|
|
17
|
+
if Catpm.config.instrument_segments && Collector.should_instrument_request?
|
|
17
18
|
use_sampler = Catpm.config.instrument_stack_sampler || Catpm.config.instrument_call_tree
|
|
18
19
|
req_segments = RequestSegments.new(
|
|
19
20
|
max_segments: Catpm.config.max_segments_per_request,
|
|
20
21
|
request_start: env['catpm.request_start'],
|
|
21
22
|
stack_sample: use_sampler,
|
|
22
|
-
call_tree: Catpm.config.instrument_call_tree
|
|
23
|
+
call_tree: Catpm.config.instrument_call_tree,
|
|
24
|
+
memory_limit: Catpm.config.max_request_memory
|
|
23
25
|
)
|
|
24
26
|
env['catpm.segments'] = req_segments
|
|
25
27
|
Thread.current[:catpm_request_segments] = req_segments
|
|
@@ -30,11 +32,11 @@ module Catpm
|
|
|
30
32
|
record_exception(env, e)
|
|
31
33
|
raise
|
|
32
34
|
ensure
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
35
|
+
req_segments&.stop_sampler
|
|
36
|
+
req_segments&.release!
|
|
37
|
+
Thread.current[:catpm_request_segments] = nil
|
|
38
|
+
Thread.current[:catpm_request_start] = nil
|
|
39
|
+
Thread.current[:catpm_tracked_instrumented] = nil
|
|
38
40
|
end
|
|
39
41
|
|
|
40
42
|
private
|
|
@@ -5,9 +5,13 @@ module Catpm
|
|
|
5
5
|
# Pre-computed symbol pairs — each type computed once per process lifetime.
|
|
6
6
|
SUMMARY_KEYS = Hash.new { |h, k| h[k] = [:"#{k}_count", :"#{k}_duration"] }
|
|
7
7
|
|
|
8
|
-
|
|
8
|
+
# Per-segment byte estimate: Hash overhead + typical keys (type, duration, detail, offset, source, parent_index)
|
|
9
|
+
SEGMENT_BASE_BYTES = Event::OBJECT_OVERHEAD + (6 * Event::HASH_ENTRY_SIZE)
|
|
10
|
+
SEGMENT_STRING_OVERHEAD = Event::OBJECT_OVERHEAD # per-string overhead in segment values
|
|
9
11
|
|
|
10
|
-
|
|
12
|
+
attr_reader :segments, :summary, :request_start, :estimated_bytes, :checkpoint_count
|
|
13
|
+
|
|
14
|
+
def initialize(max_segments:, request_start: nil, stack_sample: false, call_tree: false, memory_limit: nil)
|
|
11
15
|
@max_segments = max_segments
|
|
12
16
|
@request_start = request_start || Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
13
17
|
@segments = []
|
|
@@ -16,6 +20,10 @@ module Catpm
|
|
|
16
20
|
@span_stack = []
|
|
17
21
|
@tracked_ranges = []
|
|
18
22
|
@call_tree = call_tree
|
|
23
|
+
@memory_limit = memory_limit
|
|
24
|
+
@estimated_bytes = 0
|
|
25
|
+
@checkpoint_callback = nil
|
|
26
|
+
@checkpoint_count = 0
|
|
19
27
|
|
|
20
28
|
if stack_sample
|
|
21
29
|
@sampler = StackSampler.new(target_thread: Thread.current, request_start: @request_start, call_tree: call_tree)
|
|
@@ -23,6 +31,10 @@ module Catpm
|
|
|
23
31
|
end
|
|
24
32
|
end
|
|
25
33
|
|
|
34
|
+
def on_checkpoint(&block)
|
|
35
|
+
@checkpoint_callback = block
|
|
36
|
+
end
|
|
37
|
+
|
|
26
38
|
def add(type:, duration:, detail:, source: nil, started_at: nil)
|
|
27
39
|
type_key = type.to_sym
|
|
28
40
|
count_key, dur_key = SUMMARY_KEYS[type_key]
|
|
@@ -50,6 +62,9 @@ module Catpm
|
|
|
50
62
|
@segments[min_idx] = segment
|
|
51
63
|
end
|
|
52
64
|
end
|
|
65
|
+
|
|
66
|
+
@estimated_bytes += estimate_segment_bytes(segment)
|
|
67
|
+
maybe_checkpoint
|
|
53
68
|
end
|
|
54
69
|
|
|
55
70
|
def push_span(type:, detail:, started_at: nil)
|
|
@@ -64,6 +79,7 @@ module Catpm
|
|
|
64
79
|
index = @segments.size
|
|
65
80
|
@segments << segment
|
|
66
81
|
@span_stack.push(index)
|
|
82
|
+
@estimated_bytes += estimate_segment_bytes(segment)
|
|
67
83
|
index
|
|
68
84
|
end
|
|
69
85
|
|
|
@@ -114,6 +130,7 @@ module Catpm
|
|
|
114
130
|
@summary = {}
|
|
115
131
|
@tracked_ranges = []
|
|
116
132
|
@sampler = nil
|
|
133
|
+
@estimated_bytes = 0
|
|
117
134
|
end
|
|
118
135
|
|
|
119
136
|
def overflowed?
|
|
@@ -127,5 +144,78 @@ module Catpm
|
|
|
127
144
|
segments_capped: @overflow
|
|
128
145
|
}
|
|
129
146
|
end
|
|
147
|
+
|
|
148
|
+
private
|
|
149
|
+
|
|
150
|
+
def estimate_segment_bytes(segment)
|
|
151
|
+
bytes = SEGMENT_BASE_BYTES
|
|
152
|
+
bytes += segment[:detail].bytesize + SEGMENT_STRING_OVERHEAD if segment[:detail]
|
|
153
|
+
bytes += segment[:type].bytesize + SEGMENT_STRING_OVERHEAD if segment[:type]
|
|
154
|
+
bytes += segment[:source].bytesize + SEGMENT_STRING_OVERHEAD if segment[:source]
|
|
155
|
+
bytes
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
def maybe_checkpoint
|
|
159
|
+
return unless @memory_limit && @estimated_bytes > @memory_limit && @checkpoint_callback
|
|
160
|
+
|
|
161
|
+
checkpoint_data = {
|
|
162
|
+
segments: @segments,
|
|
163
|
+
summary: @summary,
|
|
164
|
+
overflow: @overflow,
|
|
165
|
+
sampler_segments: @sampler ? sampler_segments_for_checkpoint : [],
|
|
166
|
+
checkpoint_number: @checkpoint_count
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
@checkpoint_count += 1
|
|
170
|
+
rebuild_after_checkpoint
|
|
171
|
+
@checkpoint_callback.call(checkpoint_data)
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
def sampler_segments_for_checkpoint
|
|
175
|
+
if @call_tree
|
|
176
|
+
result = @sampler&.to_call_tree(tracked_ranges: @tracked_ranges) || []
|
|
177
|
+
else
|
|
178
|
+
result = @sampler&.to_segments(tracked_ranges: @tracked_ranges) || []
|
|
179
|
+
end
|
|
180
|
+
@sampler&.clear_samples!
|
|
181
|
+
result
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
# After checkpoint: keep only active spans from @span_stack, reset everything else.
|
|
185
|
+
def rebuild_after_checkpoint
|
|
186
|
+
if @span_stack.any?
|
|
187
|
+
# Clone active spans with corrected indices
|
|
188
|
+
new_segments = []
|
|
189
|
+
old_to_new = {}
|
|
190
|
+
|
|
191
|
+
@span_stack.each do |old_idx|
|
|
192
|
+
seg = @segments[old_idx]
|
|
193
|
+
next unless seg
|
|
194
|
+
|
|
195
|
+
new_idx = new_segments.size
|
|
196
|
+
old_to_new[old_idx] = new_idx
|
|
197
|
+
new_segments << seg.dup
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
# Fix parent_index references in cloned spans
|
|
201
|
+
new_segments.each do |seg|
|
|
202
|
+
if seg.key?(:parent_index) && old_to_new.key?(seg[:parent_index])
|
|
203
|
+
seg[:parent_index] = old_to_new[seg[:parent_index]]
|
|
204
|
+
else
|
|
205
|
+
seg.delete(:parent_index)
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
@span_stack = @span_stack.filter_map { |old_idx| old_to_new[old_idx] }
|
|
210
|
+
@segments = new_segments
|
|
211
|
+
else
|
|
212
|
+
@segments = []
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
@summary = Hash.new(0)
|
|
216
|
+
@tracked_ranges = []
|
|
217
|
+
@overflow = false
|
|
218
|
+
@estimated_bytes = 0
|
|
219
|
+
end
|
|
130
220
|
end
|
|
131
221
|
end
|
data/lib/catpm/trace.rb
CHANGED
|
@@ -76,15 +76,30 @@ module Catpm
|
|
|
76
76
|
owns_segments = false
|
|
77
77
|
|
|
78
78
|
if req_segments.nil? && config.instrument_segments
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
79
|
+
if Collector.should_instrument?(kind, target, operation)
|
|
80
|
+
use_sampler = config.instrument_stack_sampler || config.instrument_call_tree
|
|
81
|
+
start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
82
|
+
req_segments = RequestSegments.new(
|
|
83
|
+
max_segments: config.max_segments_per_request,
|
|
84
|
+
request_start: start_time,
|
|
85
|
+
stack_sample: use_sampler,
|
|
86
|
+
call_tree: config.instrument_call_tree,
|
|
87
|
+
memory_limit: config.max_request_memory
|
|
88
|
+
)
|
|
89
|
+
Thread.current[:catpm_request_segments] = req_segments
|
|
90
|
+
owns_segments = true
|
|
91
|
+
|
|
92
|
+
if config.max_request_memory
|
|
93
|
+
req_segments.on_checkpoint do |checkpoint_data|
|
|
94
|
+
Collector.process_checkpoint(
|
|
95
|
+
kind: kind, target: target, operation: operation,
|
|
96
|
+
context: context, metadata: metadata,
|
|
97
|
+
checkpoint_data: checkpoint_data,
|
|
98
|
+
request_start: start_time
|
|
99
|
+
)
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
end
|
|
88
103
|
end
|
|
89
104
|
|
|
90
105
|
if req_segments
|
|
@@ -114,6 +129,11 @@ module Catpm
|
|
|
114
129
|
if owns_segments
|
|
115
130
|
req_segments&.release!
|
|
116
131
|
Thread.current[:catpm_request_segments] = nil
|
|
132
|
+
# Mark that this request was already instrumented and processed by
|
|
133
|
+
# track_request. Without this, process_action_controller would see
|
|
134
|
+
# nil req_segments and falsely trigger force_instrument for slow
|
|
135
|
+
# requests — even though they were fully instrumented here.
|
|
136
|
+
Thread.current[:catpm_tracked_instrumented] = true
|
|
117
137
|
end
|
|
118
138
|
end
|
|
119
139
|
end
|
data/lib/catpm/version.rb
CHANGED