catpm 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/views/catpm/shared/_segments_waterfall.html.erb +5 -0
- data/lib/catpm/adapter/base.rb +8 -4
- data/lib/catpm/call_tracer.rb +85 -0
- data/lib/catpm/collector.rb +76 -19
- data/lib/catpm/configuration.rb +22 -1
- data/lib/catpm/fingerprint.rb +2 -2
- data/lib/catpm/flusher.rb +45 -32
- data/lib/catpm/middleware.rb +7 -0
- data/lib/catpm/request_segments.rb +2 -2
- data/lib/catpm/segment_subscribers.rb +4 -2
- data/lib/catpm/stack_sampler.rb +5 -3
- data/lib/catpm/trace.rb +9 -1
- data/lib/catpm/version.rb +1 -1
- data/lib/catpm.rb +1 -0
- data/lib/generators/catpm/templates/initializer.rb.tt +69 -57
- metadata +2 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 527b9950df4630b2c22f992c2e3e604eac9365c7faa0ea2a892cac5ed47d69af
|
|
4
|
+
data.tar.gz: d39f675cb7bea8ab51762f78d2319d29e2f88769b37594a45bb0571cdcdd2d32
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: d47303a0a85e9f9773e37ef6ca2f0faf7627207ea1050ae456cf0bdff285d60f6ae043285216e5dbdcf61668ba6e4bffb91848bfd884f3e0c7548ea79a97ebf9
|
|
7
|
+
data.tar.gz: d4b68684377d6f350645dbfff66cd7faac777884c5dc0aae3b7ef5c2c612576bfc27c27ee55c7c3f3136e1897b3b2d48bc76e49b8aae5f6f9b0eb8240db7c543
|
|
@@ -17,6 +17,11 @@
|
|
|
17
17
|
end
|
|
18
18
|
end
|
|
19
19
|
|
|
20
|
+
# Sort children of each parent by timeline offset so segments appear chronologically
|
|
21
|
+
children.each_value do |kids|
|
|
22
|
+
kids.sort_by! { |i| (segments[i]["offset"] || segments[i][:offset] || 0).to_f }
|
|
23
|
+
end
|
|
24
|
+
|
|
20
25
|
depth_map = {}
|
|
21
26
|
ordered = []
|
|
22
27
|
build_order = ->(indices, depth) {
|
data/lib/catpm/adapter/base.rb
CHANGED
|
@@ -3,6 +3,9 @@
|
|
|
3
3
|
module Catpm
|
|
4
4
|
module Adapter
|
|
5
5
|
module Base
|
|
6
|
+
MINUTE_BUCKET_RETENTION = 48 * 3600
|
|
7
|
+
HOUR_BUCKET_RETENTION = 90 * 86400
|
|
8
|
+
DAY_BUCKET_RETENTION = 2 * 365 * 86400
|
|
6
9
|
def persist_buckets(aggregated_buckets)
|
|
7
10
|
raise NotImplementedError
|
|
8
11
|
end
|
|
@@ -66,7 +69,8 @@ module Catpm
|
|
|
66
69
|
|
|
67
70
|
def merge_contexts(existing_contexts, new_contexts)
|
|
68
71
|
combined = (existing_contexts + new_contexts)
|
|
69
|
-
|
|
72
|
+
max = Catpm.config.max_error_contexts
|
|
73
|
+
max ? combined.last(max) : combined
|
|
70
74
|
end
|
|
71
75
|
|
|
72
76
|
# Merge new occurrence timestamps into the multi-resolution bucket structure.
|
|
@@ -90,9 +94,9 @@ module Catpm
|
|
|
90
94
|
|
|
91
95
|
# Compact old entries
|
|
92
96
|
now = Time.current.to_i
|
|
93
|
-
cutoff_m = now -
|
|
94
|
-
cutoff_h = now -
|
|
95
|
-
cutoff_d = now -
|
|
97
|
+
cutoff_m = now - MINUTE_BUCKET_RETENTION
|
|
98
|
+
cutoff_h = now - HOUR_BUCKET_RETENTION
|
|
99
|
+
cutoff_d = now - DAY_BUCKET_RETENTION
|
|
96
100
|
|
|
97
101
|
buckets['m'].reject! { |k, _| k.to_i < cutoff_m }
|
|
98
102
|
buckets['h'].reject! { |k, _| k.to_i < cutoff_h }
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Catpm
|
|
4
|
+
class CallTracer
|
|
5
|
+
def initialize(request_segments:)
|
|
6
|
+
@request_segments = request_segments
|
|
7
|
+
@call_stack = []
|
|
8
|
+
@path_cache = {}
|
|
9
|
+
@started = false
|
|
10
|
+
|
|
11
|
+
@tracepoint = TracePoint.new(:call, :return) do |tp|
|
|
12
|
+
case tp.event
|
|
13
|
+
when :call
|
|
14
|
+
handle_call(tp)
|
|
15
|
+
when :return
|
|
16
|
+
handle_return
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def start
|
|
22
|
+
return if @started
|
|
23
|
+
|
|
24
|
+
@started = true
|
|
25
|
+
@tracepoint.enable(target_thread: Thread.current)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def stop
|
|
29
|
+
return unless @started
|
|
30
|
+
|
|
31
|
+
@tracepoint.disable
|
|
32
|
+
@started = false
|
|
33
|
+
flush_remaining_spans
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
private
|
|
37
|
+
|
|
38
|
+
def handle_call(tp)
|
|
39
|
+
path = tp.path
|
|
40
|
+
app = app_frame?(path)
|
|
41
|
+
|
|
42
|
+
unless app
|
|
43
|
+
@call_stack.push(:skip)
|
|
44
|
+
return
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
48
|
+
detail = format_detail(tp.defined_class, tp.method_id)
|
|
49
|
+
index = @request_segments.push_span(type: :code, detail: detail, started_at: started_at)
|
|
50
|
+
@call_stack.push(index)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def handle_return
|
|
54
|
+
entry = @call_stack.pop
|
|
55
|
+
return if entry == :skip || entry.nil?
|
|
56
|
+
|
|
57
|
+
@request_segments.pop_span(entry)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def flush_remaining_spans
|
|
61
|
+
@call_stack.reverse_each do |entry|
|
|
62
|
+
next if entry == :skip || entry.nil?
|
|
63
|
+
|
|
64
|
+
@request_segments.pop_span(entry)
|
|
65
|
+
end
|
|
66
|
+
@call_stack.clear
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def app_frame?(path)
|
|
70
|
+
cached = @path_cache[path]
|
|
71
|
+
return cached unless cached.nil?
|
|
72
|
+
|
|
73
|
+
@path_cache[path] = Fingerprint.app_frame?(path)
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def format_detail(defined_class, method_id)
|
|
77
|
+
if defined_class.singleton_class?
|
|
78
|
+
owner = defined_class.attached_object
|
|
79
|
+
"#{owner.name || owner.inspect}.#{method_id}"
|
|
80
|
+
else
|
|
81
|
+
"#{defined_class.name || defined_class.inspect}##{method_id}"
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
data/lib/catpm/collector.rb
CHANGED
|
@@ -2,6 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
module Catpm
|
|
4
4
|
module Collector
|
|
5
|
+
SYNTHETIC_MIDDLEWARE_OFFSET_MS = 0.5
|
|
6
|
+
MIN_GAP_MS = 1.0
|
|
7
|
+
|
|
5
8
|
class << self
|
|
6
9
|
def process_action_controller(event)
|
|
7
10
|
return unless Catpm.enabled?
|
|
@@ -65,7 +68,7 @@ module Catpm
|
|
|
65
68
|
if ctrl_idx
|
|
66
69
|
has_real_middleware = segments.any? { |s| s[:type] == 'middleware' }
|
|
67
70
|
ctrl_offset = (segments[ctrl_idx][:offset] || 0.0).to_f
|
|
68
|
-
if ctrl_offset >
|
|
71
|
+
if ctrl_offset > SYNTHETIC_MIDDLEWARE_OFFSET_MS && !has_real_middleware
|
|
69
72
|
middleware_seg = {
|
|
70
73
|
type: 'middleware',
|
|
71
74
|
detail: 'Middleware Stack',
|
|
@@ -98,7 +101,7 @@ module Catpm
|
|
|
98
101
|
end
|
|
99
102
|
gap = ctrl_dur - child_dur
|
|
100
103
|
|
|
101
|
-
if gap >
|
|
104
|
+
if gap > MIN_GAP_MS && Catpm.config.show_untracked_segments
|
|
102
105
|
inject_gap_segments(segments, req_segments, gap, ctrl_idx, ctrl_seg)
|
|
103
106
|
end
|
|
104
107
|
end
|
|
@@ -119,7 +122,7 @@ module Catpm
|
|
|
119
122
|
|
|
120
123
|
context[:segments] << {
|
|
121
124
|
type: 'error',
|
|
122
|
-
detail: "#{payload[:exception].first}: #{payload[:exception].last}".truncate(
|
|
125
|
+
detail: "#{payload[:exception].first}: #{payload[:exception].last}".truncate(Catpm.config.max_error_detail_length),
|
|
123
126
|
source: payload[:exception_object]&.backtrace&.first,
|
|
124
127
|
duration: 0,
|
|
125
128
|
offset: error_offset,
|
|
@@ -253,7 +256,7 @@ module Catpm
|
|
|
253
256
|
end
|
|
254
257
|
gap = ctrl_dur - child_dur
|
|
255
258
|
|
|
256
|
-
if gap >
|
|
259
|
+
if gap > MIN_GAP_MS && Catpm.config.show_untracked_segments
|
|
257
260
|
inject_gap_segments(segments, req_segments, gap, ctrl_idx, ctrl_seg)
|
|
258
261
|
end
|
|
259
262
|
end
|
|
@@ -274,7 +277,7 @@ module Catpm
|
|
|
274
277
|
|
|
275
278
|
context[:segments] << {
|
|
276
279
|
type: 'error',
|
|
277
|
-
detail: "#{error.class.name}: #{error.message}".truncate(
|
|
280
|
+
detail: "#{error.class.name}: #{error.message}".truncate(Catpm.config.max_error_detail_length),
|
|
278
281
|
source: error.backtrace&.first,
|
|
279
282
|
duration: 0,
|
|
280
283
|
offset: error_offset,
|
|
@@ -338,7 +341,8 @@ module Catpm
|
|
|
338
341
|
# Filling phase: always sample until endpoint has enough random samples
|
|
339
342
|
endpoint_key = [kind.to_s, target, operation.to_s]
|
|
340
343
|
count = random_sample_counts[endpoint_key]
|
|
341
|
-
|
|
344
|
+
max_random = Catpm.config.max_random_samples_per_endpoint
|
|
345
|
+
if max_random.nil? || count < max_random
|
|
342
346
|
random_sample_counts[endpoint_key] = count + 1
|
|
343
347
|
return 'random'
|
|
344
348
|
end
|
|
@@ -374,22 +378,75 @@ module Catpm
|
|
|
374
378
|
end
|
|
375
379
|
|
|
376
380
|
remaining = gap - sampler_dur
|
|
377
|
-
if remaining >
|
|
378
|
-
segments
|
|
379
|
-
type: 'other',
|
|
380
|
-
detail: 'Untracked',
|
|
381
|
-
duration: remaining.round(2),
|
|
382
|
-
offset: (ctrl_seg[:offset] || 0.0),
|
|
383
|
-
parent_index: ctrl_idx
|
|
384
|
-
}
|
|
381
|
+
if remaining > MIN_GAP_MS
|
|
382
|
+
inject_timeline_gaps(segments, ctrl_idx, ctrl_seg, remaining)
|
|
385
383
|
end
|
|
386
384
|
else
|
|
385
|
+
inject_timeline_gaps(segments, ctrl_idx, ctrl_seg, gap)
|
|
386
|
+
end
|
|
387
|
+
end
|
|
388
|
+
|
|
389
|
+
# Compute actual gap intervals between tracked child segments on the timeline,
|
|
390
|
+
# then create one Untracked entry per gap. This avoids placing a single large
|
|
391
|
+
# Untracked block that overlaps with real segments.
|
|
392
|
+
def inject_timeline_gaps(segments, ctrl_idx, ctrl_seg, total_gap)
|
|
393
|
+
ctrl_offset = (ctrl_seg[:offset] || 0.0).to_f
|
|
394
|
+
ctrl_dur = (ctrl_seg[:duration] || 0.0).to_f
|
|
395
|
+
ctrl_end = ctrl_offset + ctrl_dur
|
|
396
|
+
|
|
397
|
+
# Collect [start, end] intervals of direct children that have offsets
|
|
398
|
+
intervals = []
|
|
399
|
+
segments.each_with_index do |seg, i|
|
|
400
|
+
next if i == ctrl_idx
|
|
401
|
+
next unless seg[:parent_index] == ctrl_idx
|
|
402
|
+
off = seg[:offset]
|
|
403
|
+
dur = (seg[:duration] || 0).to_f
|
|
404
|
+
next unless off
|
|
405
|
+
intervals << [off.to_f, off.to_f + dur]
|
|
406
|
+
end
|
|
407
|
+
|
|
408
|
+
# If no children have offsets, place the gap at the controller start
|
|
409
|
+
if intervals.empty?
|
|
410
|
+
segments << {
|
|
411
|
+
type: 'other', detail: 'Untracked', duration: total_gap.round(2),
|
|
412
|
+
offset: ctrl_offset, parent_index: ctrl_idx
|
|
413
|
+
}
|
|
414
|
+
return
|
|
415
|
+
end
|
|
416
|
+
|
|
417
|
+
# Sort and merge overlapping intervals
|
|
418
|
+
intervals.sort_by!(&:first)
|
|
419
|
+
merged = [intervals.first.dup]
|
|
420
|
+
intervals[1..].each do |s, e|
|
|
421
|
+
if s <= merged.last[1]
|
|
422
|
+
merged.last[1] = e if e > merged.last[1]
|
|
423
|
+
else
|
|
424
|
+
merged << [s, e]
|
|
425
|
+
end
|
|
426
|
+
end
|
|
427
|
+
|
|
428
|
+
# Find gaps between controller start, merged intervals, and controller end
|
|
429
|
+
gaps = []
|
|
430
|
+
cursor = ctrl_offset
|
|
431
|
+
merged.each do |s, e|
|
|
432
|
+
gaps << [cursor, s] if s - cursor > 0
|
|
433
|
+
cursor = [cursor, e].max
|
|
434
|
+
end
|
|
435
|
+
gaps << [cursor, ctrl_end] if ctrl_end - cursor > 0
|
|
436
|
+
|
|
437
|
+
# Distribute total_gap proportionally across timeline gaps
|
|
438
|
+
raw_gap_sum = gaps.sum { |s, e| e - s }
|
|
439
|
+
return if raw_gap_sum <= 0
|
|
440
|
+
|
|
441
|
+
gaps.each do |gs, ge|
|
|
442
|
+
raw_dur = ge - gs
|
|
443
|
+
# Scale so all Untracked segments sum to total_gap
|
|
444
|
+
dur = (raw_dur / raw_gap_sum) * total_gap
|
|
445
|
+
next if dur < MIN_GAP_MS
|
|
446
|
+
|
|
387
447
|
segments << {
|
|
388
|
-
type: 'other',
|
|
389
|
-
|
|
390
|
-
duration: gap.round(2),
|
|
391
|
-
offset: (ctrl_seg[:offset] || 0.0),
|
|
392
|
-
parent_index: ctrl_idx
|
|
448
|
+
type: 'other', detail: 'Untracked', duration: dur.round(2),
|
|
449
|
+
offset: gs.round(2), parent_index: ctrl_idx
|
|
393
450
|
}
|
|
394
451
|
end
|
|
395
452
|
end
|
data/lib/catpm/configuration.rb
CHANGED
|
@@ -43,7 +43,15 @@ module Catpm
|
|
|
43
43
|
:events_max_samples_per_name,
|
|
44
44
|
:track_own_requests,
|
|
45
45
|
:stack_sample_interval,
|
|
46
|
-
:max_stack_samples_per_request
|
|
46
|
+
:max_stack_samples_per_request,
|
|
47
|
+
:downsampling_thresholds,
|
|
48
|
+
:max_error_detail_length,
|
|
49
|
+
:max_fingerprint_app_frames,
|
|
50
|
+
:max_fingerprint_gem_frames,
|
|
51
|
+
:cleanup_batch_size,
|
|
52
|
+
:caller_scan_depth,
|
|
53
|
+
:instrument_call_tree,
|
|
54
|
+
:show_untracked_segments
|
|
47
55
|
|
|
48
56
|
def initialize
|
|
49
57
|
@enabled = true
|
|
@@ -88,6 +96,19 @@ module Catpm
|
|
|
88
96
|
@track_own_requests = false
|
|
89
97
|
@stack_sample_interval = 0.005 # seconds (5ms)
|
|
90
98
|
@max_stack_samples_per_request = 200
|
|
99
|
+
@downsampling_thresholds = {
|
|
100
|
+
medium: 1.hour,
|
|
101
|
+
hourly: 24.hours,
|
|
102
|
+
daily: 1.week,
|
|
103
|
+
weekly: 90.days
|
|
104
|
+
}
|
|
105
|
+
@max_error_detail_length = 200
|
|
106
|
+
@max_fingerprint_app_frames = 5
|
|
107
|
+
@max_fingerprint_gem_frames = 3
|
|
108
|
+
@cleanup_batch_size = 1_000
|
|
109
|
+
@caller_scan_depth = 50
|
|
110
|
+
@instrument_call_tree = false
|
|
111
|
+
@show_untracked_segments = false
|
|
91
112
|
end
|
|
92
113
|
|
|
93
114
|
def slow_threshold_for(kind)
|
data/lib/catpm/fingerprint.rb
CHANGED
|
@@ -15,7 +15,7 @@ module Catpm
|
|
|
15
15
|
def self.normalize_backtrace(backtrace)
|
|
16
16
|
app_frames = backtrace
|
|
17
17
|
.select { |line| app_frame?(line) }
|
|
18
|
-
.first(
|
|
18
|
+
.first(Catpm.config.max_fingerprint_app_frames)
|
|
19
19
|
.map { |line| strip_line_number(line) }
|
|
20
20
|
|
|
21
21
|
# If there are app frames, group by app code (like Sentry)
|
|
@@ -25,7 +25,7 @@ module Catpm
|
|
|
25
25
|
# so the same bug is always one issue regardless of the caller.
|
|
26
26
|
backtrace
|
|
27
27
|
.reject { |line| line.include?('<internal:') }
|
|
28
|
-
.first(
|
|
28
|
+
.first(Catpm.config.max_fingerprint_gem_frames)
|
|
29
29
|
.map { |line| strip_line_number(line) }
|
|
30
30
|
.join("\n")
|
|
31
31
|
end
|
data/lib/catpm/flusher.rb
CHANGED
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
module Catpm
|
|
4
4
|
class Flusher
|
|
5
|
+
ERROR_LOG_BACKTRACE_LINES = 5
|
|
6
|
+
|
|
5
7
|
attr_reader :running
|
|
6
8
|
|
|
7
9
|
def initialize(buffer:, interval: nil, jitter: nil)
|
|
@@ -104,7 +106,7 @@ module Catpm
|
|
|
104
106
|
events&.each { |ev| @buffer.push(ev) }
|
|
105
107
|
@circuit.record_failure
|
|
106
108
|
Catpm.config.error_handler.call(e)
|
|
107
|
-
Rails.logger.error("[catpm] flush error: #{e.class}: #{e.message}\n#{e.backtrace&.first(
|
|
109
|
+
Rails.logger.error("[catpm] flush error: #{e.class}: #{e.message}\n#{e.backtrace&.first(ERROR_LOG_BACKTRACE_LINES)&.join("\n")}")
|
|
108
110
|
end
|
|
109
111
|
|
|
110
112
|
def reset!
|
|
@@ -190,7 +192,8 @@ module Catpm
|
|
|
190
192
|
error[:last_occurred_at] = [ error[:last_occurred_at], event.started_at ].max
|
|
191
193
|
error[:occurrence_times] << event.started_at
|
|
192
194
|
|
|
193
|
-
|
|
195
|
+
max_ctx = Catpm.config.max_error_contexts
|
|
196
|
+
if max_ctx.nil? || error[:new_contexts].size < max_ctx
|
|
194
197
|
error[:new_contexts] << build_error_context(event)
|
|
195
198
|
end
|
|
196
199
|
end
|
|
@@ -254,31 +257,40 @@ module Catpm
|
|
|
254
257
|
|
|
255
258
|
case sample[:sample_type]
|
|
256
259
|
when 'random'
|
|
257
|
-
|
|
258
|
-
if
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
260
|
+
max_random = Catpm.config.max_random_samples_per_endpoint
|
|
261
|
+
if max_random
|
|
262
|
+
cache_key = [kind, target, operation, 'random']
|
|
263
|
+
if (counts_cache[cache_key] || 0) >= max_random
|
|
264
|
+
oldest = Catpm::Sample.joins(:bucket)
|
|
265
|
+
.where(catpm_buckets: { kind: kind, target: target, operation: operation })
|
|
266
|
+
.where(sample_type: 'random').order(recorded_at: :asc).first
|
|
267
|
+
oldest&.destroy
|
|
268
|
+
end
|
|
263
269
|
end
|
|
264
270
|
when 'slow'
|
|
265
|
-
|
|
266
|
-
if
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
271
|
+
max_slow = Catpm.config.max_slow_samples_per_endpoint
|
|
272
|
+
if max_slow
|
|
273
|
+
cache_key = [kind, target, operation, 'slow']
|
|
274
|
+
if (counts_cache[cache_key] || 0) >= max_slow
|
|
275
|
+
weakest = Catpm::Sample.joins(:bucket)
|
|
276
|
+
.where(catpm_buckets: { kind: kind, target: target, operation: operation })
|
|
277
|
+
.where(sample_type: 'slow').order(duration: :asc).first
|
|
278
|
+
if weakest && sample[:duration] > weakest.duration
|
|
279
|
+
weakest.destroy
|
|
280
|
+
else
|
|
281
|
+
sample[:_skip] = true
|
|
282
|
+
end
|
|
274
283
|
end
|
|
275
284
|
end
|
|
276
285
|
when 'error'
|
|
277
|
-
|
|
278
|
-
if
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
286
|
+
max_err = Catpm.config.max_error_samples_per_fingerprint
|
|
287
|
+
if max_err
|
|
288
|
+
fp = sample[:error_fingerprint]
|
|
289
|
+
if fp && (error_counts[fp] || 0) >= max_err
|
|
290
|
+
oldest = Catpm::Sample.where(sample_type: 'error', error_fingerprint: fp)
|
|
291
|
+
.order(recorded_at: :asc).first
|
|
292
|
+
oldest&.destroy
|
|
293
|
+
end
|
|
282
294
|
end
|
|
283
295
|
end
|
|
284
296
|
end
|
|
@@ -335,7 +347,7 @@ module Catpm
|
|
|
335
347
|
|
|
336
348
|
max = Catpm.config.events_max_samples_per_name
|
|
337
349
|
if event.payload.any?
|
|
338
|
-
if sample_counts[event.name] < max
|
|
350
|
+
if max.nil? || sample_counts[event.name] < max
|
|
339
351
|
samples << { name: event.name, payload: event.payload, recorded_at: event.recorded_at }
|
|
340
352
|
sample_counts[event.name] += 1
|
|
341
353
|
elsif rand(Catpm.config.random_sample_rate) == 0
|
|
@@ -357,41 +369,42 @@ module Catpm
|
|
|
357
369
|
|
|
358
370
|
def downsample_buckets
|
|
359
371
|
bucket_sizes = Catpm.config.bucket_sizes
|
|
372
|
+
thresholds = Catpm.config.downsampling_thresholds
|
|
360
373
|
adapter = Catpm::Adapter.current
|
|
361
374
|
|
|
362
375
|
# Phase 1: Merge 1-minute buckets older than 1 hour into 5-minute buckets
|
|
363
376
|
downsample_tier(
|
|
364
377
|
target_interval: bucket_sizes[:medium],
|
|
365
|
-
age_threshold:
|
|
378
|
+
age_threshold: thresholds[:medium],
|
|
366
379
|
adapter: adapter
|
|
367
380
|
)
|
|
368
381
|
|
|
369
382
|
# Phase 2: Merge 5-minute buckets older than 24 hours into 1-hour buckets
|
|
370
383
|
downsample_tier(
|
|
371
384
|
target_interval: bucket_sizes[:hourly],
|
|
372
|
-
age_threshold:
|
|
385
|
+
age_threshold: thresholds[:hourly],
|
|
373
386
|
adapter: adapter
|
|
374
387
|
)
|
|
375
388
|
|
|
376
389
|
# Phase 3: Merge 1-hour buckets older than 1 week into 1-day buckets
|
|
377
390
|
downsample_tier(
|
|
378
391
|
target_interval: bucket_sizes[:daily],
|
|
379
|
-
age_threshold:
|
|
392
|
+
age_threshold: thresholds[:daily],
|
|
380
393
|
adapter: adapter
|
|
381
394
|
)
|
|
382
395
|
|
|
383
396
|
# Phase 4: Merge 1-day buckets older than 3 months into 1-week buckets
|
|
384
397
|
downsample_tier(
|
|
385
398
|
target_interval: bucket_sizes[:weekly],
|
|
386
|
-
age_threshold:
|
|
399
|
+
age_threshold: thresholds[:weekly],
|
|
387
400
|
adapter: adapter
|
|
388
401
|
)
|
|
389
402
|
|
|
390
403
|
# Event buckets: same downsampling tiers
|
|
391
|
-
downsample_event_tier(target_interval: bucket_sizes[:medium], age_threshold:
|
|
392
|
-
downsample_event_tier(target_interval: bucket_sizes[:hourly], age_threshold:
|
|
393
|
-
downsample_event_tier(target_interval: bucket_sizes[:daily], age_threshold:
|
|
394
|
-
downsample_event_tier(target_interval: bucket_sizes[:weekly], age_threshold:
|
|
404
|
+
downsample_event_tier(target_interval: bucket_sizes[:medium], age_threshold: thresholds[:medium], adapter: adapter)
|
|
405
|
+
downsample_event_tier(target_interval: bucket_sizes[:hourly], age_threshold: thresholds[:hourly], adapter: adapter)
|
|
406
|
+
downsample_event_tier(target_interval: bucket_sizes[:daily], age_threshold: thresholds[:daily], adapter: adapter)
|
|
407
|
+
downsample_event_tier(target_interval: bucket_sizes[:weekly], age_threshold: thresholds[:weekly], adapter: adapter)
|
|
395
408
|
end
|
|
396
409
|
|
|
397
410
|
def downsample_tier(target_interval:, age_threshold:, adapter:)
|
|
@@ -496,7 +509,7 @@ module Catpm
|
|
|
496
509
|
|
|
497
510
|
def cleanup_expired_data
|
|
498
511
|
cutoff = Catpm.config.retention_period.ago
|
|
499
|
-
batch_size =
|
|
512
|
+
batch_size = Catpm.config.cleanup_batch_size
|
|
500
513
|
|
|
501
514
|
[ Catpm::Bucket, Catpm::Sample ].each do |model|
|
|
502
515
|
time_column = model == Catpm::Sample ? :recorded_at : :bucket_start
|
data/lib/catpm/middleware.rb
CHANGED
|
@@ -21,6 +21,12 @@ module Catpm
|
|
|
21
21
|
)
|
|
22
22
|
env['catpm.segments'] = req_segments
|
|
23
23
|
Thread.current[:catpm_request_segments] = req_segments
|
|
24
|
+
|
|
25
|
+
if Catpm.config.instrument_call_tree
|
|
26
|
+
call_tracer = CallTracer.new(request_segments: req_segments)
|
|
27
|
+
call_tracer.start
|
|
28
|
+
env['catpm.call_tracer'] = call_tracer
|
|
29
|
+
end
|
|
24
30
|
end
|
|
25
31
|
|
|
26
32
|
@app.call(env)
|
|
@@ -29,6 +35,7 @@ module Catpm
|
|
|
29
35
|
raise
|
|
30
36
|
ensure
|
|
31
37
|
if Catpm.config.instrument_segments
|
|
38
|
+
env['catpm.call_tracer']&.stop
|
|
32
39
|
req_segments&.stop_sampler
|
|
33
40
|
Thread.current[:catpm_request_segments] = nil
|
|
34
41
|
end
|
|
@@ -36,7 +36,7 @@ module Catpm
|
|
|
36
36
|
@tracked_ranges << [started_at, started_at + duration / 1000.0]
|
|
37
37
|
end
|
|
38
38
|
|
|
39
|
-
if @segments.size < @max_segments
|
|
39
|
+
if @max_segments.nil? || @segments.size < @max_segments
|
|
40
40
|
@segments << segment
|
|
41
41
|
else
|
|
42
42
|
@overflow = true
|
|
@@ -54,7 +54,7 @@ module Catpm
|
|
|
54
54
|
segment[:offset] = offset if offset
|
|
55
55
|
segment[:parent_index] = @span_stack.last if @span_stack.any?
|
|
56
56
|
|
|
57
|
-
return nil if @segments.size >= @max_segments
|
|
57
|
+
return nil if @max_segments && @segments.size >= @max_segments
|
|
58
58
|
|
|
59
59
|
index = @segments.size
|
|
60
60
|
@segments << segment
|
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
module Catpm
|
|
4
4
|
module SegmentSubscribers
|
|
5
|
+
MIN_INSTANTIATION_DURATION_MS = 0.1
|
|
6
|
+
CALLER_OFFSET = 4 # frames to skip to reach user code from this call site
|
|
5
7
|
# Subscriber with start/finish callbacks so all segments (SQL, views, etc.)
|
|
6
8
|
# fired during a controller action are automatically nested under the controller span.
|
|
7
9
|
class ControllerSpanSubscriber
|
|
@@ -146,7 +148,7 @@ module Catpm
|
|
|
146
148
|
return unless req_segments
|
|
147
149
|
|
|
148
150
|
duration = event.duration
|
|
149
|
-
return if duration <
|
|
151
|
+
return if duration < MIN_INSTANTIATION_DURATION_MS # skip trivial instantiations
|
|
150
152
|
|
|
151
153
|
payload = event.payload
|
|
152
154
|
record_count = payload[:record_count] || 0
|
|
@@ -230,7 +232,7 @@ module Catpm
|
|
|
230
232
|
end
|
|
231
233
|
|
|
232
234
|
def extract_source_location
|
|
233
|
-
locations = caller_locations(
|
|
235
|
+
locations = caller_locations(CALLER_OFFSET, Catpm.config.caller_scan_depth)
|
|
234
236
|
locations&.each do |loc|
|
|
235
237
|
path = loc.path.to_s
|
|
236
238
|
if Fingerprint.app_frame?(path)
|
data/lib/catpm/stack_sampler.rb
CHANGED
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
module Catpm
|
|
4
4
|
class StackSampler
|
|
5
5
|
MS_PER_SECOND = 1000.0
|
|
6
|
+
MIN_SEGMENT_DURATION_MS = 1.0
|
|
7
|
+
SAMPLING_THREAD_PRIORITY = -1
|
|
6
8
|
|
|
7
9
|
# Single global thread that samples all active requests.
|
|
8
10
|
# Avoids creating a thread per request.
|
|
@@ -33,7 +35,7 @@ module Catpm
|
|
|
33
35
|
sample_all
|
|
34
36
|
end
|
|
35
37
|
end
|
|
36
|
-
@thread.priority =
|
|
38
|
+
@thread.priority = SAMPLING_THREAD_PRIORITY
|
|
37
39
|
end
|
|
38
40
|
|
|
39
41
|
def sample_all
|
|
@@ -118,7 +120,7 @@ module Catpm
|
|
|
118
120
|
|
|
119
121
|
groups.filter_map do |group|
|
|
120
122
|
duration = estimate_duration(group)
|
|
121
|
-
next if duration <
|
|
123
|
+
next if duration < MIN_SEGMENT_DURATION_MS
|
|
122
124
|
|
|
123
125
|
offset = ((group[:start_time] - @request_start) * MS_PER_SECOND).round(2)
|
|
124
126
|
app_frame = group[:app_frame]
|
|
@@ -205,7 +207,7 @@ module Catpm
|
|
|
205
207
|
(span[:end_time] - span[:start_time]) * MS_PER_SECOND,
|
|
206
208
|
span[:count] * Catpm.config.stack_sample_interval * MS_PER_SECOND
|
|
207
209
|
].max
|
|
208
|
-
next if duration <
|
|
210
|
+
next if duration < MIN_SEGMENT_DURATION_MS
|
|
209
211
|
|
|
210
212
|
frame = span[:frame]
|
|
211
213
|
path = frame.path.to_s
|
data/lib/catpm/trace.rb
CHANGED
|
@@ -83,6 +83,11 @@ module Catpm
|
|
|
83
83
|
)
|
|
84
84
|
Thread.current[:catpm_request_segments] = req_segments
|
|
85
85
|
owns_segments = true
|
|
86
|
+
|
|
87
|
+
if config.instrument_call_tree
|
|
88
|
+
call_tracer = CallTracer.new(request_segments: req_segments)
|
|
89
|
+
call_tracer.start
|
|
90
|
+
end
|
|
86
91
|
end
|
|
87
92
|
|
|
88
93
|
if req_segments
|
|
@@ -100,6 +105,7 @@ module Catpm
|
|
|
100
105
|
raise
|
|
101
106
|
ensure
|
|
102
107
|
duration = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000.0
|
|
108
|
+
call_tracer&.stop
|
|
103
109
|
req_segments&.pop_span(ctrl_idx) if ctrl_idx
|
|
104
110
|
req_segments&.stop_sampler
|
|
105
111
|
|
|
@@ -152,8 +158,10 @@ module Catpm
|
|
|
152
158
|
|
|
153
159
|
private
|
|
154
160
|
|
|
161
|
+
CALLER_OFFSET = 3 # frames to skip to reach user code from this call site
|
|
162
|
+
|
|
155
163
|
def self.extract_trace_source
|
|
156
|
-
locations = caller_locations(
|
|
164
|
+
locations = caller_locations(CALLER_OFFSET, Catpm.config.caller_scan_depth)
|
|
157
165
|
locations&.each do |loc|
|
|
158
166
|
path = loc.path.to_s
|
|
159
167
|
if Fingerprint.app_frame?(path)
|
data/lib/catpm/version.rb
CHANGED
data/lib/catpm.rb
CHANGED
|
@@ -1,78 +1,90 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
Catpm.configure do |config|
|
|
4
|
+
# === Enable/Disable ===
|
|
5
|
+
# config.enabled = Rails.env.production? || Rails.env.staging?
|
|
6
|
+
|
|
4
7
|
# === Security ===
|
|
5
|
-
# Protect the dashboard with HTTP Basic Auth or a custom policy.
|
|
6
|
-
# If neither is configured, the dashboard is accessible to everyone.
|
|
7
|
-
#
|
|
8
8
|
# config.http_basic_auth_user = ENV["CATPM_USER"]
|
|
9
9
|
# config.http_basic_auth_password = ENV["CATPM_PASSWORD"]
|
|
10
10
|
# config.access_policy = ->(request) { request.env["warden"].user&.admin? }
|
|
11
11
|
|
|
12
12
|
# === PII Filtering ===
|
|
13
|
-
# Rails' filter_parameters are inherited automatically.
|
|
14
|
-
# Add extra patterns here:
|
|
15
|
-
#
|
|
16
13
|
# config.additional_filter_parameters = [:card_number, :ssn]
|
|
17
14
|
|
|
18
15
|
# === Instrumentation ===
|
|
19
|
-
|
|
20
|
-
config.
|
|
21
|
-
config.
|
|
22
|
-
config.
|
|
23
|
-
# config.
|
|
24
|
-
# config.
|
|
25
|
-
# config.
|
|
26
|
-
# config.
|
|
27
|
-
# config.
|
|
28
|
-
|
|
29
|
-
#
|
|
30
|
-
# config.
|
|
31
|
-
#
|
|
32
|
-
|
|
33
|
-
#
|
|
34
|
-
#
|
|
35
|
-
# config.
|
|
36
|
-
#
|
|
37
|
-
#
|
|
38
|
-
#
|
|
16
|
+
config.instrument_http = true
|
|
17
|
+
config.instrument_jobs = false
|
|
18
|
+
config.instrument_segments = true
|
|
19
|
+
# config.instrument_net_http = false
|
|
20
|
+
# config.instrument_middleware_stack = false
|
|
21
|
+
# config.instrument_stack_sampler = false
|
|
22
|
+
# config.instrument_call_tree = false
|
|
23
|
+
# config.show_untracked_segments = false
|
|
24
|
+
# config.track_own_requests = false
|
|
25
|
+
|
|
26
|
+
# === Auto-instrumentation ===
|
|
27
|
+
# config.service_base_classes = nil # nil = auto-detect (ApplicationService, BaseService)
|
|
28
|
+
# config.auto_instrument_methods = [] # e.g. ["Worker#process", "Gateway.charge"]
|
|
29
|
+
|
|
30
|
+
# === Segments ===
|
|
31
|
+
# config.max_segments_per_request = 50 # nil = unlimited
|
|
32
|
+
# config.segment_source_threshold = 0.0 # ms — capture caller_locations above this
|
|
33
|
+
# config.max_sql_length = 200 # nil = no truncation
|
|
34
|
+
# config.slow_threshold = 500 # ms
|
|
35
|
+
# config.slow_threshold_per_kind = {} # { http: 500, job: 5_000, custom: 1_000 }
|
|
36
|
+
# config.ignored_targets = []
|
|
37
|
+
|
|
38
|
+
# === Stack Sampling ===
|
|
39
|
+
# config.stack_sample_interval = 0.005 # seconds (5ms)
|
|
40
|
+
# config.max_stack_samples_per_request = 200 # nil = unlimited
|
|
39
41
|
|
|
40
42
|
# === Sampling ===
|
|
41
|
-
#
|
|
42
|
-
# config.
|
|
43
|
-
# config.
|
|
44
|
-
# config.
|
|
45
|
-
|
|
46
|
-
# ===
|
|
47
|
-
#
|
|
48
|
-
# config.
|
|
49
|
-
|
|
50
|
-
#
|
|
51
|
-
# config.
|
|
52
|
-
# config.
|
|
53
|
-
|
|
54
|
-
#
|
|
43
|
+
# config.random_sample_rate = 20 # 1 in N requests sampled randomly
|
|
44
|
+
# config.max_random_samples_per_endpoint = 5 # nil = unlimited
|
|
45
|
+
# config.max_slow_samples_per_endpoint = 5 # nil = unlimited
|
|
46
|
+
# config.max_error_samples_per_fingerprint = 20 # nil = unlimited
|
|
47
|
+
|
|
48
|
+
# === Errors ===
|
|
49
|
+
# config.max_error_contexts = 5 # nil = unlimited
|
|
50
|
+
# config.backtrace_lines = nil
|
|
51
|
+
|
|
52
|
+
# === Events ===
|
|
53
|
+
# config.events_enabled = false
|
|
54
|
+
# config.events_max_samples_per_name = 20 # nil = unlimited
|
|
55
|
+
|
|
56
|
+
# === Buffering & Flushing ===
|
|
57
|
+
# config.max_buffer_memory = 8.megabytes
|
|
58
|
+
# config.flush_interval = 30 # seconds
|
|
59
|
+
# config.flush_jitter = 5 # ±seconds
|
|
60
|
+
# config.persistence_batch_size = 100
|
|
61
|
+
|
|
62
|
+
# === Data Retention ===
|
|
63
|
+
# config.retention_period = nil # nil = keep forever
|
|
64
|
+
# config.cleanup_interval = 1.hour
|
|
55
65
|
|
|
56
66
|
# === Downsampling ===
|
|
57
|
-
# Data is kept forever with progressively coarser resolution:
|
|
58
|
-
# - Last hour: 1-minute buckets
|
|
59
|
-
# - 1h–24h: 5-minute buckets
|
|
60
|
-
# - 1d–1w: 1-hour buckets
|
|
61
|
-
# - 1w–3mo: 1-day buckets
|
|
62
|
-
# - Older than 3 months: 1-week buckets
|
|
63
|
-
#
|
|
64
67
|
# config.bucket_sizes = { recent: 1.minute, medium: 5.minutes, hourly: 1.hour, daily: 1.day, weekly: 1.week }
|
|
68
|
+
# config.downsampling_thresholds = { # Age before tier is merged into the next coarser tier
|
|
69
|
+
# medium: 1.hour,
|
|
70
|
+
# hourly: 24.hours,
|
|
71
|
+
# daily: 1.week,
|
|
72
|
+
# weekly: 90.days
|
|
73
|
+
# }
|
|
74
|
+
|
|
75
|
+
# === Database ===
|
|
76
|
+
# config.sqlite_busy_timeout = 5_000 # ms
|
|
77
|
+
|
|
78
|
+
# === Circuit Breaker ===
|
|
79
|
+
# config.circuit_breaker_failure_threshold = 5
|
|
80
|
+
# config.circuit_breaker_recovery_timeout = 60 # seconds
|
|
65
81
|
|
|
66
82
|
# === Advanced ===
|
|
67
|
-
#
|
|
68
|
-
# config.
|
|
69
|
-
# config.
|
|
70
|
-
# config.
|
|
71
|
-
# config.
|
|
72
|
-
# config.
|
|
83
|
+
# config.shutdown_timeout = 5 # seconds
|
|
84
|
+
# config.max_error_detail_length = 200 # truncate error detail segments
|
|
85
|
+
# config.max_fingerprint_app_frames = 5 # app frames used for error fingerprint
|
|
86
|
+
# config.max_fingerprint_gem_frames = 3 # gem frames used when no app frames
|
|
87
|
+
# config.cleanup_batch_size = 1_000 # rows per cleanup batch
|
|
88
|
+
# config.caller_scan_depth = 50 # max frames to scan for app code
|
|
73
89
|
# config.error_handler = ->(e) { Rails.logger.error("[catpm] #{e.message}") }
|
|
74
|
-
|
|
75
|
-
# === Enable/Disable ===
|
|
76
|
-
#
|
|
77
|
-
# config.enabled = Rails.env.production? || Rails.env.staging?
|
|
78
90
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: catpm
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.5.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- ''
|
|
@@ -71,6 +71,7 @@ files:
|
|
|
71
71
|
- lib/catpm/adapter/sqlite.rb
|
|
72
72
|
- lib/catpm/auto_instrument.rb
|
|
73
73
|
- lib/catpm/buffer.rb
|
|
74
|
+
- lib/catpm/call_tracer.rb
|
|
74
75
|
- lib/catpm/circuit_breaker.rb
|
|
75
76
|
- lib/catpm/collector.rb
|
|
76
77
|
- lib/catpm/configuration.rb
|