catpm 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/controllers/catpm/endpoints_controller.rb +29 -5
- data/app/controllers/catpm/errors_controller.rb +30 -6
- data/app/controllers/catpm/events_controller.rb +28 -4
- data/app/controllers/catpm/status_controller.rb +14 -2
- data/app/helpers/catpm/application_helper.rb +18 -6
- data/app/views/catpm/endpoints/show.html.erb +29 -12
- data/app/views/catpm/errors/show.html.erb +12 -7
- data/app/views/catpm/events/index.html.erb +2 -2
- data/app/views/catpm/events/show.html.erb +12 -8
- data/app/views/catpm/shared/_segments_waterfall.html.erb +5 -0
- data/app/views/catpm/status/index.html.erb +2 -2
- data/app/views/layouts/catpm/application.html.erb +62 -16
- data/lib/catpm/adapter/base.rb +8 -4
- data/lib/catpm/call_tracer.rb +85 -0
- data/lib/catpm/collector.rb +76 -19
- data/lib/catpm/configuration.rb +27 -2
- data/lib/catpm/event.rb +11 -5
- data/lib/catpm/fingerprint.rb +2 -2
- data/lib/catpm/flusher.rb +124 -89
- data/lib/catpm/middleware.rb +7 -0
- data/lib/catpm/request_segments.rb +2 -2
- data/lib/catpm/segment_subscribers.rb +6 -2
- data/lib/catpm/stack_sampler.rb +16 -11
- data/lib/catpm/tdigest.rb +2 -1
- data/lib/catpm/trace.rb +9 -1
- data/lib/catpm/version.rb +1 -1
- data/lib/catpm.rb +1 -0
- data/lib/generators/catpm/templates/initializer.rb.tt +69 -57
- metadata +2 -1
data/lib/catpm/collector.rb
CHANGED
|
@@ -2,6 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
module Catpm
|
|
4
4
|
module Collector
|
|
5
|
+
SYNTHETIC_MIDDLEWARE_OFFSET_MS = 0.5
|
|
6
|
+
MIN_GAP_MS = 1.0
|
|
7
|
+
|
|
5
8
|
class << self
|
|
6
9
|
def process_action_controller(event)
|
|
7
10
|
return unless Catpm.enabled?
|
|
@@ -65,7 +68,7 @@ module Catpm
|
|
|
65
68
|
if ctrl_idx
|
|
66
69
|
has_real_middleware = segments.any? { |s| s[:type] == 'middleware' }
|
|
67
70
|
ctrl_offset = (segments[ctrl_idx][:offset] || 0.0).to_f
|
|
68
|
-
if ctrl_offset >
|
|
71
|
+
if ctrl_offset > SYNTHETIC_MIDDLEWARE_OFFSET_MS && !has_real_middleware
|
|
69
72
|
middleware_seg = {
|
|
70
73
|
type: 'middleware',
|
|
71
74
|
detail: 'Middleware Stack',
|
|
@@ -98,7 +101,7 @@ module Catpm
|
|
|
98
101
|
end
|
|
99
102
|
gap = ctrl_dur - child_dur
|
|
100
103
|
|
|
101
|
-
if gap >
|
|
104
|
+
if gap > MIN_GAP_MS && Catpm.config.show_untracked_segments
|
|
102
105
|
inject_gap_segments(segments, req_segments, gap, ctrl_idx, ctrl_seg)
|
|
103
106
|
end
|
|
104
107
|
end
|
|
@@ -119,7 +122,7 @@ module Catpm
|
|
|
119
122
|
|
|
120
123
|
context[:segments] << {
|
|
121
124
|
type: 'error',
|
|
122
|
-
detail: "#{payload[:exception].first}: #{payload[:exception].last}".truncate(
|
|
125
|
+
detail: "#{payload[:exception].first}: #{payload[:exception].last}".truncate(Catpm.config.max_error_detail_length),
|
|
123
126
|
source: payload[:exception_object]&.backtrace&.first,
|
|
124
127
|
duration: 0,
|
|
125
128
|
offset: error_offset,
|
|
@@ -253,7 +256,7 @@ module Catpm
|
|
|
253
256
|
end
|
|
254
257
|
gap = ctrl_dur - child_dur
|
|
255
258
|
|
|
256
|
-
if gap >
|
|
259
|
+
if gap > MIN_GAP_MS && Catpm.config.show_untracked_segments
|
|
257
260
|
inject_gap_segments(segments, req_segments, gap, ctrl_idx, ctrl_seg)
|
|
258
261
|
end
|
|
259
262
|
end
|
|
@@ -274,7 +277,7 @@ module Catpm
|
|
|
274
277
|
|
|
275
278
|
context[:segments] << {
|
|
276
279
|
type: 'error',
|
|
277
|
-
detail: "#{error.class.name}: #{error.message}".truncate(
|
|
280
|
+
detail: "#{error.class.name}: #{error.message}".truncate(Catpm.config.max_error_detail_length),
|
|
278
281
|
source: error.backtrace&.first,
|
|
279
282
|
duration: 0,
|
|
280
283
|
offset: error_offset,
|
|
@@ -338,7 +341,8 @@ module Catpm
|
|
|
338
341
|
# Filling phase: always sample until endpoint has enough random samples
|
|
339
342
|
endpoint_key = [kind.to_s, target, operation.to_s]
|
|
340
343
|
count = random_sample_counts[endpoint_key]
|
|
341
|
-
|
|
344
|
+
max_random = Catpm.config.max_random_samples_per_endpoint
|
|
345
|
+
if max_random.nil? || count < max_random
|
|
342
346
|
random_sample_counts[endpoint_key] = count + 1
|
|
343
347
|
return 'random'
|
|
344
348
|
end
|
|
@@ -374,22 +378,75 @@ module Catpm
|
|
|
374
378
|
end
|
|
375
379
|
|
|
376
380
|
remaining = gap - sampler_dur
|
|
377
|
-
if remaining >
|
|
378
|
-
segments
|
|
379
|
-
type: 'other',
|
|
380
|
-
detail: 'Untracked',
|
|
381
|
-
duration: remaining.round(2),
|
|
382
|
-
offset: (ctrl_seg[:offset] || 0.0),
|
|
383
|
-
parent_index: ctrl_idx
|
|
384
|
-
}
|
|
381
|
+
if remaining > MIN_GAP_MS
|
|
382
|
+
inject_timeline_gaps(segments, ctrl_idx, ctrl_seg, remaining)
|
|
385
383
|
end
|
|
386
384
|
else
|
|
385
|
+
inject_timeline_gaps(segments, ctrl_idx, ctrl_seg, gap)
|
|
386
|
+
end
|
|
387
|
+
end
|
|
388
|
+
|
|
389
|
+
# Compute actual gap intervals between tracked child segments on the timeline,
|
|
390
|
+
# then create one Untracked entry per gap. This avoids placing a single large
|
|
391
|
+
# Untracked block that overlaps with real segments.
|
|
392
|
+
def inject_timeline_gaps(segments, ctrl_idx, ctrl_seg, total_gap)
|
|
393
|
+
ctrl_offset = (ctrl_seg[:offset] || 0.0).to_f
|
|
394
|
+
ctrl_dur = (ctrl_seg[:duration] || 0.0).to_f
|
|
395
|
+
ctrl_end = ctrl_offset + ctrl_dur
|
|
396
|
+
|
|
397
|
+
# Collect [start, end] intervals of direct children that have offsets
|
|
398
|
+
intervals = []
|
|
399
|
+
segments.each_with_index do |seg, i|
|
|
400
|
+
next if i == ctrl_idx
|
|
401
|
+
next unless seg[:parent_index] == ctrl_idx
|
|
402
|
+
off = seg[:offset]
|
|
403
|
+
dur = (seg[:duration] || 0).to_f
|
|
404
|
+
next unless off
|
|
405
|
+
intervals << [off.to_f, off.to_f + dur]
|
|
406
|
+
end
|
|
407
|
+
|
|
408
|
+
# If no children have offsets, place the gap at the controller start
|
|
409
|
+
if intervals.empty?
|
|
410
|
+
segments << {
|
|
411
|
+
type: 'other', detail: 'Untracked', duration: total_gap.round(2),
|
|
412
|
+
offset: ctrl_offset, parent_index: ctrl_idx
|
|
413
|
+
}
|
|
414
|
+
return
|
|
415
|
+
end
|
|
416
|
+
|
|
417
|
+
# Sort and merge overlapping intervals
|
|
418
|
+
intervals.sort_by!(&:first)
|
|
419
|
+
merged = [intervals.first.dup]
|
|
420
|
+
intervals[1..].each do |s, e|
|
|
421
|
+
if s <= merged.last[1]
|
|
422
|
+
merged.last[1] = e if e > merged.last[1]
|
|
423
|
+
else
|
|
424
|
+
merged << [s, e]
|
|
425
|
+
end
|
|
426
|
+
end
|
|
427
|
+
|
|
428
|
+
# Find gaps between controller start, merged intervals, and controller end
|
|
429
|
+
gaps = []
|
|
430
|
+
cursor = ctrl_offset
|
|
431
|
+
merged.each do |s, e|
|
|
432
|
+
gaps << [cursor, s] if s - cursor > 0
|
|
433
|
+
cursor = [cursor, e].max
|
|
434
|
+
end
|
|
435
|
+
gaps << [cursor, ctrl_end] if ctrl_end - cursor > 0
|
|
436
|
+
|
|
437
|
+
# Distribute total_gap proportionally across timeline gaps
|
|
438
|
+
raw_gap_sum = gaps.sum { |s, e| e - s }
|
|
439
|
+
return if raw_gap_sum <= 0
|
|
440
|
+
|
|
441
|
+
gaps.each do |gs, ge|
|
|
442
|
+
raw_dur = ge - gs
|
|
443
|
+
# Scale so all Untracked segments sum to total_gap
|
|
444
|
+
dur = (raw_dur / raw_gap_sum) * total_gap
|
|
445
|
+
next if dur < MIN_GAP_MS
|
|
446
|
+
|
|
387
447
|
segments << {
|
|
388
|
-
type: 'other',
|
|
389
|
-
|
|
390
|
-
duration: gap.round(2),
|
|
391
|
-
offset: (ctrl_seg[:offset] || 0.0),
|
|
392
|
-
parent_index: ctrl_idx
|
|
448
|
+
type: 'other', detail: 'Untracked', duration: dur.round(2),
|
|
449
|
+
offset: gs.round(2), parent_index: ctrl_idx
|
|
393
450
|
}
|
|
394
451
|
end
|
|
395
452
|
end
|
data/lib/catpm/configuration.rb
CHANGED
|
@@ -41,7 +41,17 @@ module Catpm
|
|
|
41
41
|
:shutdown_timeout,
|
|
42
42
|
:events_enabled,
|
|
43
43
|
:events_max_samples_per_name,
|
|
44
|
-
:track_own_requests
|
|
44
|
+
:track_own_requests,
|
|
45
|
+
:stack_sample_interval,
|
|
46
|
+
:max_stack_samples_per_request,
|
|
47
|
+
:downsampling_thresholds,
|
|
48
|
+
:max_error_detail_length,
|
|
49
|
+
:max_fingerprint_app_frames,
|
|
50
|
+
:max_fingerprint_gem_frames,
|
|
51
|
+
:cleanup_batch_size,
|
|
52
|
+
:caller_scan_depth,
|
|
53
|
+
:instrument_call_tree,
|
|
54
|
+
:show_untracked_segments
|
|
45
55
|
|
|
46
56
|
def initialize
|
|
47
57
|
@enabled = true
|
|
@@ -58,7 +68,7 @@ module Catpm
|
|
|
58
68
|
@slow_threshold_per_kind = {}
|
|
59
69
|
@ignored_targets = []
|
|
60
70
|
@retention_period = nil # nil = keep forever (data is downsampled, not deleted)
|
|
61
|
-
@max_buffer_memory =
|
|
71
|
+
@max_buffer_memory = 8.megabytes
|
|
62
72
|
@flush_interval = 30 # seconds
|
|
63
73
|
@flush_jitter = 5 # ±seconds
|
|
64
74
|
@max_error_contexts = 5
|
|
@@ -84,6 +94,21 @@ module Catpm
|
|
|
84
94
|
@events_enabled = false
|
|
85
95
|
@events_max_samples_per_name = 20
|
|
86
96
|
@track_own_requests = false
|
|
97
|
+
@stack_sample_interval = 0.005 # seconds (5ms)
|
|
98
|
+
@max_stack_samples_per_request = 200
|
|
99
|
+
@downsampling_thresholds = {
|
|
100
|
+
medium: 1.hour,
|
|
101
|
+
hourly: 24.hours,
|
|
102
|
+
daily: 1.week,
|
|
103
|
+
weekly: 90.days
|
|
104
|
+
}
|
|
105
|
+
@max_error_detail_length = 200
|
|
106
|
+
@max_fingerprint_app_frames = 5
|
|
107
|
+
@max_fingerprint_gem_frames = 3
|
|
108
|
+
@cleanup_batch_size = 1_000
|
|
109
|
+
@caller_scan_depth = 50
|
|
110
|
+
@instrument_call_tree = false
|
|
111
|
+
@show_untracked_segments = false
|
|
87
112
|
end
|
|
88
113
|
|
|
89
114
|
def slow_threshold_for(kind)
|
data/lib/catpm/event.rb
CHANGED
|
@@ -10,18 +10,24 @@ module Catpm
|
|
|
10
10
|
:metadata, :error_class, :error_message, :backtrace,
|
|
11
11
|
:sample_type, :context, :status
|
|
12
12
|
|
|
13
|
+
EMPTY_HASH = {}.freeze
|
|
14
|
+
private_constant :EMPTY_HASH
|
|
15
|
+
|
|
13
16
|
def initialize(kind:, target:, operation: '', duration: 0.0, started_at: nil,
|
|
14
|
-
metadata:
|
|
15
|
-
sample_type: nil, context:
|
|
17
|
+
metadata: nil, error_class: nil, error_message: nil, backtrace: nil,
|
|
18
|
+
sample_type: nil, context: nil, status: nil)
|
|
16
19
|
@kind = kind.to_s
|
|
17
20
|
@target = target.to_s
|
|
18
21
|
@operation = (operation || '').to_s
|
|
19
22
|
@duration = duration.to_f
|
|
20
23
|
@started_at = started_at || Time.current
|
|
21
|
-
@metadata = metadata ||
|
|
24
|
+
@metadata = metadata || EMPTY_HASH
|
|
22
25
|
@error_class = error_class
|
|
23
26
|
@error_message = error_message
|
|
24
|
-
@backtrace = backtrace
|
|
27
|
+
@backtrace = if backtrace
|
|
28
|
+
limit = Catpm.config.backtrace_lines
|
|
29
|
+
limit ? backtrace.first(limit) : backtrace
|
|
30
|
+
end
|
|
25
31
|
@sample_type = sample_type
|
|
26
32
|
@context = context
|
|
27
33
|
@status = status
|
|
@@ -67,7 +73,7 @@ module Catpm
|
|
|
67
73
|
end
|
|
68
74
|
|
|
69
75
|
def metadata_bytes
|
|
70
|
-
return 0 if metadata.empty?
|
|
76
|
+
return 0 if metadata.nil? || metadata.empty?
|
|
71
77
|
|
|
72
78
|
metadata.to_json.bytesize + REF_SIZE
|
|
73
79
|
end
|
data/lib/catpm/fingerprint.rb
CHANGED
|
@@ -15,7 +15,7 @@ module Catpm
|
|
|
15
15
|
def self.normalize_backtrace(backtrace)
|
|
16
16
|
app_frames = backtrace
|
|
17
17
|
.select { |line| app_frame?(line) }
|
|
18
|
-
.first(
|
|
18
|
+
.first(Catpm.config.max_fingerprint_app_frames)
|
|
19
19
|
.map { |line| strip_line_number(line) }
|
|
20
20
|
|
|
21
21
|
# If there are app frames, group by app code (like Sentry)
|
|
@@ -25,7 +25,7 @@ module Catpm
|
|
|
25
25
|
# so the same bug is always one issue regardless of the caller.
|
|
26
26
|
backtrace
|
|
27
27
|
.reject { |line| line.include?('<internal:') }
|
|
28
|
-
.first(
|
|
28
|
+
.first(Catpm.config.max_fingerprint_gem_frames)
|
|
29
29
|
.map { |line| strip_line_number(line) }
|
|
30
30
|
.join("\n")
|
|
31
31
|
end
|
data/lib/catpm/flusher.rb
CHANGED
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
module Catpm
|
|
4
4
|
class Flusher
|
|
5
|
+
ERROR_LOG_BACKTRACE_LINES = 5
|
|
6
|
+
|
|
5
7
|
attr_reader :running
|
|
6
8
|
|
|
7
9
|
def initialize(buffer:, interval: nil, jitter: nil)
|
|
@@ -104,7 +106,7 @@ module Catpm
|
|
|
104
106
|
events&.each { |ev| @buffer.push(ev) }
|
|
105
107
|
@circuit.record_failure
|
|
106
108
|
Catpm.config.error_handler.call(e)
|
|
107
|
-
Rails.logger.error("[catpm] flush error: #{e.class}: #{e.message}\n#{e.backtrace&.first(
|
|
109
|
+
Rails.logger.error("[catpm] flush error: #{e.class}: #{e.message}\n#{e.backtrace&.first(ERROR_LOG_BACKTRACE_LINES)&.join("\n")}")
|
|
108
110
|
end
|
|
109
111
|
|
|
110
112
|
def reset!
|
|
@@ -190,7 +192,8 @@ module Catpm
|
|
|
190
192
|
error[:last_occurred_at] = [ error[:last_occurred_at], event.started_at ].max
|
|
191
193
|
error[:occurrence_times] << event.started_at
|
|
192
194
|
|
|
193
|
-
|
|
195
|
+
max_ctx = Catpm.config.max_error_contexts
|
|
196
|
+
if max_ctx.nil? || error[:new_contexts].size < max_ctx
|
|
194
197
|
error[:new_contexts] << build_error_context(event)
|
|
195
198
|
end
|
|
196
199
|
end
|
|
@@ -225,34 +228,68 @@ module Catpm
|
|
|
225
228
|
|
|
226
229
|
|
|
227
230
|
def rotate_samples(samples)
|
|
231
|
+
return samples if samples.empty?
|
|
232
|
+
|
|
233
|
+
# Pre-fetch counts for all endpoints and types in bulk
|
|
234
|
+
endpoint_keys = samples.map { |s| s[:bucket_key][0..2] }.uniq
|
|
235
|
+
error_fps = samples.filter_map { |s| s[:error_fingerprint] }.uniq
|
|
236
|
+
|
|
237
|
+
# Build counts cache: { [kind, target, op, type] => count }
|
|
238
|
+
counts_cache = {}
|
|
239
|
+
if endpoint_keys.any?
|
|
240
|
+
Catpm::Sample.joins(:bucket)
|
|
241
|
+
.where(catpm_buckets: { kind: endpoint_keys.map(&:first), target: endpoint_keys.map { |k| k[1] }, operation: endpoint_keys.map { |k| k[2] } })
|
|
242
|
+
.where(sample_type: %w[random slow])
|
|
243
|
+
.group('catpm_buckets.kind', 'catpm_buckets.target', 'catpm_buckets.operation', 'catpm_samples.sample_type')
|
|
244
|
+
.count
|
|
245
|
+
.each { |(kind, target, op, type), cnt| counts_cache[[kind, target, op, type]] = cnt }
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
error_counts = {}
|
|
249
|
+
if error_fps.any?
|
|
250
|
+
Catpm::Sample.where(sample_type: 'error', error_fingerprint: error_fps)
|
|
251
|
+
.group(:error_fingerprint).count
|
|
252
|
+
.each { |fp, cnt| error_counts[fp] = cnt }
|
|
253
|
+
end
|
|
254
|
+
|
|
228
255
|
samples.each do |sample|
|
|
229
|
-
kind, target, operation = sample[:bucket_key][0
|
|
230
|
-
endpoint_samples = Catpm::Sample
|
|
231
|
-
.joins(:bucket)
|
|
232
|
-
.where(catpm_buckets: { kind: kind, target: target, operation: operation })
|
|
256
|
+
kind, target, operation = sample[:bucket_key][0..2]
|
|
233
257
|
|
|
234
258
|
case sample[:sample_type]
|
|
235
259
|
when 'random'
|
|
236
|
-
|
|
237
|
-
if
|
|
238
|
-
|
|
260
|
+
max_random = Catpm.config.max_random_samples_per_endpoint
|
|
261
|
+
if max_random
|
|
262
|
+
cache_key = [kind, target, operation, 'random']
|
|
263
|
+
if (counts_cache[cache_key] || 0) >= max_random
|
|
264
|
+
oldest = Catpm::Sample.joins(:bucket)
|
|
265
|
+
.where(catpm_buckets: { kind: kind, target: target, operation: operation })
|
|
266
|
+
.where(sample_type: 'random').order(recorded_at: :asc).first
|
|
267
|
+
oldest&.destroy
|
|
268
|
+
end
|
|
239
269
|
end
|
|
240
270
|
when 'slow'
|
|
241
|
-
|
|
242
|
-
if
|
|
243
|
-
|
|
244
|
-
if
|
|
245
|
-
weakest.
|
|
246
|
-
|
|
247
|
-
|
|
271
|
+
max_slow = Catpm.config.max_slow_samples_per_endpoint
|
|
272
|
+
if max_slow
|
|
273
|
+
cache_key = [kind, target, operation, 'slow']
|
|
274
|
+
if (counts_cache[cache_key] || 0) >= max_slow
|
|
275
|
+
weakest = Catpm::Sample.joins(:bucket)
|
|
276
|
+
.where(catpm_buckets: { kind: kind, target: target, operation: operation })
|
|
277
|
+
.where(sample_type: 'slow').order(duration: :asc).first
|
|
278
|
+
if weakest && sample[:duration] > weakest.duration
|
|
279
|
+
weakest.destroy
|
|
280
|
+
else
|
|
281
|
+
sample[:_skip] = true
|
|
282
|
+
end
|
|
248
283
|
end
|
|
249
284
|
end
|
|
250
285
|
when 'error'
|
|
251
|
-
|
|
252
|
-
if
|
|
253
|
-
|
|
254
|
-
if
|
|
255
|
-
|
|
286
|
+
max_err = Catpm.config.max_error_samples_per_fingerprint
|
|
287
|
+
if max_err
|
|
288
|
+
fp = sample[:error_fingerprint]
|
|
289
|
+
if fp && (error_counts[fp] || 0) >= max_err
|
|
290
|
+
oldest = Catpm::Sample.where(sample_type: 'error', error_fingerprint: fp)
|
|
291
|
+
.order(recorded_at: :asc).first
|
|
292
|
+
oldest&.destroy
|
|
256
293
|
end
|
|
257
294
|
end
|
|
258
295
|
end
|
|
@@ -267,11 +304,7 @@ module Catpm
|
|
|
267
304
|
occurred_at: event.started_at.iso8601,
|
|
268
305
|
kind: event.kind,
|
|
269
306
|
operation: event_context.slice(:method, :path, :params, :job_class, :job_id, :queue, :target, :metadata),
|
|
270
|
-
backtrace:
|
|
271
|
-
bt = event.backtrace || []
|
|
272
|
-
limit = Catpm.config.backtrace_lines
|
|
273
|
-
limit ? bt.first(limit) : bt
|
|
274
|
-
end,
|
|
307
|
+
backtrace: event.backtrace || [],
|
|
275
308
|
duration: event.duration,
|
|
276
309
|
status: event.status
|
|
277
310
|
}
|
|
@@ -314,7 +347,7 @@ module Catpm
|
|
|
314
347
|
|
|
315
348
|
max = Catpm.config.events_max_samples_per_name
|
|
316
349
|
if event.payload.any?
|
|
317
|
-
if sample_counts[event.name] < max
|
|
350
|
+
if max.nil? || sample_counts[event.name] < max
|
|
318
351
|
samples << { name: event.name, payload: event.payload, recorded_at: event.recorded_at }
|
|
319
352
|
sample_counts[event.name] += 1
|
|
320
353
|
elsif rand(Catpm.config.random_sample_rate) == 0
|
|
@@ -336,101 +369,103 @@ module Catpm
|
|
|
336
369
|
|
|
337
370
|
def downsample_buckets
|
|
338
371
|
bucket_sizes = Catpm.config.bucket_sizes
|
|
372
|
+
thresholds = Catpm.config.downsampling_thresholds
|
|
339
373
|
adapter = Catpm::Adapter.current
|
|
340
374
|
|
|
341
375
|
# Phase 1: Merge 1-minute buckets older than 1 hour into 5-minute buckets
|
|
342
376
|
downsample_tier(
|
|
343
377
|
target_interval: bucket_sizes[:medium],
|
|
344
|
-
age_threshold:
|
|
378
|
+
age_threshold: thresholds[:medium],
|
|
345
379
|
adapter: adapter
|
|
346
380
|
)
|
|
347
381
|
|
|
348
382
|
# Phase 2: Merge 5-minute buckets older than 24 hours into 1-hour buckets
|
|
349
383
|
downsample_tier(
|
|
350
384
|
target_interval: bucket_sizes[:hourly],
|
|
351
|
-
age_threshold:
|
|
385
|
+
age_threshold: thresholds[:hourly],
|
|
352
386
|
adapter: adapter
|
|
353
387
|
)
|
|
354
388
|
|
|
355
389
|
# Phase 3: Merge 1-hour buckets older than 1 week into 1-day buckets
|
|
356
390
|
downsample_tier(
|
|
357
391
|
target_interval: bucket_sizes[:daily],
|
|
358
|
-
age_threshold:
|
|
392
|
+
age_threshold: thresholds[:daily],
|
|
359
393
|
adapter: adapter
|
|
360
394
|
)
|
|
361
395
|
|
|
362
396
|
# Phase 4: Merge 1-day buckets older than 3 months into 1-week buckets
|
|
363
397
|
downsample_tier(
|
|
364
398
|
target_interval: bucket_sizes[:weekly],
|
|
365
|
-
age_threshold:
|
|
399
|
+
age_threshold: thresholds[:weekly],
|
|
366
400
|
adapter: adapter
|
|
367
401
|
)
|
|
368
402
|
|
|
369
403
|
# Event buckets: same downsampling tiers
|
|
370
|
-
downsample_event_tier(target_interval: bucket_sizes[:medium], age_threshold:
|
|
371
|
-
downsample_event_tier(target_interval: bucket_sizes[:hourly], age_threshold:
|
|
372
|
-
downsample_event_tier(target_interval: bucket_sizes[:daily], age_threshold:
|
|
373
|
-
downsample_event_tier(target_interval: bucket_sizes[:weekly], age_threshold:
|
|
404
|
+
downsample_event_tier(target_interval: bucket_sizes[:medium], age_threshold: thresholds[:medium], adapter: adapter)
|
|
405
|
+
downsample_event_tier(target_interval: bucket_sizes[:hourly], age_threshold: thresholds[:hourly], adapter: adapter)
|
|
406
|
+
downsample_event_tier(target_interval: bucket_sizes[:daily], age_threshold: thresholds[:daily], adapter: adapter)
|
|
407
|
+
downsample_event_tier(target_interval: bucket_sizes[:weekly], age_threshold: thresholds[:weekly], adapter: adapter)
|
|
374
408
|
end
|
|
375
409
|
|
|
376
410
|
def downsample_tier(target_interval:, age_threshold:, adapter:)
|
|
377
411
|
cutoff = age_threshold.ago
|
|
378
412
|
target_seconds = target_interval.to_i
|
|
379
413
|
|
|
380
|
-
#
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
[bucket.kind, bucket.target, bucket.operation, aligned_start]
|
|
391
|
-
end
|
|
392
|
-
|
|
393
|
-
groups.each do |(kind, target, operation, aligned_start), buckets|
|
|
394
|
-
# Skip if only one bucket already at the target alignment
|
|
395
|
-
next if buckets.size == 1 && buckets.first.bucket_start.to_i % target_seconds == 0
|
|
414
|
+
# Process in batches to avoid loading all old buckets into memory
|
|
415
|
+
Catpm::Bucket.where(bucket_start: ...cutoff)
|
|
416
|
+
.select(:id, :kind, :target, :operation, :bucket_start)
|
|
417
|
+
.group_by { |b| [b.kind, b.target, b.operation] }
|
|
418
|
+
.each do |(_kind, _target, _operation), endpoint_buckets|
|
|
419
|
+
groups = endpoint_buckets.group_by do |bucket|
|
|
420
|
+
epoch = bucket.bucket_start.to_i
|
|
421
|
+
aligned_epoch = epoch - (epoch % target_seconds)
|
|
422
|
+
Time.at(aligned_epoch).utc
|
|
423
|
+
end
|
|
396
424
|
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
425
|
+
groups.each do |aligned_start, stub_buckets|
|
|
426
|
+
next if stub_buckets.size == 1 && stub_buckets.first.bucket_start.to_i % target_seconds == 0
|
|
427
|
+
|
|
428
|
+
# Load full records only for groups that need merging
|
|
429
|
+
bucket_ids = stub_buckets.map(&:id)
|
|
430
|
+
buckets = Catpm::Bucket.where(id: bucket_ids).to_a
|
|
431
|
+
|
|
432
|
+
merged = {
|
|
433
|
+
kind: buckets.first.kind,
|
|
434
|
+
target: buckets.first.target,
|
|
435
|
+
operation: buckets.first.operation,
|
|
436
|
+
bucket_start: aligned_start,
|
|
437
|
+
count: buckets.sum(&:count),
|
|
438
|
+
success_count: buckets.sum(&:success_count),
|
|
439
|
+
failure_count: buckets.sum(&:failure_count),
|
|
440
|
+
duration_sum: buckets.sum(&:duration_sum),
|
|
441
|
+
duration_max: buckets.map(&:duration_max).max,
|
|
442
|
+
duration_min: buckets.map(&:duration_min).min,
|
|
443
|
+
metadata_sum: merge_bucket_metadata(buckets, adapter),
|
|
444
|
+
p95_digest: merge_bucket_digests(buckets)
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
survivor = buckets.first
|
|
448
|
+
|
|
449
|
+
# Reassign all samples to the survivor bucket
|
|
450
|
+
Catpm::Sample.where(bucket_id: bucket_ids).update_all(bucket_id: survivor.id)
|
|
451
|
+
|
|
452
|
+
# Delete non-survivor source buckets (now sample-free)
|
|
453
|
+
Catpm::Bucket.where(id: bucket_ids - [survivor.id]).delete_all
|
|
454
|
+
|
|
455
|
+
# Overwrite survivor with merged data
|
|
456
|
+
survivor.update!(
|
|
457
|
+
bucket_start: aligned_start,
|
|
458
|
+
count: merged[:count],
|
|
459
|
+
success_count: merged[:success_count],
|
|
460
|
+
failure_count: merged[:failure_count],
|
|
461
|
+
duration_sum: merged[:duration_sum],
|
|
462
|
+
duration_max: merged[:duration_max],
|
|
463
|
+
duration_min: merged[:duration_min],
|
|
464
|
+
metadata_sum: merged[:metadata_sum],
|
|
465
|
+
p95_digest: merged[:p95_digest]
|
|
466
|
+
)
|
|
467
|
+
end
|
|
468
|
+
end
|
|
434
469
|
end
|
|
435
470
|
|
|
436
471
|
def downsample_event_tier(target_interval:, age_threshold:, adapter:)
|
|
@@ -474,7 +509,7 @@ module Catpm
|
|
|
474
509
|
|
|
475
510
|
def cleanup_expired_data
|
|
476
511
|
cutoff = Catpm.config.retention_period.ago
|
|
477
|
-
batch_size =
|
|
512
|
+
batch_size = Catpm.config.cleanup_batch_size
|
|
478
513
|
|
|
479
514
|
[ Catpm::Bucket, Catpm::Sample ].each do |model|
|
|
480
515
|
time_column = model == Catpm::Sample ? :recorded_at : :bucket_start
|
data/lib/catpm/middleware.rb
CHANGED
|
@@ -21,6 +21,12 @@ module Catpm
|
|
|
21
21
|
)
|
|
22
22
|
env['catpm.segments'] = req_segments
|
|
23
23
|
Thread.current[:catpm_request_segments] = req_segments
|
|
24
|
+
|
|
25
|
+
if Catpm.config.instrument_call_tree
|
|
26
|
+
call_tracer = CallTracer.new(request_segments: req_segments)
|
|
27
|
+
call_tracer.start
|
|
28
|
+
env['catpm.call_tracer'] = call_tracer
|
|
29
|
+
end
|
|
24
30
|
end
|
|
25
31
|
|
|
26
32
|
@app.call(env)
|
|
@@ -29,6 +35,7 @@ module Catpm
|
|
|
29
35
|
raise
|
|
30
36
|
ensure
|
|
31
37
|
if Catpm.config.instrument_segments
|
|
38
|
+
env['catpm.call_tracer']&.stop
|
|
32
39
|
req_segments&.stop_sampler
|
|
33
40
|
Thread.current[:catpm_request_segments] = nil
|
|
34
41
|
end
|
|
@@ -36,7 +36,7 @@ module Catpm
|
|
|
36
36
|
@tracked_ranges << [started_at, started_at + duration / 1000.0]
|
|
37
37
|
end
|
|
38
38
|
|
|
39
|
-
if @segments.size < @max_segments
|
|
39
|
+
if @max_segments.nil? || @segments.size < @max_segments
|
|
40
40
|
@segments << segment
|
|
41
41
|
else
|
|
42
42
|
@overflow = true
|
|
@@ -54,7 +54,7 @@ module Catpm
|
|
|
54
54
|
segment[:offset] = offset if offset
|
|
55
55
|
segment[:parent_index] = @span_stack.last if @span_stack.any?
|
|
56
56
|
|
|
57
|
-
return nil if @segments.size >= @max_segments
|
|
57
|
+
return nil if @max_segments && @segments.size >= @max_segments
|
|
58
58
|
|
|
59
59
|
index = @segments.size
|
|
60
60
|
@segments << segment
|
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
module Catpm
|
|
4
4
|
module SegmentSubscribers
|
|
5
|
+
MIN_INSTANTIATION_DURATION_MS = 0.1
|
|
6
|
+
CALLER_OFFSET = 4 # frames to skip to reach user code from this call site
|
|
5
7
|
# Subscriber with start/finish callbacks so all segments (SQL, views, etc.)
|
|
6
8
|
# fired during a controller action are automatically nested under the controller span.
|
|
7
9
|
class ControllerSpanSubscriber
|
|
@@ -146,7 +148,7 @@ module Catpm
|
|
|
146
148
|
return unless req_segments
|
|
147
149
|
|
|
148
150
|
duration = event.duration
|
|
149
|
-
return if duration <
|
|
151
|
+
return if duration < MIN_INSTANTIATION_DURATION_MS # skip trivial instantiations
|
|
150
152
|
|
|
151
153
|
payload = event.payload
|
|
152
154
|
record_count = payload[:record_count] || 0
|
|
@@ -171,6 +173,8 @@ module Catpm
|
|
|
171
173
|
|
|
172
174
|
duration = event.duration
|
|
173
175
|
sql = payload[:sql].to_s
|
|
176
|
+
max_len = Catpm.config.max_sql_length
|
|
177
|
+
sql = sql.truncate(max_len) if max_len && sql.length > max_len
|
|
174
178
|
source = duration >= Catpm.config.segment_source_threshold ? extract_source_location : nil
|
|
175
179
|
|
|
176
180
|
req_segments.add(
|
|
@@ -228,7 +232,7 @@ module Catpm
|
|
|
228
232
|
end
|
|
229
233
|
|
|
230
234
|
def extract_source_location
|
|
231
|
-
locations = caller_locations(
|
|
235
|
+
locations = caller_locations(CALLER_OFFSET, Catpm.config.caller_scan_depth)
|
|
232
236
|
locations&.each do |loc|
|
|
233
237
|
path = loc.path.to_s
|
|
234
238
|
if Fingerprint.app_frame?(path)
|