catpm 0.1.4 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/controllers/catpm/application_controller.rb +8 -0
- data/app/controllers/catpm/endpoints_controller.rb +16 -3
- data/app/controllers/catpm/errors_controller.rb +42 -0
- data/app/controllers/catpm/events_controller.rb +3 -3
- data/app/controllers/catpm/samples_controller.rb +3 -0
- data/app/controllers/catpm/status_controller.rb +1 -1
- data/app/controllers/catpm/system_controller.rb +0 -3
- data/app/helpers/catpm/application_helper.rb +4 -4
- data/app/models/catpm/error_record.rb +15 -0
- data/app/models/catpm/sample.rb +1 -0
- data/app/views/catpm/endpoints/show.html.erb +13 -8
- data/app/views/catpm/errors/show.html.erb +58 -18
- data/app/views/catpm/samples/show.html.erb +24 -34
- data/app/views/catpm/shared/_page_nav.html.erb +3 -1
- data/app/views/catpm/shared/_segments_waterfall.html.erb +5 -1
- data/app/views/catpm/system/index.html.erb +2 -2
- data/config/routes.rb +1 -0
- data/db/migrate/20250601000001_create_catpm_tables.rb +3 -0
- data/lib/catpm/adapter/base.rb +43 -1
- data/lib/catpm/adapter/postgresql.rb +9 -2
- data/lib/catpm/adapter/sqlite.rb +9 -2
- data/lib/catpm/collector.rb +217 -112
- data/lib/catpm/configuration.rb +6 -2
- data/lib/catpm/event.rb +3 -3
- data/lib/catpm/flusher.rb +60 -53
- data/lib/catpm/stack_sampler.rb +53 -12
- data/lib/catpm/version.rb +1 -1
- data/lib/generators/catpm/templates/initializer.rb.tt +1 -0
- data/lib/tasks/catpm_tasks.rake +21 -4
- metadata +1 -1
data/lib/catpm/flusher.rb
CHANGED
|
@@ -123,14 +123,6 @@ module Catpm
|
|
|
123
123
|
samples = []
|
|
124
124
|
error_groups = {}
|
|
125
125
|
|
|
126
|
-
# Pre-load existing random sample counts per endpoint for filling phase
|
|
127
|
-
@random_sample_counts = {}
|
|
128
|
-
Catpm::Sample.where(sample_type: 'random')
|
|
129
|
-
.joins(:bucket)
|
|
130
|
-
.group('catpm_buckets.kind', 'catpm_buckets.target', 'catpm_buckets.operation')
|
|
131
|
-
.count
|
|
132
|
-
.each { |(kind, target, op), cnt| @random_sample_counts[[ kind, target, op ]] = cnt }
|
|
133
|
-
|
|
134
126
|
events.each do |event|
|
|
135
127
|
# Bucket aggregation
|
|
136
128
|
key = [ event.kind, event.target, event.operation, event.bucket_start ]
|
|
@@ -155,40 +147,48 @@ module Catpm
|
|
|
155
147
|
# TDigest
|
|
156
148
|
bucket[:tdigest].add(event.duration)
|
|
157
149
|
|
|
158
|
-
#
|
|
159
|
-
|
|
150
|
+
# Compute error fingerprint (used for both samples and error grouping)
|
|
151
|
+
error_fp = nil
|
|
152
|
+
if event.error?
|
|
153
|
+
error_fp = Catpm::Fingerprint.generate(
|
|
154
|
+
kind: event.kind,
|
|
155
|
+
error_class: event.error_class,
|
|
156
|
+
backtrace: event.backtrace
|
|
157
|
+
)
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Collect samples (pre-determined by collector — only these events carry full context)
|
|
161
|
+
sample_type = event.sample_type
|
|
160
162
|
if sample_type
|
|
161
|
-
|
|
163
|
+
sample_hash = {
|
|
162
164
|
bucket_key: key,
|
|
163
165
|
kind: event.kind,
|
|
164
166
|
sample_type: sample_type,
|
|
165
167
|
recorded_at: event.started_at,
|
|
166
168
|
duration: event.duration,
|
|
167
|
-
context: event.context
|
|
169
|
+
context: event.context || {}
|
|
168
170
|
}
|
|
171
|
+
sample_hash[:error_fingerprint] = error_fp if error_fp
|
|
172
|
+
samples << sample_hash
|
|
169
173
|
end
|
|
170
174
|
|
|
171
175
|
# Error grouping
|
|
172
|
-
if
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
error_class: event.error_class,
|
|
176
|
-
backtrace: event.backtrace
|
|
177
|
-
)
|
|
178
|
-
|
|
179
|
-
error = error_groups[fp] ||= {
|
|
180
|
-
fingerprint: fp,
|
|
176
|
+
if error_fp
|
|
177
|
+
error = error_groups[error_fp] ||= {
|
|
178
|
+
fingerprint: error_fp,
|
|
181
179
|
kind: event.kind,
|
|
182
180
|
error_class: event.error_class,
|
|
183
181
|
message: event.error_message,
|
|
184
182
|
occurrences_count: 0,
|
|
185
183
|
first_occurred_at: event.started_at,
|
|
186
184
|
last_occurred_at: event.started_at,
|
|
187
|
-
new_contexts: []
|
|
185
|
+
new_contexts: [],
|
|
186
|
+
occurrence_times: []
|
|
188
187
|
}
|
|
189
188
|
|
|
190
189
|
error[:occurrences_count] += 1
|
|
191
190
|
error[:last_occurred_at] = [ error[:last_occurred_at], event.started_at ].max
|
|
191
|
+
error[:occurrence_times] << event.started_at
|
|
192
192
|
|
|
193
193
|
if error[:new_contexts].size < Catpm.config.max_error_contexts
|
|
194
194
|
error[:new_contexts] << build_error_context(event)
|
|
@@ -223,24 +223,6 @@ module Catpm
|
|
|
223
223
|
}
|
|
224
224
|
end
|
|
225
225
|
|
|
226
|
-
def determine_sample_type(event)
|
|
227
|
-
return 'error' if event.error?
|
|
228
|
-
|
|
229
|
-
threshold = Catpm.config.slow_threshold_for(event.kind.to_sym)
|
|
230
|
-
return 'slow' if event.duration >= threshold
|
|
231
|
-
|
|
232
|
-
# Always sample if endpoint has few random samples (filling phase)
|
|
233
|
-
endpoint_key = [ event.kind, event.target, event.operation ]
|
|
234
|
-
existing_random = @random_sample_counts[endpoint_key] || 0
|
|
235
|
-
if existing_random < Catpm.config.max_random_samples_per_endpoint
|
|
236
|
-
@random_sample_counts[endpoint_key] = existing_random + 1
|
|
237
|
-
return 'random'
|
|
238
|
-
end
|
|
239
|
-
|
|
240
|
-
return 'random' if rand(Catpm.config.random_sample_rate) == 0
|
|
241
|
-
|
|
242
|
-
nil
|
|
243
|
-
end
|
|
244
226
|
|
|
245
227
|
def rotate_samples(samples)
|
|
246
228
|
samples.each do |sample|
|
|
@@ -265,6 +247,14 @@ module Catpm
|
|
|
265
247
|
sample[:_skip] = true
|
|
266
248
|
end
|
|
267
249
|
end
|
|
250
|
+
when 'error'
|
|
251
|
+
fp = sample[:error_fingerprint]
|
|
252
|
+
if fp
|
|
253
|
+
existing = Catpm::Sample.where(sample_type: 'error', error_fingerprint: fp)
|
|
254
|
+
if existing.count >= Catpm.config.max_error_samples_per_fingerprint
|
|
255
|
+
existing.order(recorded_at: :asc).first.destroy
|
|
256
|
+
end
|
|
257
|
+
end
|
|
268
258
|
end
|
|
269
259
|
end
|
|
270
260
|
|
|
@@ -272,24 +262,29 @@ module Catpm
|
|
|
272
262
|
end
|
|
273
263
|
|
|
274
264
|
def build_error_context(event)
|
|
265
|
+
event_context = event.context || {}
|
|
275
266
|
ctx = {
|
|
276
267
|
occurred_at: event.started_at.iso8601,
|
|
277
268
|
kind: event.kind,
|
|
278
|
-
operation:
|
|
279
|
-
backtrace:
|
|
269
|
+
operation: event_context.slice(:method, :path, :params, :job_class, :job_id, :queue, :target, :metadata),
|
|
270
|
+
backtrace: begin
|
|
271
|
+
bt = event.backtrace || []
|
|
272
|
+
limit = Catpm.config.backtrace_lines
|
|
273
|
+
limit ? bt.first(limit) : bt
|
|
274
|
+
end,
|
|
280
275
|
duration: event.duration,
|
|
281
276
|
status: event.status
|
|
282
277
|
}
|
|
283
278
|
|
|
284
279
|
ctx[:target] = event.target if event.target.present?
|
|
285
280
|
|
|
286
|
-
if
|
|
287
|
-
ctx[:segments] =
|
|
288
|
-
ctx[:segments_capped] =
|
|
281
|
+
if event_context[:segments]
|
|
282
|
+
ctx[:segments] = event_context[:segments]
|
|
283
|
+
ctx[:segments_capped] = event_context[:segments_capped]
|
|
289
284
|
end
|
|
290
285
|
|
|
291
|
-
if
|
|
292
|
-
ctx[:segment_summary] =
|
|
286
|
+
if event_context[:segment_summary]
|
|
287
|
+
ctx[:segment_summary] = event_context[:segment_summary]
|
|
293
288
|
end
|
|
294
289
|
|
|
295
290
|
ctx
|
|
@@ -415,14 +410,26 @@ module Catpm
|
|
|
415
410
|
}
|
|
416
411
|
|
|
417
412
|
source_ids = buckets.map(&:id)
|
|
413
|
+
survivor = buckets.first
|
|
418
414
|
|
|
419
|
-
#
|
|
420
|
-
|
|
421
|
-
Catpm::Sample.where(bucket_id: source_ids).delete_all
|
|
422
|
-
Catpm::Bucket.where(id: source_ids).delete_all
|
|
415
|
+
# Reassign all samples to the survivor bucket
|
|
416
|
+
Catpm::Sample.where(bucket_id: source_ids).update_all(bucket_id: survivor.id)
|
|
423
417
|
|
|
424
|
-
#
|
|
425
|
-
|
|
418
|
+
# Delete non-survivor source buckets (now sample-free)
|
|
419
|
+
Catpm::Bucket.where(id: source_ids - [survivor.id]).delete_all
|
|
420
|
+
|
|
421
|
+
# Overwrite survivor with merged data
|
|
422
|
+
survivor.update!(
|
|
423
|
+
bucket_start: aligned_start,
|
|
424
|
+
count: merged[:count],
|
|
425
|
+
success_count: merged[:success_count],
|
|
426
|
+
failure_count: merged[:failure_count],
|
|
427
|
+
duration_sum: merged[:duration_sum],
|
|
428
|
+
duration_max: merged[:duration_max],
|
|
429
|
+
duration_min: merged[:duration_min],
|
|
430
|
+
metadata_sum: merged[:metadata_sum],
|
|
431
|
+
p95_digest: merged[:p95_digest]
|
|
432
|
+
)
|
|
426
433
|
end
|
|
427
434
|
end
|
|
428
435
|
|
data/lib/catpm/stack_sampler.rb
CHANGED
|
@@ -4,28 +4,69 @@ module Catpm
|
|
|
4
4
|
class StackSampler
|
|
5
5
|
SAMPLE_INTERVAL = 0.005 # 5ms
|
|
6
6
|
|
|
7
|
+
# Single global thread that samples all active requests.
|
|
8
|
+
# Avoids creating a thread per request.
|
|
9
|
+
class SamplingLoop
|
|
10
|
+
def initialize
|
|
11
|
+
@mutex = Mutex.new
|
|
12
|
+
@samplers = []
|
|
13
|
+
@thread = nil
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def register(sampler)
|
|
17
|
+
@mutex.synchronize do
|
|
18
|
+
@samplers << sampler
|
|
19
|
+
start_thread unless @thread&.alive?
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def unregister(sampler)
|
|
24
|
+
@mutex.synchronize { @samplers.delete(sampler) }
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
private
|
|
28
|
+
|
|
29
|
+
def start_thread
|
|
30
|
+
@thread = Thread.new do
|
|
31
|
+
loop do
|
|
32
|
+
sleep(SAMPLE_INTERVAL)
|
|
33
|
+
sample_all
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
@thread.priority = -1
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def sample_all
|
|
40
|
+
now = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
41
|
+
targets = @mutex.synchronize { @samplers.dup }
|
|
42
|
+
targets.each { |s| s.capture(now) }
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
@loop = SamplingLoop.new
|
|
47
|
+
|
|
48
|
+
class << self
|
|
49
|
+
attr_reader :loop
|
|
50
|
+
end
|
|
51
|
+
|
|
7
52
|
def initialize(target_thread:, request_start:)
|
|
8
53
|
@target = target_thread
|
|
9
54
|
@request_start = request_start
|
|
10
55
|
@samples = []
|
|
11
|
-
@running = false
|
|
12
56
|
end
|
|
13
57
|
|
|
14
58
|
def start
|
|
15
|
-
|
|
16
|
-
@thread = Thread.new do
|
|
17
|
-
while @running
|
|
18
|
-
locs = @target.backtrace_locations
|
|
19
|
-
@samples << [Process.clock_gettime(Process::CLOCK_MONOTONIC), locs] if locs
|
|
20
|
-
sleep(SAMPLE_INTERVAL)
|
|
21
|
-
end
|
|
22
|
-
end
|
|
23
|
-
@thread.priority = -1
|
|
59
|
+
self.class.loop.register(self)
|
|
24
60
|
end
|
|
25
61
|
|
|
26
62
|
def stop
|
|
27
|
-
|
|
28
|
-
|
|
63
|
+
self.class.loop.unregister(self)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Called by SamplingLoop from the global thread
|
|
67
|
+
def capture(now)
|
|
68
|
+
locs = @target.backtrace_locations
|
|
69
|
+
@samples << [now, locs] if locs
|
|
29
70
|
end
|
|
30
71
|
|
|
31
72
|
# Returns array of { parent: {segment}, children: [{segment}, ...] }
|
data/lib/catpm/version.rb
CHANGED
|
@@ -20,6 +20,7 @@ Catpm.configure do |config|
|
|
|
20
20
|
config.instrument_http = true # Track HTTP requests (default: true)
|
|
21
21
|
config.instrument_jobs = false # Track ActiveJob (default: false)
|
|
22
22
|
config.instrument_segments = true # Track SQL/view/cache segments (default: true)
|
|
23
|
+
# config.track_own_requests = false # Track catpm dashboard requests (default: false)
|
|
23
24
|
# config.instrument_net_http = false # Patch Net::HTTP for outbound tracking (default: false)
|
|
24
25
|
# config.instrument_middleware_stack = false # Decompose middleware into per-middleware segments (default: false)
|
|
25
26
|
# config.max_segments_per_request = 50 # Cap segments per request (keeps slowest)
|
data/lib/tasks/catpm_tasks.rake
CHANGED
|
@@ -1,6 +1,23 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
3
|
+
namespace :catpm do
|
|
4
|
+
desc 'Add missing columns to catpm tables (safe to run multiple times)'
|
|
5
|
+
task upgrade: :environment do
|
|
6
|
+
connection = ActiveRecord::Base.connection
|
|
7
|
+
|
|
8
|
+
unless connection.column_exists?(:catpm_samples, :error_fingerprint)
|
|
9
|
+
connection.add_column :catpm_samples, :error_fingerprint, :string, limit: 64
|
|
10
|
+
connection.add_index :catpm_samples, :error_fingerprint, name: 'idx_catpm_samples_error_fp'
|
|
11
|
+
puts '[catpm] Added error_fingerprint column to catpm_samples'
|
|
12
|
+
else
|
|
13
|
+
puts '[catpm] catpm_samples.error_fingerprint already exists, skipping'
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
unless connection.column_exists?(:catpm_errors, :occurrence_buckets)
|
|
17
|
+
connection.add_column :catpm_errors, :occurrence_buckets, :json
|
|
18
|
+
puts '[catpm] Added occurrence_buckets column to catpm_errors'
|
|
19
|
+
else
|
|
20
|
+
puts '[catpm] catpm_errors.occurrence_buckets already exists, skipping'
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|