catpm 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/catpm/flusher.rb CHANGED
@@ -123,14 +123,6 @@ module Catpm
123
123
  samples = []
124
124
  error_groups = {}
125
125
 
126
- # Pre-load existing random sample counts per endpoint for filling phase
127
- @random_sample_counts = {}
128
- Catpm::Sample.where(sample_type: 'random')
129
- .joins(:bucket)
130
- .group('catpm_buckets.kind', 'catpm_buckets.target', 'catpm_buckets.operation')
131
- .count
132
- .each { |(kind, target, op), cnt| @random_sample_counts[[ kind, target, op ]] = cnt }
133
-
134
126
  events.each do |event|
135
127
  # Bucket aggregation
136
128
  key = [ event.kind, event.target, event.operation, event.bucket_start ]
@@ -165,8 +157,8 @@ module Catpm
165
157
  )
166
158
  end
167
159
 
168
- # Collect samples
169
- sample_type = determine_sample_type(event)
160
+ # Collect samples (pre-determined by collector — only these events carry full context)
161
+ sample_type = event.sample_type
170
162
  if sample_type
171
163
  sample_hash = {
172
164
  bucket_key: key,
@@ -174,7 +166,7 @@ module Catpm
174
166
  sample_type: sample_type,
175
167
  recorded_at: event.started_at,
176
168
  duration: event.duration,
177
- context: event.context
169
+ context: event.context || {}
178
170
  }
179
171
  sample_hash[:error_fingerprint] = error_fp if error_fp
180
172
  samples << sample_hash
@@ -231,43 +223,51 @@ module Catpm
231
223
  }
232
224
  end
233
225
 
234
- def determine_sample_type(event)
235
- return 'error' if event.error?
236
-
237
- threshold = Catpm.config.slow_threshold_for(event.kind.to_sym)
238
- return 'slow' if event.duration >= threshold
239
226
 
240
- # Always sample if endpoint has few random samples (filling phase)
241
- endpoint_key = [ event.kind, event.target, event.operation ]
242
- existing_random = @random_sample_counts[endpoint_key] || 0
243
- if existing_random < Catpm.config.max_random_samples_per_endpoint
244
- @random_sample_counts[endpoint_key] = existing_random + 1
245
- return 'random'
227
+ def rotate_samples(samples)
228
+ return samples if samples.empty?
229
+
230
+ # Pre-fetch counts for all endpoints and types in bulk
231
+ endpoint_keys = samples.map { |s| s[:bucket_key][0..2] }.uniq
232
+ error_fps = samples.filter_map { |s| s[:error_fingerprint] }.uniq
233
+
234
+ # Build counts cache: { [kind, target, op, type] => count }
235
+ counts_cache = {}
236
+ if endpoint_keys.any?
237
+ Catpm::Sample.joins(:bucket)
238
+ .where(catpm_buckets: { kind: endpoint_keys.map(&:first), target: endpoint_keys.map { |k| k[1] }, operation: endpoint_keys.map { |k| k[2] } })
239
+ .where(sample_type: %w[random slow])
240
+ .group('catpm_buckets.kind', 'catpm_buckets.target', 'catpm_buckets.operation', 'catpm_samples.sample_type')
241
+ .count
242
+ .each { |(kind, target, op, type), cnt| counts_cache[[kind, target, op, type]] = cnt }
246
243
  end
247
244
 
248
- return 'random' if rand(Catpm.config.random_sample_rate) == 0
249
-
250
- nil
251
- end
245
+ error_counts = {}
246
+ if error_fps.any?
247
+ Catpm::Sample.where(sample_type: 'error', error_fingerprint: error_fps)
248
+ .group(:error_fingerprint).count
249
+ .each { |fp, cnt| error_counts[fp] = cnt }
250
+ end
252
251
 
253
- def rotate_samples(samples)
254
252
  samples.each do |sample|
255
- kind, target, operation = sample[:bucket_key][0], sample[:bucket_key][1], sample[:bucket_key][2]
256
- endpoint_samples = Catpm::Sample
257
- .joins(:bucket)
258
- .where(catpm_buckets: { kind: kind, target: target, operation: operation })
253
+ kind, target, operation = sample[:bucket_key][0..2]
259
254
 
260
255
  case sample[:sample_type]
261
256
  when 'random'
262
- existing = endpoint_samples.where(sample_type: 'random')
263
- if existing.count >= Catpm.config.max_random_samples_per_endpoint
264
- existing.order(recorded_at: :asc).first.destroy
257
+ cache_key = [kind, target, operation, 'random']
258
+ if (counts_cache[cache_key] || 0) >= Catpm.config.max_random_samples_per_endpoint
259
+ oldest = Catpm::Sample.joins(:bucket)
260
+ .where(catpm_buckets: { kind: kind, target: target, operation: operation })
261
+ .where(sample_type: 'random').order(recorded_at: :asc).first
262
+ oldest&.destroy
265
263
  end
266
264
  when 'slow'
267
- existing = endpoint_samples.where(sample_type: 'slow')
268
- if existing.count >= Catpm.config.max_slow_samples_per_endpoint
269
- weakest = existing.order(duration: :asc).first
270
- if sample[:duration] > weakest.duration
265
+ cache_key = [kind, target, operation, 'slow']
266
+ if (counts_cache[cache_key] || 0) >= Catpm.config.max_slow_samples_per_endpoint
267
+ weakest = Catpm::Sample.joins(:bucket)
268
+ .where(catpm_buckets: { kind: kind, target: target, operation: operation })
269
+ .where(sample_type: 'slow').order(duration: :asc).first
270
+ if weakest && sample[:duration] > weakest.duration
271
271
  weakest.destroy
272
272
  else
273
273
  sample[:_skip] = true
@@ -275,11 +275,10 @@ module Catpm
275
275
  end
276
276
  when 'error'
277
277
  fp = sample[:error_fingerprint]
278
- if fp
279
- existing = Catpm::Sample.where(sample_type: 'error', error_fingerprint: fp)
280
- if existing.count >= Catpm.config.max_error_samples_per_fingerprint
281
- existing.order(recorded_at: :asc).first.destroy
282
- end
278
+ if fp && (error_counts[fp] || 0) >= Catpm.config.max_error_samples_per_fingerprint
279
+ oldest = Catpm::Sample.where(sample_type: 'error', error_fingerprint: fp)
280
+ .order(recorded_at: :asc).first
281
+ oldest&.destroy
283
282
  end
284
283
  end
285
284
  end
@@ -288,28 +287,25 @@ module Catpm
288
287
  end
289
288
 
290
289
  def build_error_context(event)
290
+ event_context = event.context || {}
291
291
  ctx = {
292
292
  occurred_at: event.started_at.iso8601,
293
293
  kind: event.kind,
294
- operation: event.context.slice(:method, :path, :params, :job_class, :job_id, :queue, :target, :metadata),
295
- backtrace: begin
296
- bt = event.backtrace || []
297
- limit = Catpm.config.backtrace_lines
298
- limit ? bt.first(limit) : bt
299
- end,
294
+ operation: event_context.slice(:method, :path, :params, :job_class, :job_id, :queue, :target, :metadata),
295
+ backtrace: event.backtrace || [],
300
296
  duration: event.duration,
301
297
  status: event.status
302
298
  }
303
299
 
304
300
  ctx[:target] = event.target if event.target.present?
305
301
 
306
- if event.context[:segments]
307
- ctx[:segments] = event.context[:segments]
308
- ctx[:segments_capped] = event.context[:segments_capped]
302
+ if event_context[:segments]
303
+ ctx[:segments] = event_context[:segments]
304
+ ctx[:segments_capped] = event_context[:segments_capped]
309
305
  end
310
306
 
311
- if event.context[:segment_summary]
312
- ctx[:segment_summary] = event.context[:segment_summary]
307
+ if event_context[:segment_summary]
308
+ ctx[:segment_summary] = event_context[:segment_summary]
313
309
  end
314
310
 
315
311
  ctx
@@ -402,48 +398,61 @@ module Catpm
402
398
  cutoff = age_threshold.ago
403
399
  target_seconds = target_interval.to_i
404
400
 
405
- # Find all buckets older than cutoff
406
- source_buckets = Catpm::Bucket.where(bucket_start: ...cutoff).to_a
407
- return if source_buckets.empty?
408
-
409
- # Group by (kind, target, operation) + target-aligned bucket_start
410
- groups = source_buckets.group_by do |bucket|
411
- epoch = bucket.bucket_start.to_i
412
- aligned_epoch = epoch - (epoch % target_seconds)
413
- aligned_start = Time.at(aligned_epoch).utc
414
-
415
- [bucket.kind, bucket.target, bucket.operation, aligned_start]
416
- end
417
-
418
- groups.each do |(kind, target, operation, aligned_start), buckets|
419
- # Skip if only one bucket already at the target alignment
420
- next if buckets.size == 1 && buckets.first.bucket_start.to_i % target_seconds == 0
401
+ # Process in batches to avoid loading all old buckets into memory
402
+ Catpm::Bucket.where(bucket_start: ...cutoff)
403
+ .select(:id, :kind, :target, :operation, :bucket_start)
404
+ .group_by { |b| [b.kind, b.target, b.operation] }
405
+ .each do |(_kind, _target, _operation), endpoint_buckets|
406
+ groups = endpoint_buckets.group_by do |bucket|
407
+ epoch = bucket.bucket_start.to_i
408
+ aligned_epoch = epoch - (epoch % target_seconds)
409
+ Time.at(aligned_epoch).utc
410
+ end
421
411
 
422
- merged = {
423
- kind: kind,
424
- target: target,
425
- operation: operation,
426
- bucket_start: aligned_start,
427
- count: buckets.sum(&:count),
428
- success_count: buckets.sum(&:success_count),
429
- failure_count: buckets.sum(&:failure_count),
430
- duration_sum: buckets.sum(&:duration_sum),
431
- duration_max: buckets.map(&:duration_max).max,
432
- duration_min: buckets.map(&:duration_min).min,
433
- metadata_sum: merge_bucket_metadata(buckets, adapter),
434
- p95_digest: merge_bucket_digests(buckets)
435
- }
436
-
437
- source_ids = buckets.map(&:id)
438
-
439
- # Delete source buckets first (to avoid unique constraint conflict
440
- # if one source bucket has the same bucket_start as the target)
441
- Catpm::Sample.where(bucket_id: source_ids).delete_all
442
- Catpm::Bucket.where(id: source_ids).delete_all
443
-
444
- # Create the merged bucket
445
- adapter.persist_buckets([merged])
446
- end
412
+ groups.each do |aligned_start, stub_buckets|
413
+ next if stub_buckets.size == 1 && stub_buckets.first.bucket_start.to_i % target_seconds == 0
414
+
415
+ # Load full records only for groups that need merging
416
+ bucket_ids = stub_buckets.map(&:id)
417
+ buckets = Catpm::Bucket.where(id: bucket_ids).to_a
418
+
419
+ merged = {
420
+ kind: buckets.first.kind,
421
+ target: buckets.first.target,
422
+ operation: buckets.first.operation,
423
+ bucket_start: aligned_start,
424
+ count: buckets.sum(&:count),
425
+ success_count: buckets.sum(&:success_count),
426
+ failure_count: buckets.sum(&:failure_count),
427
+ duration_sum: buckets.sum(&:duration_sum),
428
+ duration_max: buckets.map(&:duration_max).max,
429
+ duration_min: buckets.map(&:duration_min).min,
430
+ metadata_sum: merge_bucket_metadata(buckets, adapter),
431
+ p95_digest: merge_bucket_digests(buckets)
432
+ }
433
+
434
+ survivor = buckets.first
435
+
436
+ # Reassign all samples to the survivor bucket
437
+ Catpm::Sample.where(bucket_id: bucket_ids).update_all(bucket_id: survivor.id)
438
+
439
+ # Delete non-survivor source buckets (now sample-free)
440
+ Catpm::Bucket.where(id: bucket_ids - [survivor.id]).delete_all
441
+
442
+ # Overwrite survivor with merged data
443
+ survivor.update!(
444
+ bucket_start: aligned_start,
445
+ count: merged[:count],
446
+ success_count: merged[:success_count],
447
+ failure_count: merged[:failure_count],
448
+ duration_sum: merged[:duration_sum],
449
+ duration_max: merged[:duration_max],
450
+ duration_min: merged[:duration_min],
451
+ metadata_sum: merged[:metadata_sum],
452
+ p95_digest: merged[:p95_digest]
453
+ )
454
+ end
455
+ end
447
456
  end
448
457
 
449
458
  def downsample_event_tier(target_interval:, age_threshold:, adapter:)
@@ -171,6 +171,8 @@ module Catpm
171
171
 
172
172
  duration = event.duration
173
173
  sql = payload[:sql].to_s
174
+ max_len = Catpm.config.max_sql_length
175
+ sql = sql.truncate(max_len) if max_len && sql.length > max_len
174
176
  source = duration >= Catpm.config.segment_source_threshold ? extract_source_location : nil
175
177
 
176
178
  req_segments.add(
@@ -2,30 +2,74 @@
2
2
 
3
3
  module Catpm
4
4
  class StackSampler
5
- SAMPLE_INTERVAL = 0.005 # 5ms
5
+ MS_PER_SECOND = 1000.0
6
+
7
+ # Single global thread that samples all active requests.
8
+ # Avoids creating a thread per request.
9
+ class SamplingLoop
10
+ def initialize
11
+ @mutex = Mutex.new
12
+ @samplers = []
13
+ @thread = nil
14
+ end
15
+
16
+ def register(sampler)
17
+ @mutex.synchronize do
18
+ @samplers << sampler
19
+ start_thread unless @thread&.alive?
20
+ end
21
+ end
22
+
23
+ def unregister(sampler)
24
+ @mutex.synchronize { @samplers.delete(sampler) }
25
+ end
26
+
27
+ private
28
+
29
+ def start_thread
30
+ @thread = Thread.new do
31
+ loop do
32
+ sleep(Catpm.config.stack_sample_interval)
33
+ sample_all
34
+ end
35
+ end
36
+ @thread.priority = -1
37
+ end
38
+
39
+ def sample_all
40
+ now = Process.clock_gettime(Process::CLOCK_MONOTONIC)
41
+ targets = @mutex.synchronize { @samplers.dup }
42
+ targets.each { |s| s.capture(now) }
43
+ end
44
+ end
45
+
46
+ @loop = SamplingLoop.new
47
+
48
+ class << self
49
+ attr_reader :loop
50
+ end
6
51
 
7
52
  def initialize(target_thread:, request_start:)
8
53
  @target = target_thread
9
54
  @request_start = request_start
10
55
  @samples = []
11
- @running = false
12
56
  end
13
57
 
14
58
  def start
15
- @running = true
16
- @thread = Thread.new do
17
- while @running
18
- locs = @target.backtrace_locations
19
- @samples << [Process.clock_gettime(Process::CLOCK_MONOTONIC), locs] if locs
20
- sleep(SAMPLE_INTERVAL)
21
- end
22
- end
23
- @thread.priority = -1
59
+ self.class.loop.register(self)
24
60
  end
25
61
 
26
62
  def stop
27
- @running = false
28
- @thread&.join(0.1)
63
+ self.class.loop.unregister(self)
64
+ end
65
+
66
+ # Called by SamplingLoop from the global thread
67
+ def capture(now)
68
+ max = Catpm.config.max_stack_samples_per_request
69
+ return if max && @samples.size >= max
70
+
71
+ locs = @target.backtrace_locations
72
+ @samples << [now, locs] if locs
29
73
  end
30
74
 
31
75
  # Returns array of { parent: {segment}, children: [{segment}, ...] }
@@ -76,7 +120,7 @@ module Catpm
76
120
  duration = estimate_duration(group)
77
121
  next if duration < 1.0
78
122
 
79
- offset = ((group[:start_time] - @request_start) * 1000.0).round(2)
123
+ offset = ((group[:start_time] - @request_start) * MS_PER_SECOND).round(2)
80
124
  app_frame = group[:app_frame]
81
125
  leaf = group[:leaves].first&.last
82
126
 
@@ -158,8 +202,8 @@ module Catpm
158
202
 
159
203
  spans.filter_map do |span|
160
204
  duration = [
161
- (span[:end_time] - span[:start_time]) * 1000.0,
162
- span[:count] * SAMPLE_INTERVAL * 1000.0
205
+ (span[:end_time] - span[:start_time]) * MS_PER_SECOND,
206
+ span[:count] * Catpm.config.stack_sample_interval * MS_PER_SECOND
163
207
  ].max
164
208
  next if duration < 1.0
165
209
 
@@ -170,7 +214,7 @@ module Catpm
170
214
  type: classify_path(path),
171
215
  detail: build_gem_detail(frame),
172
216
  duration: duration.round(2),
173
- offset: ((span[:start_time] - @request_start) * 1000.0).round(2),
217
+ offset: ((span[:start_time] - @request_start) * MS_PER_SECOND).round(2),
174
218
  started_at: span[:start_time]
175
219
  }
176
220
  end
@@ -178,8 +222,8 @@ module Catpm
178
222
 
179
223
  def estimate_duration(group)
180
224
  [
181
- (group[:end_time] - group[:start_time]) * 1000.0,
182
- group[:count] * SAMPLE_INTERVAL * 1000.0
225
+ (group[:end_time] - group[:start_time]) * MS_PER_SECOND,
226
+ group[:count] * Catpm.config.stack_sample_interval * MS_PER_SECOND
183
227
  ].max
184
228
  end
185
229
 
data/lib/catpm/tdigest.rb CHANGED
@@ -12,6 +12,7 @@ module Catpm
12
12
  Centroid = Struct.new(:mean, :weight)
13
13
 
14
14
  COMPRESSION = 100 # Controls accuracy vs. memory trade-off
15
+ BUFFER_FLUSH_FACTOR = 2 # Lower = more frequent flushes (better accuracy), higher = fewer flushes (better performance)
15
16
 
16
17
  attr_reader :count
17
18
 
@@ -22,7 +23,7 @@ module Catpm
22
23
  @min = Float::INFINITY
23
24
  @max = -Float::INFINITY
24
25
  @buffer = []
25
- @buffer_limit = @compression * 5
26
+ @buffer_limit = @compression * BUFFER_FLUSH_FACTOR
26
27
  end
27
28
 
28
29
  def add(value, weight = 1)
data/lib/catpm/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Catpm
4
- VERSION = '0.2.0'
4
+ VERSION = '0.4.0'
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: catpm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - ''