catpm 0.9.5 → 0.9.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 18b84e4c767fa3872bbf196d21c3d197e6d4f6517269357e7b24bf0514da3708
4
- data.tar.gz: 6cc282d9f10f13546939f8e9c39b12b784687ef4c0d25e42a2a7e34a39cd3124
3
+ metadata.gz: 3731353688fdebaef1f9cb164731daba00a0d69df65da03dc309f6f7901e2708
4
+ data.tar.gz: 4ef7e26d0b721c8fea556f797d74c58ef2b0df8f9257edead653393fcb991229
5
5
  SHA512:
6
- metadata.gz: 1968d1e8c7ed1257d2f0bfc3e28f9e34b4e38057ec89e103c10f02bd42d99daa78cf7ba0a9fc31ff5c07a8dc9e0895d9f6c541c557345469a20e2c09da88dd9c
7
- data.tar.gz: d9e5c1b664605c6e66c92e9e6520a1c606f979f10e5e0deb8883307b965f4b4c3d361ef2dcf7ddfe3fb840d99e107c4c666a1ef4487165543f5904285f2a5a07
6
+ metadata.gz: df633940cf6beba3252b6915c45d22688ff23ef3200edf17622e971ce65191ad85af476bfdd853a2bee449da6d28df81431c48e663001c80b14242ba407d3a7b
7
+ data.tar.gz: 473011238fdf84d011bf9d3c0ceed1d47b7f1a3be5e6e9d640f6f66b62273c2a396c425166b09ceb5b0b8bf63a993e51651d99d373c48b1058ead4ede2451266
data/README.md CHANGED
@@ -1,5 +1,5 @@
1
1
  gem build catpm.gemspec
2
- gem push catpm-0.9.4.gem
2
+ gem push catpm-0.9.5.gem
3
3
 
4
4
  # Catpm
5
5
 
@@ -94,7 +94,7 @@ module Catpm
94
94
  events_max_samples_per_name: { group: 'Events', label: 'Max Samples / Name', desc: 'Event samples retained per event name', fmt: :nullable_int },
95
95
 
96
96
  # ── Buffer & Flush ──
97
- max_memory_per_thread: { group: 'Buffer & Flush', label: 'Memory per Thread', desc: 'Memory budget per application thread (split between request segments and event buffer)', fmt: :bytes },
97
+ max_memory: { group: 'Buffer & Flush', label: 'Max Memory (MB)', desc: 'Global memory budget for catpm gem in megabytes', fmt: :int },
98
98
  flush_interval: { group: 'Buffer & Flush', label: 'Flush Interval', desc: 'How often the background thread drains the buffer to the database', fmt: :seconds },
99
99
  flush_jitter: { group: 'Buffer & Flush', label: 'Flush Jitter', desc: 'Random jitter added to flush interval to avoid thundering herd', fmt: :pm_seconds },
100
100
  persistence_batch_size: { group: 'Buffer & Flush', label: 'Batch Size', desc: 'Number of events written per database transaction', fmt: :int },
@@ -14,10 +14,6 @@ module Catpm
14
14
  scope :recent, ->(period = 1.hour) { where(recorded_at: period.ago..) }
15
15
  scope :for_error, ->(fingerprint) { where(error_fingerprint: fingerprint) }
16
16
 
17
- def self.request_id_supported?
18
- column_names.include?('request_id')
19
- end
20
-
21
17
  def parsed_context
22
18
  case context
23
19
  when Hash then context
@@ -38,7 +38,6 @@ module Catpm
38
38
  context: sample_data[:context],
39
39
  error_fingerprint: sample_data[:error_fingerprint]
40
40
  }
41
- record[:request_id] = sample_data[:request_id] if Catpm::Sample.request_id_supported?
42
41
  record
43
42
  end
44
43
 
data/lib/catpm/buffer.rb CHANGED
@@ -22,17 +22,19 @@ module Catpm
22
22
  # Called from request threads. Returns :accepted or :dropped.
23
23
  # Never blocks — monitoring must not slow down the application.
24
24
  #
25
+ OVERFLOW_FACTOR = 1.5 # hard cap multiplier — drops events beyond this to prevent OOM
26
+
25
27
  # When buffer reaches max_bytes, signals the flusher for immediate drain
26
- # and continues accepting events. Only drops as a last resort at 3x capacity
27
- # (flusher stuck or DB down).
28
+ # and continues accepting events. Only drops as a last resort at OVERFLOW_FACTOR
29
+ # capacity (flusher stuck or DB down).
28
30
  def push(event)
29
31
  signal_flush = false
30
32
 
31
33
  @monitor.synchronize do
32
34
  bytes = event.estimated_bytes
33
35
 
34
- # Hard safety cap: 3x configured limit prevents OOM if flusher is stuck
35
- if @current_bytes + bytes > @max_bytes * 3
36
+ # Hard safety cap: prevents OOM if flusher is stuck
37
+ if @current_bytes + bytes > @max_bytes * OVERFLOW_FACTOR
36
38
  @dropped_count += 1
37
39
  Catpm.stats[:dropped_events] += 1
38
40
  return :dropped
@@ -10,6 +10,7 @@ module Catpm
10
10
  # are slow. Without this cap, apps with 30% slow requests would see ~23%
11
11
  # instrumentation instead of the configured 1/random_sample_rate.
12
12
  MAX_FORCE_INSTRUMENT_COUNT = 3
13
+ FORCE_INSTRUMENT_MAX_ENDPOINTS = 100 # cap per-endpoint force-instrument hash
13
14
 
14
15
  class << self
15
16
  def process_action_controller(event)
@@ -63,16 +64,20 @@ module Catpm
63
64
  )
64
65
 
65
66
  # Force the NEXT HTTP request to be fully instrumented when this one
66
- # wasn't. Covers slow/error spikes and filling phase (new endpoints that
67
- # haven't collected enough instrumented samples yet).
67
+ # wasn't instrumented and was slow/error.
68
+ # Filling phase is handled by @http_filling_active flag in
69
+ # should_instrument_request? — no need for force_instrument here.
68
70
  if !instrumented
69
71
  if payload[:exception] || duration >= Catpm.config.slow_threshold_for(:http)
70
72
  trigger_force_instrument
71
- else
73
+ elsif !@http_filling_active
74
+ # Detect new/underfilled endpoints that appeared after filling phase ended
72
75
  max = Catpm.config.max_random_samples_per_endpoint
73
76
  if max
74
77
  endpoint_key = ['http', target, operation]
75
- trigger_force_instrument if instrumented_sample_counts[endpoint_key] < max
78
+ if instrumented_sample_counts[endpoint_key] < max
79
+ @http_filling_active = true
80
+ end
76
81
  end
77
82
  end
78
83
  end
@@ -372,8 +377,6 @@ module Catpm
372
377
  context = nil
373
378
  end
374
379
 
375
- request_id = req_segments&.request_id
376
-
377
380
  ev = Event.new(
378
381
  kind: kind,
379
382
  target: target,
@@ -386,56 +389,7 @@ module Catpm
386
389
  metadata: metadata,
387
390
  error_class: error&.class&.name,
388
391
  error_message: error&.message,
389
- backtrace: error&.backtrace,
390
- request_id: request_id
391
- )
392
-
393
- Catpm.buffer&.push(ev)
394
- end
395
-
396
- def process_checkpoint(kind:, target:, operation:, context:, metadata:, checkpoint_data:, request_start:, request_id: nil)
397
- return unless Catpm.enabled?
398
-
399
- segments = checkpoint_data[:segments].dup
400
- collapse_code_wrappers(segments)
401
-
402
- duration_so_far = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - request_start) * 1000.0
403
-
404
- # Inject root request segment
405
- root_segment = {
406
- type: 'request',
407
- detail: "#{operation.presence || kind} #{target}",
408
- duration: duration_so_far.round(2),
409
- offset: 0.0
410
- }
411
- segments.each do |seg|
412
- if seg.key?(:parent_index)
413
- seg[:parent_index] += 1
414
- else
415
- seg[:parent_index] = 0
416
- end
417
- end
418
- segments.unshift(root_segment)
419
-
420
- checkpoint_context = (context || {}).dup
421
- checkpoint_context[:segments] = segments
422
- checkpoint_context[:segment_summary] = checkpoint_data[:summary]
423
- checkpoint_context[:segments_capped] = checkpoint_data[:overflow]
424
- checkpoint_context[:partial] = true
425
- checkpoint_context[:checkpoint_number] = checkpoint_data[:checkpoint_number]
426
- checkpoint_context = scrub(checkpoint_context)
427
-
428
- ev = Event.new(
429
- kind: kind,
430
- target: target,
431
- operation: operation.to_s,
432
- duration: duration_so_far,
433
- started_at: Time.current,
434
- status: DEFAULT_SUCCESS_STATUS,
435
- context: checkpoint_context,
436
- sample_type: 'random',
437
- metadata: (metadata || {}).dup.merge(checkpoint_data[:summary] || {}),
438
- request_id: request_id
392
+ backtrace: error&.backtrace
439
393
  )
440
394
 
441
395
  Catpm.buffer&.push(ev)
@@ -463,6 +417,15 @@ module Catpm
463
417
 
464
418
  # --- Pre-sampling: decide BEFORE request whether to instrument ---
465
419
 
420
+ # Eagerly load sample counts at startup so old endpoints don't
421
+ # re-enter filling phase on every process restart.
422
+ # Called from Lifecycle.register_hooks after flusher init.
423
+ def load_sample_counts_eagerly!
424
+ @instrumented_sample_counts = load_sample_counts_from_db
425
+ @instrumented_sample_counts_loaded = true
426
+ recompute_http_filling_active
427
+ end
428
+
466
429
  # For HTTP middleware where endpoint is unknown at start.
467
430
  # Returns true if this request should get full instrumentation.
468
431
  def should_instrument_request?
@@ -472,6 +435,12 @@ module Catpm
472
435
  return true
473
436
  end
474
437
 
438
+ # During filling phase, instrument all requests so underfilled
439
+ # endpoints collect their quota (max_random_samples_per_endpoint).
440
+ # The flag is set by load_sample_counts_eagerly! and maintained
441
+ # by early_sample_type as endpoints fill up.
442
+ return true if @http_filling_active
443
+
475
444
  rand(Catpm.config.random_sample_rate) == 0
476
445
  end
477
446
 
@@ -505,7 +474,9 @@ module Catpm
505
474
  def trigger_force_instrument(kind: nil, target: nil, operation: nil)
506
475
  if kind && target
507
476
  endpoint_key = [kind.to_s, target.to_s, (operation || '').to_s]
508
- force_instrument_endpoints[endpoint_key] = true
477
+ if force_instrument_endpoints.size < FORCE_INSTRUMENT_MAX_ENDPOINTS
478
+ force_instrument_endpoints[endpoint_key] = true
479
+ end
509
480
  else
510
481
  @force_instrument_count = [(@force_instrument_count || 0) + 1, MAX_FORCE_INSTRUMENT_COUNT].min
511
482
  end
@@ -516,10 +487,42 @@ module Catpm
516
487
  @instrumented_sample_counts_loaded = false
517
488
  @force_instrument_endpoints = nil
518
489
  @force_instrument_count = nil
490
+ @http_filling_active = false
519
491
  end
520
492
 
521
493
  private
522
494
 
495
+ # Recompute whether any HTTP endpoint is still below its sample quota.
496
+ # Called after loading counts from DB and when an endpoint exits filling.
497
+ def recompute_http_filling_active
498
+ max = Catpm.config.max_random_samples_per_endpoint
499
+ @http_filling_active = if max
500
+ # True if hash is empty (new app / new endpoints may appear) or any endpoint below quota
501
+ instrumented_sample_counts.empty? || instrumented_sample_counts.any? { |_, c| c < max }
502
+ else
503
+ false # unlimited quota → no filling phase for HTTP middleware
504
+ end
505
+ end
506
+
507
+ # Evict half the entries from instrumented_sample_counts.
508
+ # Prefers evicting filled entries (count >= max) to avoid
509
+ # re-triggering filling phase for those endpoints.
510
+ def evict_sample_counts(max_random)
511
+ evict_count = instrumented_sample_counts.size / 2
512
+ if max_random
513
+ filled_keys = []
514
+ unfilled_keys = []
515
+ instrumented_sample_counts.each do |k, c|
516
+ (c >= max_random ? filled_keys : unfilled_keys) << k
517
+ end
518
+ # Evict filled first (safe), then unfilled if needed
519
+ to_evict = (filled_keys + unfilled_keys).first(evict_count)
520
+ to_evict.each { |k| instrumented_sample_counts.delete(k) }
521
+ else
522
+ evict_count.times { instrumented_sample_counts.shift }
523
+ end
524
+ end
525
+
523
526
  def force_instrument_endpoints
524
527
  @force_instrument_endpoints ||= {}
525
528
  end
@@ -644,7 +647,17 @@ module Catpm
644
647
  count = instrumented_sample_counts[endpoint_key]
645
648
  max_random = Catpm.config.max_random_samples_per_endpoint
646
649
  if max_random.nil? || count < max_random
650
+ # Evict when hash exceeds derived limit — prefer filled entries
651
+ max_entries = Catpm.config.effective_sample_counts_max
652
+ if instrumented_sample_counts.size >= max_entries
653
+ evict_sample_counts(max_random)
654
+ end
647
655
  instrumented_sample_counts[endpoint_key] = count + 1
656
+
657
+ # Endpoint just reached quota — recheck if any filling endpoints remain
658
+ if max_random && count + 1 >= max_random
659
+ recompute_http_filling_active
660
+ end
648
661
  end
649
662
 
650
663
  return 'slow' if is_slow
@@ -2,12 +2,15 @@
2
2
 
3
3
  module Catpm
4
4
  class Configuration
5
- # Memory budget shares — how max_memory_per_thread is split
6
- BUFFER_MEMORY_SHARE = 0.5 # half per-thread budget goes to buffer pool
7
- REQUEST_MEMORY_SHARE = 0.5 # half per-thread budget for request segments
8
- MIN_REQUEST_MEMORY = 1_024 # 1 KB — floor for per-request (checkpoint viability, ~5 minimal segments)
9
5
  MIN_BUFFER_MEMORY = 1_048_576 # 1 MB — floor for buffer (meaningful buffering)
10
- DEFAULT_ASSUMED_THREADS = 5 # fallback when thread detection fails
6
+ DEFAULT_ASSUMED_THREADS = 5 # fallback when thread detection fails
7
+
8
+ # Global memory budget distribution shares
9
+ BUFFER_MEMORY_SHARE = 0.5 # 50% of max_memory for event buffer
10
+ CACHE_ENTRIES_PER_MB = 10_000 # ~100 bytes/entry in path_cache
11
+ PATH_CACHE_BUDGET_SHARE = 0.05 # 5% of max_memory for path_cache
12
+ SAMPLE_COUNTS_PER_MB = 12_500 # ~80 bytes/entry in sample counts hash
13
+ SAMPLE_COUNTS_BUDGET_SHARE = 0.02 # 2% of max_memory for sample counts
11
14
 
12
15
  # Boolean / non-numeric settings — plain attr_accessor
13
16
  attr_accessor :enabled,
@@ -37,7 +40,7 @@ module Catpm
37
40
  random_sample_rate cleanup_interval
38
41
  circuit_breaker_failure_threshold circuit_breaker_recovery_timeout
39
42
  sqlite_busy_timeout persistence_batch_size shutdown_timeout
40
- stack_sample_interval min_segment_duration
43
+ stack_sample_interval min_segment_duration max_memory
41
44
  ].freeze
42
45
 
43
46
  # Numeric settings where nil means "no limit" / "disabled"
@@ -48,7 +51,6 @@ module Catpm
48
51
  events_max_samples_per_name max_stack_samples_per_request
49
52
  max_error_detail_length max_fingerprint_app_frames
50
53
  max_fingerprint_gem_frames cleanup_batch_size caller_scan_depth
51
- max_memory_per_thread
52
54
  ].freeze
53
55
 
54
56
  (REQUIRED_NUMERIC + OPTIONAL_NUMERIC).each do |attr|
@@ -83,7 +85,7 @@ module Catpm
83
85
  @slow_threshold_per_kind = {}
84
86
  @ignored_targets = []
85
87
  @retention_period = nil # nil = keep forever (data is downsampled, not deleted)
86
- @max_memory_per_thread = 2.megabytes
88
+ @max_memory = 20 # MB — global memory budget (2% of 1GB server)
87
89
  @flush_interval = 30 # seconds
88
90
  @flush_jitter = 5 # ±seconds
89
91
  @max_error_contexts = 5
@@ -124,17 +126,20 @@ module Catpm
124
126
  @show_untracked_segments = false
125
127
  end
126
128
 
127
- def derived_request_memory_limit
128
- return nil unless max_memory_per_thread
129
-
130
- [max_memory_per_thread * REQUEST_MEMORY_SHARE, MIN_REQUEST_MEMORY].max
129
+ # Buffer gets BUFFER_MEMORY_SHARE of max_memory, scaled by thread count
130
+ def effective_max_buffer_memory
131
+ bytes = (max_memory * 1_048_576 * BUFFER_MEMORY_SHARE).to_i
132
+ [bytes, MIN_BUFFER_MEMORY].max
131
133
  end
132
134
 
133
- def derived_buffer_memory_limit(detected_threads = nil)
134
- return MIN_BUFFER_MEMORY unless max_memory_per_thread
135
+ # Path cache limit derived from max_memory
136
+ def effective_path_cache_max
137
+ (max_memory * CACHE_ENTRIES_PER_MB * PATH_CACHE_BUDGET_SHARE).to_i
138
+ end
135
139
 
136
- threads = detected_threads || DEFAULT_ASSUMED_THREADS
137
- [max_memory_per_thread * BUFFER_MEMORY_SHARE * threads, MIN_BUFFER_MEMORY].max
140
+ # Sample counts hash limit derived from max_memory
141
+ def effective_sample_counts_max
142
+ (max_memory * SAMPLE_COUNTS_PER_MB * SAMPLE_COUNTS_BUDGET_SHARE).to_i
138
143
  end
139
144
 
140
145
  def slow_threshold_for(kind)
data/lib/catpm/event.rb CHANGED
@@ -9,14 +9,14 @@ module Catpm
9
9
 
10
10
  attr_accessor :kind, :target, :operation, :duration, :started_at,
11
11
  :metadata, :error_class, :error_message, :backtrace,
12
- :sample_type, :context, :status, :request_id
12
+ :sample_type, :context, :status
13
13
 
14
14
  EMPTY_HASH = {}.freeze
15
15
  private_constant :EMPTY_HASH
16
16
 
17
17
  def initialize(kind:, target:, operation: '', duration: 0.0, started_at: nil,
18
18
  metadata: nil, error_class: nil, error_message: nil, backtrace: nil,
19
- sample_type: nil, context: nil, status: nil, request_id: nil)
19
+ sample_type: nil, context: nil, status: nil)
20
20
  @kind = kind.to_s
21
21
  @target = target.to_s
22
22
  @operation = (operation || '').to_s
@@ -32,7 +32,6 @@ module Catpm
32
32
  @sample_type = sample_type
33
33
  @context = context
34
34
  @status = status
35
- @request_id = request_id
36
35
  end
37
36
 
38
37
  def estimated_bytes
@@ -40,7 +40,7 @@ module Catpm
40
40
 
41
41
  result = _app_frame?(line)
42
42
  @path_cache_mutex.synchronize do
43
- @path_cache.clear if @path_cache.size > 4000
43
+ @path_cache.clear if @path_cache.size > Catpm.config.effective_path_cache_max
44
44
  @path_cache[line] = result
45
45
  end
46
46
  result
data/lib/catpm/flusher.rb CHANGED
@@ -3,7 +3,6 @@
3
3
  module Catpm
4
4
  class Flusher
5
5
  ERROR_LOG_BACKTRACE_LINES = 5
6
- PARTIAL_STALE_TIMEOUT = 600 # seconds — orphaned partial samples cleaned after 10 minutes
7
6
 
8
7
  attr_reader :running
9
8
 
@@ -182,8 +181,7 @@ module Catpm
182
181
  sample_type: sample_type,
183
182
  recorded_at: event.started_at,
184
183
  duration: event.duration,
185
- context: event.context || {},
186
- request_id: event.request_id
184
+ context: event.context || {}
187
185
  }
188
186
  sample_hash[:error_fingerprint] = error_fp if error_fp
189
187
  samples << sample_hash
@@ -221,8 +219,6 @@ module Catpm
221
219
  b
222
220
  end
223
221
 
224
- samples = merge_request_samples(samples)
225
-
226
222
  [ buckets, samples, error_groups.values ]
227
223
  end
228
224
 
@@ -350,7 +346,6 @@ module Catpm
350
346
  @last_cleanup_at = Time.now
351
347
  downsample_buckets
352
348
  cleanup_expired_data if Catpm.config.retention_period
353
- cleanup_orphaned_partials
354
349
  Collector.reset_sample_counts!
355
350
  end
356
351
 
@@ -398,83 +393,93 @@ module Catpm
398
393
  cutoff = age_threshold.ago
399
394
  target_seconds = target_interval.to_i
400
395
 
401
- # Process in batches to avoid loading all old buckets into memory
402
- Catpm::Bucket.where(bucket_start: ...cutoff)
403
- .select(:id, :kind, :target, :operation, :bucket_start)
404
- .group_by { |b| [b.kind, b.target, b.operation] }
405
- .each do |(_kind, _target, _operation), endpoint_buckets|
406
- groups = endpoint_buckets.group_by do |bucket|
407
- epoch = bucket.bucket_start.to_i
408
- aligned_epoch = epoch - (epoch % target_seconds)
409
- Time.at(aligned_epoch).utc
410
- end
396
+ # Get unique endpoint keys first (small set), then process per-endpoint
397
+ # to avoid loading all old buckets into memory at once
398
+ endpoint_keys = Catpm::Bucket.where(bucket_start: ...cutoff)
399
+ .distinct.pluck(:kind, :target, :operation)
411
400
 
412
- groups.each do |aligned_start, stub_buckets|
413
- next if stub_buckets.size == 1 && stub_buckets.first.bucket_start.to_i % target_seconds == 0
414
-
415
- # Load full records only for groups that need merging
416
- bucket_ids = stub_buckets.map(&:id)
417
- buckets = Catpm::Bucket.where(id: bucket_ids).to_a
418
-
419
- merged = {
420
- kind: buckets.first.kind,
421
- target: buckets.first.target,
422
- operation: buckets.first.operation,
423
- bucket_start: aligned_start,
424
- count: buckets.sum(&:count),
425
- success_count: buckets.sum(&:success_count),
426
- failure_count: buckets.sum(&:failure_count),
427
- duration_sum: buckets.sum(&:duration_sum),
428
- duration_max: buckets.map(&:duration_max).max,
429
- duration_min: buckets.map(&:duration_min).min,
430
- metadata_sum: merge_bucket_metadata(buckets, adapter),
431
- p95_digest: merge_bucket_digests(buckets)
432
- }
433
-
434
- survivor = buckets.first
435
-
436
- # Reassign all samples to the survivor bucket
437
- Catpm::Sample.where(bucket_id: bucket_ids).update_all(bucket_id: survivor.id)
438
-
439
- # Delete non-survivor source buckets (now sample-free)
440
- Catpm::Bucket.where(id: bucket_ids - [survivor.id]).delete_all
441
-
442
- # Overwrite survivor with merged data
443
- survivor.update!(
444
- bucket_start: aligned_start,
445
- count: merged[:count],
446
- success_count: merged[:success_count],
447
- failure_count: merged[:failure_count],
448
- duration_sum: merged[:duration_sum],
449
- duration_max: merged[:duration_max],
450
- duration_min: merged[:duration_min],
451
- metadata_sum: merged[:metadata_sum],
452
- p95_digest: merged[:p95_digest]
453
- )
454
- end
401
+ endpoint_keys.each do |kind, target, operation|
402
+ endpoint_buckets = Catpm::Bucket
403
+ .where(kind: kind, target: target, operation: operation, bucket_start: ...cutoff)
404
+ .select(:id, :bucket_start).to_a
405
+
406
+ groups = endpoint_buckets.group_by do |bucket|
407
+ epoch = bucket.bucket_start.to_i
408
+ aligned_epoch = epoch - (epoch % target_seconds)
409
+ Time.at(aligned_epoch).utc
410
+ end
411
+
412
+ groups.each do |aligned_start, stub_buckets|
413
+ next if stub_buckets.size == 1 && stub_buckets.first.bucket_start.to_i % target_seconds == 0
414
+
415
+ # Load full records only for groups that need merging
416
+ bucket_ids = stub_buckets.map(&:id)
417
+ buckets = Catpm::Bucket.where(id: bucket_ids).to_a
418
+
419
+ merged = {
420
+ kind: buckets.first.kind,
421
+ target: buckets.first.target,
422
+ operation: buckets.first.operation,
423
+ bucket_start: aligned_start,
424
+ count: buckets.sum(&:count),
425
+ success_count: buckets.sum(&:success_count),
426
+ failure_count: buckets.sum(&:failure_count),
427
+ duration_sum: buckets.sum(&:duration_sum),
428
+ duration_max: buckets.map(&:duration_max).max,
429
+ duration_min: buckets.map(&:duration_min).min,
430
+ metadata_sum: merge_bucket_metadata(buckets, adapter),
431
+ p95_digest: merge_bucket_digests(buckets)
432
+ }
433
+
434
+ survivor = buckets.first
435
+
436
+ # Reassign all samples to the survivor bucket
437
+ Catpm::Sample.where(bucket_id: bucket_ids).update_all(bucket_id: survivor.id)
438
+
439
+ # Delete non-survivor source buckets (now sample-free)
440
+ Catpm::Bucket.where(id: bucket_ids - [survivor.id]).delete_all
441
+
442
+ # Overwrite survivor with merged data
443
+ survivor.update!(
444
+ bucket_start: aligned_start,
445
+ count: merged[:count],
446
+ success_count: merged[:success_count],
447
+ failure_count: merged[:failure_count],
448
+ duration_sum: merged[:duration_sum],
449
+ duration_max: merged[:duration_max],
450
+ duration_min: merged[:duration_min],
451
+ metadata_sum: merged[:metadata_sum],
452
+ p95_digest: merged[:p95_digest]
453
+ )
455
454
  end
455
+ end
456
456
  end
457
457
 
458
458
  def downsample_event_tier(target_interval:, age_threshold:, adapter:)
459
459
  cutoff = age_threshold.ago
460
460
  target_seconds = target_interval.to_i
461
461
 
462
- source_buckets = Catpm::EventBucket.where(bucket_start: ...cutoff).to_a
463
- return if source_buckets.empty?
462
+ # Process per-name to avoid loading all event buckets into memory
463
+ names = Catpm::EventBucket.where(bucket_start: ...cutoff).distinct.pluck(:name)
464
+ return if names.empty?
464
465
 
465
- groups = source_buckets.group_by do |bucket|
466
- epoch = bucket.bucket_start.to_i
467
- aligned_epoch = epoch - (epoch % target_seconds)
468
- aligned_start = Time.at(aligned_epoch).utc
469
- [bucket.name, aligned_start]
470
- end
466
+ names.each do |name|
467
+ buckets = Catpm::EventBucket.where(name: name, bucket_start: ...cutoff).to_a
468
+ next if buckets.empty?
471
469
 
472
- groups.each do |(name, aligned_start), buckets|
473
- next if buckets.size == 1 && buckets.first.bucket_start.to_i % target_seconds == 0
470
+ groups = buckets.group_by do |bucket|
471
+ epoch = bucket.bucket_start.to_i
472
+ aligned_epoch = epoch - (epoch % target_seconds)
473
+ Time.at(aligned_epoch).utc
474
+ end
475
+
476
+ groups.each do |aligned_start, group_buckets|
477
+ next if group_buckets.size == 1 && group_buckets.first.bucket_start.to_i % target_seconds == 0
474
478
 
475
- merged = { name: name, bucket_start: aligned_start, count: buckets.sum(&:count) }
476
- Catpm::EventBucket.where(id: buckets.map(&:id)).delete_all
477
- adapter.persist_event_buckets([merged])
479
+ merged = { name: name, bucket_start: aligned_start, count: group_buckets.sum(&:count) }
480
+ Catpm::EventBucket.where(id: group_buckets.map(&:id)).delete_all
481
+ adapter.persist_event_buckets([merged])
482
+ end
478
483
  end
479
484
  end
480
485
 
@@ -494,136 +499,6 @@ module Catpm
494
499
  combined.empty? ? nil : combined.serialize
495
500
  end
496
501
 
497
- def merge_request_samples(samples)
498
- return samples unless Catpm::Sample.request_id_supported?
499
-
500
- by_request = {} # request_id => { partials: [], final: nil }
501
- regular = []
502
-
503
- samples.each do |s|
504
- rid = s[:request_id]
505
- if rid
506
- entry = (by_request[rid] ||= { partials: [], final: nil })
507
- if s[:context].is_a?(Hash) && s[:context][:partial]
508
- entry[:partials] << s
509
- else
510
- entry[:final] = s
511
- end
512
- else
513
- regular << s
514
- end
515
- end
516
-
517
- merged = []
518
- by_request.each do |rid, entry|
519
- if entry[:final]
520
- # Merge in-batch partials
521
- if entry[:partials].any?
522
- merge_checkpoint_contexts(
523
- entry[:final][:context],
524
- entry[:partials].map { |p| p[:context] }
525
- )
526
- end
527
-
528
- # Merge cross-batch partials from DB
529
- db_partials = Catpm::Sample.where(request_id: rid)
530
- if db_partials.exists?
531
- merge_checkpoint_contexts(
532
- entry[:final][:context],
533
- db_partials.map(&:parsed_context)
534
- )
535
- db_partials.delete_all
536
- end
537
-
538
- # Clear request_id so persisted final sample won't be treated as orphan
539
- entry[:final][:request_id] = nil
540
- merged << entry[:final]
541
- else
542
- # Only partials, no final yet — persist as-is
543
- merged.concat(entry[:partials])
544
- end
545
- end
546
-
547
- regular + merged
548
- end
549
-
550
- def merge_checkpoint_contexts(final_ctx, checkpoint_ctxs)
551
- final_segments = final_ctx[:segments] || final_ctx['segments']
552
- return unless final_segments
553
-
554
- final_ctrl_idx = final_segments.index { |s|
555
- (s[:type] || s['type']) == 'controller'
556
- }
557
-
558
- sorted = checkpoint_ctxs.sort_by { |c|
559
- c[:checkpoint_number] || c['checkpoint_number'] || 0
560
- }
561
-
562
- sorted.each do |cp_ctx|
563
- cp_segments = cp_ctx[:segments] || cp_ctx['segments'] || []
564
-
565
- old_to_new = {}
566
- kept = []
567
-
568
- cp_segments.each_with_index do |seg, i|
569
- seg_type = seg[:type] || seg['type']
570
- next if seg_type == 'request'
571
- next if seg_type == 'controller'
572
- old_to_new[i] = final_segments.size + kept.size
573
- kept << seg.dup
574
- end
575
-
576
- kept.each do |seg|
577
- pi_key = seg.key?(:parent_index) ? :parent_index : 'parent_index'
578
- pi = seg[pi_key]
579
- next unless pi
580
-
581
- if old_to_new.key?(pi)
582
- seg[pi_key] = old_to_new[pi]
583
- else
584
- seg[pi_key] = final_ctrl_idx || 0
585
- end
586
- end
587
-
588
- final_segments.concat(kept)
589
-
590
- # Merge summary
591
- cp_summary = cp_ctx[:segment_summary] || cp_ctx['segment_summary']
592
- if cp_summary
593
- use_symbols = final_ctx.key?(:segment_summary)
594
- summary_key = use_symbols ? :segment_summary : 'segment_summary'
595
- final_summary = final_ctx[summary_key] ||= {}
596
- cp_summary.each do |k, v|
597
- nk = use_symbols ? k.to_sym : k.to_s
598
- final_summary[nk] = (final_summary[nk] || 0) + v.to_f
599
- end
600
- end
601
-
602
- # Merge capped flag
603
- capped_key = final_ctx.key?(:segments_capped) ? :segments_capped : 'segments_capped'
604
- cp_capped = cp_ctx[:segments_capped] || cp_ctx['segments_capped']
605
- final_ctx[capped_key] = true if cp_capped
606
- end
607
-
608
- # Clean up checkpoint markers
609
- final_ctx.delete(:partial)
610
- final_ctx.delete('partial')
611
- final_ctx.delete(:request_id)
612
- final_ctx.delete('request_id')
613
- final_ctx.delete(:checkpoint_number)
614
- final_ctx.delete('checkpoint_number')
615
- end
616
-
617
- def cleanup_orphaned_partials
618
- return unless Catpm::Sample.request_id_supported?
619
-
620
- Catpm::Sample.where.not(request_id: nil)
621
- .where(recorded_at: ..PARTIAL_STALE_TIMEOUT.seconds.ago)
622
- .delete_all
623
- rescue => e
624
- Catpm.config.error_handler&.call(e)
625
- end
626
-
627
502
  def cleanup_expired_data
628
503
  cutoff = Catpm.config.retention_period.ago
629
504
  batch_size = Catpm.config.cleanup_batch_size
@@ -8,6 +8,7 @@ module Catpm
8
8
 
9
9
  initialize_buffer
10
10
  initialize_flusher
11
+ load_sample_counts
11
12
  apply_patches
12
13
 
13
14
  # Start the flusher in the current process.
@@ -24,6 +25,12 @@ module Catpm
24
25
 
25
26
  private
26
27
 
28
+ def load_sample_counts
29
+ Collector.load_sample_counts_eagerly!
30
+ rescue => e
31
+ Catpm.config.error_handler&.call(e)
32
+ end
33
+
27
34
  def apply_patches
28
35
  if Catpm.config.instrument_net_http
29
36
  if defined?(::Net::HTTP)
@@ -39,15 +46,7 @@ module Catpm
39
46
  end
40
47
 
41
48
  def initialize_buffer
42
- max_bytes = Catpm.config.derived_buffer_memory_limit(detect_threads)
43
- Catpm.buffer ||= Buffer.new(max_bytes: max_bytes)
44
- end
45
-
46
- def detect_threads
47
- return Puma.cli_config.options[:max_threads] if defined?(Puma::Server) && Puma.respond_to?(:cli_config)
48
- return ENV['RAILS_MAX_THREADS'].to_i if ENV['RAILS_MAX_THREADS'].present?
49
- return Sidekiq[:concurrency] if defined?(Sidekiq) && Sidekiq.respond_to?(:[])
50
- nil
49
+ Catpm.buffer ||= Buffer.new(max_bytes: Catpm.config.effective_max_buffer_memory)
51
50
  end
52
51
 
53
52
  def initialize_flusher
@@ -20,8 +20,7 @@ module Catpm
20
20
  max_segments: Catpm.config.max_segments_per_request,
21
21
  request_start: env['catpm.request_start'],
22
22
  stack_sample: use_sampler,
23
- call_tree: Catpm.config.instrument_call_tree,
24
- memory_limit: Catpm.config.derived_request_memory_limit
23
+ call_tree: Catpm.config.instrument_call_tree
25
24
  )
26
25
  env['catpm.segments'] = req_segments
27
26
  Thread.current[:catpm_request_segments] = req_segments
@@ -9,9 +9,9 @@ module Catpm
9
9
  SEGMENT_BASE_BYTES = Event::OBJECT_OVERHEAD + (6 * Event::HASH_ENTRY_SIZE)
10
10
  SEGMENT_STRING_OVERHEAD = Event::OBJECT_OVERHEAD # per-string overhead in segment values
11
11
 
12
- attr_reader :segments, :summary, :request_start, :estimated_bytes, :checkpoint_count, :request_id, :segments_filtered
12
+ attr_reader :segments, :summary, :request_start, :estimated_bytes, :segments_filtered
13
13
 
14
- def initialize(max_segments:, request_start: nil, stack_sample: false, call_tree: false, memory_limit: nil)
14
+ def initialize(max_segments:, request_start: nil, stack_sample: false, call_tree: false)
15
15
  @max_segments = max_segments
16
16
  @request_start = request_start || Process.clock_gettime(Process::CLOCK_MONOTONIC)
17
17
  @segments = []
@@ -20,11 +20,7 @@ module Catpm
20
20
  @span_stack = []
21
21
  @tracked_ranges = []
22
22
  @call_tree = call_tree
23
- @memory_limit = memory_limit
24
23
  @estimated_bytes = 0
25
- @checkpoint_callback = nil
26
- @checkpoint_count = 0
27
- @request_id = memory_limit ? SecureRandom.hex(8) : nil
28
24
  @segments_filtered = 0
29
25
 
30
26
  if stack_sample
@@ -33,10 +29,6 @@ module Catpm
33
29
  end
34
30
  end
35
31
 
36
- def on_checkpoint(&block)
37
- @checkpoint_callback = block
38
- end
39
-
40
32
  def add(type:, duration:, detail:, source: nil, started_at: nil)
41
33
  type_key = type.to_sym
42
34
  count_key, dur_key = SUMMARY_KEYS[type_key]
@@ -73,7 +65,6 @@ module Catpm
73
65
  end
74
66
 
75
67
  @estimated_bytes += estimate_segment_bytes(segment)
76
- maybe_checkpoint
77
68
  end
78
69
 
79
70
  def push_span(type:, detail:, started_at: nil)
@@ -166,68 +157,5 @@ module Catpm
166
157
  bytes
167
158
  end
168
159
 
169
- def maybe_checkpoint
170
- return unless @memory_limit && @estimated_bytes > @memory_limit && @checkpoint_callback
171
-
172
- checkpoint_data = {
173
- segments: @segments,
174
- summary: @summary,
175
- overflow: @overflow,
176
- sampler_segments: @sampler ? sampler_segments_for_checkpoint : [],
177
- checkpoint_number: @checkpoint_count
178
- }
179
-
180
- @checkpoint_count += 1
181
- rebuild_after_checkpoint
182
- @checkpoint_callback.call(checkpoint_data)
183
- end
184
-
185
- def sampler_segments_for_checkpoint
186
- if @call_tree
187
- result = @sampler&.to_call_tree(tracked_ranges: @tracked_ranges) || []
188
- else
189
- result = @sampler&.to_segments(tracked_ranges: @tracked_ranges) || []
190
- end
191
- @sampler&.clear_samples!
192
- result
193
- end
194
-
195
- # After checkpoint: keep only active spans from @span_stack, reset everything else.
196
- def rebuild_after_checkpoint
197
- if @span_stack.any?
198
- # Clone active spans with corrected indices
199
- new_segments = []
200
- old_to_new = {}
201
-
202
- @span_stack.each do |old_idx|
203
- seg = @segments[old_idx]
204
- next unless seg
205
-
206
- new_idx = new_segments.size
207
- old_to_new[old_idx] = new_idx
208
- new_segments << seg.dup
209
- end
210
-
211
- # Fix parent_index references in cloned spans
212
- new_segments.each do |seg|
213
- if seg.key?(:parent_index) && old_to_new.key?(seg[:parent_index])
214
- seg[:parent_index] = old_to_new[seg[:parent_index]]
215
- else
216
- seg.delete(:parent_index)
217
- end
218
- end
219
-
220
- @span_stack = @span_stack.filter_map { |old_idx| old_to_new[old_idx] }
221
- @segments = new_segments
222
- else
223
- @segments = []
224
- end
225
-
226
- @summary = Hash.new(0)
227
- @tracked_ranges = []
228
- @overflow = false
229
- @estimated_bytes = 0
230
- @segments_filtered = 0
231
- end
232
160
  end
233
161
  end
data/lib/catpm/trace.rb CHANGED
@@ -89,23 +89,10 @@ module Catpm
89
89
  max_segments: config.max_segments_per_request,
90
90
  request_start: start_time,
91
91
  stack_sample: use_sampler,
92
- call_tree: config.instrument_call_tree,
93
- memory_limit: config.derived_request_memory_limit
92
+ call_tree: config.instrument_call_tree
94
93
  )
95
94
  Thread.current[:catpm_request_segments] = req_segments
96
95
  owns_segments = true
97
-
98
- if config.derived_request_memory_limit
99
- req_segments.on_checkpoint do |checkpoint_data|
100
- Collector.process_checkpoint(
101
- kind: kind, target: target, operation: operation,
102
- context: context, metadata: metadata,
103
- checkpoint_data: checkpoint_data,
104
- request_start: start_time,
105
- request_id: req_segments.request_id
106
- )
107
- end
108
- end
109
96
  end
110
97
  end
111
98
 
data/lib/catpm/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Catpm
4
- VERSION = '0.9.5'
4
+ VERSION = '0.9.6'
5
5
  end
data/lib/catpm.rb CHANGED
@@ -39,6 +39,7 @@ module Catpm
39
39
  @buffer = nil
40
40
  @flusher = nil
41
41
  Fingerprint.reset_caches!
42
+ Collector.reset_sample_counts!
42
43
  end
43
44
 
44
45
  def enabled?
@@ -50,7 +50,7 @@ Catpm.configure do |config|
50
50
  # config.events_max_samples_per_name = 20 # nil = unlimited
51
51
 
52
52
  # === Memory ===
53
- # config.max_memory_per_thread = 2.megabytes # memory budget per thread (buffer + request segments)
53
+ # config.max_memory = 20 # MB — global memory budget (2% of 1GB server)
54
54
 
55
55
  # === Buffering & Flushing ===
56
56
  # config.flush_interval = 30 # seconds
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: catpm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.5
4
+ version: 0.9.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - ''