catpm 0.9.6 → 0.9.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/catpm/collector.rb +3 -205
- data/lib/catpm/configuration.rb +0 -7
- data/lib/catpm/flusher.rb +0 -1
- data/lib/catpm/lifecycle.rb +0 -7
- data/lib/catpm/trace.rb +1 -2
- data/lib/catpm/version.rb +1 -1
- data/lib/catpm.rb +0 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 54937b58ef7d18fa437e232b7a660ac014737a6e716daed6e57ab7463dc38e27
|
|
4
|
+
data.tar.gz: 76cfd9389ecb1f37794806353c2c56f1d7f799a9bf6f9e8c0c975c93b8423c53
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a948c19294ca90dc60215f58e3d8f6fbdd377f4b62f468eba76678b223af37610d549d4a52ea7f42d7c6fec4ab93952bceeb6e19857ec9c67ecf601a4a1a9b51
|
|
7
|
+
data.tar.gz: 298c9964d29d3fc9b2570720a7813b30adc5f21c9b7f75e49c288c9fc4f4cd65c4196c798dc05caeb7824ebefb2553cd82b6778a3f1e1a8eeaac4f3d893a703b
|
data/README.md
CHANGED
data/lib/catpm/collector.rb
CHANGED
|
@@ -6,12 +6,6 @@ module Catpm
|
|
|
6
6
|
MIN_GAP_MS = 1.0
|
|
7
7
|
DEFAULT_ERROR_STATUS = 500
|
|
8
8
|
DEFAULT_SUCCESS_STATUS = 200
|
|
9
|
-
# Cap global force-instrument counter to avoid cascade when many requests
|
|
10
|
-
# are slow. Without this cap, apps with 30% slow requests would see ~23%
|
|
11
|
-
# instrumentation instead of the configured 1/random_sample_rate.
|
|
12
|
-
MAX_FORCE_INSTRUMENT_COUNT = 3
|
|
13
|
-
FORCE_INSTRUMENT_MAX_ENDPOINTS = 100 # cap per-endpoint force-instrument hash
|
|
14
|
-
|
|
15
9
|
class << self
|
|
16
10
|
def process_action_controller(event)
|
|
17
11
|
return unless Catpm.enabled?
|
|
@@ -63,25 +57,6 @@ module Catpm
|
|
|
63
57
|
instrumented: instrumented
|
|
64
58
|
)
|
|
65
59
|
|
|
66
|
-
# Force the NEXT HTTP request to be fully instrumented when this one
|
|
67
|
-
# wasn't instrumented and was slow/error.
|
|
68
|
-
# Filling phase is handled by @http_filling_active flag in
|
|
69
|
-
# should_instrument_request? — no need for force_instrument here.
|
|
70
|
-
if !instrumented
|
|
71
|
-
if payload[:exception] || duration >= Catpm.config.slow_threshold_for(:http)
|
|
72
|
-
trigger_force_instrument
|
|
73
|
-
elsif !@http_filling_active
|
|
74
|
-
# Detect new/underfilled endpoints that appeared after filling phase ended
|
|
75
|
-
max = Catpm.config.max_random_samples_per_endpoint
|
|
76
|
-
if max
|
|
77
|
-
endpoint_key = ['http', target, operation]
|
|
78
|
-
if instrumented_sample_counts[endpoint_key] < max
|
|
79
|
-
@http_filling_active = true
|
|
80
|
-
end
|
|
81
|
-
end
|
|
82
|
-
end
|
|
83
|
-
end
|
|
84
|
-
|
|
85
60
|
if sample_type
|
|
86
61
|
context = build_http_context(payload)
|
|
87
62
|
|
|
@@ -286,11 +261,6 @@ module Catpm
|
|
|
286
261
|
instrumented: instrumented
|
|
287
262
|
)
|
|
288
263
|
|
|
289
|
-
# Slow spike detection: force instrument next request for this endpoint
|
|
290
|
-
if !instrumented && (error || duration >= Catpm.config.slow_threshold_for(kind.to_sym))
|
|
291
|
-
trigger_force_instrument(kind: kind, target: target, operation: operation)
|
|
292
|
-
end
|
|
293
|
-
|
|
294
264
|
if sample_type
|
|
295
265
|
context = (context || {}).dup
|
|
296
266
|
|
|
@@ -417,152 +387,18 @@ module Catpm
|
|
|
417
387
|
|
|
418
388
|
# --- Pre-sampling: decide BEFORE request whether to instrument ---
|
|
419
389
|
|
|
420
|
-
# Eagerly load sample counts at startup so old endpoints don't
|
|
421
|
-
# re-enter filling phase on every process restart.
|
|
422
|
-
# Called from Lifecycle.register_hooks after flusher init.
|
|
423
|
-
def load_sample_counts_eagerly!
|
|
424
|
-
@instrumented_sample_counts = load_sample_counts_from_db
|
|
425
|
-
@instrumented_sample_counts_loaded = true
|
|
426
|
-
recompute_http_filling_active
|
|
427
|
-
end
|
|
428
|
-
|
|
429
390
|
# For HTTP middleware where endpoint is unknown at start.
|
|
430
|
-
# Returns true if this request should get full instrumentation.
|
|
431
391
|
def should_instrument_request?
|
|
432
|
-
# Force after slow spike detection
|
|
433
|
-
if (@force_instrument_count || 0) > 0
|
|
434
|
-
@force_instrument_count -= 1
|
|
435
|
-
return true
|
|
436
|
-
end
|
|
437
|
-
|
|
438
|
-
# During filling phase, instrument all requests so underfilled
|
|
439
|
-
# endpoints collect their quota (max_random_samples_per_endpoint).
|
|
440
|
-
# The flag is set by load_sample_counts_eagerly! and maintained
|
|
441
|
-
# by early_sample_type as endpoints fill up.
|
|
442
|
-
return true if @http_filling_active
|
|
443
|
-
|
|
444
392
|
rand(Catpm.config.random_sample_rate) == 0
|
|
445
393
|
end
|
|
446
394
|
|
|
447
395
|
# For track_request where endpoint is known at start.
|
|
448
|
-
|
|
449
|
-
def should_instrument?(kind, target, operation)
|
|
450
|
-
endpoint_key = [kind.to_s, target.to_s, (operation || '').to_s]
|
|
451
|
-
|
|
452
|
-
# Force after slow spike
|
|
453
|
-
if force_instrument_endpoints.delete(endpoint_key)
|
|
454
|
-
return true
|
|
455
|
-
end
|
|
456
|
-
|
|
457
|
-
# Filling phase — endpoint hasn't collected enough instrumented samples yet
|
|
458
|
-
max = Catpm.config.max_random_samples_per_endpoint
|
|
459
|
-
if max.nil? || instrumented_sample_counts[endpoint_key] < max
|
|
460
|
-
return true
|
|
461
|
-
end
|
|
462
|
-
|
|
396
|
+
def should_instrument?(_kind, _target, _operation)
|
|
463
397
|
rand(Catpm.config.random_sample_rate) == 0
|
|
464
398
|
end
|
|
465
399
|
|
|
466
|
-
# Called when a slow/error request had no instrumentation —
|
|
467
|
-
# forces the NEXT request(s) to be fully instrumented.
|
|
468
|
-
#
|
|
469
|
-
# Two modes (mutually exclusive to avoid double-instrumentation):
|
|
470
|
-
# - With endpoint: sets per-endpoint flag consumed by should_instrument?
|
|
471
|
-
# (for track_request paths where endpoint is known)
|
|
472
|
-
# - Without endpoint: increments global counter consumed by
|
|
473
|
-
# should_instrument_request? (for middleware path where endpoint is unknown)
|
|
474
|
-
def trigger_force_instrument(kind: nil, target: nil, operation: nil)
|
|
475
|
-
if kind && target
|
|
476
|
-
endpoint_key = [kind.to_s, target.to_s, (operation || '').to_s]
|
|
477
|
-
if force_instrument_endpoints.size < FORCE_INSTRUMENT_MAX_ENDPOINTS
|
|
478
|
-
force_instrument_endpoints[endpoint_key] = true
|
|
479
|
-
end
|
|
480
|
-
else
|
|
481
|
-
@force_instrument_count = [(@force_instrument_count || 0) + 1, MAX_FORCE_INSTRUMENT_COUNT].min
|
|
482
|
-
end
|
|
483
|
-
end
|
|
484
|
-
|
|
485
|
-
def reset_sample_counts!
|
|
486
|
-
@instrumented_sample_counts = nil
|
|
487
|
-
@instrumented_sample_counts_loaded = false
|
|
488
|
-
@force_instrument_endpoints = nil
|
|
489
|
-
@force_instrument_count = nil
|
|
490
|
-
@http_filling_active = false
|
|
491
|
-
end
|
|
492
|
-
|
|
493
400
|
private
|
|
494
401
|
|
|
495
|
-
# Recompute whether any HTTP endpoint is still below its sample quota.
|
|
496
|
-
# Called after loading counts from DB and when an endpoint exits filling.
|
|
497
|
-
def recompute_http_filling_active
|
|
498
|
-
max = Catpm.config.max_random_samples_per_endpoint
|
|
499
|
-
@http_filling_active = if max
|
|
500
|
-
# True if hash is empty (new app / new endpoints may appear) or any endpoint below quota
|
|
501
|
-
instrumented_sample_counts.empty? || instrumented_sample_counts.any? { |_, c| c < max }
|
|
502
|
-
else
|
|
503
|
-
false # unlimited quota → no filling phase for HTTP middleware
|
|
504
|
-
end
|
|
505
|
-
end
|
|
506
|
-
|
|
507
|
-
# Evict half the entries from instrumented_sample_counts.
|
|
508
|
-
# Prefers evicting filled entries (count >= max) to avoid
|
|
509
|
-
# re-triggering filling phase for those endpoints.
|
|
510
|
-
def evict_sample_counts(max_random)
|
|
511
|
-
evict_count = instrumented_sample_counts.size / 2
|
|
512
|
-
if max_random
|
|
513
|
-
filled_keys = []
|
|
514
|
-
unfilled_keys = []
|
|
515
|
-
instrumented_sample_counts.each do |k, c|
|
|
516
|
-
(c >= max_random ? filled_keys : unfilled_keys) << k
|
|
517
|
-
end
|
|
518
|
-
# Evict filled first (safe), then unfilled if needed
|
|
519
|
-
to_evict = (filled_keys + unfilled_keys).first(evict_count)
|
|
520
|
-
to_evict.each { |k| instrumented_sample_counts.delete(k) }
|
|
521
|
-
else
|
|
522
|
-
evict_count.times { instrumented_sample_counts.shift }
|
|
523
|
-
end
|
|
524
|
-
end
|
|
525
|
-
|
|
526
|
-
def force_instrument_endpoints
|
|
527
|
-
@force_instrument_endpoints ||= {}
|
|
528
|
-
end
|
|
529
|
-
|
|
530
|
-
def instrumented_sample_counts
|
|
531
|
-
return @instrumented_sample_counts if @instrumented_sample_counts_loaded
|
|
532
|
-
|
|
533
|
-
@instrumented_sample_counts = load_sample_counts_from_db
|
|
534
|
-
@instrumented_sample_counts_loaded = true
|
|
535
|
-
@instrumented_sample_counts
|
|
536
|
-
end
|
|
537
|
-
|
|
538
|
-
# Pre-populate filling counters from DB so old endpoints don't
|
|
539
|
-
# re-enter filling phase on every process restart.
|
|
540
|
-
# Temporarily clears thread-local to prevent our query from being
|
|
541
|
-
# captured as a segment in any active request.
|
|
542
|
-
def load_sample_counts_from_db
|
|
543
|
-
counts = Hash.new(0)
|
|
544
|
-
return counts unless defined?(Catpm::Sample) && Catpm::Bucket.table_exists?
|
|
545
|
-
|
|
546
|
-
saved_rs = Thread.current[:catpm_request_segments]
|
|
547
|
-
Thread.current[:catpm_request_segments] = nil
|
|
548
|
-
begin
|
|
549
|
-
Catpm::Sample.joins(:bucket)
|
|
550
|
-
.where(sample_type: 'random')
|
|
551
|
-
.group('catpm_buckets.kind', 'catpm_buckets.target', 'catpm_buckets.operation')
|
|
552
|
-
.count
|
|
553
|
-
.each do |(kind, target, operation), count|
|
|
554
|
-
counts[[kind.to_s, target.to_s, operation.to_s]] = count
|
|
555
|
-
end
|
|
556
|
-
ensure
|
|
557
|
-
Thread.current[:catpm_request_segments] = saved_rs
|
|
558
|
-
end
|
|
559
|
-
|
|
560
|
-
counts
|
|
561
|
-
rescue => e
|
|
562
|
-
Catpm.config.error_handler&.call(e)
|
|
563
|
-
Hash.new(0)
|
|
564
|
-
end
|
|
565
|
-
|
|
566
402
|
# Remove near-zero-duration "code" spans that merely wrap a "controller" span.
|
|
567
403
|
# This happens when CallTracer (TracePoint) captures a thin dispatch method
|
|
568
404
|
# (e.g. Telegram::WebhookController#process) whose :return fires before the
|
|
@@ -618,49 +454,11 @@ module Catpm
|
|
|
618
454
|
|
|
619
455
|
# Determine sample type at event creation time so only sampled events
|
|
620
456
|
# carry full context in the buffer.
|
|
621
|
-
#
|
|
622
|
-
# Non-instrumented requests never get a sample (they have no segments).
|
|
623
|
-
# Filling phase is handled by the caller via trigger_force_instrument,
|
|
624
|
-
# so the NEXT request gets full instrumentation with segments.
|
|
625
|
-
#
|
|
626
|
-
# Post-filling: non-instrumented requests just contribute duration/count
|
|
627
|
-
# to the bucket, no sample created.
|
|
457
|
+
# Non-instrumented requests have no segments — skip sample creation.
|
|
628
458
|
def early_sample_type(error:, duration:, kind:, target:, operation:, instrumented: true)
|
|
629
|
-
# Errors: only create sample for instrumented requests (with segments).
|
|
630
|
-
# Non-instrumented errors are still tracked in error_groups via
|
|
631
|
-
# event.error? — occurrence counts, contexts, and backtrace are preserved.
|
|
632
|
-
# trigger_force_instrument ensures the next occurrence gets full segments.
|
|
633
459
|
return 'error' if error && instrumented
|
|
634
|
-
|
|
635
|
-
is_slow = duration >= Catpm.config.slow_threshold_for(kind.to_sym)
|
|
636
|
-
|
|
637
|
-
# Non-instrumented requests have no segments — skip sample creation.
|
|
638
|
-
# Slow/error spikes are handled by the caller via trigger_force_instrument
|
|
639
|
-
# so the NEXT request gets full instrumentation with useful segments.
|
|
640
460
|
return nil unless instrumented
|
|
641
|
-
|
|
642
|
-
# Count this instrumented request towards filling phase completion.
|
|
643
|
-
# Both slow and random requests count — without this, endpoints where
|
|
644
|
-
# most requests exceed slow_threshold would never exit the filling phase,
|
|
645
|
-
# causing 100% instrumentation regardless of random_sample_rate.
|
|
646
|
-
endpoint_key = [kind.to_s, target, operation.to_s]
|
|
647
|
-
count = instrumented_sample_counts[endpoint_key]
|
|
648
|
-
max_random = Catpm.config.max_random_samples_per_endpoint
|
|
649
|
-
if max_random.nil? || count < max_random
|
|
650
|
-
# Evict when hash exceeds derived limit — prefer filled entries
|
|
651
|
-
max_entries = Catpm.config.effective_sample_counts_max
|
|
652
|
-
if instrumented_sample_counts.size >= max_entries
|
|
653
|
-
evict_sample_counts(max_random)
|
|
654
|
-
end
|
|
655
|
-
instrumented_sample_counts[endpoint_key] = count + 1
|
|
656
|
-
|
|
657
|
-
# Endpoint just reached quota — recheck if any filling endpoints remain
|
|
658
|
-
if max_random && count + 1 >= max_random
|
|
659
|
-
recompute_http_filling_active
|
|
660
|
-
end
|
|
661
|
-
end
|
|
662
|
-
|
|
663
|
-
return 'slow' if is_slow
|
|
461
|
+
return 'slow' if duration >= Catpm.config.slow_threshold_for(kind.to_sym)
|
|
664
462
|
|
|
665
463
|
'random'
|
|
666
464
|
end
|
data/lib/catpm/configuration.rb
CHANGED
|
@@ -9,8 +9,6 @@ module Catpm
|
|
|
9
9
|
BUFFER_MEMORY_SHARE = 0.5 # 50% of max_memory for event buffer
|
|
10
10
|
CACHE_ENTRIES_PER_MB = 10_000 # ~100 bytes/entry in path_cache
|
|
11
11
|
PATH_CACHE_BUDGET_SHARE = 0.05 # 5% of max_memory for path_cache
|
|
12
|
-
SAMPLE_COUNTS_PER_MB = 12_500 # ~80 bytes/entry in sample counts hash
|
|
13
|
-
SAMPLE_COUNTS_BUDGET_SHARE = 0.02 # 2% of max_memory for sample counts
|
|
14
12
|
|
|
15
13
|
# Boolean / non-numeric settings — plain attr_accessor
|
|
16
14
|
attr_accessor :enabled,
|
|
@@ -137,11 +135,6 @@ module Catpm
|
|
|
137
135
|
(max_memory * CACHE_ENTRIES_PER_MB * PATH_CACHE_BUDGET_SHARE).to_i
|
|
138
136
|
end
|
|
139
137
|
|
|
140
|
-
# Sample counts hash limit derived from max_memory
|
|
141
|
-
def effective_sample_counts_max
|
|
142
|
-
(max_memory * SAMPLE_COUNTS_PER_MB * SAMPLE_COUNTS_BUDGET_SHARE).to_i
|
|
143
|
-
end
|
|
144
|
-
|
|
145
138
|
def slow_threshold_for(kind)
|
|
146
139
|
slow_threshold_per_kind.fetch(kind.to_sym, slow_threshold)
|
|
147
140
|
end
|
data/lib/catpm/flusher.rb
CHANGED
data/lib/catpm/lifecycle.rb
CHANGED
|
@@ -8,7 +8,6 @@ module Catpm
|
|
|
8
8
|
|
|
9
9
|
initialize_buffer
|
|
10
10
|
initialize_flusher
|
|
11
|
-
load_sample_counts
|
|
12
11
|
apply_patches
|
|
13
12
|
|
|
14
13
|
# Start the flusher in the current process.
|
|
@@ -25,12 +24,6 @@ module Catpm
|
|
|
25
24
|
|
|
26
25
|
private
|
|
27
26
|
|
|
28
|
-
def load_sample_counts
|
|
29
|
-
Collector.load_sample_counts_eagerly!
|
|
30
|
-
rescue => e
|
|
31
|
-
Catpm.config.error_handler&.call(e)
|
|
32
|
-
end
|
|
33
|
-
|
|
34
27
|
def apply_patches
|
|
35
28
|
if Catpm.config.instrument_net_http
|
|
36
29
|
if defined?(::Net::HTTP)
|
data/lib/catpm/trace.rb
CHANGED
|
@@ -125,8 +125,7 @@ module Catpm
|
|
|
125
125
|
Thread.current[:catpm_request_segments] = nil
|
|
126
126
|
# Mark that this request was already instrumented and processed by
|
|
127
127
|
# track_request. Without this, process_action_controller would see
|
|
128
|
-
# nil req_segments and
|
|
129
|
-
# requests — even though they were fully instrumented here.
|
|
128
|
+
# nil req_segments and think the request was not instrumented.
|
|
130
129
|
Thread.current[:catpm_tracked_instrumented] = true
|
|
131
130
|
end
|
|
132
131
|
end
|
data/lib/catpm/version.rb
CHANGED
data/lib/catpm.rb
CHANGED