catpm 0.9.4 → 0.9.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -21
- data/app/controllers/catpm/system_controller.rb +2 -1
- data/app/helpers/catpm/application_helper.rb +2 -4
- data/app/models/catpm/sample.rb +0 -4
- data/app/views/catpm/samples/show.html.erb +2 -2
- data/app/views/catpm/shared/_segments_waterfall.html.erb +7 -1
- data/app/views/catpm/system/index.html.erb +1 -1
- data/lib/catpm/adapter/base.rb +0 -1
- data/lib/catpm/buffer.rb +7 -5
- data/lib/catpm/collector.rb +83 -70
- data/lib/catpm/configuration.rb +30 -10
- data/lib/catpm/engine.rb +0 -4
- data/lib/catpm/event.rb +2 -3
- data/lib/catpm/fingerprint.rb +1 -1
- data/lib/catpm/flusher.rb +77 -202
- data/lib/catpm/lifecycle.rb +8 -1
- data/lib/catpm/middleware.rb +3 -4
- data/lib/catpm/patches/httpclient.rb +1 -1
- data/lib/catpm/patches/net_http.rb +1 -1
- data/lib/catpm/request_segments.rb +18 -79
- data/lib/catpm/segment_subscribers.rb +5 -5
- data/lib/catpm/trace.rb +3 -16
- data/lib/catpm/version.rb +1 -1
- data/lib/catpm.rb +7 -1
- data/lib/generators/catpm/templates/initializer.rb.tt +4 -6
- metadata +1 -2
- data/lib/catpm/auto_instrument.rb +0 -145
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 3731353688fdebaef1f9cb164731daba00a0d69df65da03dc309f6f7901e2708
|
|
4
|
+
data.tar.gz: 4ef7e26d0b721c8fea556f797d74c58ef2b0df8f9257edead653393fcb991229
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: df633940cf6beba3252b6915c45d22688ff23ef3200edf17622e971ce65191ad85af476bfdd853a2bee449da6d28df81431c48e663001c80b14242ba407d3a7b
|
|
7
|
+
data.tar.gz: 473011238fdf84d011bf9d3c0ceed1d47b7f1a3be5e6e9d640f6f66b62273c2a396c425166b09ceb5b0b8bf63a993e51651d99d373c48b1058ead4ede2451266
|
data/README.md
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
gem build catpm.gemspec
|
|
2
|
-
gem push catpm-0.9.
|
|
2
|
+
gem push catpm-0.9.5.gem
|
|
3
3
|
|
|
4
4
|
# Catpm
|
|
5
5
|
|
|
@@ -16,7 +16,6 @@ Catpm is designed for small-to-medium Rails applications where a full APM (Datad
|
|
|
16
16
|
- **Error tracking** — fingerprinting, occurrence counting, context circular buffers
|
|
17
17
|
- **Built-in dashboard** — filterable by kind, endpoint drill-down, waterfall visualization
|
|
18
18
|
- **Custom events** — track business events (signups, payments, etc.) with `Catpm.event`
|
|
19
|
-
- **Auto-instrumentation** — service objects (`ApplicationService`, `BaseService`) traced automatically
|
|
20
19
|
- **Multi-database** — PostgreSQL (primary), SQLite (first-class)
|
|
21
20
|
- **Zero dependencies** — only Rails >= 7.1, no Redis or background queues required
|
|
22
21
|
- **Memory-safe** — configurable buffer limits, automatic downsampling with infinite retention
|
|
@@ -112,24 +111,6 @@ class PaymentService
|
|
|
112
111
|
end
|
|
113
112
|
```
|
|
114
113
|
|
|
115
|
-
### Auto-instrumentation
|
|
116
|
-
|
|
117
|
-
Service objects following the `ApplicationService.call` pattern are instrumented automatically — no configuration needed. If your base class has a different name:
|
|
118
|
-
|
|
119
|
-
```ruby
|
|
120
|
-
Catpm.configure do |config|
|
|
121
|
-
config.service_base_classes = ['MyServiceBase']
|
|
122
|
-
end
|
|
123
|
-
```
|
|
124
|
-
|
|
125
|
-
You can also instrument specific methods explicitly:
|
|
126
|
-
|
|
127
|
-
```ruby
|
|
128
|
-
Catpm.configure do |config|
|
|
129
|
-
config.auto_instrument_methods = ['Worker#process', 'Gateway.charge']
|
|
130
|
-
end
|
|
131
|
-
```
|
|
132
|
-
|
|
133
114
|
### Custom events
|
|
134
115
|
|
|
135
116
|
Track business-level events that aren't tied to performance:
|
|
@@ -182,7 +163,7 @@ Catpm.configure do |config|
|
|
|
182
163
|
]
|
|
183
164
|
|
|
184
165
|
# Tuning
|
|
185
|
-
config.
|
|
166
|
+
config.max_memory_per_thread = 2.megabytes # Memory budget per thread (buffer + request segments)
|
|
186
167
|
config.flush_interval = 30 # Seconds between DB flushes
|
|
187
168
|
end
|
|
188
169
|
```
|
|
@@ -6,6 +6,7 @@ module Catpm
|
|
|
6
6
|
@stats = Catpm.stats
|
|
7
7
|
@buffer_size = Catpm.buffer&.size || 0
|
|
8
8
|
@buffer_bytes = Catpm.buffer&.current_bytes || 0
|
|
9
|
+
@buffer_max_bytes = Catpm.buffer&.max_bytes || 0
|
|
9
10
|
@config = Catpm.config
|
|
10
11
|
@oldest_bucket = Catpm::Bucket.minimum(:bucket_start)
|
|
11
12
|
@active_error_count = Catpm::ErrorRecord.unresolved.count
|
|
@@ -13,7 +14,7 @@ module Catpm
|
|
|
13
14
|
end
|
|
14
15
|
|
|
15
16
|
def pipeline
|
|
16
|
-
render layout:
|
|
17
|
+
render layout: 'catpm/pipeline'
|
|
17
18
|
end
|
|
18
19
|
end
|
|
19
20
|
end
|
|
@@ -68,7 +68,7 @@ module Catpm
|
|
|
68
68
|
slow_threshold: { group: 'Segments', label: 'Slow Threshold', desc: 'Requests slower than this are flagged as slow', fmt: :ms },
|
|
69
69
|
slow_threshold_per_kind: { group: 'Segments', label: 'Slow Threshold (per kind)', desc: 'Override slow threshold for specific request kinds (http, job, custom)', fmt: :hash_ms },
|
|
70
70
|
max_segments_per_request: { group: 'Segments', label: 'Max Segments / Request', desc: 'Cap on segments captured per request', fmt: :nullable_int },
|
|
71
|
-
|
|
71
|
+
min_segment_duration: { group: 'Segments', label: 'Min Segment Duration', desc: 'Segments shorter than this are counted in summary but not stored; 0 = store all', fmt: :ms_zero },
|
|
72
72
|
max_sql_length: { group: 'Segments', label: 'Max SQL Length', desc: 'Truncate SQL queries beyond this many characters', fmt: :nullable_chars },
|
|
73
73
|
ignored_targets: { group: 'Segments', label: 'Ignored Targets', desc: 'Endpoint patterns excluded from tracking (strings or regexps)', fmt: :list },
|
|
74
74
|
|
|
@@ -94,7 +94,7 @@ module Catpm
|
|
|
94
94
|
events_max_samples_per_name: { group: 'Events', label: 'Max Samples / Name', desc: 'Event samples retained per event name', fmt: :nullable_int },
|
|
95
95
|
|
|
96
96
|
# ── Buffer & Flush ──
|
|
97
|
-
|
|
97
|
+
max_memory: { group: 'Buffer & Flush', label: 'Max Memory (MB)', desc: 'Global memory budget for catpm gem in megabytes', fmt: :int },
|
|
98
98
|
flush_interval: { group: 'Buffer & Flush', label: 'Flush Interval', desc: 'How often the background thread drains the buffer to the database', fmt: :seconds },
|
|
99
99
|
flush_jitter: { group: 'Buffer & Flush', label: 'Flush Jitter', desc: 'Random jitter added to flush interval to avoid thundering herd', fmt: :pm_seconds },
|
|
100
100
|
persistence_batch_size: { group: 'Buffer & Flush', label: 'Batch Size', desc: 'Number of events written per database transaction', fmt: :int },
|
|
@@ -121,8 +121,6 @@ module Catpm
|
|
|
121
121
|
# ── Advanced ──
|
|
122
122
|
shutdown_timeout: { group: 'Advanced', label: 'Shutdown Timeout', desc: 'Seconds to wait for buffer flush on application shutdown', fmt: :seconds },
|
|
123
123
|
caller_scan_depth: { group: 'Advanced', label: 'Caller Scan Depth', desc: 'Max stack frames scanned to find app code for source attribution', fmt: :int },
|
|
124
|
-
auto_instrument_methods: { group: 'Advanced', label: 'Auto-Instrument Methods', desc: 'Method signatures to automatically instrument (e.g. Worker#process)', fmt: :list },
|
|
125
|
-
service_base_classes: { group: 'Advanced', label: 'Service Base Classes', desc: 'Base classes for auto-detection of service objects; nil = auto-detect', fmt: :nullable_list },
|
|
126
124
|
}.freeze
|
|
127
125
|
|
|
128
126
|
def format_config_value(config, attr, meta)
|
data/app/models/catpm/sample.rb
CHANGED
|
@@ -14,10 +14,6 @@ module Catpm
|
|
|
14
14
|
scope :recent, ->(period = 1.hour) { where(recorded_at: period.ago..) }
|
|
15
15
|
scope :for_error, ->(fingerprint) { where(error_fingerprint: fingerprint) }
|
|
16
16
|
|
|
17
|
-
def self.request_id_supported?
|
|
18
|
-
column_names.include?('request_id')
|
|
19
|
-
end
|
|
20
|
-
|
|
21
17
|
def parsed_context
|
|
22
18
|
case context
|
|
23
19
|
when Hash then context
|
|
@@ -40,7 +40,7 @@
|
|
|
40
40
|
|
|
41
41
|
<%# ─── Request Context ─── %>
|
|
42
42
|
<%
|
|
43
|
-
ctx_display = @context.except("segments", :segments, "segment_summary", :segment_summary, "segments_capped", :segments_capped, "backtrace", :backtrace, "method", :method, "path", :path, "status", :status)
|
|
43
|
+
ctx_display = @context.except("segments", :segments, "segment_summary", :segment_summary, "segments_capped", :segments_capped, "segments_filtered", :segments_filtered, "backtrace", :backtrace, "method", :method, "path", :path, "status", :status)
|
|
44
44
|
ctx_flat = ctx_display.select { |_, v| !v.is_a?(Hash) && !v.is_a?(Array) }
|
|
45
45
|
ctx_nested = ctx_display.select { |_, v| v.is_a?(Hash) || v.is_a?(Array) }
|
|
46
46
|
%>
|
|
@@ -103,5 +103,5 @@
|
|
|
103
103
|
|
|
104
104
|
<%# ─── Segments Waterfall (full width, no title) ─── %>
|
|
105
105
|
<% if @segments.any? %>
|
|
106
|
-
<%= render "catpm/shared/segments_waterfall", segments: @segments, total_duration: @sample.duration, segments_capped: @context["segments_capped"] || @context[:segments_capped], table_id: "segments-table" %>
|
|
106
|
+
<%= render "catpm/shared/segments_waterfall", segments: @segments, total_duration: @sample.duration, segments_capped: @context["segments_capped"] || @context[:segments_capped], segments_filtered: @context["segments_filtered"] || @context[:segments_filtered] || 0, table_id: "segments-table" %>
|
|
107
107
|
<% end %>
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
<%# Locals: segments, total_duration, segments_capped, table_id %>
|
|
1
|
+
<%# Locals: segments, total_duration, segments_capped, segments_filtered, table_id %>
|
|
2
2
|
|
|
3
3
|
<% if segments.any? %>
|
|
4
4
|
<% total_dur = total_duration.to_f %>
|
|
@@ -147,6 +147,12 @@
|
|
|
147
147
|
<% end %>
|
|
148
148
|
</tbody>
|
|
149
149
|
</table>
|
|
150
|
+
<% filtered = segments_filtered.to_i %>
|
|
151
|
+
<% if filtered > 0 %>
|
|
152
|
+
<div style="padding:6px 12px; font-size:12px; color:var(--text-2); border-top:1px solid var(--border)">
|
|
153
|
+
<%= filtered %> segment<%= filtered == 1 ? '' : 's' %> below <%= Catpm.config.min_segment_duration %>ms not shown (counted in Time Breakdown)
|
|
154
|
+
</div>
|
|
155
|
+
<% end %>
|
|
150
156
|
</div>
|
|
151
157
|
<% else %>
|
|
152
158
|
<div class="empty-state">
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
<div class="diag-card">
|
|
11
11
|
<div class="diag-label">Buffer</div>
|
|
12
12
|
<div class="diag-value"><%= @buffer_size %> <span class="diag-unit">events</span></div>
|
|
13
|
-
<div class="diag-detail"><%= number_to_human_size(@buffer_bytes) %> / <%= number_to_human_size(@
|
|
13
|
+
<div class="diag-detail"><%= number_to_human_size(@buffer_bytes) %> / <%= number_to_human_size(@buffer_max_bytes) %></div>
|
|
14
14
|
</div>
|
|
15
15
|
<div class="diag-card">
|
|
16
16
|
<div class="diag-label">Flushes</div>
|
data/lib/catpm/adapter/base.rb
CHANGED
data/lib/catpm/buffer.rb
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
module Catpm
|
|
4
4
|
class Buffer
|
|
5
|
-
attr_reader :current_bytes, :dropped_count
|
|
5
|
+
attr_reader :current_bytes, :max_bytes, :dropped_count
|
|
6
6
|
|
|
7
7
|
def initialize(max_bytes:)
|
|
8
8
|
@monitor = Monitor.new
|
|
@@ -22,17 +22,19 @@ module Catpm
|
|
|
22
22
|
# Called from request threads. Returns :accepted or :dropped.
|
|
23
23
|
# Never blocks — monitoring must not slow down the application.
|
|
24
24
|
#
|
|
25
|
+
OVERFLOW_FACTOR = 1.5 # hard cap multiplier — drops events beyond this to prevent OOM
|
|
26
|
+
|
|
25
27
|
# When buffer reaches max_bytes, signals the flusher for immediate drain
|
|
26
|
-
# and continues accepting events. Only drops as a last resort at
|
|
27
|
-
# (flusher stuck or DB down).
|
|
28
|
+
# and continues accepting events. Only drops as a last resort at OVERFLOW_FACTOR
|
|
29
|
+
# capacity (flusher stuck or DB down).
|
|
28
30
|
def push(event)
|
|
29
31
|
signal_flush = false
|
|
30
32
|
|
|
31
33
|
@monitor.synchronize do
|
|
32
34
|
bytes = event.estimated_bytes
|
|
33
35
|
|
|
34
|
-
# Hard safety cap:
|
|
35
|
-
if @current_bytes + bytes > @max_bytes *
|
|
36
|
+
# Hard safety cap: prevents OOM if flusher is stuck
|
|
37
|
+
if @current_bytes + bytes > @max_bytes * OVERFLOW_FACTOR
|
|
36
38
|
@dropped_count += 1
|
|
37
39
|
Catpm.stats[:dropped_events] += 1
|
|
38
40
|
return :dropped
|
data/lib/catpm/collector.rb
CHANGED
|
@@ -4,10 +4,13 @@ module Catpm
|
|
|
4
4
|
module Collector
|
|
5
5
|
SYNTHETIC_MIDDLEWARE_OFFSET_MS = 0.5
|
|
6
6
|
MIN_GAP_MS = 1.0
|
|
7
|
+
DEFAULT_ERROR_STATUS = 500
|
|
8
|
+
DEFAULT_SUCCESS_STATUS = 200
|
|
7
9
|
# Cap global force-instrument counter to avoid cascade when many requests
|
|
8
10
|
# are slow. Without this cap, apps with 30% slow requests would see ~23%
|
|
9
11
|
# instrumentation instead of the configured 1/random_sample_rate.
|
|
10
12
|
MAX_FORCE_INSTRUMENT_COUNT = 3
|
|
13
|
+
FORCE_INSTRUMENT_MAX_ENDPOINTS = 100 # cap per-endpoint force-instrument hash
|
|
11
14
|
|
|
12
15
|
class << self
|
|
13
16
|
def process_action_controller(event)
|
|
@@ -19,7 +22,7 @@ module Catpm
|
|
|
19
22
|
return if Catpm.config.ignored?(target)
|
|
20
23
|
|
|
21
24
|
duration = event.duration # milliseconds
|
|
22
|
-
status = payload[:status] || (payload[:exception] ?
|
|
25
|
+
status = payload[:status] || (payload[:exception] ? DEFAULT_ERROR_STATUS : nil)
|
|
23
26
|
metadata = build_http_metadata(payload)
|
|
24
27
|
|
|
25
28
|
req_segments = Thread.current[:catpm_request_segments]
|
|
@@ -50,7 +53,7 @@ module Catpm
|
|
|
50
53
|
metadata[:_instrumented] = 1 if instrumented
|
|
51
54
|
|
|
52
55
|
# Early sampling decision — only build heavy context for sampled events
|
|
53
|
-
operation = payload[:method] || '
|
|
56
|
+
operation = payload[:method] || ''
|
|
54
57
|
sample_type = early_sample_type(
|
|
55
58
|
error: payload[:exception],
|
|
56
59
|
duration: duration,
|
|
@@ -61,16 +64,20 @@ module Catpm
|
|
|
61
64
|
)
|
|
62
65
|
|
|
63
66
|
# Force the NEXT HTTP request to be fully instrumented when this one
|
|
64
|
-
# wasn't
|
|
65
|
-
#
|
|
67
|
+
# wasn't instrumented and was slow/error.
|
|
68
|
+
# Filling phase is handled by @http_filling_active flag in
|
|
69
|
+
# should_instrument_request? — no need for force_instrument here.
|
|
66
70
|
if !instrumented
|
|
67
71
|
if payload[:exception] || duration >= Catpm.config.slow_threshold_for(:http)
|
|
68
72
|
trigger_force_instrument
|
|
69
|
-
|
|
73
|
+
elsif !@http_filling_active
|
|
74
|
+
# Detect new/underfilled endpoints that appeared after filling phase ended
|
|
70
75
|
max = Catpm.config.max_random_samples_per_endpoint
|
|
71
76
|
if max
|
|
72
77
|
endpoint_key = ['http', target, operation]
|
|
73
|
-
|
|
78
|
+
if instrumented_sample_counts[endpoint_key] < max
|
|
79
|
+
@http_filling_active = true
|
|
80
|
+
end
|
|
74
81
|
end
|
|
75
82
|
end
|
|
76
83
|
end
|
|
@@ -159,6 +166,7 @@ module Catpm
|
|
|
159
166
|
context[:segments] = segments
|
|
160
167
|
context[:segment_summary] = segment_data[:segment_summary]
|
|
161
168
|
context[:segments_capped] = segment_data[:segments_capped]
|
|
169
|
+
context[:segments_filtered] = segment_data[:segments_filtered] if segment_data[:segments_filtered] > 0
|
|
162
170
|
|
|
163
171
|
# Append error marker segment inside the controller
|
|
164
172
|
if payload[:exception]
|
|
@@ -340,6 +348,7 @@ module Catpm
|
|
|
340
348
|
context[:segments] = segments
|
|
341
349
|
context[:segment_summary] = segment_data[:segment_summary]
|
|
342
350
|
context[:segments_capped] = segment_data[:segments_capped]
|
|
351
|
+
context[:segments_filtered] = segment_data[:segments_filtered] if segment_data[:segments_filtered] > 0
|
|
343
352
|
|
|
344
353
|
# Append error marker segment inside the controller
|
|
345
354
|
if error
|
|
@@ -368,70 +377,19 @@ module Catpm
|
|
|
368
377
|
context = nil
|
|
369
378
|
end
|
|
370
379
|
|
|
371
|
-
request_id = req_segments&.request_id
|
|
372
|
-
|
|
373
380
|
ev = Event.new(
|
|
374
381
|
kind: kind,
|
|
375
382
|
target: target,
|
|
376
383
|
operation: operation.to_s,
|
|
377
384
|
duration: duration,
|
|
378
385
|
started_at: Time.current,
|
|
379
|
-
status: error ?
|
|
386
|
+
status: error ? DEFAULT_ERROR_STATUS : DEFAULT_SUCCESS_STATUS,
|
|
380
387
|
context: context,
|
|
381
388
|
sample_type: sample_type,
|
|
382
389
|
metadata: metadata,
|
|
383
390
|
error_class: error&.class&.name,
|
|
384
391
|
error_message: error&.message,
|
|
385
|
-
backtrace: error&.backtrace
|
|
386
|
-
request_id: request_id
|
|
387
|
-
)
|
|
388
|
-
|
|
389
|
-
Catpm.buffer&.push(ev)
|
|
390
|
-
end
|
|
391
|
-
|
|
392
|
-
def process_checkpoint(kind:, target:, operation:, context:, metadata:, checkpoint_data:, request_start:, request_id: nil)
|
|
393
|
-
return unless Catpm.enabled?
|
|
394
|
-
|
|
395
|
-
segments = checkpoint_data[:segments].dup
|
|
396
|
-
collapse_code_wrappers(segments)
|
|
397
|
-
|
|
398
|
-
duration_so_far = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - request_start) * 1000.0
|
|
399
|
-
|
|
400
|
-
# Inject root request segment
|
|
401
|
-
root_segment = {
|
|
402
|
-
type: 'request',
|
|
403
|
-
detail: "#{operation.presence || kind} #{target}",
|
|
404
|
-
duration: duration_so_far.round(2),
|
|
405
|
-
offset: 0.0
|
|
406
|
-
}
|
|
407
|
-
segments.each do |seg|
|
|
408
|
-
if seg.key?(:parent_index)
|
|
409
|
-
seg[:parent_index] += 1
|
|
410
|
-
else
|
|
411
|
-
seg[:parent_index] = 0
|
|
412
|
-
end
|
|
413
|
-
end
|
|
414
|
-
segments.unshift(root_segment)
|
|
415
|
-
|
|
416
|
-
checkpoint_context = (context || {}).dup
|
|
417
|
-
checkpoint_context[:segments] = segments
|
|
418
|
-
checkpoint_context[:segment_summary] = checkpoint_data[:summary]
|
|
419
|
-
checkpoint_context[:segments_capped] = checkpoint_data[:overflow]
|
|
420
|
-
checkpoint_context[:partial] = true
|
|
421
|
-
checkpoint_context[:checkpoint_number] = checkpoint_data[:checkpoint_number]
|
|
422
|
-
checkpoint_context = scrub(checkpoint_context)
|
|
423
|
-
|
|
424
|
-
ev = Event.new(
|
|
425
|
-
kind: kind,
|
|
426
|
-
target: target,
|
|
427
|
-
operation: operation.to_s,
|
|
428
|
-
duration: duration_so_far,
|
|
429
|
-
started_at: Time.current,
|
|
430
|
-
status: 200,
|
|
431
|
-
context: checkpoint_context,
|
|
432
|
-
sample_type: 'random',
|
|
433
|
-
metadata: (metadata || {}).dup.merge(checkpoint_data[:summary] || {}),
|
|
434
|
-
request_id: request_id
|
|
392
|
+
backtrace: error&.backtrace
|
|
435
393
|
)
|
|
436
394
|
|
|
437
395
|
Catpm.buffer&.push(ev)
|
|
@@ -459,6 +417,15 @@ module Catpm
|
|
|
459
417
|
|
|
460
418
|
# --- Pre-sampling: decide BEFORE request whether to instrument ---
|
|
461
419
|
|
|
420
|
+
# Eagerly load sample counts at startup so old endpoints don't
|
|
421
|
+
# re-enter filling phase on every process restart.
|
|
422
|
+
# Called from Lifecycle.register_hooks after flusher init.
|
|
423
|
+
def load_sample_counts_eagerly!
|
|
424
|
+
@instrumented_sample_counts = load_sample_counts_from_db
|
|
425
|
+
@instrumented_sample_counts_loaded = true
|
|
426
|
+
recompute_http_filling_active
|
|
427
|
+
end
|
|
428
|
+
|
|
462
429
|
# For HTTP middleware where endpoint is unknown at start.
|
|
463
430
|
# Returns true if this request should get full instrumentation.
|
|
464
431
|
def should_instrument_request?
|
|
@@ -468,6 +435,12 @@ module Catpm
|
|
|
468
435
|
return true
|
|
469
436
|
end
|
|
470
437
|
|
|
438
|
+
# During filling phase, instrument all requests so underfilled
|
|
439
|
+
# endpoints collect their quota (max_random_samples_per_endpoint).
|
|
440
|
+
# The flag is set by load_sample_counts_eagerly! and maintained
|
|
441
|
+
# by early_sample_type as endpoints fill up.
|
|
442
|
+
return true if @http_filling_active
|
|
443
|
+
|
|
471
444
|
rand(Catpm.config.random_sample_rate) == 0
|
|
472
445
|
end
|
|
473
446
|
|
|
@@ -501,7 +474,9 @@ module Catpm
|
|
|
501
474
|
def trigger_force_instrument(kind: nil, target: nil, operation: nil)
|
|
502
475
|
if kind && target
|
|
503
476
|
endpoint_key = [kind.to_s, target.to_s, (operation || '').to_s]
|
|
504
|
-
force_instrument_endpoints
|
|
477
|
+
if force_instrument_endpoints.size < FORCE_INSTRUMENT_MAX_ENDPOINTS
|
|
478
|
+
force_instrument_endpoints[endpoint_key] = true
|
|
479
|
+
end
|
|
505
480
|
else
|
|
506
481
|
@force_instrument_count = [(@force_instrument_count || 0) + 1, MAX_FORCE_INSTRUMENT_COUNT].min
|
|
507
482
|
end
|
|
@@ -512,10 +487,42 @@ module Catpm
|
|
|
512
487
|
@instrumented_sample_counts_loaded = false
|
|
513
488
|
@force_instrument_endpoints = nil
|
|
514
489
|
@force_instrument_count = nil
|
|
490
|
+
@http_filling_active = false
|
|
515
491
|
end
|
|
516
492
|
|
|
517
493
|
private
|
|
518
494
|
|
|
495
|
+
# Recompute whether any HTTP endpoint is still below its sample quota.
|
|
496
|
+
# Called after loading counts from DB and when an endpoint exits filling.
|
|
497
|
+
def recompute_http_filling_active
|
|
498
|
+
max = Catpm.config.max_random_samples_per_endpoint
|
|
499
|
+
@http_filling_active = if max
|
|
500
|
+
# True if hash is empty (new app / new endpoints may appear) or any endpoint below quota
|
|
501
|
+
instrumented_sample_counts.empty? || instrumented_sample_counts.any? { |_, c| c < max }
|
|
502
|
+
else
|
|
503
|
+
false # unlimited quota → no filling phase for HTTP middleware
|
|
504
|
+
end
|
|
505
|
+
end
|
|
506
|
+
|
|
507
|
+
# Evict half the entries from instrumented_sample_counts.
|
|
508
|
+
# Prefers evicting filled entries (count >= max) to avoid
|
|
509
|
+
# re-triggering filling phase for those endpoints.
|
|
510
|
+
def evict_sample_counts(max_random)
|
|
511
|
+
evict_count = instrumented_sample_counts.size / 2
|
|
512
|
+
if max_random
|
|
513
|
+
filled_keys = []
|
|
514
|
+
unfilled_keys = []
|
|
515
|
+
instrumented_sample_counts.each do |k, c|
|
|
516
|
+
(c >= max_random ? filled_keys : unfilled_keys) << k
|
|
517
|
+
end
|
|
518
|
+
# Evict filled first (safe), then unfilled if needed
|
|
519
|
+
to_evict = (filled_keys + unfilled_keys).first(evict_count)
|
|
520
|
+
to_evict.each { |k| instrumented_sample_counts.delete(k) }
|
|
521
|
+
else
|
|
522
|
+
evict_count.times { instrumented_sample_counts.shift }
|
|
523
|
+
end
|
|
524
|
+
end
|
|
525
|
+
|
|
519
526
|
def force_instrument_endpoints
|
|
520
527
|
@force_instrument_endpoints ||= {}
|
|
521
528
|
end
|
|
@@ -627,13 +634,9 @@ module Catpm
|
|
|
627
634
|
|
|
628
635
|
is_slow = duration >= Catpm.config.slow_threshold_for(kind.to_sym)
|
|
629
636
|
|
|
630
|
-
# Non-instrumented slow requests still get a sample (for dashboard) but
|
|
631
|
-
# don't count towards filling phase (they have no segments).
|
|
632
|
-
return 'slow' if is_slow && !instrumented
|
|
633
|
-
|
|
634
637
|
# Non-instrumented requests have no segments — skip sample creation.
|
|
635
|
-
#
|
|
636
|
-
# so the NEXT request gets full instrumentation with segments.
|
|
638
|
+
# Slow/error spikes are handled by the caller via trigger_force_instrument
|
|
639
|
+
# so the NEXT request gets full instrumentation with useful segments.
|
|
637
640
|
return nil unless instrumented
|
|
638
641
|
|
|
639
642
|
# Count this instrumented request towards filling phase completion.
|
|
@@ -644,7 +647,17 @@ module Catpm
|
|
|
644
647
|
count = instrumented_sample_counts[endpoint_key]
|
|
645
648
|
max_random = Catpm.config.max_random_samples_per_endpoint
|
|
646
649
|
if max_random.nil? || count < max_random
|
|
650
|
+
# Evict when hash exceeds derived limit — prefer filled entries
|
|
651
|
+
max_entries = Catpm.config.effective_sample_counts_max
|
|
652
|
+
if instrumented_sample_counts.size >= max_entries
|
|
653
|
+
evict_sample_counts(max_random)
|
|
654
|
+
end
|
|
647
655
|
instrumented_sample_counts[endpoint_key] = count + 1
|
|
656
|
+
|
|
657
|
+
# Endpoint just reached quota — recheck if any filling endpoints remain
|
|
658
|
+
if max_random && count + 1 >= max_random
|
|
659
|
+
recompute_http_filling_active
|
|
660
|
+
end
|
|
648
661
|
end
|
|
649
662
|
|
|
650
663
|
return 'slow' if is_slow
|
|
@@ -758,10 +771,10 @@ module Catpm
|
|
|
758
771
|
end
|
|
759
772
|
|
|
760
773
|
def build_http_metadata(payload)
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
774
|
+
metadata = {}
|
|
775
|
+
metadata[:db_runtime] = payload[:db_runtime] if payload[:db_runtime]
|
|
776
|
+
metadata[:view_runtime] = payload[:view_runtime] if payload[:view_runtime]
|
|
777
|
+
metadata
|
|
765
778
|
end
|
|
766
779
|
|
|
767
780
|
def scrub(hash)
|
data/lib/catpm/configuration.rb
CHANGED
|
@@ -2,6 +2,16 @@
|
|
|
2
2
|
|
|
3
3
|
module Catpm
|
|
4
4
|
class Configuration
|
|
5
|
+
MIN_BUFFER_MEMORY = 1_048_576 # 1 MB — floor for buffer (meaningful buffering)
|
|
6
|
+
DEFAULT_ASSUMED_THREADS = 5 # fallback when thread detection fails
|
|
7
|
+
|
|
8
|
+
# Global memory budget distribution shares
|
|
9
|
+
BUFFER_MEMORY_SHARE = 0.5 # 50% of max_memory for event buffer
|
|
10
|
+
CACHE_ENTRIES_PER_MB = 10_000 # ~100 bytes/entry in path_cache
|
|
11
|
+
PATH_CACHE_BUDGET_SHARE = 0.05 # 5% of max_memory for path_cache
|
|
12
|
+
SAMPLE_COUNTS_PER_MB = 12_500 # ~80 bytes/entry in sample counts hash
|
|
13
|
+
SAMPLE_COUNTS_BUDGET_SHARE = 0.02 # 2% of max_memory for sample counts
|
|
14
|
+
|
|
5
15
|
# Boolean / non-numeric settings — plain attr_accessor
|
|
6
16
|
attr_accessor :enabled,
|
|
7
17
|
:instrument_http,
|
|
@@ -19,8 +29,6 @@ module Catpm
|
|
|
19
29
|
:http_basic_auth_password,
|
|
20
30
|
:access_policy,
|
|
21
31
|
:additional_filter_parameters,
|
|
22
|
-
:auto_instrument_methods,
|
|
23
|
-
:service_base_classes,
|
|
24
32
|
:events_enabled,
|
|
25
33
|
:track_own_requests,
|
|
26
34
|
:downsampling_thresholds,
|
|
@@ -28,11 +36,11 @@ module Catpm
|
|
|
28
36
|
|
|
29
37
|
# Numeric settings that must be positive numbers (nil not allowed)
|
|
30
38
|
REQUIRED_NUMERIC = %i[
|
|
31
|
-
slow_threshold
|
|
39
|
+
slow_threshold flush_interval flush_jitter
|
|
32
40
|
random_sample_rate cleanup_interval
|
|
33
41
|
circuit_breaker_failure_threshold circuit_breaker_recovery_timeout
|
|
34
42
|
sqlite_busy_timeout persistence_batch_size shutdown_timeout
|
|
35
|
-
stack_sample_interval
|
|
43
|
+
stack_sample_interval min_segment_duration max_memory
|
|
36
44
|
].freeze
|
|
37
45
|
|
|
38
46
|
# Numeric settings where nil means "no limit" / "disabled"
|
|
@@ -43,7 +51,6 @@ module Catpm
|
|
|
43
51
|
events_max_samples_per_name max_stack_samples_per_request
|
|
44
52
|
max_error_detail_length max_fingerprint_app_frames
|
|
45
53
|
max_fingerprint_gem_frames cleanup_batch_size caller_scan_depth
|
|
46
|
-
max_request_memory
|
|
47
54
|
].freeze
|
|
48
55
|
|
|
49
56
|
(REQUIRED_NUMERIC + OPTIONAL_NUMERIC).each do |attr|
|
|
@@ -72,13 +79,13 @@ module Catpm
|
|
|
72
79
|
@instrument_stack_sampler = false
|
|
73
80
|
@instrument_middleware_stack = false
|
|
74
81
|
@max_segments_per_request = 50
|
|
75
|
-
@
|
|
82
|
+
@min_segment_duration = 5.0 # ms — segments shorter than this are counted in summary but not stored (0.0 = store all)
|
|
76
83
|
@max_sql_length = 200
|
|
77
84
|
@slow_threshold = 500 # milliseconds
|
|
78
85
|
@slow_threshold_per_kind = {}
|
|
79
86
|
@ignored_targets = []
|
|
80
87
|
@retention_period = nil # nil = keep forever (data is downsampled, not deleted)
|
|
81
|
-
@
|
|
88
|
+
@max_memory = 20 # MB — global memory budget (2% of 1GB server)
|
|
82
89
|
@flush_interval = 30 # seconds
|
|
83
90
|
@flush_jitter = 5 # ±seconds
|
|
84
91
|
@max_error_contexts = 5
|
|
@@ -88,8 +95,6 @@ module Catpm
|
|
|
88
95
|
@http_basic_auth_password = nil
|
|
89
96
|
@access_policy = nil
|
|
90
97
|
@additional_filter_parameters = []
|
|
91
|
-
@auto_instrument_methods = []
|
|
92
|
-
@service_base_classes = nil # nil = auto-detect (ApplicationService, BaseService)
|
|
93
98
|
@random_sample_rate = 20
|
|
94
99
|
@max_random_samples_per_endpoint = 5
|
|
95
100
|
@max_slow_samples_per_endpoint = 5
|
|
@@ -117,11 +122,26 @@ module Catpm
|
|
|
117
122
|
@max_fingerprint_gem_frames = 3
|
|
118
123
|
@cleanup_batch_size = 1_000
|
|
119
124
|
@caller_scan_depth = 50
|
|
120
|
-
@max_request_memory = 2.megabytes
|
|
121
125
|
@instrument_call_tree = false
|
|
122
126
|
@show_untracked_segments = false
|
|
123
127
|
end
|
|
124
128
|
|
|
129
|
+
# Buffer gets BUFFER_MEMORY_SHARE of max_memory, scaled by thread count
|
|
130
|
+
def effective_max_buffer_memory
|
|
131
|
+
bytes = (max_memory * 1_048_576 * BUFFER_MEMORY_SHARE).to_i
|
|
132
|
+
[bytes, MIN_BUFFER_MEMORY].max
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Path cache limit derived from max_memory
|
|
136
|
+
def effective_path_cache_max
|
|
137
|
+
(max_memory * CACHE_ENTRIES_PER_MB * PATH_CACHE_BUDGET_SHARE).to_i
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
# Sample counts hash limit derived from max_memory
|
|
141
|
+
def effective_sample_counts_max
|
|
142
|
+
(max_memory * SAMPLE_COUNTS_PER_MB * SAMPLE_COUNTS_BUDGET_SHARE).to_i
|
|
143
|
+
end
|
|
144
|
+
|
|
125
145
|
def slow_threshold_for(kind)
|
|
126
146
|
slow_threshold_per_kind.fetch(kind.to_sym, slow_threshold)
|
|
127
147
|
end
|
data/lib/catpm/engine.rb
CHANGED
|
@@ -12,7 +12,6 @@ module Catpm
|
|
|
12
12
|
if Catpm.enabled?
|
|
13
13
|
Catpm::Subscribers.subscribe!
|
|
14
14
|
Catpm::Lifecycle.register_hooks
|
|
15
|
-
Catpm::AutoInstrument.apply!
|
|
16
15
|
|
|
17
16
|
if Catpm.config.instrument_middleware_stack
|
|
18
17
|
app = Rails.application
|
|
@@ -26,8 +25,5 @@ module Catpm
|
|
|
26
25
|
end
|
|
27
26
|
end
|
|
28
27
|
|
|
29
|
-
config.to_prepare do
|
|
30
|
-
Catpm::AutoInstrument.apply! if Catpm.enabled?
|
|
31
|
-
end
|
|
32
28
|
end
|
|
33
29
|
end
|
data/lib/catpm/event.rb
CHANGED
|
@@ -9,14 +9,14 @@ module Catpm
|
|
|
9
9
|
|
|
10
10
|
attr_accessor :kind, :target, :operation, :duration, :started_at,
|
|
11
11
|
:metadata, :error_class, :error_message, :backtrace,
|
|
12
|
-
:sample_type, :context, :status
|
|
12
|
+
:sample_type, :context, :status
|
|
13
13
|
|
|
14
14
|
EMPTY_HASH = {}.freeze
|
|
15
15
|
private_constant :EMPTY_HASH
|
|
16
16
|
|
|
17
17
|
def initialize(kind:, target:, operation: '', duration: 0.0, started_at: nil,
|
|
18
18
|
metadata: nil, error_class: nil, error_message: nil, backtrace: nil,
|
|
19
|
-
sample_type: nil, context: nil, status: nil
|
|
19
|
+
sample_type: nil, context: nil, status: nil)
|
|
20
20
|
@kind = kind.to_s
|
|
21
21
|
@target = target.to_s
|
|
22
22
|
@operation = (operation || '').to_s
|
|
@@ -32,7 +32,6 @@ module Catpm
|
|
|
32
32
|
@sample_type = sample_type
|
|
33
33
|
@context = context
|
|
34
34
|
@status = status
|
|
35
|
-
@request_id = request_id
|
|
36
35
|
end
|
|
37
36
|
|
|
38
37
|
def estimated_bytes
|
data/lib/catpm/fingerprint.rb
CHANGED