catpm 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/controllers/catpm/application_controller.rb +8 -0
- data/app/controllers/catpm/endpoints_controller.rb +16 -3
- data/app/controllers/catpm/errors_controller.rb +1 -1
- data/app/controllers/catpm/events_controller.rb +2 -2
- data/app/controllers/catpm/status_controller.rb +1 -1
- data/app/controllers/catpm/system_controller.rb +0 -3
- data/app/helpers/catpm/application_helper.rb +1 -1
- data/app/views/catpm/endpoints/show.html.erb +13 -8
- data/app/views/catpm/samples/show.html.erb +20 -34
- data/app/views/catpm/shared/_page_nav.html.erb +3 -1
- data/app/views/catpm/system/index.html.erb +2 -2
- data/config/routes.rb +1 -0
- data/lib/catpm/collector.rb +215 -150
- data/lib/catpm/event.rb +2 -2
- data/lib/catpm/flusher.rb +28 -41
- data/lib/catpm/stack_sampler.rb +53 -12
- data/lib/catpm/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: ac5b510824ed9364db9d541a92eb95b1b0339c1972e48c2cfc634817d36d2600
|
|
4
|
+
data.tar.gz: 6f7f990fd824795ea8b9ef66e51f28e120d45aa960a2d1fb1e42c85f31458021
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: ca29621896898fcf69b876260bb27f68df164931986695905737fee23080e55183a6440292d3a8f60f2668c417d20cfb39d9219b2882adbaa764a94e1a3345ea
|
|
7
|
+
data.tar.gz: e8bc348a4c5fe4028403512a8c96f5b60dbc62676167fc41c56710440688aac206ec4b423c2ccd904a5bebef6428a19835af9e10dcc5961141459a9a0dce1073
|
|
@@ -2,5 +2,13 @@
|
|
|
2
2
|
|
|
3
3
|
module Catpm
|
|
4
4
|
class ApplicationController < ActionController::Base
|
|
5
|
+
private
|
|
6
|
+
|
|
7
|
+
def remembered_range
|
|
8
|
+
if params[:range].present?
|
|
9
|
+
cookies[:catpm_range] = { value: params[:range], expires: 1.year.from_now }
|
|
10
|
+
end
|
|
11
|
+
params[:range] || cookies[:catpm_range]
|
|
12
|
+
end
|
|
5
13
|
end
|
|
6
14
|
end
|
|
@@ -8,7 +8,7 @@ module Catpm
|
|
|
8
8
|
@operation = params[:operation].presence || ''
|
|
9
9
|
|
|
10
10
|
# Time range filter
|
|
11
|
-
@range, period, _bucket_seconds = helpers.parse_range(
|
|
11
|
+
@range, period, _bucket_seconds = helpers.parse_range(remembered_range, extra_valid: ['all'])
|
|
12
12
|
|
|
13
13
|
scope = Catpm::Bucket
|
|
14
14
|
.where(kind: @kind, target: @target, operation: @operation)
|
|
@@ -23,11 +23,15 @@ module Catpm
|
|
|
23
23
|
'MAX(duration_max)',
|
|
24
24
|
'MIN(duration_min)',
|
|
25
25
|
'SUM(failure_count)',
|
|
26
|
-
'SUM(success_count)'
|
|
26
|
+
'SUM(success_count)',
|
|
27
|
+
'MIN(bucket_start)',
|
|
28
|
+
'MAX(bucket_start)'
|
|
27
29
|
)
|
|
28
30
|
|
|
29
31
|
@count, @duration_sum, @duration_max, @duration_min, @failure_count, @success_count =
|
|
30
|
-
@aggregate.map { |v| v || 0 }
|
|
32
|
+
@aggregate[0..5].map { |v| v || 0 }
|
|
33
|
+
@first_event_at = @aggregate[6]
|
|
34
|
+
@last_event_at = @aggregate[7]
|
|
31
35
|
|
|
32
36
|
@avg_duration = @count > 0 ? @duration_sum / @count : 0.0
|
|
33
37
|
@failure_rate = @count > 0 ? @failure_count.to_f / @count : 0.0
|
|
@@ -59,5 +63,14 @@ module Catpm
|
|
|
59
63
|
|
|
60
64
|
@active_error_count = Catpm::ErrorRecord.unresolved.count
|
|
61
65
|
end
|
|
66
|
+
|
|
67
|
+
def destroy
|
|
68
|
+
kind = params[:kind]
|
|
69
|
+
target = params[:target]
|
|
70
|
+
operation = params[:operation].presence || ''
|
|
71
|
+
|
|
72
|
+
Catpm::Bucket.where(kind: kind, target: target, operation: operation).destroy_all
|
|
73
|
+
redirect_to catpm.status_index_path, notice: 'Endpoint deleted'
|
|
74
|
+
end
|
|
62
75
|
end
|
|
63
76
|
end
|
|
@@ -36,7 +36,7 @@ module Catpm
|
|
|
36
36
|
@contexts = @error.parsed_contexts
|
|
37
37
|
@active_error_count = Catpm::ErrorRecord.unresolved.count
|
|
38
38
|
|
|
39
|
-
@range, period, bucket_seconds = helpers.parse_range(
|
|
39
|
+
@range, period, bucket_seconds = helpers.parse_range(remembered_range)
|
|
40
40
|
|
|
41
41
|
# Samples table: 20 most recent linked by fingerprint
|
|
42
42
|
@samples = Catpm::Sample.where(error_fingerprint: @error.fingerprint)
|
|
@@ -5,7 +5,7 @@ module Catpm
|
|
|
5
5
|
PER_PAGE = 25
|
|
6
6
|
|
|
7
7
|
def index
|
|
8
|
-
@range, period, bucket_seconds = helpers.parse_range(
|
|
8
|
+
@range, period, bucket_seconds = helpers.parse_range(remembered_range)
|
|
9
9
|
|
|
10
10
|
recent_buckets = Catpm::EventBucket.recent(period).to_a
|
|
11
11
|
|
|
@@ -59,7 +59,7 @@ module Catpm
|
|
|
59
59
|
|
|
60
60
|
def show
|
|
61
61
|
@name = params[:name]
|
|
62
|
-
@range, period, bucket_seconds = helpers.parse_range(
|
|
62
|
+
@range, period, bucket_seconds = helpers.parse_range(remembered_range)
|
|
63
63
|
|
|
64
64
|
recent_buckets = Catpm::EventBucket.by_name(@name).recent(period).to_a
|
|
65
65
|
|
|
@@ -6,7 +6,7 @@ module Catpm
|
|
|
6
6
|
|
|
7
7
|
def index
|
|
8
8
|
# Time range (parsed first — everything below uses this)
|
|
9
|
-
@range, period, bucket_seconds = helpers.parse_range(
|
|
9
|
+
@range, period, bucket_seconds = helpers.parse_range(remembered_range)
|
|
10
10
|
|
|
11
11
|
recent_buckets = Catpm::Bucket.recent(period).to_a
|
|
12
12
|
|
|
@@ -7,9 +7,6 @@ module Catpm
|
|
|
7
7
|
@buffer_size = Catpm.buffer&.size || 0
|
|
8
8
|
@buffer_bytes = Catpm.buffer&.current_bytes || 0
|
|
9
9
|
@config = Catpm.config
|
|
10
|
-
@bucket_count = Catpm::Bucket.count
|
|
11
|
-
@sample_count = Catpm::Sample.count
|
|
12
|
-
@error_count = Catpm::ErrorRecord.count
|
|
13
10
|
@oldest_bucket = Catpm::Bucket.minimum(:bucket_start)
|
|
14
11
|
@active_error_count = Catpm::ErrorRecord.unresolved.count
|
|
15
12
|
end
|
|
@@ -236,7 +236,7 @@ module Catpm
|
|
|
236
236
|
prev_url = '?' + prev_params.compact.map { |k, v| "#{k}=#{v}" }.join('&')
|
|
237
237
|
next_url = '?' + next_params.compact.map { |k, v| "#{k}=#{v}" }.join('&')
|
|
238
238
|
|
|
239
|
-
html = '<div class="pagination">'
|
|
239
|
+
html = +'<div class="pagination">'
|
|
240
240
|
if current_page > 1
|
|
241
241
|
html << %(<a href="#{prev_url}" class="btn">← Previous</a>)
|
|
242
242
|
else
|
|
@@ -8,10 +8,15 @@
|
|
|
8
8
|
|
|
9
9
|
<%= render "catpm/shared/page_nav", active: "performance" %>
|
|
10
10
|
|
|
11
|
-
<div class="breadcrumbs">
|
|
12
|
-
<
|
|
13
|
-
|
|
14
|
-
|
|
11
|
+
<div class="breadcrumbs" style="display:flex; align-items:center; justify-content:space-between">
|
|
12
|
+
<div>
|
|
13
|
+
<a href="<%= catpm.status_index_path %>">Performance</a>
|
|
14
|
+
<span class="sep">/</span>
|
|
15
|
+
<span><%= @target %></span>
|
|
16
|
+
</div>
|
|
17
|
+
<%= button_to "Delete Endpoint", catpm.endpoint_path(kind: @kind, target: @target, operation: @operation),
|
|
18
|
+
method: :delete, class: "btn btn-danger",
|
|
19
|
+
data: { confirm: "Delete this endpoint and all its data? This cannot be undone." } %>
|
|
15
20
|
</div>
|
|
16
21
|
|
|
17
22
|
<% ep_params = { kind: @kind, target: @target, operation: @operation } %>
|
|
@@ -27,12 +32,12 @@
|
|
|
27
32
|
<div class="value"><%= @count %></div>
|
|
28
33
|
</div>
|
|
29
34
|
<div class="card">
|
|
30
|
-
<div class="label">
|
|
31
|
-
<div class="value"><%=
|
|
35
|
+
<div class="label">First Event</div>
|
|
36
|
+
<div class="value"><%= @first_event_at ? time_with_tooltip(@first_event_at) : "—" %></div>
|
|
32
37
|
</div>
|
|
33
38
|
<div class="card">
|
|
34
|
-
<div class="label">
|
|
35
|
-
<div class="value"><%= @
|
|
39
|
+
<div class="label">Last Event</div>
|
|
40
|
+
<div class="value"><%= @last_event_at ? time_with_tooltip(@last_event_at) : "—" %></div>
|
|
36
41
|
</div>
|
|
37
42
|
<div class="card">
|
|
38
43
|
<div class="label">Max</div>
|
|
@@ -34,48 +34,34 @@
|
|
|
34
34
|
<% end %>
|
|
35
35
|
</div>
|
|
36
36
|
|
|
37
|
-
<%# ─── Request Context
|
|
37
|
+
<%# ─── Request Context ─── %>
|
|
38
38
|
<%
|
|
39
|
-
ctx_display = @context.except("segments", :segments, "segment_summary", :segment_summary, "segments_capped", :segments_capped, "backtrace", :backtrace)
|
|
39
|
+
ctx_display = @context.except("segments", :segments, "segment_summary", :segment_summary, "segments_capped", :segments_capped, "backtrace", :backtrace, "method", :method, "path", :path, "status", :status)
|
|
40
40
|
ctx_flat = ctx_display.select { |_, v| !v.is_a?(Hash) && !v.is_a?(Array) }
|
|
41
41
|
ctx_nested = ctx_display.select { |_, v| v.is_a?(Hash) || v.is_a?(Array) }
|
|
42
42
|
%>
|
|
43
43
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
<%
|
|
49
|
-
<div class="
|
|
50
|
-
|
|
51
|
-
<div class="ctx-key"><%= k %></div>
|
|
52
|
-
<div class="ctx-val"><%= v.to_s.truncate(200) %></div>
|
|
53
|
-
<% end %>
|
|
54
|
-
</div>
|
|
55
|
-
<% end %>
|
|
56
|
-
<% if ctx_nested.any? %>
|
|
57
|
-
<% ctx_nested.each do |k, v| %>
|
|
58
|
-
<details class="collapsible" open>
|
|
59
|
-
<summary><%= k %></summary>
|
|
60
|
-
<div class="details-body">
|
|
61
|
-
<pre class="mono" style="color:var(--text-1); white-space:pre-wrap; font-size:12px"><%= JSON.pretty_generate(v) rescue v.inspect %></pre>
|
|
62
|
-
</div>
|
|
63
|
-
</details>
|
|
64
|
-
<% end %>
|
|
44
|
+
<% if ctx_display.any? %>
|
|
45
|
+
<h2>Request Context</h2>
|
|
46
|
+
<% if ctx_flat.any? %>
|
|
47
|
+
<div class="context-grid" style="margin-bottom:12px">
|
|
48
|
+
<% ctx_flat.each do |k, v| %>
|
|
49
|
+
<div class="ctx-key"><%= k %></div>
|
|
50
|
+
<div class="ctx-val"><%= v.to_s.truncate(200) %></div>
|
|
65
51
|
<% end %>
|
|
66
52
|
</div>
|
|
67
53
|
<% end %>
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
</
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
54
|
+
<% if ctx_nested.any? %>
|
|
55
|
+
<% ctx_nested.each do |k, v| %>
|
|
56
|
+
<details class="collapsible" open>
|
|
57
|
+
<summary><%= k %></summary>
|
|
58
|
+
<div class="details-body">
|
|
59
|
+
<pre class="mono" style="color:var(--text-1); white-space:pre-wrap; font-size:12px"><%= JSON.pretty_generate(v) rescue v.inspect %></pre>
|
|
60
|
+
</div>
|
|
61
|
+
</details>
|
|
62
|
+
<% end %>
|
|
63
|
+
<% end %>
|
|
64
|
+
<% end %>
|
|
79
65
|
|
|
80
66
|
<%# ─── Time Breakdown (full width, above waterfall) ─── %>
|
|
81
67
|
<% if @summary.any? %>
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
<div class="page-nav">
|
|
2
2
|
<a href="<%= catpm.status_index_path %>"<%= ' class="active"'.html_safe if active == "performance" %>>Performance</a>
|
|
3
|
-
|
|
3
|
+
<% if Catpm.config.events_enabled || Catpm::EventBucket.exists? %>
|
|
4
|
+
<a href="<%= catpm.events_path %>"<%= ' class="active"'.html_safe if active == "events" %>>Events</a>
|
|
5
|
+
<% end %>
|
|
4
6
|
<a href="<%= catpm.errors_path %>"<%= ' class="active"'.html_safe if active == "errors" %>>Errors<% if @active_error_count.to_i > 0 %><span class="nav-count alert"><%= @active_error_count %></span><% end %></a>
|
|
5
7
|
<a href="<%= catpm.system_index_path %>"<%= ' class="active"'.html_safe if active == "system" %>>System</a>
|
|
6
8
|
</div>
|
|
@@ -38,8 +38,8 @@
|
|
|
38
38
|
<div class="pipeline-node">
|
|
39
39
|
<div class="node-icon"><svg width="28" height="28" viewBox="0 0 28 28" fill="none" stroke="var(--text-2)" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"><ellipse cx="14" cy="8" rx="8" ry="4"/><path d="M6 8v12c0 2.2 3.58 4 8 4s8-1.8 8-4V8"/><path d="M6 14c0 2.2 3.58 4 8 4s8-1.8 8-4"/></svg></div>
|
|
40
40
|
<div class="node-label">Database</div>
|
|
41
|
-
<div class="node-value"
|
|
42
|
-
<div class="node-detail">Aggregated stats are stored as time buckets,
|
|
41
|
+
<div class="node-value" style="font-size:14px">Storage</div>
|
|
42
|
+
<div class="node-detail">Aggregated stats are stored as time buckets, with detailed samples and error fingerprints.<br><%= @oldest_bucket ? "Data since #{@oldest_bucket.strftime('%b %-d')}, retained #{@config.retention_period ? "#{(@config.retention_period / 1.day).to_i} days" : "forever"}." : "No data yet." %></div>
|
|
43
43
|
</div>
|
|
44
44
|
</div>
|
|
45
45
|
|
data/config/routes.rb
CHANGED
|
@@ -5,6 +5,7 @@ Catpm::Engine.routes.draw do
|
|
|
5
5
|
resources :status, only: [:index]
|
|
6
6
|
resources :system, only: [:index]
|
|
7
7
|
get 'endpoint', to: 'endpoints#show', as: :endpoint
|
|
8
|
+
delete 'endpoint', to: 'endpoints#destroy'
|
|
8
9
|
resources :samples, only: [:show]
|
|
9
10
|
resources :events, only: [:index, :show], param: :name
|
|
10
11
|
resources :errors, only: [:index, :show, :destroy] do
|
data/lib/catpm/collector.rb
CHANGED
|
@@ -13,118 +13,133 @@ module Catpm
|
|
|
13
13
|
|
|
14
14
|
duration = event.duration # milliseconds
|
|
15
15
|
status = payload[:status] || (payload[:exception] ? 500 : nil)
|
|
16
|
-
context = build_http_context(payload)
|
|
17
16
|
metadata = build_http_metadata(payload)
|
|
18
17
|
|
|
19
18
|
req_segments = Thread.current[:catpm_request_segments]
|
|
20
19
|
if req_segments
|
|
21
20
|
segment_data = req_segments.to_h
|
|
22
|
-
segments = segment_data[:segments]
|
|
23
21
|
|
|
24
|
-
#
|
|
25
|
-
# (event.duration only covers the controller action, not middleware)
|
|
22
|
+
# Total request duration is always needed (includes middleware time)
|
|
26
23
|
total_request_duration = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - req_segments.request_start) * 1000.0
|
|
24
|
+
duration = total_request_duration
|
|
27
25
|
|
|
28
|
-
#
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
26
|
+
# Segment summary is always needed for bucket metadata aggregation
|
|
27
|
+
segment_data[:segment_summary].each { |k, v| metadata[k] = v }
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Early sampling decision — only build heavy context for sampled events
|
|
31
|
+
operation = payload[:method] || 'GET'
|
|
32
|
+
sample_type = early_sample_type(
|
|
33
|
+
error: payload[:exception],
|
|
34
|
+
duration: duration,
|
|
35
|
+
kind: :http,
|
|
36
|
+
target: target,
|
|
37
|
+
operation: operation
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
if sample_type
|
|
41
|
+
context = build_http_context(payload)
|
|
42
|
+
|
|
43
|
+
if req_segments
|
|
44
|
+
segments = segment_data[:segments]
|
|
45
|
+
|
|
46
|
+
# Inject root request segment with full duration
|
|
47
|
+
root_segment = {
|
|
48
|
+
type: 'request',
|
|
49
|
+
detail: "#{payload[:method]} #{payload[:path]}",
|
|
50
|
+
duration: total_request_duration.round(2),
|
|
51
|
+
offset: 0.0
|
|
52
|
+
}
|
|
53
|
+
segments.each do |seg|
|
|
54
|
+
if seg.key?(:parent_index)
|
|
55
|
+
seg[:parent_index] += 1
|
|
56
|
+
else
|
|
57
|
+
seg[:parent_index] = 0
|
|
58
|
+
end
|
|
40
59
|
end
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
60
|
+
segments.unshift(root_segment)
|
|
61
|
+
|
|
62
|
+
# Inject synthetic middleware segment if there's a time gap before the controller action
|
|
63
|
+
# (only when real per-middleware segments are not present)
|
|
64
|
+
ctrl_idx = segments.index { |s| s[:type] == 'controller' }
|
|
65
|
+
if ctrl_idx
|
|
66
|
+
has_real_middleware = segments.any? { |s| s[:type] == 'middleware' }
|
|
67
|
+
ctrl_offset = (segments[ctrl_idx][:offset] || 0.0).to_f
|
|
68
|
+
if ctrl_offset > 0.5 && !has_real_middleware
|
|
69
|
+
middleware_seg = {
|
|
70
|
+
type: 'middleware',
|
|
71
|
+
detail: 'Middleware Stack',
|
|
72
|
+
duration: ctrl_offset.round(2),
|
|
73
|
+
offset: 0.0,
|
|
74
|
+
parent_index: 0
|
|
75
|
+
}
|
|
76
|
+
segments.insert(1, middleware_seg)
|
|
77
|
+
# Shift parent_index for segments that moved down
|
|
78
|
+
segments.each_with_index do |seg, i|
|
|
79
|
+
next if i <= 1
|
|
80
|
+
next unless seg.key?(:parent_index)
|
|
81
|
+
seg[:parent_index] += 1 if seg[:parent_index] >= 1
|
|
82
|
+
end
|
|
83
|
+
# Add to summary so Time Breakdown shows middleware
|
|
84
|
+
segment_data[:segment_summary][:middleware_count] = 1
|
|
85
|
+
segment_data[:segment_summary][:middleware_duration] = ctrl_offset.round(2)
|
|
64
86
|
end
|
|
65
|
-
# Add to summary so Time Breakdown shows middleware
|
|
66
|
-
segment_data[:segment_summary][:middleware_count] = 1
|
|
67
|
-
segment_data[:segment_summary][:middleware_duration] = ctrl_offset.round(2)
|
|
68
87
|
end
|
|
69
|
-
end
|
|
70
88
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
89
|
+
# Fill untracked controller time with sampler data or synthetic segment
|
|
90
|
+
ctrl_idx = segments.index { |s| s[:type] == 'controller' }
|
|
91
|
+
if ctrl_idx
|
|
92
|
+
ctrl_seg = segments[ctrl_idx]
|
|
93
|
+
ctrl_dur = (ctrl_seg[:duration] || 0).to_f
|
|
94
|
+
child_dur = segments.each_with_index.sum do |pair|
|
|
95
|
+
seg, i = pair
|
|
96
|
+
next 0.0 if i == ctrl_idx
|
|
97
|
+
(seg[:parent_index] == ctrl_idx) ? (seg[:duration] || 0).to_f : 0.0
|
|
98
|
+
end
|
|
99
|
+
gap = ctrl_dur - child_dur
|
|
82
100
|
|
|
83
|
-
|
|
84
|
-
|
|
101
|
+
if gap > 1.0
|
|
102
|
+
inject_gap_segments(segments, req_segments, gap, ctrl_idx, ctrl_seg)
|
|
103
|
+
end
|
|
85
104
|
end
|
|
86
|
-
end
|
|
87
|
-
|
|
88
|
-
context[:segments] = segments
|
|
89
|
-
context[:segment_summary] = segment_data[:segment_summary]
|
|
90
|
-
context[:segments_capped] = segment_data[:segments_capped]
|
|
91
|
-
|
|
92
|
-
segment_data[:segment_summary].each do |k, v|
|
|
93
|
-
metadata[k] = v
|
|
94
|
-
end
|
|
95
105
|
|
|
96
|
-
|
|
97
|
-
|
|
106
|
+
context[:segments] = segments
|
|
107
|
+
context[:segment_summary] = segment_data[:segment_summary]
|
|
108
|
+
context[:segments_capped] = segment_data[:segments_capped]
|
|
109
|
+
|
|
110
|
+
# Append error marker segment inside the controller
|
|
111
|
+
if payload[:exception]
|
|
112
|
+
error_parent = ctrl_idx || 0
|
|
113
|
+
error_offset = if ctrl_idx
|
|
114
|
+
ctrl = segments[ctrl_idx]
|
|
115
|
+
((ctrl[:offset] || 0) + (ctrl[:duration] || 0)).round(2)
|
|
116
|
+
else
|
|
117
|
+
duration.round(2)
|
|
118
|
+
end
|
|
98
119
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
120
|
+
context[:segments] << {
|
|
121
|
+
type: 'error',
|
|
122
|
+
detail: "#{payload[:exception].first}: #{payload[:exception].last}".truncate(200),
|
|
123
|
+
source: payload[:exception_object]&.backtrace&.first,
|
|
124
|
+
duration: 0,
|
|
125
|
+
offset: error_offset,
|
|
126
|
+
parent_index: error_parent
|
|
127
|
+
}
|
|
107
128
|
end
|
|
108
|
-
|
|
109
|
-
context[:segments] << {
|
|
110
|
-
type: 'error',
|
|
111
|
-
detail: "#{payload[:exception].first}: #{payload[:exception].last}".truncate(200),
|
|
112
|
-
source: payload[:exception_object]&.backtrace&.first,
|
|
113
|
-
duration: 0,
|
|
114
|
-
offset: error_offset,
|
|
115
|
-
parent_index: error_parent
|
|
116
|
-
}
|
|
117
129
|
end
|
|
130
|
+
|
|
131
|
+
context = scrub(context)
|
|
118
132
|
end
|
|
119
133
|
|
|
120
134
|
ev = Event.new(
|
|
121
135
|
kind: :http,
|
|
122
136
|
target: target,
|
|
123
|
-
operation:
|
|
137
|
+
operation: operation,
|
|
124
138
|
duration: duration,
|
|
125
139
|
started_at: Time.current,
|
|
126
140
|
status: status,
|
|
127
|
-
context:
|
|
141
|
+
context: context,
|
|
142
|
+
sample_type: sample_type,
|
|
128
143
|
metadata: metadata,
|
|
129
144
|
error_class: payload[:exception]&.first,
|
|
130
145
|
error_message: payload[:exception]&.last,
|
|
@@ -149,15 +164,25 @@ module Catpm
|
|
|
149
164
|
((Time.current - job.enqueued_at.to_time) * 1000.0) rescue nil
|
|
150
165
|
end
|
|
151
166
|
|
|
152
|
-
context = {
|
|
153
|
-
job_class: target,
|
|
154
|
-
job_id: job.job_id,
|
|
155
|
-
queue: job.queue_name,
|
|
156
|
-
attempts: job.executions
|
|
157
|
-
}
|
|
158
|
-
|
|
159
167
|
metadata = { queue_wait: queue_wait }.compact
|
|
160
168
|
|
|
169
|
+
sample_type = early_sample_type(
|
|
170
|
+
error: exception,
|
|
171
|
+
duration: duration,
|
|
172
|
+
kind: :job,
|
|
173
|
+
target: target,
|
|
174
|
+
operation: job.queue_name
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
context = if sample_type
|
|
178
|
+
{
|
|
179
|
+
job_class: target,
|
|
180
|
+
job_id: job.job_id,
|
|
181
|
+
queue: job.queue_name,
|
|
182
|
+
attempts: job.executions
|
|
183
|
+
}
|
|
184
|
+
end
|
|
185
|
+
|
|
161
186
|
ev = Event.new(
|
|
162
187
|
kind: :job,
|
|
163
188
|
target: target,
|
|
@@ -165,6 +190,7 @@ module Catpm
|
|
|
165
190
|
duration: duration,
|
|
166
191
|
started_at: Time.current,
|
|
167
192
|
context: context,
|
|
193
|
+
sample_type: sample_type,
|
|
168
194
|
metadata: metadata,
|
|
169
195
|
error_class: exception&.class&.name,
|
|
170
196
|
error_message: exception&.message,
|
|
@@ -178,73 +204,88 @@ module Catpm
|
|
|
178
204
|
return unless Catpm.enabled?
|
|
179
205
|
return if Catpm.config.ignored?(target)
|
|
180
206
|
|
|
181
|
-
context = (context || {}).dup
|
|
182
207
|
metadata = (metadata || {}).dup
|
|
183
208
|
|
|
184
209
|
if req_segments
|
|
185
210
|
segment_data = req_segments.to_h
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
# Inject root request segment
|
|
189
|
-
root_segment = {
|
|
190
|
-
type: 'request',
|
|
191
|
-
detail: "#{operation.presence || kind} #{target}",
|
|
192
|
-
duration: duration.round(2),
|
|
193
|
-
offset: 0.0
|
|
194
|
-
}
|
|
195
|
-
segments.each do |seg|
|
|
196
|
-
if seg.key?(:parent_index)
|
|
197
|
-
seg[:parent_index] += 1
|
|
198
|
-
else
|
|
199
|
-
seg[:parent_index] = 0
|
|
200
|
-
end
|
|
201
|
-
end
|
|
202
|
-
segments.unshift(root_segment)
|
|
203
|
-
|
|
204
|
-
# Fill untracked controller time with sampler data or synthetic segment
|
|
205
|
-
ctrl_idx = segments.index { |s| s[:type] == 'controller' }
|
|
206
|
-
if ctrl_idx
|
|
207
|
-
ctrl_seg = segments[ctrl_idx]
|
|
208
|
-
ctrl_dur = (ctrl_seg[:duration] || 0).to_f
|
|
209
|
-
child_dur = segments.each_with_index.sum do |pair|
|
|
210
|
-
seg, i = pair
|
|
211
|
-
next 0.0 if i == ctrl_idx
|
|
212
|
-
(seg[:parent_index] == ctrl_idx) ? (seg[:duration] || 0).to_f : 0.0
|
|
213
|
-
end
|
|
214
|
-
gap = ctrl_dur - child_dur
|
|
211
|
+
segment_data[:segment_summary]&.each { |k, v| metadata[k] = v }
|
|
212
|
+
end
|
|
215
213
|
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
214
|
+
sample_type = early_sample_type(
|
|
215
|
+
error: error,
|
|
216
|
+
duration: duration,
|
|
217
|
+
kind: kind,
|
|
218
|
+
target: target,
|
|
219
|
+
operation: operation
|
|
220
|
+
)
|
|
220
221
|
|
|
221
|
-
|
|
222
|
-
context
|
|
223
|
-
context[:segments_capped] = segment_data[:segments_capped]
|
|
222
|
+
if sample_type
|
|
223
|
+
context = (context || {}).dup
|
|
224
224
|
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
225
|
+
if req_segments && segment_data
|
|
226
|
+
segments = segment_data[:segments]
|
|
227
|
+
|
|
228
|
+
# Inject root request segment
|
|
229
|
+
root_segment = {
|
|
230
|
+
type: 'request',
|
|
231
|
+
detail: "#{operation.presence || kind} #{target}",
|
|
232
|
+
duration: duration.round(2),
|
|
233
|
+
offset: 0.0
|
|
234
|
+
}
|
|
235
|
+
segments.each do |seg|
|
|
236
|
+
if seg.key?(:parent_index)
|
|
237
|
+
seg[:parent_index] += 1
|
|
238
|
+
else
|
|
239
|
+
seg[:parent_index] = 0
|
|
240
|
+
end
|
|
241
|
+
end
|
|
242
|
+
segments.unshift(root_segment)
|
|
243
|
+
|
|
244
|
+
# Fill untracked controller time with sampler data or synthetic segment
|
|
245
|
+
ctrl_idx = segments.index { |s| s[:type] == 'controller' }
|
|
246
|
+
if ctrl_idx
|
|
247
|
+
ctrl_seg = segments[ctrl_idx]
|
|
248
|
+
ctrl_dur = (ctrl_seg[:duration] || 0).to_f
|
|
249
|
+
child_dur = segments.each_with_index.sum do |pair|
|
|
250
|
+
seg, i = pair
|
|
251
|
+
next 0.0 if i == ctrl_idx
|
|
252
|
+
(seg[:parent_index] == ctrl_idx) ? (seg[:duration] || 0).to_f : 0.0
|
|
253
|
+
end
|
|
254
|
+
gap = ctrl_dur - child_dur
|
|
228
255
|
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
error_offset = if ctrl_idx
|
|
233
|
-
ctrl = segments[ctrl_idx]
|
|
234
|
-
((ctrl[:offset] || 0) + (ctrl[:duration] || 0)).round(2)
|
|
235
|
-
else
|
|
236
|
-
duration.round(2)
|
|
256
|
+
if gap > 1.0
|
|
257
|
+
inject_gap_segments(segments, req_segments, gap, ctrl_idx, ctrl_seg)
|
|
258
|
+
end
|
|
237
259
|
end
|
|
238
260
|
|
|
239
|
-
context[:segments]
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
261
|
+
context[:segments] = segments
|
|
262
|
+
context[:segment_summary] = segment_data[:segment_summary]
|
|
263
|
+
context[:segments_capped] = segment_data[:segments_capped]
|
|
264
|
+
|
|
265
|
+
# Append error marker segment inside the controller
|
|
266
|
+
if error
|
|
267
|
+
error_parent = ctrl_idx || 0
|
|
268
|
+
error_offset = if ctrl_idx
|
|
269
|
+
ctrl = segments[ctrl_idx]
|
|
270
|
+
((ctrl[:offset] || 0) + (ctrl[:duration] || 0)).round(2)
|
|
271
|
+
else
|
|
272
|
+
duration.round(2)
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
context[:segments] << {
|
|
276
|
+
type: 'error',
|
|
277
|
+
detail: "#{error.class.name}: #{error.message}".truncate(200),
|
|
278
|
+
source: error.backtrace&.first,
|
|
279
|
+
duration: 0,
|
|
280
|
+
offset: error_offset,
|
|
281
|
+
parent_index: error_parent
|
|
282
|
+
}
|
|
283
|
+
end
|
|
247
284
|
end
|
|
285
|
+
|
|
286
|
+
context = scrub(context)
|
|
287
|
+
else
|
|
288
|
+
context = nil
|
|
248
289
|
end
|
|
249
290
|
|
|
250
291
|
ev = Event.new(
|
|
@@ -254,7 +295,8 @@ module Catpm
|
|
|
254
295
|
duration: duration,
|
|
255
296
|
started_at: Time.current,
|
|
256
297
|
status: error ? 500 : 200,
|
|
257
|
-
context:
|
|
298
|
+
context: context,
|
|
299
|
+
sample_type: sample_type,
|
|
258
300
|
metadata: metadata,
|
|
259
301
|
error_class: error&.class&.name,
|
|
260
302
|
error_message: error&.message,
|
|
@@ -286,6 +328,29 @@ module Catpm
|
|
|
286
328
|
|
|
287
329
|
private
|
|
288
330
|
|
|
331
|
+
# Determine sample type at event creation time so only sampled events
|
|
332
|
+
# carry full context in the buffer. Includes filling phase via
|
|
333
|
+
# process-level counter (resets on restart — acceptable approximation).
|
|
334
|
+
def early_sample_type(error:, duration:, kind:, target:, operation:)
|
|
335
|
+
return 'error' if error
|
|
336
|
+
return 'slow' if duration >= Catpm.config.slow_threshold_for(kind.to_sym)
|
|
337
|
+
|
|
338
|
+
# Filling phase: always sample until endpoint has enough random samples
|
|
339
|
+
endpoint_key = [kind.to_s, target, operation.to_s]
|
|
340
|
+
count = random_sample_counts[endpoint_key]
|
|
341
|
+
if count < Catpm.config.max_random_samples_per_endpoint
|
|
342
|
+
random_sample_counts[endpoint_key] = count + 1
|
|
343
|
+
return 'random'
|
|
344
|
+
end
|
|
345
|
+
|
|
346
|
+
return 'random' if rand(Catpm.config.random_sample_rate) == 0
|
|
347
|
+
nil
|
|
348
|
+
end
|
|
349
|
+
|
|
350
|
+
def random_sample_counts
|
|
351
|
+
@random_sample_counts ||= Hash.new(0)
|
|
352
|
+
end
|
|
353
|
+
|
|
289
354
|
def inject_gap_segments(segments, req_segments, gap, ctrl_idx, ctrl_seg)
|
|
290
355
|
sampler_groups = req_segments&.sampler_segments || []
|
|
291
356
|
|
data/lib/catpm/event.rb
CHANGED
|
@@ -23,7 +23,7 @@ module Catpm
|
|
|
23
23
|
@error_message = error_message
|
|
24
24
|
@backtrace = backtrace
|
|
25
25
|
@sample_type = sample_type
|
|
26
|
-
@context = context
|
|
26
|
+
@context = context
|
|
27
27
|
@status = status
|
|
28
28
|
end
|
|
29
29
|
|
|
@@ -61,7 +61,7 @@ module Catpm
|
|
|
61
61
|
end
|
|
62
62
|
|
|
63
63
|
def context_bytes
|
|
64
|
-
return 0 if context.empty?
|
|
64
|
+
return 0 if context.nil? || context.empty?
|
|
65
65
|
|
|
66
66
|
context.to_json.bytesize + REF_SIZE
|
|
67
67
|
end
|
data/lib/catpm/flusher.rb
CHANGED
|
@@ -123,14 +123,6 @@ module Catpm
|
|
|
123
123
|
samples = []
|
|
124
124
|
error_groups = {}
|
|
125
125
|
|
|
126
|
-
# Pre-load existing random sample counts per endpoint for filling phase
|
|
127
|
-
@random_sample_counts = {}
|
|
128
|
-
Catpm::Sample.where(sample_type: 'random')
|
|
129
|
-
.joins(:bucket)
|
|
130
|
-
.group('catpm_buckets.kind', 'catpm_buckets.target', 'catpm_buckets.operation')
|
|
131
|
-
.count
|
|
132
|
-
.each { |(kind, target, op), cnt| @random_sample_counts[[ kind, target, op ]] = cnt }
|
|
133
|
-
|
|
134
126
|
events.each do |event|
|
|
135
127
|
# Bucket aggregation
|
|
136
128
|
key = [ event.kind, event.target, event.operation, event.bucket_start ]
|
|
@@ -165,8 +157,8 @@ module Catpm
|
|
|
165
157
|
)
|
|
166
158
|
end
|
|
167
159
|
|
|
168
|
-
# Collect samples
|
|
169
|
-
sample_type =
|
|
160
|
+
# Collect samples (pre-determined by collector — only these events carry full context)
|
|
161
|
+
sample_type = event.sample_type
|
|
170
162
|
if sample_type
|
|
171
163
|
sample_hash = {
|
|
172
164
|
bucket_key: key,
|
|
@@ -174,7 +166,7 @@ module Catpm
|
|
|
174
166
|
sample_type: sample_type,
|
|
175
167
|
recorded_at: event.started_at,
|
|
176
168
|
duration: event.duration,
|
|
177
|
-
context: event.context
|
|
169
|
+
context: event.context || {}
|
|
178
170
|
}
|
|
179
171
|
sample_hash[:error_fingerprint] = error_fp if error_fp
|
|
180
172
|
samples << sample_hash
|
|
@@ -231,24 +223,6 @@ module Catpm
|
|
|
231
223
|
}
|
|
232
224
|
end
|
|
233
225
|
|
|
234
|
-
def determine_sample_type(event)
|
|
235
|
-
return 'error' if event.error?
|
|
236
|
-
|
|
237
|
-
threshold = Catpm.config.slow_threshold_for(event.kind.to_sym)
|
|
238
|
-
return 'slow' if event.duration >= threshold
|
|
239
|
-
|
|
240
|
-
# Always sample if endpoint has few random samples (filling phase)
|
|
241
|
-
endpoint_key = [ event.kind, event.target, event.operation ]
|
|
242
|
-
existing_random = @random_sample_counts[endpoint_key] || 0
|
|
243
|
-
if existing_random < Catpm.config.max_random_samples_per_endpoint
|
|
244
|
-
@random_sample_counts[endpoint_key] = existing_random + 1
|
|
245
|
-
return 'random'
|
|
246
|
-
end
|
|
247
|
-
|
|
248
|
-
return 'random' if rand(Catpm.config.random_sample_rate) == 0
|
|
249
|
-
|
|
250
|
-
nil
|
|
251
|
-
end
|
|
252
226
|
|
|
253
227
|
def rotate_samples(samples)
|
|
254
228
|
samples.each do |sample|
|
|
@@ -288,10 +262,11 @@ module Catpm
|
|
|
288
262
|
end
|
|
289
263
|
|
|
290
264
|
def build_error_context(event)
|
|
265
|
+
event_context = event.context || {}
|
|
291
266
|
ctx = {
|
|
292
267
|
occurred_at: event.started_at.iso8601,
|
|
293
268
|
kind: event.kind,
|
|
294
|
-
operation:
|
|
269
|
+
operation: event_context.slice(:method, :path, :params, :job_class, :job_id, :queue, :target, :metadata),
|
|
295
270
|
backtrace: begin
|
|
296
271
|
bt = event.backtrace || []
|
|
297
272
|
limit = Catpm.config.backtrace_lines
|
|
@@ -303,13 +278,13 @@ module Catpm
|
|
|
303
278
|
|
|
304
279
|
ctx[:target] = event.target if event.target.present?
|
|
305
280
|
|
|
306
|
-
if
|
|
307
|
-
ctx[:segments] =
|
|
308
|
-
ctx[:segments_capped] =
|
|
281
|
+
if event_context[:segments]
|
|
282
|
+
ctx[:segments] = event_context[:segments]
|
|
283
|
+
ctx[:segments_capped] = event_context[:segments_capped]
|
|
309
284
|
end
|
|
310
285
|
|
|
311
|
-
if
|
|
312
|
-
ctx[:segment_summary] =
|
|
286
|
+
if event_context[:segment_summary]
|
|
287
|
+
ctx[:segment_summary] = event_context[:segment_summary]
|
|
313
288
|
end
|
|
314
289
|
|
|
315
290
|
ctx
|
|
@@ -435,14 +410,26 @@ module Catpm
|
|
|
435
410
|
}
|
|
436
411
|
|
|
437
412
|
source_ids = buckets.map(&:id)
|
|
413
|
+
survivor = buckets.first
|
|
414
|
+
|
|
415
|
+
# Reassign all samples to the survivor bucket
|
|
416
|
+
Catpm::Sample.where(bucket_id: source_ids).update_all(bucket_id: survivor.id)
|
|
438
417
|
|
|
439
|
-
# Delete source buckets
|
|
440
|
-
|
|
441
|
-
Catpm::Sample.where(bucket_id: source_ids).delete_all
|
|
442
|
-
Catpm::Bucket.where(id: source_ids).delete_all
|
|
418
|
+
# Delete non-survivor source buckets (now sample-free)
|
|
419
|
+
Catpm::Bucket.where(id: source_ids - [survivor.id]).delete_all
|
|
443
420
|
|
|
444
|
-
#
|
|
445
|
-
|
|
421
|
+
# Overwrite survivor with merged data
|
|
422
|
+
survivor.update!(
|
|
423
|
+
bucket_start: aligned_start,
|
|
424
|
+
count: merged[:count],
|
|
425
|
+
success_count: merged[:success_count],
|
|
426
|
+
failure_count: merged[:failure_count],
|
|
427
|
+
duration_sum: merged[:duration_sum],
|
|
428
|
+
duration_max: merged[:duration_max],
|
|
429
|
+
duration_min: merged[:duration_min],
|
|
430
|
+
metadata_sum: merged[:metadata_sum],
|
|
431
|
+
p95_digest: merged[:p95_digest]
|
|
432
|
+
)
|
|
446
433
|
end
|
|
447
434
|
end
|
|
448
435
|
|
data/lib/catpm/stack_sampler.rb
CHANGED
|
@@ -4,28 +4,69 @@ module Catpm
|
|
|
4
4
|
class StackSampler
|
|
5
5
|
SAMPLE_INTERVAL = 0.005 # 5ms
|
|
6
6
|
|
|
7
|
+
# Single global thread that samples all active requests.
|
|
8
|
+
# Avoids creating a thread per request.
|
|
9
|
+
class SamplingLoop
|
|
10
|
+
def initialize
|
|
11
|
+
@mutex = Mutex.new
|
|
12
|
+
@samplers = []
|
|
13
|
+
@thread = nil
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def register(sampler)
|
|
17
|
+
@mutex.synchronize do
|
|
18
|
+
@samplers << sampler
|
|
19
|
+
start_thread unless @thread&.alive?
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def unregister(sampler)
|
|
24
|
+
@mutex.synchronize { @samplers.delete(sampler) }
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
private
|
|
28
|
+
|
|
29
|
+
def start_thread
|
|
30
|
+
@thread = Thread.new do
|
|
31
|
+
loop do
|
|
32
|
+
sleep(SAMPLE_INTERVAL)
|
|
33
|
+
sample_all
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
@thread.priority = -1
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def sample_all
|
|
40
|
+
now = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
41
|
+
targets = @mutex.synchronize { @samplers.dup }
|
|
42
|
+
targets.each { |s| s.capture(now) }
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
@loop = SamplingLoop.new
|
|
47
|
+
|
|
48
|
+
class << self
|
|
49
|
+
attr_reader :loop
|
|
50
|
+
end
|
|
51
|
+
|
|
7
52
|
def initialize(target_thread:, request_start:)
|
|
8
53
|
@target = target_thread
|
|
9
54
|
@request_start = request_start
|
|
10
55
|
@samples = []
|
|
11
|
-
@running = false
|
|
12
56
|
end
|
|
13
57
|
|
|
14
58
|
def start
|
|
15
|
-
|
|
16
|
-
@thread = Thread.new do
|
|
17
|
-
while @running
|
|
18
|
-
locs = @target.backtrace_locations
|
|
19
|
-
@samples << [Process.clock_gettime(Process::CLOCK_MONOTONIC), locs] if locs
|
|
20
|
-
sleep(SAMPLE_INTERVAL)
|
|
21
|
-
end
|
|
22
|
-
end
|
|
23
|
-
@thread.priority = -1
|
|
59
|
+
self.class.loop.register(self)
|
|
24
60
|
end
|
|
25
61
|
|
|
26
62
|
def stop
|
|
27
|
-
|
|
28
|
-
|
|
63
|
+
self.class.loop.unregister(self)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Called by SamplingLoop from the global thread
|
|
67
|
+
def capture(now)
|
|
68
|
+
locs = @target.backtrace_locations
|
|
69
|
+
@samples << [now, locs] if locs
|
|
29
70
|
end
|
|
30
71
|
|
|
31
72
|
# Returns array of { parent: {segment}, children: [{segment}, ...] }
|
data/lib/catpm/version.rb
CHANGED