catpm 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/controllers/catpm/errors_controller.rb +42 -0
- data/app/controllers/catpm/events_controller.rb +1 -1
- data/app/controllers/catpm/samples_controller.rb +3 -0
- data/app/helpers/catpm/application_helper.rb +3 -3
- data/app/models/catpm/error_record.rb +15 -0
- data/app/models/catpm/sample.rb +1 -0
- data/app/views/catpm/errors/show.html.erb +58 -18
- data/app/views/catpm/samples/show.html.erb +4 -0
- data/app/views/catpm/shared/_segments_waterfall.html.erb +5 -1
- data/db/migrate/20250601000001_create_catpm_tables.rb +3 -0
- data/lib/catpm/adapter/base.rb +43 -1
- data/lib/catpm/adapter/postgresql.rb +9 -2
- data/lib/catpm/adapter/sqlite.rb +9 -2
- data/lib/catpm/collector.rb +41 -1
- data/lib/catpm/configuration.rb +6 -2
- data/lib/catpm/event.rb +1 -1
- data/lib/catpm/flusher.rb +87 -39
- data/lib/catpm/lifecycle.rb +3 -23
- data/lib/catpm/middleware.rb +2 -0
- data/lib/catpm/version.rb +1 -1
- data/lib/generators/catpm/templates/initializer.rb.tt +1 -0
- data/lib/tasks/catpm_tasks.rake +21 -4
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 4ee3b0b066416fbb74d3bdecd0397443272118d67a07b339e0d61a7c2f408351
|
|
4
|
+
data.tar.gz: 5b355e9a18cde63cd975c49300f8f56efa4d8a079e855a3747de518965ff1611
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: '0811e5ecba637add9c81496c180eb65fd4d8864761bb4c4d5c40b0be40ad9f3ca7e5b6e9ce120316ad2ed17f144e68b8953cf3d2ce2721f2b942a9e43aebece4'
|
|
7
|
+
data.tar.gz: 43434262c2f712032f5cf7e2205ccf2db0638d48c672e89dd2787b90258c1cd861353d21b62b504b53daa576501614e8c3aa1b6105ed6fc8e4655056120478d5
|
|
@@ -35,6 +35,48 @@ module Catpm
|
|
|
35
35
|
@error = Catpm::ErrorRecord.find(params[:id])
|
|
36
36
|
@contexts = @error.parsed_contexts
|
|
37
37
|
@active_error_count = Catpm::ErrorRecord.unresolved.count
|
|
38
|
+
|
|
39
|
+
@range, period, bucket_seconds = helpers.parse_range(params[:range] || '24h')
|
|
40
|
+
|
|
41
|
+
# Samples table: 20 most recent linked by fingerprint
|
|
42
|
+
@samples = Catpm::Sample.where(error_fingerprint: @error.fingerprint)
|
|
43
|
+
.order(recorded_at: :desc)
|
|
44
|
+
.limit(Catpm.config.max_error_samples_per_fingerprint)
|
|
45
|
+
|
|
46
|
+
# Fallback: match error samples by recorded_at from contexts
|
|
47
|
+
if @samples.empty? && @contexts.any?
|
|
48
|
+
occurred_times = @contexts.filter_map { |c|
|
|
49
|
+
Time.parse(c['occurred_at'] || c[:occurred_at]) rescue nil
|
|
50
|
+
}
|
|
51
|
+
if occurred_times.any?
|
|
52
|
+
@samples = Catpm::Sample.where(sample_type: 'error', kind: @error.kind, recorded_at: occurred_times)
|
|
53
|
+
.order(recorded_at: :desc)
|
|
54
|
+
.limit(Catpm.config.max_error_samples_per_fingerprint)
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Chart from occurrence_buckets (multi-resolution, no dependency on samples)
|
|
59
|
+
ob = @error.parsed_occurrence_buckets
|
|
60
|
+
|
|
61
|
+
# Pick resolution: minute for short ranges, hour for medium, day for long
|
|
62
|
+
resolution = case @range
|
|
63
|
+
when '1h', '6h', '24h' then 'm'
|
|
64
|
+
when '1w', '2w', '1m' then 'h'
|
|
65
|
+
else 'd'
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
slots = {}
|
|
69
|
+
cutoff = period.ago.to_i
|
|
70
|
+
(ob[resolution] || {}).each do |ts_str, count|
|
|
71
|
+
ts = ts_str.to_i
|
|
72
|
+
next if ts < cutoff
|
|
73
|
+
slot_key = (ts / bucket_seconds) * bucket_seconds
|
|
74
|
+
slots[slot_key] = (slots[slot_key] || 0) + count
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
now_slot = (Time.current.to_i / bucket_seconds) * bucket_seconds
|
|
78
|
+
@chart_data = 60.times.map { |i| slots[now_slot - (59 - i) * bucket_seconds] || 0 }
|
|
79
|
+
@chart_times = 60.times.map { |i| Time.at(now_slot - (59 - i) * bucket_seconds).strftime('%H:%M') }
|
|
38
80
|
end
|
|
39
81
|
|
|
40
82
|
def resolve
|
|
@@ -81,7 +81,7 @@ module Catpm
|
|
|
81
81
|
@chart_times = 60.times.map { |i| Time.at(now_slot - (59 - i) * bucket_seconds).strftime('%H:%M') }
|
|
82
82
|
|
|
83
83
|
# Recent samples
|
|
84
|
-
@samples = Catpm::EventSample.by_name(@name).order(recorded_at: :desc).limit(
|
|
84
|
+
@samples = Catpm::EventSample.by_name(@name).order(recorded_at: :desc).limit(Catpm.config.events_max_samples_per_name)
|
|
85
85
|
|
|
86
86
|
@active_error_count = Catpm::ErrorRecord.unresolved.count
|
|
87
87
|
end
|
|
@@ -8,6 +8,9 @@ module Catpm
|
|
|
8
8
|
@context = @sample.parsed_context
|
|
9
9
|
@segments = @context['segments'] || @context[:segments] || []
|
|
10
10
|
@summary = @context['segment_summary'] || @context[:segment_summary] || {}
|
|
11
|
+
@error_record = if @sample.error_fingerprint.present?
|
|
12
|
+
Catpm::ErrorRecord.find_by(fingerprint: @sample.error_fingerprint)
|
|
13
|
+
end
|
|
11
14
|
end
|
|
12
15
|
end
|
|
13
16
|
end
|
|
@@ -7,14 +7,14 @@ module Catpm
|
|
|
7
7
|
'sql' => '#b8e4c6', 'view' => '#e4d4f4', 'cache' => '#fdd8b5',
|
|
8
8
|
'http' => '#f9c4c0', 'mailer' => '#e4d4f4', 'storage' => '#fdd8b5',
|
|
9
9
|
'custom' => '#dde2e8', 'code' => '#c8daf0', 'gem' => '#f0e0f0', 'other' => '#e8e8e8', 'controller' => '#b6d9f7',
|
|
10
|
-
'middleware' => '#f0dfa0', 'request' => '#b6d9f7'
|
|
10
|
+
'middleware' => '#f0dfa0', 'request' => '#b6d9f7', 'error' => '#fca5a5'
|
|
11
11
|
}.freeze
|
|
12
12
|
|
|
13
13
|
SEGMENT_TEXT_COLORS = {
|
|
14
14
|
'sql' => '#1a7f37', 'view' => '#6639a6', 'cache' => '#953800',
|
|
15
15
|
'http' => '#a1110a', 'mailer' => '#6639a6', 'storage' => '#953800',
|
|
16
16
|
'custom' => '#4b5563', 'code' => '#3b5998', 'gem' => '#7b3f9e', 'other' => '#9ca3af', 'controller' => '#0550ae',
|
|
17
|
-
'middleware' => '#7c5c00', 'request' => '#0550ae'
|
|
17
|
+
'middleware' => '#7c5c00', 'request' => '#0550ae', 'error' => '#991b1b'
|
|
18
18
|
}.freeze
|
|
19
19
|
|
|
20
20
|
BADGE_CLASSES = {
|
|
@@ -32,7 +32,7 @@ module Catpm
|
|
|
32
32
|
'sql' => 'SQL Queries', 'view' => 'View Renders', 'cache' => 'Cache Ops',
|
|
33
33
|
'http' => 'HTTP Calls', 'mailer' => 'Mailer', 'storage' => 'Storage',
|
|
34
34
|
'custom' => 'Custom', 'code' => 'App Code', 'gem' => 'Gems', 'other' => 'Untracked',
|
|
35
|
-
'controller' => 'Controller', 'middleware' => 'Middleware', 'request' => 'Request'
|
|
35
|
+
'controller' => 'Controller', 'middleware' => 'Middleware', 'request' => 'Request', 'error' => 'Error'
|
|
36
36
|
}.freeze
|
|
37
37
|
|
|
38
38
|
RANGES = {
|
|
@@ -33,5 +33,20 @@ module Catpm
|
|
|
33
33
|
rescue JSON::ParserError
|
|
34
34
|
[]
|
|
35
35
|
end
|
|
36
|
+
|
|
37
|
+
def parsed_occurrence_buckets
|
|
38
|
+
raw = case occurrence_buckets
|
|
39
|
+
when Hash then occurrence_buckets
|
|
40
|
+
when String then JSON.parse(occurrence_buckets)
|
|
41
|
+
else {}
|
|
42
|
+
end
|
|
43
|
+
{
|
|
44
|
+
'm' => (raw['m'].is_a?(Hash) ? raw['m'] : {}),
|
|
45
|
+
'h' => (raw['h'].is_a?(Hash) ? raw['h'] : {}),
|
|
46
|
+
'd' => (raw['d'].is_a?(Hash) ? raw['d'] : {})
|
|
47
|
+
}
|
|
48
|
+
rescue JSON::ParserError
|
|
49
|
+
{ 'm' => {}, 'h' => {}, 'd' => {} }
|
|
50
|
+
end
|
|
36
51
|
end
|
|
37
52
|
end
|
data/app/models/catpm/sample.rb
CHANGED
|
@@ -12,6 +12,7 @@ module Catpm
|
|
|
12
12
|
scope :slow, -> { where(sample_type: 'slow') }
|
|
13
13
|
scope :errors, -> { where(sample_type: 'error') }
|
|
14
14
|
scope :recent, ->(period = 1.hour) { where(recorded_at: period.ago..) }
|
|
15
|
+
scope :for_error, ->(fingerprint) { where(error_fingerprint: fingerprint) }
|
|
15
16
|
|
|
16
17
|
def parsed_context
|
|
17
18
|
case context
|
|
@@ -48,6 +48,19 @@
|
|
|
48
48
|
</div>
|
|
49
49
|
</div>
|
|
50
50
|
|
|
51
|
+
<%# ─── Error Frequency Chart ─── %>
|
|
52
|
+
<div class="time-range">
|
|
53
|
+
<% Catpm::ApplicationHelper::RANGE_KEYS.each do |r| %>
|
|
54
|
+
<a href="<%= catpm.error_path(@error, range: r) %>" class="<%= 'active' if @range == r %>"><%= r %></a>
|
|
55
|
+
<% end %>
|
|
56
|
+
</div>
|
|
57
|
+
|
|
58
|
+
<h2>Error Frequency</h2>
|
|
59
|
+
<%= section_description("Occurrences per time slot over the selected range.") %>
|
|
60
|
+
<div style="border:1px solid var(--border); border-radius:var(--radius); padding:16px; margin-bottom:24px; position:relative">
|
|
61
|
+
<%= bar_chart_svg(@chart_data, width: 600, height: 180, color: "var(--red, #e5534b)", time_labels: @chart_times) %>
|
|
62
|
+
</div>
|
|
63
|
+
|
|
51
64
|
<%# ─── Backtrace ─── %>
|
|
52
65
|
<% first_bt = @contexts.first && (@contexts.first["backtrace"] || @contexts.first[:backtrace] || []) %>
|
|
53
66
|
<% if first_bt.any? %>
|
|
@@ -55,8 +68,17 @@
|
|
|
55
68
|
<%= section_description("All occurrences share the same fingerprint and backtrace.") %>
|
|
56
69
|
<div style="border:1px solid var(--border); border-radius:var(--radius); padding:14px; margin-bottom:12px; position:relative">
|
|
57
70
|
<button class="copy-btn" style="position:absolute; top:8px; right:8px" onclick="copyText(this)">Copy</button>
|
|
58
|
-
|
|
71
|
+
<% preview_lines = first_bt.first(10) %>
|
|
72
|
+
<% remaining_lines = first_bt.drop(10) %>
|
|
73
|
+
<pre class="mono" style="font-size:12px; white-space:pre-wrap; margin:0; line-height:1.8"><% preview_lines.each do |line| %><span class="<%= line.match?(%r{/(gems|ruby|vendor|bundle)/}) ? 'backtrace-lib' : 'backtrace-app' %>"><%= line %></span>
|
|
59
74
|
<% end %></pre>
|
|
75
|
+
<% if remaining_lines.any? %>
|
|
76
|
+
<details style="margin-top:4px">
|
|
77
|
+
<summary style="cursor:pointer; font-size:12px; color:var(--text-2)">Show full backtrace (<%= first_bt.size %> lines)</summary>
|
|
78
|
+
<pre class="mono" style="font-size:12px; white-space:pre-wrap; margin:0; line-height:1.8"><% remaining_lines.each do |line| %><span class="<%= line.match?(%r{/(gems|ruby|vendor|bundle)/}) ? 'backtrace-lib' : 'backtrace-app' %>"><%= line %></span>
|
|
79
|
+
<% end %></pre>
|
|
80
|
+
</details>
|
|
81
|
+
<% end %>
|
|
60
82
|
</div>
|
|
61
83
|
<% end %>
|
|
62
84
|
|
|
@@ -66,14 +88,14 @@
|
|
|
66
88
|
<span class="mono" style="word-break:break-all"><%= @error.fingerprint %></span>
|
|
67
89
|
</div>
|
|
68
90
|
|
|
69
|
-
<%# ───
|
|
70
|
-
<% if @
|
|
71
|
-
<h2>
|
|
91
|
+
<%# ─── Samples ─── %>
|
|
92
|
+
<% if @samples.any? %>
|
|
93
|
+
<h2>Recent Samples</h2>
|
|
94
|
+
<%= section_description("Linked request samples for this error. Click to view full details.") %>
|
|
72
95
|
<div class="table-scroll">
|
|
73
96
|
<table>
|
|
74
97
|
<thead>
|
|
75
98
|
<tr>
|
|
76
|
-
<th>#</th>
|
|
77
99
|
<th>Time</th>
|
|
78
100
|
<th>Duration</th>
|
|
79
101
|
<th>Status</th>
|
|
@@ -82,24 +104,42 @@
|
|
|
82
104
|
</tr>
|
|
83
105
|
</thead>
|
|
84
106
|
<tbody>
|
|
85
|
-
<% @
|
|
86
|
-
<%
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
<td class="mono
|
|
107
|
+
<% @samples.each do |sample| %>
|
|
108
|
+
<% ctx = sample.parsed_context %>
|
|
109
|
+
<tr class="clickable-row" onclick="window.location='<%= catpm.sample_path(sample) %>';" style="cursor:pointer">
|
|
110
|
+
<td><%= time_with_tooltip(sample.recorded_at) %></td>
|
|
111
|
+
<td class="mono"><%= format_duration(sample.duration) %></td>
|
|
112
|
+
<td><%= status_badge(ctx["status"] || ctx[:status]) %></td>
|
|
113
|
+
<td class="mono"><%= sample.bucket&.target || "—" %></td>
|
|
114
|
+
<td class="mono text-muted"><%= segment_count_summary(ctx["segment_summary"] || ctx[:segment_summary]).presence || "—" %></td>
|
|
115
|
+
</tr>
|
|
116
|
+
<% end %>
|
|
117
|
+
</tbody>
|
|
118
|
+
</table>
|
|
119
|
+
</div>
|
|
120
|
+
<% end %>
|
|
121
|
+
|
|
122
|
+
<%# ─── Legacy Occurrences (for errors without linked samples) ─── %>
|
|
123
|
+
<% if @samples.empty? && @contexts.any? %>
|
|
124
|
+
<h2>Last <%= @contexts.size %> Captured Occurrences</h2>
|
|
125
|
+
<div class="table-scroll">
|
|
126
|
+
<table>
|
|
127
|
+
<thead>
|
|
128
|
+
<tr>
|
|
129
|
+
<th>Time</th>
|
|
130
|
+
<th>Duration</th>
|
|
131
|
+
<th>Status</th>
|
|
132
|
+
<th>Target</th>
|
|
133
|
+
</tr>
|
|
134
|
+
</thead>
|
|
135
|
+
<tbody>
|
|
136
|
+
<% @contexts.each do |ctx| %>
|
|
137
|
+
<tr>
|
|
90
138
|
<td><%= time_with_tooltip(ctx["occurred_at"] || ctx[:occurred_at]) %></td>
|
|
91
139
|
<td class="mono"><%= (ctx["duration"] || ctx[:duration]) ? format_duration((ctx["duration"] || ctx[:duration]).to_f) : "—" %></td>
|
|
92
140
|
<td><%= status_badge(ctx["status"] || ctx[:status]) %></td>
|
|
93
141
|
<td class="mono"><%= ctx["target"] || ctx[:target] || "—" %></td>
|
|
94
|
-
<td class="mono text-muted"><%= segment_count_summary(ctx["segment_summary"] || ctx[:segment_summary]).presence || "—" %></td>
|
|
95
142
|
</tr>
|
|
96
|
-
<% if has_detail %>
|
|
97
|
-
<tr id="detail-<%= i %>" style="display:none">
|
|
98
|
-
<td colspan="6" style="padding:14px; background:var(--bg-1)">
|
|
99
|
-
<%= render "catpm/shared/segments_waterfall", segments: segments, total_duration: (ctx["duration"] || ctx[:duration] || 1), segments_capped: ctx["segments_capped"] || ctx[:segments_capped], table_id: "segments-table-#{i}" %>
|
|
100
|
-
</td>
|
|
101
|
-
</tr>
|
|
102
|
-
<% end %>
|
|
103
143
|
<% end %>
|
|
104
144
|
</tbody>
|
|
105
145
|
</table>
|
|
@@ -28,6 +28,10 @@
|
|
|
28
28
|
<span class="mono"><%= format_duration(@sample.duration) %></span>
|
|
29
29
|
<span class="sep">·</span>
|
|
30
30
|
<span class="text-muted"><%= time_with_tooltip(@sample.recorded_at) %></span>
|
|
31
|
+
<% if @error_record %>
|
|
32
|
+
<span class="sep">·</span>
|
|
33
|
+
<a href="<%= catpm.error_path(@error_record) %>" class="badge badge-error" style="text-decoration:none">Error: <%= @error_record.error_class %></a>
|
|
34
|
+
<% end %>
|
|
31
35
|
</div>
|
|
32
36
|
|
|
33
37
|
<%# ─── Request Context + Full JSON side by side ─── %>
|
|
@@ -131,7 +131,11 @@
|
|
|
131
131
|
</td>
|
|
132
132
|
<td>
|
|
133
133
|
<div class="bar-container">
|
|
134
|
-
|
|
134
|
+
<% if type == 'error' %>
|
|
135
|
+
<div style="position:absolute; left:<%= left_pct %>%; top:2px; bottom:2px; width:3px; background:#dc2626; border-radius:2px"></div>
|
|
136
|
+
<% else %>
|
|
137
|
+
<div class="bar-fill" style="margin-left:<%= left_pct %>%; width:<%= width_pct %>%; background:<%= bar_color %>"></div>
|
|
138
|
+
<% end %>
|
|
135
139
|
</div>
|
|
136
140
|
</td>
|
|
137
141
|
</tr>
|
|
@@ -32,10 +32,12 @@ class CreateCatpmTables < ActiveRecord::Migration[8.0]
|
|
|
32
32
|
t.datetime :recorded_at, null: false
|
|
33
33
|
t.float :duration, null: false
|
|
34
34
|
t.json :context
|
|
35
|
+
t.string :error_fingerprint, limit: 64
|
|
35
36
|
end
|
|
36
37
|
|
|
37
38
|
add_index :catpm_samples, :recorded_at, name: 'idx_catpm_samples_time'
|
|
38
39
|
add_index :catpm_samples, [:kind, :recorded_at], name: 'idx_catpm_samples_kind_time'
|
|
40
|
+
add_index :catpm_samples, :error_fingerprint, name: 'idx_catpm_samples_error_fp'
|
|
39
41
|
|
|
40
42
|
create_table :catpm_errors do |t|
|
|
41
43
|
t.string :fingerprint, null: false, limit: 64
|
|
@@ -46,6 +48,7 @@ class CreateCatpmTables < ActiveRecord::Migration[8.0]
|
|
|
46
48
|
t.datetime :first_occurred_at, null: false
|
|
47
49
|
t.datetime :last_occurred_at, null: false
|
|
48
50
|
t.json :contexts
|
|
51
|
+
t.json :occurrence_buckets
|
|
49
52
|
t.datetime :resolved_at
|
|
50
53
|
end
|
|
51
54
|
|
data/lib/catpm/adapter/base.rb
CHANGED
|
@@ -32,7 +32,8 @@ module Catpm
|
|
|
32
32
|
sample_type: sample_data[:sample_type],
|
|
33
33
|
recorded_at: sample_data[:recorded_at],
|
|
34
34
|
duration: sample_data[:duration],
|
|
35
|
-
context: sample_data[:context]
|
|
35
|
+
context: sample_data[:context],
|
|
36
|
+
error_fingerprint: sample_data[:error_fingerprint]
|
|
36
37
|
}
|
|
37
38
|
end
|
|
38
39
|
|
|
@@ -68,8 +69,49 @@ module Catpm
|
|
|
68
69
|
combined.last(Catpm.config.max_error_contexts)
|
|
69
70
|
end
|
|
70
71
|
|
|
72
|
+
# Merge new occurrence timestamps into the multi-resolution bucket structure.
|
|
73
|
+
# Structure: { "m" => {epoch => count}, "h" => {epoch => count}, "d" => {epoch => count} }
|
|
74
|
+
# - "m" (minute): kept for 48 hours
|
|
75
|
+
# - "h" (hour): kept for 90 days
|
|
76
|
+
# - "d" (day): kept for 2 years
|
|
77
|
+
def merge_occurrence_buckets(existing, new_times)
|
|
78
|
+
buckets = parse_occurrence_buckets(existing)
|
|
79
|
+
|
|
80
|
+
(new_times || []).each do |t|
|
|
81
|
+
ts = t.to_i
|
|
82
|
+
m_key = ((ts / 60) * 60).to_s
|
|
83
|
+
h_key = ((ts / 3600) * 3600).to_s
|
|
84
|
+
d_key = ((ts / 86400) * 86400).to_s
|
|
85
|
+
|
|
86
|
+
buckets['m'][m_key] = (buckets['m'][m_key] || 0) + 1
|
|
87
|
+
buckets['h'][h_key] = (buckets['h'][h_key] || 0) + 1
|
|
88
|
+
buckets['d'][d_key] = (buckets['d'][d_key] || 0) + 1
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Compact old entries
|
|
92
|
+
now = Time.current.to_i
|
|
93
|
+
cutoff_m = now - 48 * 3600
|
|
94
|
+
cutoff_h = now - 90 * 86400
|
|
95
|
+
cutoff_d = now - 2 * 365 * 86400
|
|
96
|
+
|
|
97
|
+
buckets['m'].reject! { |k, _| k.to_i < cutoff_m }
|
|
98
|
+
buckets['h'].reject! { |k, _| k.to_i < cutoff_h }
|
|
99
|
+
buckets['d'].reject! { |k, _| k.to_i < cutoff_d }
|
|
100
|
+
|
|
101
|
+
buckets
|
|
102
|
+
end
|
|
103
|
+
|
|
71
104
|
private
|
|
72
105
|
|
|
106
|
+
def parse_occurrence_buckets(value)
|
|
107
|
+
raw = parse_json(value)
|
|
108
|
+
{
|
|
109
|
+
'm' => (raw['m'].is_a?(Hash) ? raw['m'] : {}),
|
|
110
|
+
'h' => (raw['h'].is_a?(Hash) ? raw['h'] : {}),
|
|
111
|
+
'd' => (raw['d'].is_a?(Hash) ? raw['d'] : {})
|
|
112
|
+
}
|
|
113
|
+
end
|
|
114
|
+
|
|
73
115
|
def parse_json(value)
|
|
74
116
|
case value
|
|
75
117
|
when Hash then value.transform_keys(&:to_s)
|
|
@@ -144,16 +144,22 @@ module Catpm
|
|
|
144
144
|
merged_contexts = merge_contexts(
|
|
145
145
|
existing.parsed_contexts, error_data[:new_contexts]
|
|
146
146
|
)
|
|
147
|
+
merged_buckets = merge_occurrence_buckets(
|
|
148
|
+
existing.occurrence_buckets, error_data[:occurrence_times]
|
|
149
|
+
)
|
|
147
150
|
|
|
148
151
|
attrs = {
|
|
149
152
|
occurrences_count: existing.occurrences_count + error_data[:occurrences_count],
|
|
150
153
|
last_occurred_at: [existing.last_occurred_at, error_data[:last_occurred_at]].max,
|
|
151
|
-
contexts: merged_contexts
|
|
154
|
+
contexts: merged_contexts,
|
|
155
|
+
occurrence_buckets: merged_buckets
|
|
152
156
|
}
|
|
153
157
|
attrs[:resolved_at] = nil if existing.resolved?
|
|
154
158
|
|
|
155
159
|
existing.update!(attrs)
|
|
156
160
|
else
|
|
161
|
+
initial_buckets = merge_occurrence_buckets(nil, error_data[:occurrence_times])
|
|
162
|
+
|
|
157
163
|
Catpm::ErrorRecord.create!(
|
|
158
164
|
fingerprint: error_data[:fingerprint],
|
|
159
165
|
kind: error_data[:kind],
|
|
@@ -162,7 +168,8 @@ module Catpm
|
|
|
162
168
|
occurrences_count: error_data[:occurrences_count],
|
|
163
169
|
first_occurred_at: error_data[:first_occurred_at],
|
|
164
170
|
last_occurred_at: error_data[:last_occurred_at],
|
|
165
|
-
contexts: error_data[:new_contexts]
|
|
171
|
+
contexts: error_data[:new_contexts],
|
|
172
|
+
occurrence_buckets: initial_buckets
|
|
166
173
|
)
|
|
167
174
|
end
|
|
168
175
|
end
|
data/lib/catpm/adapter/sqlite.rb
CHANGED
|
@@ -116,16 +116,22 @@ module Catpm
|
|
|
116
116
|
merged_contexts = merge_contexts(
|
|
117
117
|
existing.parsed_contexts, error_data[:new_contexts]
|
|
118
118
|
)
|
|
119
|
+
merged_buckets = merge_occurrence_buckets(
|
|
120
|
+
existing.occurrence_buckets, error_data[:occurrence_times]
|
|
121
|
+
)
|
|
119
122
|
|
|
120
123
|
attrs = {
|
|
121
124
|
occurrences_count: existing.occurrences_count + error_data[:occurrences_count],
|
|
122
125
|
last_occurred_at: [existing.last_occurred_at, error_data[:last_occurred_at]].max,
|
|
123
|
-
contexts: merged_contexts.to_json
|
|
126
|
+
contexts: merged_contexts.to_json,
|
|
127
|
+
occurrence_buckets: merged_buckets.to_json
|
|
124
128
|
}
|
|
125
129
|
attrs[:resolved_at] = nil if existing.resolved?
|
|
126
130
|
|
|
127
131
|
existing.update!(attrs)
|
|
128
132
|
else
|
|
133
|
+
initial_buckets = merge_occurrence_buckets(nil, error_data[:occurrence_times])
|
|
134
|
+
|
|
129
135
|
Catpm::ErrorRecord.create!(
|
|
130
136
|
fingerprint: error_data[:fingerprint],
|
|
131
137
|
kind: error_data[:kind],
|
|
@@ -134,7 +140,8 @@ module Catpm
|
|
|
134
140
|
occurrences_count: error_data[:occurrences_count],
|
|
135
141
|
first_occurred_at: error_data[:first_occurred_at],
|
|
136
142
|
last_occurred_at: error_data[:last_occurred_at],
|
|
137
|
-
contexts: error_data[:new_contexts].to_json
|
|
143
|
+
contexts: error_data[:new_contexts].to_json,
|
|
144
|
+
occurrence_buckets: initial_buckets.to_json
|
|
138
145
|
)
|
|
139
146
|
end
|
|
140
147
|
end
|
data/lib/catpm/collector.rb
CHANGED
|
@@ -8,7 +8,7 @@ module Catpm
|
|
|
8
8
|
|
|
9
9
|
payload = event.payload
|
|
10
10
|
target = "#{payload[:controller]}##{payload[:action]}"
|
|
11
|
-
return if target.start_with?('Catpm::')
|
|
11
|
+
return if !Catpm.config.track_own_requests && target.start_with?('Catpm::')
|
|
12
12
|
return if Catpm.config.ignored?(target)
|
|
13
13
|
|
|
14
14
|
duration = event.duration # milliseconds
|
|
@@ -95,6 +95,26 @@ module Catpm
|
|
|
95
95
|
|
|
96
96
|
# Use full request duration (including middleware) for the event
|
|
97
97
|
duration = total_request_duration
|
|
98
|
+
|
|
99
|
+
# Append error marker segment inside the controller
|
|
100
|
+
if payload[:exception]
|
|
101
|
+
error_parent = ctrl_idx || 0
|
|
102
|
+
error_offset = if ctrl_idx
|
|
103
|
+
ctrl = segments[ctrl_idx]
|
|
104
|
+
((ctrl[:offset] || 0) + (ctrl[:duration] || 0)).round(2)
|
|
105
|
+
else
|
|
106
|
+
duration.round(2)
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
context[:segments] << {
|
|
110
|
+
type: 'error',
|
|
111
|
+
detail: "#{payload[:exception].first}: #{payload[:exception].last}".truncate(200),
|
|
112
|
+
source: payload[:exception_object]&.backtrace&.first,
|
|
113
|
+
duration: 0,
|
|
114
|
+
offset: error_offset,
|
|
115
|
+
parent_index: error_parent
|
|
116
|
+
}
|
|
117
|
+
end
|
|
98
118
|
end
|
|
99
119
|
|
|
100
120
|
ev = Event.new(
|
|
@@ -205,6 +225,26 @@ module Catpm
|
|
|
205
225
|
segment_data[:segment_summary]&.each do |k, v|
|
|
206
226
|
metadata[k] = v
|
|
207
227
|
end
|
|
228
|
+
|
|
229
|
+
# Append error marker segment inside the controller
|
|
230
|
+
if error
|
|
231
|
+
error_parent = ctrl_idx || 0
|
|
232
|
+
error_offset = if ctrl_idx
|
|
233
|
+
ctrl = segments[ctrl_idx]
|
|
234
|
+
((ctrl[:offset] || 0) + (ctrl[:duration] || 0)).round(2)
|
|
235
|
+
else
|
|
236
|
+
duration.round(2)
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
context[:segments] << {
|
|
240
|
+
type: 'error',
|
|
241
|
+
detail: "#{error.class.name}: #{error.message}".truncate(200),
|
|
242
|
+
source: error.backtrace&.first,
|
|
243
|
+
duration: 0,
|
|
244
|
+
offset: error_offset,
|
|
245
|
+
parent_index: error_parent
|
|
246
|
+
}
|
|
247
|
+
end
|
|
208
248
|
end
|
|
209
249
|
|
|
210
250
|
ev = Event.new(
|
data/lib/catpm/configuration.rb
CHANGED
|
@@ -31,6 +31,7 @@ module Catpm
|
|
|
31
31
|
:random_sample_rate,
|
|
32
32
|
:max_random_samples_per_endpoint,
|
|
33
33
|
:max_slow_samples_per_endpoint,
|
|
34
|
+
:max_error_samples_per_fingerprint,
|
|
34
35
|
:cleanup_interval,
|
|
35
36
|
:circuit_breaker_failure_threshold,
|
|
36
37
|
:circuit_breaker_recovery_timeout,
|
|
@@ -39,7 +40,8 @@ module Catpm
|
|
|
39
40
|
:backtrace_lines,
|
|
40
41
|
:shutdown_timeout,
|
|
41
42
|
:events_enabled,
|
|
42
|
-
:events_max_samples_per_name
|
|
43
|
+
:events_max_samples_per_name,
|
|
44
|
+
:track_own_requests
|
|
43
45
|
|
|
44
46
|
def initialize
|
|
45
47
|
@enabled = true
|
|
@@ -71,15 +73,17 @@ module Catpm
|
|
|
71
73
|
@random_sample_rate = 20
|
|
72
74
|
@max_random_samples_per_endpoint = 5
|
|
73
75
|
@max_slow_samples_per_endpoint = 5
|
|
76
|
+
@max_error_samples_per_fingerprint = 20
|
|
74
77
|
@cleanup_interval = 1.hour
|
|
75
78
|
@circuit_breaker_failure_threshold = 5
|
|
76
79
|
@circuit_breaker_recovery_timeout = 60 # seconds
|
|
77
80
|
@sqlite_busy_timeout = 5_000 # milliseconds
|
|
78
81
|
@persistence_batch_size = 100
|
|
79
|
-
@backtrace_lines =
|
|
82
|
+
@backtrace_lines = nil
|
|
80
83
|
@shutdown_timeout = 5 # seconds
|
|
81
84
|
@events_enabled = false
|
|
82
85
|
@events_max_samples_per_name = 20
|
|
86
|
+
@track_own_requests = false
|
|
83
87
|
end
|
|
84
88
|
|
|
85
89
|
def slow_threshold_for(kind)
|
data/lib/catpm/event.rb
CHANGED
data/lib/catpm/flusher.rb
CHANGED
|
@@ -12,28 +12,54 @@ module Catpm
|
|
|
12
12
|
@last_cleanup_at = Time.now
|
|
13
13
|
@running = false
|
|
14
14
|
@thread = nil
|
|
15
|
+
@pid = nil
|
|
16
|
+
@mutex = Mutex.new
|
|
15
17
|
end
|
|
16
18
|
|
|
17
19
|
def start
|
|
18
|
-
|
|
20
|
+
@mutex.synchronize do
|
|
21
|
+
# After fork(), threads are dead but @running may still be true
|
|
22
|
+
if @pid && @pid != Process.pid
|
|
23
|
+
@running = false
|
|
24
|
+
@thread = nil
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
return if @running
|
|
19
28
|
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
29
|
+
@running = true
|
|
30
|
+
@pid = Process.pid
|
|
31
|
+
@thread = Thread.new do
|
|
32
|
+
while @running
|
|
33
|
+
sleep(effective_interval)
|
|
34
|
+
flush_cycle if @running
|
|
35
|
+
end
|
|
36
|
+
rescue => e
|
|
37
|
+
Catpm.config.error_handler.call(e)
|
|
38
|
+
retry if @running
|
|
25
39
|
end
|
|
26
|
-
rescue => e
|
|
27
|
-
Catpm.config.error_handler.call(e)
|
|
28
|
-
retry if @running
|
|
29
40
|
end
|
|
30
41
|
end
|
|
31
42
|
|
|
43
|
+
# Cheap check called from middleware on every request.
|
|
44
|
+
# Detects fork (Puma, Unicorn, etc.) and restarts the thread.
|
|
45
|
+
def ensure_running!
|
|
46
|
+
return if @running && @thread&.alive? && @pid == Process.pid
|
|
47
|
+
|
|
48
|
+
start
|
|
49
|
+
end
|
|
50
|
+
|
|
32
51
|
def stop(timeout: Catpm.config.shutdown_timeout)
|
|
33
|
-
|
|
52
|
+
thread = nil
|
|
34
53
|
|
|
35
|
-
@
|
|
36
|
-
|
|
54
|
+
@mutex.synchronize do
|
|
55
|
+
return unless @running
|
|
56
|
+
|
|
57
|
+
@running = false
|
|
58
|
+
thread = @thread
|
|
59
|
+
@thread = nil
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
thread&.join(timeout)
|
|
37
63
|
flush_cycle # Final flush
|
|
38
64
|
end
|
|
39
65
|
|
|
@@ -45,26 +71,28 @@ module Catpm
|
|
|
45
71
|
return if events.empty?
|
|
46
72
|
|
|
47
73
|
ActiveRecord::Base.connection_pool.with_connection do
|
|
48
|
-
|
|
74
|
+
ActiveRecord::Base.transaction do
|
|
75
|
+
perf_events, custom_events = events.partition { |e| e.is_a?(Catpm::Event) }
|
|
49
76
|
|
|
50
|
-
|
|
51
|
-
|
|
77
|
+
if perf_events.any?
|
|
78
|
+
buckets, samples, errors = aggregate(perf_events)
|
|
52
79
|
|
|
53
|
-
|
|
54
|
-
|
|
80
|
+
adapter = Catpm::Adapter.current
|
|
81
|
+
adapter.persist_buckets(buckets)
|
|
55
82
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
83
|
+
bucket_map = build_bucket_map(buckets)
|
|
84
|
+
samples = rotate_samples(samples)
|
|
85
|
+
adapter.persist_samples(samples, bucket_map)
|
|
86
|
+
adapter.persist_errors(errors)
|
|
87
|
+
end
|
|
61
88
|
|
|
62
|
-
|
|
63
|
-
|
|
89
|
+
if custom_events.any?
|
|
90
|
+
event_buckets, event_samples = aggregate_custom_events(custom_events)
|
|
64
91
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
92
|
+
adapter = Catpm::Adapter.current
|
|
93
|
+
adapter.persist_event_buckets(event_buckets)
|
|
94
|
+
adapter.persist_event_samples(event_samples)
|
|
95
|
+
end
|
|
68
96
|
end
|
|
69
97
|
end
|
|
70
98
|
|
|
@@ -127,10 +155,20 @@ module Catpm
|
|
|
127
155
|
# TDigest
|
|
128
156
|
bucket[:tdigest].add(event.duration)
|
|
129
157
|
|
|
158
|
+
# Compute error fingerprint (used for both samples and error grouping)
|
|
159
|
+
error_fp = nil
|
|
160
|
+
if event.error?
|
|
161
|
+
error_fp = Catpm::Fingerprint.generate(
|
|
162
|
+
kind: event.kind,
|
|
163
|
+
error_class: event.error_class,
|
|
164
|
+
backtrace: event.backtrace
|
|
165
|
+
)
|
|
166
|
+
end
|
|
167
|
+
|
|
130
168
|
# Collect samples
|
|
131
169
|
sample_type = determine_sample_type(event)
|
|
132
170
|
if sample_type
|
|
133
|
-
|
|
171
|
+
sample_hash = {
|
|
134
172
|
bucket_key: key,
|
|
135
173
|
kind: event.kind,
|
|
136
174
|
sample_type: sample_type,
|
|
@@ -138,29 +176,27 @@ module Catpm
|
|
|
138
176
|
duration: event.duration,
|
|
139
177
|
context: event.context
|
|
140
178
|
}
|
|
179
|
+
sample_hash[:error_fingerprint] = error_fp if error_fp
|
|
180
|
+
samples << sample_hash
|
|
141
181
|
end
|
|
142
182
|
|
|
143
183
|
# Error grouping
|
|
144
|
-
if
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
error_class: event.error_class,
|
|
148
|
-
backtrace: event.backtrace
|
|
149
|
-
)
|
|
150
|
-
|
|
151
|
-
error = error_groups[fp] ||= {
|
|
152
|
-
fingerprint: fp,
|
|
184
|
+
if error_fp
|
|
185
|
+
error = error_groups[error_fp] ||= {
|
|
186
|
+
fingerprint: error_fp,
|
|
153
187
|
kind: event.kind,
|
|
154
188
|
error_class: event.error_class,
|
|
155
189
|
message: event.error_message,
|
|
156
190
|
occurrences_count: 0,
|
|
157
191
|
first_occurred_at: event.started_at,
|
|
158
192
|
last_occurred_at: event.started_at,
|
|
159
|
-
new_contexts: []
|
|
193
|
+
new_contexts: [],
|
|
194
|
+
occurrence_times: []
|
|
160
195
|
}
|
|
161
196
|
|
|
162
197
|
error[:occurrences_count] += 1
|
|
163
198
|
error[:last_occurred_at] = [ error[:last_occurred_at], event.started_at ].max
|
|
199
|
+
error[:occurrence_times] << event.started_at
|
|
164
200
|
|
|
165
201
|
if error[:new_contexts].size < Catpm.config.max_error_contexts
|
|
166
202
|
error[:new_contexts] << build_error_context(event)
|
|
@@ -237,6 +273,14 @@ module Catpm
|
|
|
237
273
|
sample[:_skip] = true
|
|
238
274
|
end
|
|
239
275
|
end
|
|
276
|
+
when 'error'
|
|
277
|
+
fp = sample[:error_fingerprint]
|
|
278
|
+
if fp
|
|
279
|
+
existing = Catpm::Sample.where(sample_type: 'error', error_fingerprint: fp)
|
|
280
|
+
if existing.count >= Catpm.config.max_error_samples_per_fingerprint
|
|
281
|
+
existing.order(recorded_at: :asc).first.destroy
|
|
282
|
+
end
|
|
283
|
+
end
|
|
240
284
|
end
|
|
241
285
|
end
|
|
242
286
|
|
|
@@ -248,7 +292,11 @@ module Catpm
|
|
|
248
292
|
occurred_at: event.started_at.iso8601,
|
|
249
293
|
kind: event.kind,
|
|
250
294
|
operation: event.context.slice(:method, :path, :params, :job_class, :job_id, :queue, :target, :metadata),
|
|
251
|
-
backtrace:
|
|
295
|
+
backtrace: begin
|
|
296
|
+
bt = event.backtrace || []
|
|
297
|
+
limit = Catpm.config.backtrace_lines
|
|
298
|
+
limit ? bt.first(limit) : bt
|
|
299
|
+
end,
|
|
252
300
|
duration: event.duration,
|
|
253
301
|
status: event.status
|
|
254
302
|
}
|
data/lib/catpm/lifecycle.rb
CHANGED
|
@@ -10,17 +10,11 @@ module Catpm
|
|
|
10
10
|
initialize_flusher
|
|
11
11
|
apply_patches
|
|
12
12
|
|
|
13
|
-
#
|
|
14
|
-
# For forking servers,
|
|
15
|
-
#
|
|
13
|
+
# Start the flusher in the current process.
|
|
14
|
+
# For forking servers (Puma, Passenger, Unicorn, etc.),
|
|
15
|
+
# the middleware detects fork via PID and restarts automatically.
|
|
16
16
|
Catpm.flusher&.start
|
|
17
17
|
|
|
18
|
-
if defined?(::PhusionPassenger)
|
|
19
|
-
register_passenger_hook
|
|
20
|
-
elsif defined?(::Pitchfork)
|
|
21
|
-
register_pitchfork_hook
|
|
22
|
-
end
|
|
23
|
-
|
|
24
18
|
register_shutdown_hooks
|
|
25
19
|
end
|
|
26
20
|
|
|
@@ -57,20 +51,6 @@ module Catpm
|
|
|
57
51
|
jitter: Catpm.config.flush_jitter
|
|
58
52
|
)
|
|
59
53
|
end
|
|
60
|
-
|
|
61
|
-
def register_passenger_hook
|
|
62
|
-
flusher = Catpm.flusher
|
|
63
|
-
::PhusionPassenger.on_event(:starting_worker_process) do |forked|
|
|
64
|
-
flusher&.start if forked
|
|
65
|
-
end
|
|
66
|
-
end
|
|
67
|
-
|
|
68
|
-
def register_pitchfork_hook
|
|
69
|
-
flusher = Catpm.flusher
|
|
70
|
-
::Pitchfork.configure do |server|
|
|
71
|
-
server.after_worker_fork { flusher&.start }
|
|
72
|
-
end
|
|
73
|
-
end
|
|
74
54
|
end
|
|
75
55
|
end
|
|
76
56
|
end
|
data/lib/catpm/middleware.rb
CHANGED
data/lib/catpm/version.rb
CHANGED
|
@@ -20,6 +20,7 @@ Catpm.configure do |config|
|
|
|
20
20
|
config.instrument_http = true # Track HTTP requests (default: true)
|
|
21
21
|
config.instrument_jobs = false # Track ActiveJob (default: false)
|
|
22
22
|
config.instrument_segments = true # Track SQL/view/cache segments (default: true)
|
|
23
|
+
# config.track_own_requests = false # Track catpm dashboard requests (default: false)
|
|
23
24
|
# config.instrument_net_http = false # Patch Net::HTTP for outbound tracking (default: false)
|
|
24
25
|
# config.instrument_middleware_stack = false # Decompose middleware into per-middleware segments (default: false)
|
|
25
26
|
# config.max_segments_per_request = 50 # Cap segments per request (keeps slowest)
|
data/lib/tasks/catpm_tasks.rake
CHANGED
|
@@ -1,6 +1,23 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
3
|
+
namespace :catpm do
|
|
4
|
+
desc 'Add missing columns to catpm tables (safe to run multiple times)'
|
|
5
|
+
task upgrade: :environment do
|
|
6
|
+
connection = ActiveRecord::Base.connection
|
|
7
|
+
|
|
8
|
+
unless connection.column_exists?(:catpm_samples, :error_fingerprint)
|
|
9
|
+
connection.add_column :catpm_samples, :error_fingerprint, :string, limit: 64
|
|
10
|
+
connection.add_index :catpm_samples, :error_fingerprint, name: 'idx_catpm_samples_error_fp'
|
|
11
|
+
puts '[catpm] Added error_fingerprint column to catpm_samples'
|
|
12
|
+
else
|
|
13
|
+
puts '[catpm] catpm_samples.error_fingerprint already exists, skipping'
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
unless connection.column_exists?(:catpm_errors, :occurrence_buckets)
|
|
17
|
+
connection.add_column :catpm_errors, :occurrence_buckets, :json
|
|
18
|
+
puts '[catpm] Added occurrence_buckets column to catpm_errors'
|
|
19
|
+
else
|
|
20
|
+
puts '[catpm] catpm_errors.occurrence_buckets already exists, skipping'
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|