roundhouse_ui 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/controllers/roundhouse_ui/dashboard_controller.rb +5 -0
- data/app/controllers/roundhouse_ui/errors_controller.rb +3 -51
- data/app/views/layouts/roundhouse_ui/application.html.erb +67 -5
- data/app/views/roundhouse_ui/dashboard/show.html.erb +56 -22
- data/app/views/roundhouse_ui/jobs/show.html.erb +13 -2
- data/lib/roundhouse_ui/error_groups.rb +65 -0
- data/lib/roundhouse_ui/health.rb +65 -0
- data/lib/roundhouse_ui/version.rb +1 -1
- data/lib/roundhouse_ui.rb +2 -0
- metadata +3 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 5064ee74f5aea444f5762d2f466f0b4c05d9c7af6c44a48045e6703d48b72587
|
|
4
|
+
data.tar.gz: 3b5a08f1769a5bd5ee66bdbe5ccd586842373b45530b95cbc4ff6340fe17796e
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 6d3cf1e161baec06cb4b899d9f97065fb216341716719756c062a140a811bf51997748f9bcc322dc78aaa8ec36d56311decc511b70eea8d53a07a57a5b317b8c
|
|
7
|
+
data.tar.gz: 15a01ddaa5fa81c37c67516c369d1dac2b795b011e5dad1927b3b70aeed47b365a4938765e78f9eae910da0a39e36fe174d6170ba0226d1eeab404a700f4c218
|
|
@@ -5,6 +5,11 @@ module RoundhouseUi
|
|
|
5
5
|
def show
|
|
6
6
|
@stats = Sidekiq::Stats.new
|
|
7
7
|
@queues = Sidekiq::Queue.all
|
|
8
|
+
@metrics = Metrics.new(stats: @stats)
|
|
9
|
+
@health = Health.new(stats: @stats, queues: @queues, metrics: @metrics)
|
|
10
|
+
# Highest-signal slices for the overview, from data we already read.
|
|
11
|
+
@top_errors = ErrorGroups.new(limit: 200).call.groups.first(5)
|
|
12
|
+
@problem_queues = @queues.select { |q| q.latency > 5 }.sort_by { |q| -q.latency }.first(5)
|
|
8
13
|
end
|
|
9
14
|
|
|
10
15
|
# Polled by the dashboard for live counts (same approach Sidekiq Web uses —
|
|
@@ -3,59 +3,11 @@ module RoundhouseUi
|
|
|
3
3
|
# (job class + error class) — so one bad deploy reads as a single issue with
|
|
4
4
|
# a count, not five thousand identical rows. The aggregation Sidekiq Web lacks.
|
|
5
5
|
class ErrorsController < ApplicationController
|
|
6
|
-
SCAN_LIMIT = 1_000 # cap entries scanned per pass; shown honestly in the view
|
|
7
|
-
|
|
8
6
|
def index
|
|
9
7
|
@query = params[:q].to_s.strip
|
|
10
|
-
@scan_limit =
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
private
|
|
15
|
-
|
|
16
|
-
def aggregate
|
|
17
|
-
groups = {}
|
|
18
|
-
scanned = 0
|
|
19
|
-
truncated = false
|
|
20
|
-
|
|
21
|
-
sources.each do |source, set|
|
|
22
|
-
set.each do |entry|
|
|
23
|
-
scanned += 1
|
|
24
|
-
if scanned > SCAN_LIMIT
|
|
25
|
-
truncated = true
|
|
26
|
-
break
|
|
27
|
-
end
|
|
28
|
-
record(groups, source, entry)
|
|
29
|
-
end
|
|
30
|
-
break if truncated
|
|
31
|
-
end
|
|
32
|
-
|
|
33
|
-
list = groups.values.sort_by { |g| -g[:count] }
|
|
34
|
-
list = list.select { |g| "#{g[:klass]} #{g[:error]}".downcase.include?(@query.downcase) } if @query.present?
|
|
35
|
-
[ list, scanned, truncated ]
|
|
36
|
-
end
|
|
37
|
-
|
|
38
|
-
# Sidekiq's native sets, plus the sidekiq-failures `failed` set when opted in
|
|
39
|
-
# and that gem is loaded. Its FailureSet is a Sidekiq::JobSet, so it iterates
|
|
40
|
-
# exactly like the others — no special-casing in the aggregation above.
|
|
41
|
-
def sources
|
|
42
|
-
sets = { "retry" => Sidekiq::RetrySet.new, "dead" => Sidekiq::DeadSet.new }
|
|
43
|
-
if RoundhouseUi.show_sidekiq_failures && defined?(Sidekiq::Failures::FailureSet)
|
|
44
|
-
sets["failed"] = Sidekiq::Failures::FailureSet.new
|
|
45
|
-
end
|
|
46
|
-
sets
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
def record(groups, source, entry)
|
|
50
|
-
error = entry.item["error_class"] || "UnknownError"
|
|
51
|
-
group = (groups["#{entry.klass}|#{error}"] ||= {
|
|
52
|
-
klass: entry.klass, error: error, count: 0, last_at: nil, queues: [], sources: []
|
|
53
|
-
})
|
|
54
|
-
group[:count] += 1
|
|
55
|
-
group[:queues] |= [ entry.queue ]
|
|
56
|
-
group[:sources] |= [ source ]
|
|
57
|
-
at = entry.at
|
|
58
|
-
group[:last_at] = at if at && (group[:last_at].nil? || at > group[:last_at])
|
|
8
|
+
@scan_limit = ErrorGroups::DEFAULT_SCAN_LIMIT
|
|
9
|
+
result = ErrorGroups.new(query: @query).call
|
|
10
|
+
@groups, @scanned, @truncated = result.groups, result.scanned, result.truncated
|
|
59
11
|
end
|
|
60
12
|
end
|
|
61
13
|
end
|
|
@@ -117,12 +117,52 @@
|
|
|
117
117
|
.rh-card .d.up { color:var(--good); } .rh-card .d.bad { color:var(--crit); }
|
|
118
118
|
.rh-card .pill { display:inline-flex; align-items:center; gap:6px; font-size:13px; font-weight:600; padding:4px 10px; border-radius:7px; margin-bottom:9px; background:rgba(226,165,63,.14); color:var(--warn); }
|
|
119
119
|
.rh-card .pill.ok { background:rgba(68,197,140,.14); color:var(--good); }
|
|
120
|
-
.rh-chart-wrap { background:var(--panel); border:1px solid var(--line-soft); border-radius:12px; padding:18px 20px 10px; margin-bottom:24px; }
|
|
120
|
+
.rh-chart-wrap { background:var(--panel); border:1px solid var(--line-soft); border-radius:12px; padding:18px 20px 10px; margin-bottom:24px; position:relative; }
|
|
121
|
+
#rh-chart-tip { position:absolute; top:34px; transform:translateX(-50%); pointer-events:none; background:var(--panel-3); border:1px solid var(--line); color:var(--text); font:11px var(--mono); padding:2px 7px; border-radius:6px; white-space:nowrap; }
|
|
122
|
+
#rh-chart-tip[hidden] { display:none; }
|
|
121
123
|
.rh-chart-wrap .top { display:flex; align-items:baseline; gap:12px; margin-bottom:6px; }
|
|
122
124
|
.rh-chart-wrap h3 { font-size:13px; font-weight:600; margin:0; }
|
|
123
125
|
.rh-chart-wrap .now { margin-left:auto; font-family:var(--mono); font-size:13px; color:var(--accent); }
|
|
124
126
|
canvas#rh-chart { display:block; width:100%; height:90px; }
|
|
125
127
|
.rh-cb { width:15px; height:15px; }
|
|
128
|
+
|
|
129
|
+
/* composite health banner */
|
|
130
|
+
.rh-health { background:var(--panel); border:1px solid var(--line-soft); border-left:3px solid var(--faint); border-radius:12px; margin-bottom:18px; }
|
|
131
|
+
.rh-health-ok { border-left-color:var(--good); }
|
|
132
|
+
.rh-health-warn { border-left-color:var(--warn); }
|
|
133
|
+
.rh-health-crit { border-left-color:var(--crit); }
|
|
134
|
+
.rh-health summary { display:flex; align-items:center; gap:12px; padding:14px 16px; cursor:pointer; list-style:none; }
|
|
135
|
+
.rh-health summary::-webkit-details-marker { display:none; }
|
|
136
|
+
.rh-health-dot { width:9px; height:9px; border-radius:50%; flex:none; background:var(--faint); }
|
|
137
|
+
.rh-health-ok .rh-health-dot { background:var(--good); }
|
|
138
|
+
.rh-health-warn .rh-health-dot { background:var(--warn); }
|
|
139
|
+
.rh-health-crit .rh-health-dot { background:var(--crit); }
|
|
140
|
+
.rh-health-verdict { font-weight:650; font-size:15px; }
|
|
141
|
+
.rh-health-ok .rh-health-verdict { color:var(--good); }
|
|
142
|
+
.rh-health-warn .rh-health-verdict { color:var(--warn); }
|
|
143
|
+
.rh-health-crit .rh-health-verdict { color:var(--crit); }
|
|
144
|
+
.rh-health-reason { color:var(--muted); font-size:13px; }
|
|
145
|
+
.rh-health-cta { margin-left:auto; font:11px var(--mono); color:var(--faint); }
|
|
146
|
+
.rh-health[open] .rh-health-cta { color:var(--accent); }
|
|
147
|
+
.rh-health-signals { padding:2px 16px 14px; display:flex; flex-direction:column; gap:8px; border-top:1px solid var(--line-soft); margin-top:2px; padding-top:12px; }
|
|
148
|
+
.rh-sig { display:flex; align-items:center; gap:10px; font-size:13px; }
|
|
149
|
+
.rh-sig b { min-width:170px; font-weight:600; }
|
|
150
|
+
|
|
151
|
+
/* insight panels */
|
|
152
|
+
.rh-insights { display:grid; gap:14px; margin-bottom:24px; }
|
|
153
|
+
@media(min-width:760px){ .rh-insights { grid-template-columns:1fr 1fr; } }
|
|
154
|
+
.rh-insight { padding:6px 0; }
|
|
155
|
+
.rh-insight-h { display:flex; align-items:baseline; justify-content:space-between; gap:10px; font-size:13px; font-weight:600; padding:13px 16px 10px; }
|
|
156
|
+
.rh-insight-row { display:flex; align-items:center; gap:12px; padding:10px 16px; border-top:1px solid var(--line-soft); }
|
|
157
|
+
.rh-insight-row .rh-insight-main { flex:1; min-width:0; font-size:13px; }
|
|
158
|
+
.rh-insight-row .rh-pill { font-variant-numeric:tabular-nums; }
|
|
159
|
+
.rh-insight .rh-empty { padding:18px 16px; }
|
|
160
|
+
|
|
161
|
+
/* collapsible disclosure (large args, full backtrace) */
|
|
162
|
+
.rh-disclose summary { cursor:pointer; font:12px var(--mono); color:var(--accent); padding:6px 0; list-style:none; }
|
|
163
|
+
.rh-disclose summary::-webkit-details-marker { display:none; }
|
|
164
|
+
.rh-disclose summary::before { content:"▸ "; }
|
|
165
|
+
.rh-disclose[open] summary::before { content:"▾ "; }
|
|
126
166
|
.rh-field { margin-bottom:16px; max-width:640px; }
|
|
127
167
|
.rh-field label { display:block; font-size:12px; color:var(--muted); margin-bottom:6px; }
|
|
128
168
|
.rh-field input, .rh-field textarea { width:100%; background:var(--panel); border:1px solid var(--line); border-radius:9px; padding:9px 12px; color:var(--text); font:13px var(--mono); }
|
|
@@ -160,6 +200,7 @@
|
|
|
160
200
|
var started = false, lastProcessed = null, lastFailed = null, lastBacklog = null, lastT = null;
|
|
161
201
|
var POLL_MS = <%= (RoundhouseUi.poll_interval.to_f * 1000).round %>;
|
|
162
202
|
var samples = [], buckets = [], bucketStart = null; // samples = current bucket; buckets = finalized per-interval averages
|
|
203
|
+
var lastPts = []; // points last drawn, for the hover tooltip to read
|
|
163
204
|
function setText(id, t) { var el = document.getElementById(id); if (el) el.textContent = t; }
|
|
164
205
|
function humanizeEta(s) {
|
|
165
206
|
if (s < 60) return "~" + Math.round(s) + "s";
|
|
@@ -179,12 +220,28 @@
|
|
|
179
220
|
// finalized buckets + the in-progress bucket as a provisional last point,
|
|
180
221
|
// so the chart shows data immediately instead of waiting a full interval.
|
|
181
222
|
var pts = buckets.slice(-60); if (samples.length) pts = pts.concat([ avg(samples) ]);
|
|
182
|
-
|
|
183
|
-
var
|
|
223
|
+
lastPts = pts;
|
|
224
|
+
var n = pts.length; if (n < 2) { setText("rh-chart-peak", ""); return; }
|
|
225
|
+
var peak = Math.max.apply(null, pts);
|
|
226
|
+
var max = peak * 1.25 || 1;
|
|
184
227
|
var x = function (i) { return i / (n - 1) * w; }, y = function (v) { return h - pad - v / max * (h - pad * 2); };
|
|
185
228
|
var g = ctx.createLinearGradient(0, 0, 0, h); g.addColorStop(0, "rgba(110,139,255,.30)"); g.addColorStop(1, "rgba(110,139,255,0)");
|
|
186
229
|
ctx.beginPath(); ctx.moveTo(0, h); pts.forEach(function (v, i) { ctx.lineTo(x(i), y(v)); }); ctx.lineTo(w, h); ctx.closePath(); ctx.fillStyle = g; ctx.fill();
|
|
187
230
|
ctx.beginPath(); pts.forEach(function (v, i) { i ? ctx.lineTo(x(i), y(v)) : ctx.moveTo(x(i), y(v)); }); ctx.strokeStyle = "#6E8BFF"; ctx.lineWidth = 2; ctx.lineJoin = "round"; ctx.stroke();
|
|
231
|
+
// emphasized endpoint (small marker; canvas is vertically scaled, so a rect reads cleaner than an arc)
|
|
232
|
+
ctx.fillStyle = "#6E8BFF"; ctx.fillRect(x(n - 1) - 3, y(pts[n - 1]) - 3, 6, 6);
|
|
233
|
+
setText("rh-chart-peak", "peak " + Math.round(peak) + "/s");
|
|
234
|
+
}
|
|
235
|
+
// Hover tooltip: map the cursor's fractional x to the nearest drawn point.
|
|
236
|
+
function chartHover(e) {
|
|
237
|
+
var cv = document.getElementById("rh-chart"), tip = document.getElementById("rh-chart-tip");
|
|
238
|
+
if (!cv || !tip || lastPts.length < 2) return;
|
|
239
|
+
var rect = cv.getBoundingClientRect();
|
|
240
|
+
var fx = Math.min(1, Math.max(0, (e.clientX - rect.left) / rect.width));
|
|
241
|
+
var idx = Math.round(fx * (lastPts.length - 1));
|
|
242
|
+
tip.textContent = Math.round(lastPts[idx]) + "/s";
|
|
243
|
+
tip.style.left = (e.clientX - rect.left) + "px";
|
|
244
|
+
tip.hidden = false;
|
|
188
245
|
}
|
|
189
246
|
function apply(d) {
|
|
190
247
|
Object.keys(d).forEach(function (k) {
|
|
@@ -231,6 +288,11 @@
|
|
|
231
288
|
.catch(function () {});
|
|
232
289
|
}
|
|
233
290
|
function startOnce() { if (started) return; started = true; poll(); setInterval(poll, POLL_MS); }
|
|
291
|
+
function wireChart() {
|
|
292
|
+
var cv = document.getElementById("rh-chart"); if (!cv) return;
|
|
293
|
+
cv.onmousemove = chartHover;
|
|
294
|
+
cv.onmouseleave = function () { var t = document.getElementById("rh-chart-tip"); if (t) t.hidden = true; };
|
|
295
|
+
}
|
|
234
296
|
function syncTheme() { var b = document.getElementById("rh-theme"); if (b) b.textContent = document.documentElement.getAttribute("data-theme") === "light" ? "☀" : "☾"; }
|
|
235
297
|
function syncWidth() { var b = document.getElementById("rh-width"); if (b) b.classList.toggle("is-on", document.documentElement.getAttribute("data-width") === "full"); }
|
|
236
298
|
|
|
@@ -342,8 +404,8 @@
|
|
|
342
404
|
else if (e.key === "Enter") { e.preventDefault(); if (palFiltered[palSel]) palRun(palFiltered[palSel]); }
|
|
343
405
|
});
|
|
344
406
|
|
|
345
|
-
document.addEventListener("turbo:load", function () { startOnce(); syncTheme(); syncWidth(); setActiveNav(); restoreChartInterval(); draw(); });
|
|
346
|
-
document.addEventListener("DOMContentLoaded", function () { startOnce(); syncTheme(); syncWidth(); setActiveNav(); restoreChartInterval(); });
|
|
407
|
+
document.addEventListener("turbo:load", function () { startOnce(); syncTheme(); syncWidth(); setActiveNav(); restoreChartInterval(); wireChart(); draw(); });
|
|
408
|
+
document.addEventListener("DOMContentLoaded", function () { startOnce(); syncTheme(); syncWidth(); setActiveNav(); restoreChartInterval(); wireChart(); });
|
|
347
409
|
document.addEventListener("visibilitychange", function () { if (!document.hidden) poll(); });
|
|
348
410
|
})();
|
|
349
411
|
</script>
|
|
@@ -1,49 +1,83 @@
|
|
|
1
1
|
<% content_for :title, "Dashboard" %>
|
|
2
2
|
<% content_for :crumb, Rails.env %>
|
|
3
3
|
|
|
4
|
-
<%
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
4
|
+
<% verdict = { ok: "Healthy", warn: "Degraded", crit: "Critical" }[@health.status] %>
|
|
5
|
+
<details class="rh-health rh-health-<%= @health.status %>"<%= " open".html_safe unless @health.healthy? %>>
|
|
6
|
+
<summary>
|
|
7
|
+
<span class="rh-health-dot"></span>
|
|
8
|
+
<span class="rh-health-verdict"><%= verdict %></span>
|
|
9
|
+
<span class="rh-health-reason"><%= @health.reason %></span>
|
|
10
|
+
<span class="rh-health-cta">why ▾</span>
|
|
11
|
+
</summary>
|
|
12
|
+
<div class="rh-health-signals">
|
|
13
|
+
<% @health.signals.each do |s| %>
|
|
14
|
+
<div class="rh-sig">
|
|
15
|
+
<span class="rh-st rh-st-<%= s.status %>"><%= s.status %></span>
|
|
16
|
+
<b><%= s.label %></b>
|
|
17
|
+
<span class="rh-sub"><%= s.detail %></span>
|
|
12
18
|
</div>
|
|
13
19
|
<% end %>
|
|
14
20
|
</div>
|
|
15
|
-
|
|
21
|
+
</details>
|
|
16
22
|
|
|
17
23
|
<div class="rh-cards">
|
|
18
|
-
<div class="rh-card">
|
|
19
|
-
<% if stuck.any? %>
|
|
20
|
-
<span class="pill">⚠ Degraded</span>
|
|
21
|
-
<div class="k"><b style="color:var(--warn)"><%= stuck.first.name %></b> queue over budget</div>
|
|
22
|
-
<% else %>
|
|
23
|
-
<span class="pill ok">✓ Healthy</span>
|
|
24
|
-
<div class="k">all queues within budget</div>
|
|
25
|
-
<% end %>
|
|
26
|
-
</div>
|
|
27
24
|
<div class="rh-card">
|
|
28
25
|
<div class="k">Processed</div>
|
|
29
26
|
<div class="v num" data-stat="processed"><%= number_with_delimiter @stats.processed %></div>
|
|
30
|
-
<div class="d"><span class="num" data-stat="rate">—</span> / min</div>
|
|
27
|
+
<div class="d"><span class="num" data-stat="rate">—</span> / min · <%= ((1 - @metrics.failure_ratio) * 100).round(1) %>% ok</div>
|
|
31
28
|
</div>
|
|
32
29
|
<div class="rh-card">
|
|
33
30
|
<div class="k">Failed · total</div>
|
|
34
31
|
<div class="v num" data-stat="failed"><%= number_with_delimiter @stats.failed %></div>
|
|
35
|
-
<div class="d bad"><span class="num" data-stat="dead"><%= number_with_delimiter @stats.dead_size %></span> dead</div>
|
|
32
|
+
<div class="d bad"><span class="num" data-stat="dead"><%= number_with_delimiter @stats.dead_size %></span> dead · <%= (@metrics.failure_ratio * 100).round(1) %>% rate</div>
|
|
36
33
|
</div>
|
|
37
34
|
<div class="rh-card">
|
|
38
35
|
<div class="k">Busy threads</div>
|
|
39
36
|
<div class="v num" data-stat="busy"><%= @stats.workers_size %></div>
|
|
40
|
-
<div class="d"
|
|
37
|
+
<div class="d"><%= @metrics.utilization ? "#{(@metrics.utilization * 100).round}% of #{@metrics.concurrency} busy" : "no workers reporting" %></div>
|
|
38
|
+
</div>
|
|
39
|
+
<div class="rh-card">
|
|
40
|
+
<div class="k">Backlog</div>
|
|
41
|
+
<div class="v num"><%= number_with_delimiter @metrics.backlog %></div>
|
|
42
|
+
<div class="d">enqueued <span class="num" data-stat="enqueued"><%= number_with_delimiter @stats.enqueued %></span> · scheduled <span class="num" data-stat="scheduled"><%= number_with_delimiter @stats.scheduled_size %></span></div>
|
|
41
43
|
</div>
|
|
42
44
|
</div>
|
|
43
45
|
|
|
44
46
|
<div class="rh-chart-wrap">
|
|
45
|
-
<div class="top"><h3>Throughput</h3><span class="rh-sub">jobs / sec · live</span><span class="now"><span id="rh-chart-now">—</span>/s</span><select id="rh-chart-interval" aria-label="Throughput interval" style="margin-left:12px;background:var(--panel-2);color:var(--muted);border:1px solid var(--line);border-radius:7px;padding:3px 6px;font:12px var(--sans)"><option value="10">per 10s</option><option value="30" selected>per 30s</option><option value="60">per 1m</option><option value="300">per 5m</option></select></div>
|
|
47
|
+
<div class="top"><h3>Throughput</h3><span class="rh-sub">jobs / sec · live</span><span class="rh-sub" id="rh-chart-peak" style="margin-left:10px"></span><span class="now"><span id="rh-chart-now">—</span>/s</span><select id="rh-chart-interval" aria-label="Throughput interval" style="margin-left:12px;background:var(--panel-2);color:var(--muted);border:1px solid var(--line);border-radius:7px;padding:3px 6px;font:12px var(--sans)"><option value="10">per 10s</option><option value="30" selected>per 30s</option><option value="60">per 1m</option><option value="300">per 5m</option></select></div>
|
|
46
48
|
<canvas id="rh-chart" width="1100" height="180"></canvas>
|
|
49
|
+
<div id="rh-chart-tip" hidden></div>
|
|
50
|
+
</div>
|
|
51
|
+
|
|
52
|
+
<div class="rh-insights">
|
|
53
|
+
<div class="rh-panel rh-insight">
|
|
54
|
+
<div class="rh-insight-h">Top failing job classes <%= link_to "all errors →", errors_path, class: "rh-sub" %></div>
|
|
55
|
+
<% if @top_errors.empty? %>
|
|
56
|
+
<div class="rh-empty">No failing jobs 🎉</div>
|
|
57
|
+
<% else %>
|
|
58
|
+
<% @top_errors.each do |g| %>
|
|
59
|
+
<div class="rh-insight-row">
|
|
60
|
+
<div class="rh-insight-main"><%= link_to g[:klass], errors_path(q: g[:klass]), class: "rh-joblink" %> <%= error_trace_link(klass: g[:klass], error: g[:error]) %><br><span class="rh-err"><%= g[:error] %></span></div>
|
|
61
|
+
<span class="rh-pill"><%= number_with_delimiter g[:count] %></span>
|
|
62
|
+
</div>
|
|
63
|
+
<% end %>
|
|
64
|
+
<% end %>
|
|
65
|
+
</div>
|
|
66
|
+
|
|
67
|
+
<div class="rh-panel rh-insight">
|
|
68
|
+
<div class="rh-insight-h">Problem queues <%= link_to "all queues →", queues_path, class: "rh-sub" %></div>
|
|
69
|
+
<% if @problem_queues.empty? %>
|
|
70
|
+
<div class="rh-empty">All queues fresh — nothing backing up.</div>
|
|
71
|
+
<% else %>
|
|
72
|
+
<% @problem_queues.each do |q| %>
|
|
73
|
+
<% label, css = queue_state(q.latency) %>
|
|
74
|
+
<div class="rh-insight-row">
|
|
75
|
+
<div class="rh-insight-main"><%= link_to q.name, queues_path, class: "rh-joblink rh-mono" %><br><span class="rh-sub"><%= number_with_delimiter q.size %> waiting</span></div>
|
|
76
|
+
<span class="rh-st <%= css %>"><%= q.latency < 60 ? "#{q.latency.round(1)}s" : distance_of_time_in_words(0, q.latency) %></span>
|
|
77
|
+
</div>
|
|
78
|
+
<% end %>
|
|
79
|
+
<% end %>
|
|
80
|
+
</div>
|
|
47
81
|
</div>
|
|
48
82
|
|
|
49
83
|
<h2 class="rh-h2">Queues <span class="hint">live · click a queue to manage</span></h2>
|
|
@@ -9,7 +9,15 @@
|
|
|
9
9
|
</div>
|
|
10
10
|
|
|
11
11
|
<div class="rh-sec">Arguments<% if RoundhouseUi.redact_args.present? %> <span class="rh-sub">— sensitive keys masked</span><% end %></div>
|
|
12
|
-
|
|
12
|
+
<% args_json = JSON.pretty_generate(RoundhouseUi::Redaction.apply(item["args"] || [])) %>
|
|
13
|
+
<% if args_json.lines.size > 30 %>
|
|
14
|
+
<details class="rh-disclose">
|
|
15
|
+
<summary><%= pluralize(args_json.lines.size, "line") %> — expand arguments</summary>
|
|
16
|
+
<pre class="rh-pre"><%= args_json %></pre>
|
|
17
|
+
</details>
|
|
18
|
+
<% else %>
|
|
19
|
+
<pre class="rh-pre"><%= args_json %></pre>
|
|
20
|
+
<% end %>
|
|
13
21
|
|
|
14
22
|
<% if item["error_class"].present? %>
|
|
15
23
|
<div class="rh-sec">Error</div>
|
|
@@ -17,7 +25,10 @@
|
|
|
17
25
|
<% if item["error_message"].present? %><p class="rh-sub" style="margin:6px 0 0"><%= item["error_message"] %></p><% end %>
|
|
18
26
|
<% bt = item["error_backtrace"] %>
|
|
19
27
|
<% if bt.is_a?(Array) && bt.any? %>
|
|
20
|
-
<
|
|
28
|
+
<details class="rh-disclose"<%= " open".html_safe if bt.size <= 20 %>>
|
|
29
|
+
<summary><%= pluralize(bt.size, "line") %> backtrace</summary>
|
|
30
|
+
<pre class="rh-pre"><%= bt.join("\n") %></pre>
|
|
31
|
+
</details>
|
|
21
32
|
<% end %>
|
|
22
33
|
<% end %>
|
|
23
34
|
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
require "sidekiq/api"
|
|
2
|
+
|
|
3
|
+
module RoundhouseUi
|
|
4
|
+
# Groups failing jobs across the retry + dead sets (and the sidekiq-failures
|
|
5
|
+
# `failed` set, when opted in) by a fingerprint of job class + error class —
|
|
6
|
+
# so one bad deploy reads as a single issue with a count, not thousands of
|
|
7
|
+
# identical rows. Used by the Errors page and the dashboard's "top failing"
|
|
8
|
+
# panel, so the aggregation lives here rather than in a controller.
|
|
9
|
+
class ErrorGroups
|
|
10
|
+
DEFAULT_SCAN_LIMIT = 1_000 # cap entries scanned per pass; surfaced honestly
|
|
11
|
+
|
|
12
|
+
Result = Struct.new(:groups, :scanned, :truncated, keyword_init: true)
|
|
13
|
+
|
|
14
|
+
def initialize(query: nil, limit: DEFAULT_SCAN_LIMIT)
|
|
15
|
+
@query = query.to_s.strip
|
|
16
|
+
@limit = limit
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def call
|
|
20
|
+
groups = {}
|
|
21
|
+
scanned = 0
|
|
22
|
+
truncated = false
|
|
23
|
+
|
|
24
|
+
sources.each do |source, set|
|
|
25
|
+
set.each do |entry|
|
|
26
|
+
scanned += 1
|
|
27
|
+
if scanned > @limit
|
|
28
|
+
truncated = true
|
|
29
|
+
break
|
|
30
|
+
end
|
|
31
|
+
record(groups, source, entry)
|
|
32
|
+
end
|
|
33
|
+
break if truncated
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
list = groups.values.sort_by { |g| -g[:count] }
|
|
37
|
+
list = list.select { |g| "#{g[:klass]} #{g[:error]}".downcase.include?(@query.downcase) } if @query.present?
|
|
38
|
+
Result.new(groups: list, scanned: scanned, truncated: truncated)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
private
|
|
42
|
+
|
|
43
|
+
# Sidekiq's native sets, plus the sidekiq-failures `failed` set when opted in
|
|
44
|
+
# and loaded. Its FailureSet is a Sidekiq::JobSet, so it iterates like the rest.
|
|
45
|
+
def sources
|
|
46
|
+
sets = { "retry" => Sidekiq::RetrySet.new, "dead" => Sidekiq::DeadSet.new }
|
|
47
|
+
if RoundhouseUi.show_sidekiq_failures && defined?(Sidekiq::Failures::FailureSet)
|
|
48
|
+
sets["failed"] = Sidekiq::Failures::FailureSet.new
|
|
49
|
+
end
|
|
50
|
+
sets
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def record(groups, source, entry)
|
|
54
|
+
error = entry.item["error_class"] || "UnknownError"
|
|
55
|
+
group = (groups["#{entry.klass}|#{error}"] ||= {
|
|
56
|
+
klass: entry.klass, error: error, count: 0, last_at: nil, queues: [], sources: []
|
|
57
|
+
})
|
|
58
|
+
group[:count] += 1
|
|
59
|
+
group[:queues] |= [ entry.queue ]
|
|
60
|
+
group[:sources] |= [ source ]
|
|
61
|
+
at = entry.at
|
|
62
|
+
group[:last_at] = at if at && (group[:last_at].nil? || at > group[:last_at])
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
module RoundhouseUi
|
|
2
|
+
# A composite health verdict for the dashboard. Instead of a static green dot,
|
|
3
|
+
# it rolls up the signals an on-call engineer actually checks — error rate,
|
|
4
|
+
# queue latency, worker utilization — into one status + a human reason, and
|
|
5
|
+
# exposes the sub-signals so the banner can explain *why*.
|
|
6
|
+
class Health
|
|
7
|
+
Signal = Struct.new(:key, :label, :status, :detail, keyword_init: true)
|
|
8
|
+
|
|
9
|
+
RANK = { ok: 0, warn: 1, crit: 2 }.freeze
|
|
10
|
+
|
|
11
|
+
def initialize(stats:, queues:, metrics:)
|
|
12
|
+
@stats = stats
|
|
13
|
+
@queues = queues
|
|
14
|
+
@metrics = metrics
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def signals
|
|
18
|
+
@signals ||= [ error_rate_signal, latency_signal, utilization_signal ].compact
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# Worst sub-signal wins.
|
|
22
|
+
def status
|
|
23
|
+
signals.map(&:status).max_by { |s| RANK[s] } || :ok
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def reason
|
|
27
|
+
worst = signals.max_by { |s| RANK[s.status] }
|
|
28
|
+
return "all signals nominal" if worst.nil? || worst.status == :ok
|
|
29
|
+
|
|
30
|
+
worst.detail
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def healthy?
|
|
34
|
+
status == :ok
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
private
|
|
38
|
+
|
|
39
|
+
def error_rate_signal
|
|
40
|
+
ratio = @metrics.failure_ratio
|
|
41
|
+
status = if ratio >= 0.10 then :crit elsif ratio >= 0.02 then :warn else :ok end
|
|
42
|
+
Signal.new(key: "error_rate", label: "Error rate (lifetime)", status: status,
|
|
43
|
+
detail: "#{(ratio * 100).round(1)}% of processed jobs have failed")
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def latency_signal
|
|
47
|
+
worst = @queues.max_by(&:latency)
|
|
48
|
+
return Signal.new(key: "latency", label: "Queue latency", status: :ok, detail: "no active queues") if worst.nil?
|
|
49
|
+
|
|
50
|
+
lat = worst.latency
|
|
51
|
+
status = if lat > 600 then :crit elsif lat > 60 then :warn else :ok end
|
|
52
|
+
detail = status == :ok ? "all queues fresh (< 60s)" : "#{worst.name}: oldest job #{lat.round}s"
|
|
53
|
+
Signal.new(key: "latency", label: "Queue latency", status: status, detail: detail)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def utilization_signal
|
|
57
|
+
util = @metrics.utilization
|
|
58
|
+
return nil if util.nil? # no processes reporting in — can't judge
|
|
59
|
+
|
|
60
|
+
status = if util >= 1.0 then :crit elsif util >= 0.85 then :warn else :ok end
|
|
61
|
+
Signal.new(key: "utilization", label: "Worker utilization", status: status,
|
|
62
|
+
detail: "#{(util * 100).round}% of worker threads busy")
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
data/lib/roundhouse_ui.rb
CHANGED
|
@@ -10,6 +10,8 @@ require "roundhouse_ui/redaction"
|
|
|
10
10
|
require "roundhouse_ui/cancellation"
|
|
11
11
|
require "roundhouse_ui/cancel_middleware"
|
|
12
12
|
require "roundhouse_ui/metrics"
|
|
13
|
+
require "roundhouse_ui/error_groups"
|
|
14
|
+
require "roundhouse_ui/health"
|
|
13
15
|
|
|
14
16
|
# Brand name is "Roundhouse"; the gem and Ruby namespace are RoundhouseUi
|
|
15
17
|
# (matching the published gem name `roundhouse_ui`).
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: roundhouse_ui
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.7.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- R.J. Robinson
|
|
@@ -97,7 +97,9 @@ files:
|
|
|
97
97
|
- lib/roundhouse_ui/cancel_middleware.rb
|
|
98
98
|
- lib/roundhouse_ui/cancellation.rb
|
|
99
99
|
- lib/roundhouse_ui/engine.rb
|
|
100
|
+
- lib/roundhouse_ui/error_groups.rb
|
|
100
101
|
- lib/roundhouse_ui/fetch.rb
|
|
102
|
+
- lib/roundhouse_ui/health.rb
|
|
101
103
|
- lib/roundhouse_ui/metrics.rb
|
|
102
104
|
- lib/roundhouse_ui/observability.rb
|
|
103
105
|
- lib/roundhouse_ui/pause.rb
|