rails_error_dashboard 0.7.1 → 0.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/controllers/rails_error_dashboard/errors_controller.rb +20 -0
- data/app/views/layouts/rails_error_dashboard.html.erb +3 -0
- data/app/views/rails_error_dashboard/errors/llm_health_summary.html.erb +249 -0
- data/config/routes.rb +1 -0
- data/lib/rails_error_dashboard/queries/llm_health_summary.rb +220 -0
- data/lib/rails_error_dashboard/version.rb +1 -1
- data/lib/rails_error_dashboard.rb +1 -0
- metadata +4 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: cef62933d759c659ba8fd14e78aa7afdda622ce40520dccaa7424dd45322ecb6
|
|
4
|
+
data.tar.gz: 54fbae420eb1908ed7cb3b967c23dacdbbfa077fb918afb15f60fd3cef25a226
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: f49280f38ce42192a5ac6544a31559550d50bb80c33bf8e8f8486ced684a7f729997cf46a1f4bcd933f4ca903acb9456a77ee33a56efbcd696cbf683765b866a
|
|
7
|
+
data.tar.gz: 1a69c452a43372ba7476d20502e98fc7f83d57fdc41839c1f30a61dc809a4d0484b0680c6d4526c6f517b23d1bc977fa500dea243eb5e258a0dfd86ef16a142c
|
|
@@ -518,6 +518,26 @@ module RailsErrorDashboard
|
|
|
518
518
|
@pagy, @channels = pagy(:offset, all_channels, limit: params[:per_page] || 25)
|
|
519
519
|
end
|
|
520
520
|
|
|
521
|
+
def llm_health_summary
|
|
522
|
+
unless RailsErrorDashboard.configuration.enable_llm_observability &&
|
|
523
|
+
RailsErrorDashboard.configuration.enable_breadcrumbs
|
|
524
|
+
@feature_disabled = true
|
|
525
|
+
@days = days_param(default: 30)
|
|
526
|
+
@models = []
|
|
527
|
+
@totals = Queries::LlmHealthSummary.blank_totals
|
|
528
|
+
@pagy = nil
|
|
529
|
+
return
|
|
530
|
+
end
|
|
531
|
+
|
|
532
|
+
days = days_param(default: 30)
|
|
533
|
+
@days = days
|
|
534
|
+
result = Queries::LlmHealthSummary.call(days, application_id: @current_application_id)
|
|
535
|
+
@totals = result[:totals]
|
|
536
|
+
all_models = result[:models]
|
|
537
|
+
|
|
538
|
+
@pagy, @models = pagy(:offset, all_models, limit: params[:per_page] || 25)
|
|
539
|
+
end
|
|
540
|
+
|
|
521
541
|
def activestorage_health_summary
|
|
522
542
|
unless RailsErrorDashboard.configuration.enable_activestorage_tracking &&
|
|
523
543
|
RailsErrorDashboard.configuration.enable_breadcrumbs
|
|
@@ -1002,6 +1002,9 @@ tr[data-red-row-href]:hover .sev-bar { opacity: 1 !important; }
|
|
|
1002
1002
|
<% if RailsErrorDashboard.configuration.enable_activestorage_tracking && RailsErrorDashboard.configuration.enable_breadcrumbs %>
|
|
1003
1003
|
<% health_items << { path: activestorage_health_summary_errors_path(nav_params), icon: 'bi-cloud-arrow-up', label: 'ActiveStorage' } %>
|
|
1004
1004
|
<% end %>
|
|
1005
|
+
<% if RailsErrorDashboard.configuration.enable_llm_observability && RailsErrorDashboard.configuration.enable_breadcrumbs %>
|
|
1006
|
+
<% health_items << { path: llm_health_summary_errors_path(nav_params), icon: 'bi-cpu-fill', label: 'LLM' } %>
|
|
1007
|
+
<% end %>
|
|
1005
1008
|
|
|
1006
1009
|
<% if health_items.any? %>
|
|
1007
1010
|
<div style="margin-bottom: var(--space-2);" id="navHealthSection">
|
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
<% content_for :page_title, "LLM Health" %>
|
|
2
|
+
|
|
3
|
+
<div>
|
|
4
|
+
<div class="d-flex justify-content-between align-items-center mb-4">
|
|
5
|
+
<h1 style="font-size: 20px; font-weight: 700; margin: 0;">
|
|
6
|
+
<i class="bi bi-cpu-fill me-2"></i>
|
|
7
|
+
LLM Health
|
|
8
|
+
</h1>
|
|
9
|
+
|
|
10
|
+
<% unless @feature_disabled %>
|
|
11
|
+
<div class="btn-group" role="group">
|
|
12
|
+
<%= link_to llm_health_summary_errors_path(days: 7), class: "btn btn-sm #{@days == 7 ? 'btn-primary' : 'btn-outline-primary'}" do %>
|
|
13
|
+
7 Days
|
|
14
|
+
<% end %>
|
|
15
|
+
<%= link_to llm_health_summary_errors_path(days: 30), class: "btn btn-sm #{@days == 30 ? 'btn-primary' : 'btn-outline-primary'}" do %>
|
|
16
|
+
30 Days
|
|
17
|
+
<% end %>
|
|
18
|
+
<%= link_to llm_health_summary_errors_path(days: 90), class: "btn btn-sm #{@days == 90 ? 'btn-primary' : 'btn-outline-primary'}" do %>
|
|
19
|
+
90 Days
|
|
20
|
+
<% end %>
|
|
21
|
+
</div>
|
|
22
|
+
<% end %>
|
|
23
|
+
</div>
|
|
24
|
+
|
|
25
|
+
<% if @feature_disabled %>
|
|
26
|
+
<div class="red-empty-state">
|
|
27
|
+
<i class="bi bi-cpu-fill display-1 text-muted mb-3"></i>
|
|
28
|
+
<div class="red-empty-state-title">LLM Observability Not Enabled</div>
|
|
29
|
+
<p class="text-muted">
|
|
30
|
+
To see per-model LLM stats here, enable LLM observability and breadcrumbs.
|
|
31
|
+
</p>
|
|
32
|
+
<div class="card mx-auto" style="max-width: 600px;">
|
|
33
|
+
<div class="card-body text-start">
|
|
34
|
+
<h6>Enable in <code>config/initializers/rails_error_dashboard.rb</code>:</h6>
|
|
35
|
+
<pre class="mb-0" style="background: var(--bg-subtle); padding: var(--space-3); border-radius: var(--radius-md); font-size: 12px;"><code>RailsErrorDashboard.configure do |config|
|
|
36
|
+
config.enable_breadcrumbs = true
|
|
37
|
+
config.enable_llm_observability = true
|
|
38
|
+
end</code></pre>
|
|
39
|
+
</div>
|
|
40
|
+
</div>
|
|
41
|
+
</div>
|
|
42
|
+
|
|
43
|
+
<% elsif @totals[:total_calls] == 0 && @totals[:total_tool_calls] == 0 %>
|
|
44
|
+
<div class="red-empty-state">
|
|
45
|
+
<i class="bi bi-cpu-fill display-1 text-success mb-3"></i>
|
|
46
|
+
<div class="red-empty-state-title">No LLM Calls Detected</div>
|
|
47
|
+
<p class="text-muted">
|
|
48
|
+
No LLM API calls were captured in error breadcrumbs over the last <%= @days %> days.
|
|
49
|
+
Instrument LLM calls in one of three ways:
|
|
50
|
+
</p>
|
|
51
|
+
|
|
52
|
+
<div class="card mx-auto" style="max-width: 720px;">
|
|
53
|
+
<div class="card-body p-0">
|
|
54
|
+
<ul class="nav nav-tabs" role="tablist" id="llmPathTabs">
|
|
55
|
+
<li class="nav-item" role="presentation">
|
|
56
|
+
<button class="nav-link active" data-bs-toggle="tab" data-bs-target="#llmPathOtel" type="button" role="tab">
|
|
57
|
+
<i class="bi bi-diagram-3 me-1"></i> OpenTelemetry
|
|
58
|
+
</button>
|
|
59
|
+
</li>
|
|
60
|
+
<li class="nav-item" role="presentation">
|
|
61
|
+
<button class="nav-link" data-bs-toggle="tab" data-bs-target="#llmPathFaraday" type="button" role="tab">
|
|
62
|
+
<i class="bi bi-arrow-left-right me-1"></i> Faraday / RubyLLM
|
|
63
|
+
</button>
|
|
64
|
+
</li>
|
|
65
|
+
<li class="nav-item" role="presentation">
|
|
66
|
+
<button class="nav-link" data-bs-toggle="tab" data-bs-target="#llmPathManual" type="button" role="tab">
|
|
67
|
+
<i class="bi bi-pencil-square me-1"></i> Manual
|
|
68
|
+
</button>
|
|
69
|
+
</li>
|
|
70
|
+
</ul>
|
|
71
|
+
|
|
72
|
+
<div class="tab-content p-3 text-start">
|
|
73
|
+
<div class="tab-pane fade show active" id="llmPathOtel" role="tabpanel">
|
|
74
|
+
<p class="mb-2"><small>Already running an OTel collector? Drop in this gem and we'll subscribe automatically.</small></p>
|
|
75
|
+
<pre class="mb-2" style="background: var(--bg-subtle); padding: var(--space-3); border-radius: var(--radius-md); font-size: 12px;"><code># Gemfile
|
|
76
|
+
gem "opentelemetry-instrumentation-ruby_llm"
|
|
77
|
+
|
|
78
|
+
# Then nothing else — OTel spans flow into RED breadcrumbs.</code></pre>
|
|
79
|
+
<small class="text-muted">Requires <code>enable_llm_observability = true</code>.</small>
|
|
80
|
+
</div>
|
|
81
|
+
|
|
82
|
+
<div class="tab-pane fade" id="llmPathFaraday" role="tabpanel">
|
|
83
|
+
<p class="mb-2"><small>Using <code>ruby_llm</code>, <code>openai</code>, <code>anthropic</code>, or any Faraday-based SDK? Insert the middleware once.</small></p>
|
|
84
|
+
<pre class="mb-2" style="background: var(--bg-subtle); padding: var(--space-3); border-radius: var(--radius-md); font-size: 12px;"><code>Faraday.default_connection_options.builder.tap do |b|
|
|
85
|
+
b.use RailsErrorDashboard::Integrations::LlmMiddleware
|
|
86
|
+
end</code></pre>
|
|
87
|
+
<small class="text-muted">RED auto-detects provider/model and tokens from the request/response.</small>
|
|
88
|
+
</div>
|
|
89
|
+
|
|
90
|
+
<div class="tab-pane fade" id="llmPathManual" role="tabpanel">
|
|
91
|
+
<p class="mb-2"><small>Direct <code>Net::HTTP</code>, gRPC, local inference, or anything else — instrument by hand.</small></p>
|
|
92
|
+
<pre class="mb-2" style="background: var(--bg-subtle); padding: var(--space-3); border-radius: var(--radius-md); font-size: 12px;"><code>ActiveSupport::Notifications.instrument(
|
|
93
|
+
"red.llm_call",
|
|
94
|
+
provider: "anthropic",
|
|
95
|
+
model: "claude-3-5-sonnet",
|
|
96
|
+
input_tokens: 1200,
|
|
97
|
+
output_tokens: 350
|
|
98
|
+
) do
|
|
99
|
+
call_anthropic(...)
|
|
100
|
+
end</code></pre>
|
|
101
|
+
<small class="text-muted">Cost is auto-estimated from tokens — payload can override with <code>cost_usd_estimate:</code>.</small>
|
|
102
|
+
</div>
|
|
103
|
+
</div>
|
|
104
|
+
</div>
|
|
105
|
+
</div>
|
|
106
|
+
</div>
|
|
107
|
+
|
|
108
|
+
<% else %>
|
|
109
|
+
<div class="row mb-4">
|
|
110
|
+
<div class="col-md-3">
|
|
111
|
+
<div class="card text-center">
|
|
112
|
+
<div class="card-body">
|
|
113
|
+
<div class="display-6 text-primary"><%= @totals[:total_calls] %></div>
|
|
114
|
+
<small class="text-muted">LLM Calls</small>
|
|
115
|
+
</div>
|
|
116
|
+
</div>
|
|
117
|
+
</div>
|
|
118
|
+
<div class="col-md-3">
|
|
119
|
+
<div class="card text-center">
|
|
120
|
+
<div class="card-body">
|
|
121
|
+
<div class="display-6 text-secondary"><%= @totals[:model_count] %></div>
|
|
122
|
+
<small class="text-muted">Models</small>
|
|
123
|
+
</div>
|
|
124
|
+
</div>
|
|
125
|
+
</div>
|
|
126
|
+
<div class="col-md-3">
|
|
127
|
+
<div class="card text-center">
|
|
128
|
+
<div class="card-body">
|
|
129
|
+
<% err_color = @totals[:error_rate] >= 10 ? "danger" : (@totals[:error_rate] >= 5 ? "warning" : "success") %>
|
|
130
|
+
<div class="display-6 text-<%= err_color %>"><%= @totals[:unique_error_count] %></div>
|
|
131
|
+
<small class="text-muted">Errors with LLM (<%= @totals[:error_rate] %>%)</small>
|
|
132
|
+
</div>
|
|
133
|
+
</div>
|
|
134
|
+
</div>
|
|
135
|
+
<div class="col-md-3">
|
|
136
|
+
<div class="card text-center">
|
|
137
|
+
<div class="card-body">
|
|
138
|
+
<div class="display-6 text-info">$<%= format("%.2f", @totals[:total_cost_usd]) %></div>
|
|
139
|
+
<small class="text-muted">Total Cost</small>
|
|
140
|
+
</div>
|
|
141
|
+
</div>
|
|
142
|
+
</div>
|
|
143
|
+
</div>
|
|
144
|
+
|
|
145
|
+
<div class="card mb-4">
|
|
146
|
+
<div class="card-header d-flex justify-content-between align-items-center">
|
|
147
|
+
<h5 class="mb-0">
|
|
148
|
+
<i class="bi bi-cpu-fill text-primary me-2"></i>
|
|
149
|
+
Performance by Model
|
|
150
|
+
<span class="badge bg-primary"><%= @totals[:model_count] %></span>
|
|
151
|
+
</h5>
|
|
152
|
+
<small class="text-muted"><%== @pagy.info_tag %></small>
|
|
153
|
+
</div>
|
|
154
|
+
<div class="card-body p-0">
|
|
155
|
+
<div class="table-responsive">
|
|
156
|
+
<table class="table table-hover mb-0">
|
|
157
|
+
<thead class="table-light">
|
|
158
|
+
<tr>
|
|
159
|
+
<th>Provider · Model</th>
|
|
160
|
+
<th width="90">Calls</th>
|
|
161
|
+
<th width="80">Tools</th>
|
|
162
|
+
<th width="130">Avg tokens</th>
|
|
163
|
+
<th width="110">Avg latency</th>
|
|
164
|
+
<th width="100">Error rate</th>
|
|
165
|
+
<th width="100">Cost</th>
|
|
166
|
+
<th>Top error</th>
|
|
167
|
+
<th width="140">Last seen</th>
|
|
168
|
+
</tr>
|
|
169
|
+
</thead>
|
|
170
|
+
<tbody>
|
|
171
|
+
<% @models.each do |entry| %>
|
|
172
|
+
<tr>
|
|
173
|
+
<td>
|
|
174
|
+
<small class="text-muted"><%= entry[:provider] %></small><br>
|
|
175
|
+
<code><%= entry[:model] %></code>
|
|
176
|
+
</td>
|
|
177
|
+
<td><%= entry[:call_count] %></td>
|
|
178
|
+
<td>
|
|
179
|
+
<% if entry[:tool_call_count].positive? %>
|
|
180
|
+
<span class="badge bg-info"><%= entry[:tool_call_count] %></span>
|
|
181
|
+
<% else %>
|
|
182
|
+
<span class="text-muted">—</span>
|
|
183
|
+
<% end %>
|
|
184
|
+
</td>
|
|
185
|
+
<td>
|
|
186
|
+
<% if entry[:avg_input_tokens] || entry[:avg_output_tokens] %>
|
|
187
|
+
<small>
|
|
188
|
+
in <strong><%= number_with_delimiter(entry[:avg_input_tokens] || 0) %></strong><br>
|
|
189
|
+
out <strong><%= number_with_delimiter(entry[:avg_output_tokens] || 0) %></strong>
|
|
190
|
+
</small>
|
|
191
|
+
<% else %>
|
|
192
|
+
<span class="text-muted">—</span>
|
|
193
|
+
<% end %>
|
|
194
|
+
</td>
|
|
195
|
+
<td>
|
|
196
|
+
<% if entry[:avg_duration_ms] %>
|
|
197
|
+
<% lat_color = entry[:avg_duration_ms] > 5000 ? "text-danger" : entry[:avg_duration_ms] > 2000 ? "text-warning" : "text-muted" %>
|
|
198
|
+
<span class="<%= lat_color %>"><%= number_with_delimiter(entry[:avg_duration_ms].round) %>ms</span>
|
|
199
|
+
<% else %>
|
|
200
|
+
<span class="text-muted">—</span>
|
|
201
|
+
<% end %>
|
|
202
|
+
</td>
|
|
203
|
+
<td>
|
|
204
|
+
<% sev_color = entry[:severity] == :danger ? "danger" : (entry[:severity] == :warning ? "warning" : "success") %>
|
|
205
|
+
<span class="badge bg-<%= sev_color %>"><%= entry[:error_rate] %>%</span>
|
|
206
|
+
<% if entry[:error_count].positive? %>
|
|
207
|
+
<br><small class="text-muted"><%= entry[:error_count] %> failed</small>
|
|
208
|
+
<% end %>
|
|
209
|
+
</td>
|
|
210
|
+
<td>
|
|
211
|
+
<% if entry[:cost_usd_sum].positive? %>
|
|
212
|
+
$<%= format("%.4f", entry[:cost_usd_sum]) %>
|
|
213
|
+
<% else %>
|
|
214
|
+
<span class="text-muted">—</span>
|
|
215
|
+
<% end %>
|
|
216
|
+
</td>
|
|
217
|
+
<td>
|
|
218
|
+
<% if entry[:top_error_class] %>
|
|
219
|
+
<code style="font-size: 11px;"><%= entry[:top_error_class] %></code>
|
|
220
|
+
<small class="text-muted">×<%= entry[:top_error_class_count] %></small>
|
|
221
|
+
<% else %>
|
|
222
|
+
<span class="text-muted">—</span>
|
|
223
|
+
<% end %>
|
|
224
|
+
</td>
|
|
225
|
+
<td><%= local_time_ago(entry[:last_seen]) %></td>
|
|
226
|
+
</tr>
|
|
227
|
+
<% end %>
|
|
228
|
+
</tbody>
|
|
229
|
+
</table>
|
|
230
|
+
</div>
|
|
231
|
+
</div>
|
|
232
|
+
<div class="card-footer border-top d-flex justify-content-between align-items-center">
|
|
233
|
+
<div>
|
|
234
|
+
<small class="text-muted">
|
|
235
|
+
<i class="bi bi-lightbulb text-warning"></i> High error rates on a model often signal quota or rate-limit issues. Slow latency (>5s) suggests provider degradation.
|
|
236
|
+
</small>
|
|
237
|
+
<small class="ms-3">
|
|
238
|
+
<a href="https://github.com/thoughtbot/opentelemetry-instrumentation-ruby_llm" target="_blank" rel="noopener" class="text-decoration-none">
|
|
239
|
+
<i class="bi bi-book"></i> Instrumentation Guide <i class="bi bi-box-arrow-up-right" style="font-size: 0.7em;"></i>
|
|
240
|
+
</a>
|
|
241
|
+
</small>
|
|
242
|
+
</div>
|
|
243
|
+
<div>
|
|
244
|
+
<%== @pagy.series_nav(:bootstrap) if @pagy.pages > 1 %>
|
|
245
|
+
</div>
|
|
246
|
+
</div>
|
|
247
|
+
</div>
|
|
248
|
+
<% end %>
|
|
249
|
+
</div>
|
data/config/routes.rb
CHANGED
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RailsErrorDashboard
|
|
4
|
+
module Queries
|
|
5
|
+
# Query: Aggregate LLM call breadcrumbs across all errors, grouped by
|
|
6
|
+
# "provider · model". Scans error_logs breadcrumbs JSON, filters for "llm"
|
|
7
|
+
# and "llm_tool" category crumbs, and computes per-model stats: call count,
|
|
8
|
+
# tool count, avg tokens, avg latency, error rate, cost, top error class.
|
|
9
|
+
#
|
|
10
|
+
# Sorted by error rate desc, then unique-error count desc, then call volume
|
|
11
|
+
# desc — so the model causing the most errors floats to the top.
|
|
12
|
+
#
|
|
13
|
+
# Cost is read straight from the breadcrumb metadata (already estimated at
|
|
14
|
+
# capture time by LlmCallSubscriber via LlmCostEstimator). No re-estimation.
|
|
15
|
+
class LlmHealthSummary
|
|
16
|
+
DANGER_THRESHOLD = 10.0 # error rate %
|
|
17
|
+
WARNING_THRESHOLD = 5.0
|
|
18
|
+
|
|
19
|
+
def self.call(days = 30, application_id: nil)
|
|
20
|
+
new(days, application_id: application_id).call
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Public helper so controllers can render an empty-state shell without
|
|
24
|
+
# running the query (e.g., when the feature is disabled).
|
|
25
|
+
def self.blank_totals
|
|
26
|
+
{
|
|
27
|
+
total_calls: 0,
|
|
28
|
+
total_tool_calls: 0,
|
|
29
|
+
model_count: 0,
|
|
30
|
+
unique_error_count: 0,
|
|
31
|
+
error_rate: 0.0,
|
|
32
|
+
total_cost_usd: 0.0
|
|
33
|
+
}
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def initialize(days = 30, application_id: nil)
|
|
37
|
+
@days = days
|
|
38
|
+
@application_id = application_id
|
|
39
|
+
@start_date = days.days.ago
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def call
|
|
43
|
+
models = aggregated_models
|
|
44
|
+
{
|
|
45
|
+
models: models,
|
|
46
|
+
totals: totals_for(models)
|
|
47
|
+
}
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
private
|
|
51
|
+
|
|
52
|
+
def base_query
|
|
53
|
+
scope = ErrorLog.where("occurred_at >= ?", @start_date)
|
|
54
|
+
.where.not(breadcrumbs: nil)
|
|
55
|
+
scope = scope.where(application_id: @application_id) if @application_id.present?
|
|
56
|
+
scope
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def aggregated_models
|
|
60
|
+
results = {}
|
|
61
|
+
|
|
62
|
+
base_query.select(:id, :breadcrumbs, :occurred_at).find_each(batch_size: 500) do |error_log|
|
|
63
|
+
crumbs = parse_breadcrumbs(error_log.breadcrumbs)
|
|
64
|
+
next if crumbs.empty?
|
|
65
|
+
|
|
66
|
+
llm_crumbs = crumbs.select { |c| c["c"] == "llm" || c["c"] == "llm_tool" }
|
|
67
|
+
next if llm_crumbs.empty?
|
|
68
|
+
|
|
69
|
+
llm_crumbs.each do |crumb|
|
|
70
|
+
meta = crumb["meta"] || {}
|
|
71
|
+
provider = meta["provider"].to_s.presence || "unknown"
|
|
72
|
+
model = meta["model"].to_s.presence || "unknown"
|
|
73
|
+
key = "#{provider}·#{model}"
|
|
74
|
+
|
|
75
|
+
results[key] ||= new_entry(provider, model)
|
|
76
|
+
entry = results[key]
|
|
77
|
+
|
|
78
|
+
if crumb["c"] == "llm_tool"
|
|
79
|
+
entry[:tool_call_count] += 1
|
|
80
|
+
else
|
|
81
|
+
entry[:call_count] += 1
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
status = meta["status"].to_s
|
|
85
|
+
entry[:error_count] += 1 if status == "error" || status == "timeout"
|
|
86
|
+
|
|
87
|
+
# BreadcrumbCollector#truncate_metadata stringifies every metadata
|
|
88
|
+
# value (input_tokens "42", cost_usd "0.0003", etc.), so we coerce
|
|
89
|
+
# back to numeric here using the same pattern as LlmSummary. nil and
|
|
90
|
+
# blank values skip the accumulator entirely so they don't pollute
|
|
91
|
+
# averages.
|
|
92
|
+
if (it = meta["input_tokens"]).present?
|
|
93
|
+
entry[:input_tokens_sum] += it.to_i
|
|
94
|
+
entry[:input_tokens_seen] += 1
|
|
95
|
+
end
|
|
96
|
+
if (ot = meta["output_tokens"]).present?
|
|
97
|
+
entry[:output_tokens_sum] += ot.to_i
|
|
98
|
+
entry[:output_tokens_seen] += 1
|
|
99
|
+
end
|
|
100
|
+
duration_raw = meta["duration_ms"] || crumb["d"]
|
|
101
|
+
if duration_raw.present?
|
|
102
|
+
d = duration_raw.to_f
|
|
103
|
+
if d > 0
|
|
104
|
+
entry[:duration_sum] += d
|
|
105
|
+
entry[:duration_seen] += 1
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
if (cost = meta["cost_usd"]).present?
|
|
109
|
+
entry[:cost_usd_sum] += cost.to_f
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
if (err_class = meta["error_class"]).is_a?(String) && !err_class.empty?
|
|
113
|
+
entry[:error_classes][err_class] = (entry[:error_classes][err_class] || 0) + 1
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
entry[:error_ids] << error_log.id
|
|
117
|
+
entry[:last_seen] = [ entry[:last_seen], error_log.occurred_at ].compact.max
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
finalize(results)
|
|
122
|
+
rescue => e
|
|
123
|
+
Rails.logger.error("[RailsErrorDashboard] LlmHealthSummary query failed: #{e.class}: #{e.message}")
|
|
124
|
+
[]
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def new_entry(provider, model)
|
|
128
|
+
{
|
|
129
|
+
provider: provider,
|
|
130
|
+
model: model,
|
|
131
|
+
call_count: 0,
|
|
132
|
+
tool_call_count: 0,
|
|
133
|
+
error_count: 0,
|
|
134
|
+
input_tokens_sum: 0,
|
|
135
|
+
input_tokens_seen: 0,
|
|
136
|
+
output_tokens_sum: 0,
|
|
137
|
+
output_tokens_seen: 0,
|
|
138
|
+
duration_sum: 0.0,
|
|
139
|
+
duration_seen: 0,
|
|
140
|
+
cost_usd_sum: 0.0,
|
|
141
|
+
error_classes: {},
|
|
142
|
+
error_ids: [],
|
|
143
|
+
last_seen: nil
|
|
144
|
+
}
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def finalize(results)
|
|
148
|
+
results.values.each do |r|
|
|
149
|
+
r[:error_ids] = r[:error_ids].uniq
|
|
150
|
+
r[:unique_error_count] = r[:error_ids].size
|
|
151
|
+
|
|
152
|
+
total_attempts = r[:call_count] + r[:tool_call_count]
|
|
153
|
+
r[:error_rate] = total_attempts.positive? ? (r[:error_count].to_f / total_attempts * 100).round(2) : 0.0
|
|
154
|
+
r[:severity] = severity_for(r[:error_rate])
|
|
155
|
+
|
|
156
|
+
r[:avg_input_tokens] = avg(r[:input_tokens_sum], r[:input_tokens_seen])
|
|
157
|
+
r[:avg_output_tokens] = avg(r[:output_tokens_sum], r[:output_tokens_seen])
|
|
158
|
+
r[:avg_duration_ms] = r[:duration_seen].positive? ? (r[:duration_sum] / r[:duration_seen]).round(2) : nil
|
|
159
|
+
r[:cost_usd_sum] = r[:cost_usd_sum].round(4)
|
|
160
|
+
|
|
161
|
+
top_class, top_count = r[:error_classes].max_by { |_, c| c }
|
|
162
|
+
r[:top_error_class] = top_class
|
|
163
|
+
r[:top_error_class_count] = top_count
|
|
164
|
+
|
|
165
|
+
# Drop accumulators — view doesn't need them
|
|
166
|
+
r.delete(:input_tokens_sum)
|
|
167
|
+
r.delete(:input_tokens_seen)
|
|
168
|
+
r.delete(:output_tokens_sum)
|
|
169
|
+
r.delete(:output_tokens_seen)
|
|
170
|
+
r.delete(:duration_sum)
|
|
171
|
+
r.delete(:duration_seen)
|
|
172
|
+
r.delete(:error_classes)
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
results.values.sort_by { |r| [ -r[:error_rate], -r[:unique_error_count], -r[:call_count] ] }
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
def avg(sum, count)
|
|
179
|
+
return nil if count.zero?
|
|
180
|
+
(sum.to_f / count).round
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
def severity_for(error_rate)
|
|
184
|
+
return :danger if error_rate >= DANGER_THRESHOLD
|
|
185
|
+
return :warning if error_rate >= WARNING_THRESHOLD
|
|
186
|
+
:success
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
def totals_for(models)
|
|
190
|
+
return blank_totals if models.empty? || !models.is_a?(Array)
|
|
191
|
+
|
|
192
|
+
total_calls = models.sum { |m| m[:call_count] }
|
|
193
|
+
total_tool_calls = models.sum { |m| m[:tool_call_count] }
|
|
194
|
+
total_errors = models.sum { |m| m[:error_count] }
|
|
195
|
+
total_attempts = total_calls + total_tool_calls
|
|
196
|
+
unique_error_ids = models.flat_map { |m| m[:error_ids] }.uniq
|
|
197
|
+
|
|
198
|
+
{
|
|
199
|
+
total_calls: total_calls,
|
|
200
|
+
total_tool_calls: total_tool_calls,
|
|
201
|
+
model_count: models.size,
|
|
202
|
+
unique_error_count: unique_error_ids.size,
|
|
203
|
+
error_rate: total_attempts.positive? ? (total_errors.to_f / total_attempts * 100).round(2) : 0.0,
|
|
204
|
+
total_cost_usd: models.sum { |m| m[:cost_usd_sum] }.round(4)
|
|
205
|
+
}
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
def blank_totals
|
|
209
|
+
self.class.blank_totals
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
def parse_breadcrumbs(raw)
|
|
213
|
+
return [] if raw.blank?
|
|
214
|
+
JSON.parse(raw)
|
|
215
|
+
rescue JSON::ParserError
|
|
216
|
+
[]
|
|
217
|
+
end
|
|
218
|
+
end
|
|
219
|
+
end
|
|
220
|
+
end
|
|
@@ -131,6 +131,7 @@ require "rails_error_dashboard/queries/job_health_summary"
|
|
|
131
131
|
require "rails_error_dashboard/queries/database_health_summary"
|
|
132
132
|
require "rails_error_dashboard/queries/swallowed_exception_summary"
|
|
133
133
|
require "rails_error_dashboard/queries/rack_attack_summary"
|
|
134
|
+
require "rails_error_dashboard/queries/llm_health_summary"
|
|
134
135
|
require "rails_error_dashboard/queries/release_timeline"
|
|
135
136
|
require "rails_error_dashboard/error_reporter"
|
|
136
137
|
require "rails_error_dashboard/middleware/error_catcher"
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: rails_error_dashboard
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.7.
|
|
4
|
+
version: 0.7.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Anjan Jagirdar
|
|
@@ -312,6 +312,7 @@ files:
|
|
|
312
312
|
- app/views/rails_error_dashboard/errors/diagnostic_dumps.html.erb
|
|
313
313
|
- app/views/rails_error_dashboard/errors/index.html.erb
|
|
314
314
|
- app/views/rails_error_dashboard/errors/job_health_summary.html.erb
|
|
315
|
+
- app/views/rails_error_dashboard/errors/llm_health_summary.html.erb
|
|
315
316
|
- app/views/rails_error_dashboard/errors/n_plus_one_summary.html.erb
|
|
316
317
|
- app/views/rails_error_dashboard/errors/overview.html.erb
|
|
317
318
|
- app/views/rails_error_dashboard/errors/platform_comparison.html.erb
|
|
@@ -421,6 +422,7 @@ files:
|
|
|
421
422
|
- lib/rails_error_dashboard/queries/errors_list.rb
|
|
422
423
|
- lib/rails_error_dashboard/queries/filter_options.rb
|
|
423
424
|
- lib/rails_error_dashboard/queries/job_health_summary.rb
|
|
425
|
+
- lib/rails_error_dashboard/queries/llm_health_summary.rb
|
|
424
426
|
- lib/rails_error_dashboard/queries/mttr_stats.rb
|
|
425
427
|
- lib/rails_error_dashboard/queries/n_plus_one_summary.rb
|
|
426
428
|
- lib/rails_error_dashboard/queries/platform_comparison.rb
|
|
@@ -508,7 +510,7 @@ metadata:
|
|
|
508
510
|
funding_uri: https://github.com/sponsors/AnjanJ
|
|
509
511
|
post_install_message: |
|
|
510
512
|
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
511
|
-
RED (Rails Error Dashboard) v0.7.
|
|
513
|
+
RED (Rails Error Dashboard) v0.7.2
|
|
512
514
|
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
513
515
|
|
|
514
516
|
First install:
|