rails_error_dashboard 0.7.2 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +28 -0
- data/lib/generators/rails_error_dashboard/install/templates/initializer.rb +35 -0
- data/lib/rails_error_dashboard/commands/log_error.rb +54 -2
- data/lib/rails_error_dashboard/configuration.rb +37 -0
- data/lib/rails_error_dashboard/engine.rb +14 -0
- data/lib/rails_error_dashboard/integrations/tracer.rb +195 -0
- data/lib/rails_error_dashboard/services/breadcrumb_collector.rb +45 -5
- data/lib/rails_error_dashboard/services/error_notification_dispatcher.rb +37 -15
- data/lib/rails_error_dashboard/services/system_health_snapshot.rb +10 -1
- data/lib/rails_error_dashboard/version.rb +1 -1
- data/lib/rails_error_dashboard.rb +1 -0
- metadata +4 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 002c2e5d338c585425599802271a90c5bac425dc86867da1aa10a0c7a478d1e8
|
|
4
|
+
data.tar.gz: 077d7d7a7f957bee799cf39c96632532e3c74259e83310605c6068d57436aec3
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 8e6ef9f87aae8c200e997bf761184eb37c86d0637dfdd383fd6cc003f3c3b9fa30b7906fbaf6377670d932a60da5a9046202d4a48ab63264ce916c861ac20b91
|
|
7
|
+
data.tar.gz: 67858acc12ad29cb412b0c049b776b7193c9ce8d45febfa4bc23e079a1afd7ff64a2a85bd81516c999cc57861631ed3f89ea62bc36544370811e8cff858c8678
|
data/README.md
CHANGED
|
@@ -475,6 +475,32 @@ end
|
|
|
475
475
|
[Plugin System guide →](docs/PLUGIN_SYSTEM.md)
|
|
476
476
|
</details>
|
|
477
477
|
|
|
478
|
+
<details>
|
|
479
|
+
<summary><strong>OpenTelemetry Export — Emit Gem Operations as Spans</strong></summary>
|
|
480
|
+
|
|
481
|
+
Send the gem's error-capture pipeline as OpenTelemetry spans to your existing Datadog, Honeycomb, or Jaeger collector. Each stage of the capture path — DB write, breadcrumb harvest, system health snapshot, and notification dispatch — becomes a named child span so you can audit gem overhead from your own observability dashboards.
|
|
482
|
+
|
|
483
|
+
- Off by default — zero impact unless you opt in
|
|
484
|
+
- No-op when the OTel API gem isn't loaded
|
|
485
|
+
- Per-span-kind opt-in: enable only the stages you care about
|
|
486
|
+
- Every span individually rescue-wrapped — never raises into host code
|
|
487
|
+
- Boot-time warning if `enable_otel_export = true` but `opentelemetry-api` isn't in the Gemfile
|
|
488
|
+
|
|
489
|
+
```ruby
|
|
490
|
+
# Gemfile — only the API gem is required; the SDK is optional
|
|
491
|
+
gem "opentelemetry-api"
|
|
492
|
+
|
|
493
|
+
# config/initializers/rails_error_dashboard.rb
|
|
494
|
+
config.enable_otel_export = true
|
|
495
|
+
config.otel_service_name = "my-app" # falls back to application_name
|
|
496
|
+
config.otel_spans = [:capture, :breadcrumbs, :health, :notifications] # all (default)
|
|
497
|
+
# config.otel_spans = [:capture] # parent span only
|
|
498
|
+
```
|
|
499
|
+
|
|
500
|
+
Span names follow the `rails_error_dashboard.<operation>` convention, e.g. `rails_error_dashboard.capture_error`. Both attributes are attached to every span: `rails_error_dashboard.version` and `rails_error_dashboard.service_name` — use them to filter the gem's traffic in your dashboards.
|
|
501
|
+
|
|
502
|
+
</details>
|
|
503
|
+
|
|
478
504
|
---
|
|
479
505
|
|
|
480
506
|
## Quick Start
|
|
@@ -539,6 +565,8 @@ end
|
|
|
539
565
|
|
|
540
566
|
**Multi-App Support** — Track errors from multiple Rails apps in a single shared database. Auto-detects app name, supports per-app filtering. [Multi-App guide →](docs/MULTI_APP_PERFORMANCE.md)
|
|
541
567
|
|
|
568
|
+
**OpenTelemetry Export** — Emit error-capture operations as OTel spans to Datadog, Honeycomb, or Jaeger. Add `gem "opentelemetry-api"` and set `config.enable_otel_export = true`. See [OpenTelemetry Export](#opentelemetry-export--emit-gem-operations-as-spans) above for full options.
|
|
569
|
+
|
|
542
570
|
---
|
|
543
571
|
|
|
544
572
|
## Documentation
|
|
@@ -483,6 +483,41 @@ RailsErrorDashboard.configure do |config|
|
|
|
483
483
|
# config.llm_max_output_tokens = 900
|
|
484
484
|
# config.llm_system_prompt = "Prefer concise answers with file-level next steps."
|
|
485
485
|
|
|
486
|
+
# ============================================================================
|
|
487
|
+
# OPENTELEMETRY EXPORT (OUTBOUND)
|
|
488
|
+
# ============================================================================
|
|
489
|
+
#
|
|
490
|
+
# Emit gem operations as OpenTelemetry spans so the host's existing
|
|
491
|
+
# Datadog / Honeycomb / Jaeger / Grafana Tempo pipeline gets a trace
|
|
492
|
+
# of every error capture. Useful for:
|
|
493
|
+
# - Auditing "when did this error get captured?" against deploy events
|
|
494
|
+
# - Measuring how much time the gem spends in the capture path
|
|
495
|
+
# - Proving the <5ms host-safety budget from operator dashboards
|
|
496
|
+
#
|
|
497
|
+
# Emits four spans per error capture:
|
|
498
|
+
# rails_error_dashboard.capture_error — parent, wraps everything
|
|
499
|
+
# rails_error_dashboard.breadcrumb_collection — buffer drain (~µs)
|
|
500
|
+
# rails_error_dashboard.system_health_snapshot — GC.stat etc. (<1ms)
|
|
501
|
+
# rails_error_dashboard.notification_dispatch — Slack/email enqueue
|
|
502
|
+
#
|
|
503
|
+
# Disabled by default. Requires the host app to already run OpenTelemetry
|
|
504
|
+
# (the gem does NOT add an opentelemetry-* runtime dependency). When OTel
|
|
505
|
+
# is absent, every span call is a zero-overhead no-op.
|
|
506
|
+
#
|
|
507
|
+
# config.enable_otel_export = true
|
|
508
|
+
# config.otel_service_name = "my-app" # Falls back to application_name when nil
|
|
509
|
+
#
|
|
510
|
+
# Per-span opt-out: pass any subset to disable individual span kinds
|
|
511
|
+
# without code changes. Useful when e.g. notification dispatch is slow due
|
|
512
|
+
# to outbound HTTP and you don't want it polluting your trace dashboards.
|
|
513
|
+
#
|
|
514
|
+
# config.otel_spans = [:capture, :breadcrumbs, :health, :notifications] # all (default)
|
|
515
|
+
# config.otel_spans = [:capture] # parent only
|
|
516
|
+
# config.otel_spans = [:capture, :health] # parent + health
|
|
517
|
+
#
|
|
518
|
+
# No PII or request bodies in span attributes — just metadata + timing.
|
|
519
|
+
# Safe to enable on production OTel pipelines.
|
|
520
|
+
|
|
486
521
|
# ============================================================================
|
|
487
522
|
# ISSUE TRACKING (GitHub / GitLab / Codeberg)
|
|
488
523
|
# ============================================================================
|
|
@@ -17,6 +17,21 @@ module RailsErrorDashboard
|
|
|
17
17
|
end
|
|
18
18
|
end
|
|
19
19
|
|
|
20
|
+
# Build the base OTel span attributes available before any work happens.
|
|
21
|
+
# Kept as a module-level helper so both sync and async paths can call it.
|
|
22
|
+
# @return [Hash<String, Object>]
|
|
23
|
+
def self.build_capture_span_attributes(exception, was_async:)
|
|
24
|
+
msg = exception.message.to_s
|
|
25
|
+
{
|
|
26
|
+
"error.type" => exception.class.name,
|
|
27
|
+
"error.message" => msg.length > 200 ? "#{msg[0, 200]}…" : msg,
|
|
28
|
+
"rails_error_dashboard.environment" => (defined?(Rails) && Rails.env.to_s) || "unknown",
|
|
29
|
+
"rails_error_dashboard.was_async" => was_async
|
|
30
|
+
}
|
|
31
|
+
rescue StandardError
|
|
32
|
+
{ "error.type" => "unknown", "rails_error_dashboard.was_async" => was_async }
|
|
33
|
+
end
|
|
34
|
+
|
|
20
35
|
# Queue error logging as a background job
|
|
21
36
|
def self.call_async(exception, context = {})
|
|
22
37
|
# Serialize exception data for the job
|
|
@@ -68,7 +83,17 @@ module RailsErrorDashboard
|
|
|
68
83
|
# Enqueue the async job using ActiveJob
|
|
69
84
|
# The queue adapter (:sidekiq, :solid_queue, :async) is configured separately
|
|
70
85
|
begin
|
|
71
|
-
|
|
86
|
+
# OTel: emit a capture span around the enqueue itself. The real capture
|
|
87
|
+
# work runs in the job (which starts its own root span via .new(...).call).
|
|
88
|
+
# For the async path the span here measures *enqueue latency only* — used
|
|
89
|
+
# to detect queue-adapter backpressure or Redis slowness.
|
|
90
|
+
Integrations::Tracer.in_span(
|
|
91
|
+
"capture_error",
|
|
92
|
+
kind: :capture,
|
|
93
|
+
attributes: build_capture_span_attributes(exception, was_async: true)
|
|
94
|
+
) do |_span|
|
|
95
|
+
AsyncErrorLoggingJob.perform_later(exception_data, context)
|
|
96
|
+
end
|
|
72
97
|
rescue => e
|
|
73
98
|
# Queue adapter failed (e.g., Redis down for Sidekiq). Fall back to
|
|
74
99
|
# sync logging so the error is still captured. Without this rescue,
|
|
@@ -118,13 +143,31 @@ module RailsErrorDashboard
|
|
|
118
143
|
end
|
|
119
144
|
|
|
120
145
|
def call
|
|
146
|
+
# OTel: parent capture span. Wraps the entire sync capture path so
|
|
147
|
+
# operators can audit how long error capture takes from their existing
|
|
148
|
+
# tracing pipeline. Child spans (breadcrumbs, health, notifications)
|
|
149
|
+
# nest under this one automatically via OTel context propagation.
|
|
150
|
+
#
|
|
151
|
+
# The span lives INSIDE the rescue clause — if the span itself raises
|
|
152
|
+
# somehow, the outer rescue still catches it and returns nil. Defense
|
|
153
|
+
# in depth. When the block raises, the Tracer façade records the
|
|
154
|
+
# exception on the span and re-raises so the rescue can swallow it.
|
|
155
|
+
Integrations::Tracer.in_span(
|
|
156
|
+
"capture_error",
|
|
157
|
+
kind: :capture,
|
|
158
|
+
attributes: self.class.build_capture_span_attributes(@exception, was_async: false)
|
|
159
|
+
) do |span|
|
|
121
160
|
# Check if this exception should be logged (ignore list + sampling)
|
|
122
|
-
|
|
161
|
+
if !Services::ExceptionFilter.should_log?(@exception)
|
|
162
|
+
span&.set_attribute("rails_error_dashboard.filtered", true)
|
|
163
|
+
next nil
|
|
164
|
+
end
|
|
123
165
|
|
|
124
166
|
error_context = ValueObjects::ErrorContext.new(@context, @context[:source])
|
|
125
167
|
|
|
126
168
|
# Find or create application (cached lookup)
|
|
127
169
|
application = find_or_create_application
|
|
170
|
+
span&.set_attribute("rails_error_dashboard.application", application.name.to_s) if application.respond_to?(:name)
|
|
128
171
|
|
|
129
172
|
# Build error attributes
|
|
130
173
|
truncated_backtrace = Services::BacktraceProcessor.truncate(@exception.backtrace)
|
|
@@ -262,6 +305,14 @@ module RailsErrorDashboard
|
|
|
262
305
|
# This ensures accurate occurrence tracking
|
|
263
306
|
error_log = ErrorLog.find_or_increment_by_hash(error_hash, attributes.merge(error_hash: error_hash))
|
|
264
307
|
|
|
308
|
+
# OTel: now that the error_log exists, attach its id + dedup flag + severity
|
|
309
|
+
# to the parent capture span so operators can correlate to dashboard URLs.
|
|
310
|
+
if span && error_log
|
|
311
|
+
span.set_attribute("rails_error_dashboard.error_id", error_log.id) if error_log.id
|
|
312
|
+
span.set_attribute("rails_error_dashboard.deduplicated", error_log.occurrence_count.to_i > 1)
|
|
313
|
+
span.set_attribute("rails_error_dashboard.severity", error_log.severity.to_s) if error_log.respond_to?(:severity) && error_log.severity
|
|
314
|
+
end
|
|
315
|
+
|
|
265
316
|
# Track individual error occurrence for co-occurrence analysis (if table exists)
|
|
266
317
|
if defined?(ErrorOccurrence) && ErrorOccurrence.table_exists?
|
|
267
318
|
begin
|
|
@@ -298,6 +349,7 @@ module RailsErrorDashboard
|
|
|
298
349
|
check_baseline_anomaly(error_log)
|
|
299
350
|
|
|
300
351
|
error_log
|
|
352
|
+
end
|
|
301
353
|
rescue => e
|
|
302
354
|
# Don't let error logging cause more errors - fail silently
|
|
303
355
|
# CRITICAL: Log but never propagate exception
|
|
@@ -189,6 +189,14 @@ module RailsErrorDashboard
|
|
|
189
189
|
attr_accessor :llm_observability_content_capture # Capture prompt/completion text (default: false — PII risk)
|
|
190
190
|
attr_accessor :llm_pricing_overrides # Hash of { "model-name" => { input: usd_per_1m, output: usd_per_1m } }
|
|
191
191
|
|
|
192
|
+
# OpenTelemetry outbound export — emit gem operations as OTel spans for
|
|
193
|
+
# Datadog/Honeycomb/Jaeger. Requires the host app to already run OTel.
|
|
194
|
+
# When OTel is absent OR enable_otel_export is false, all emit calls
|
|
195
|
+
# are no-ops with zero overhead.
|
|
196
|
+
attr_accessor :enable_otel_export # Master switch (default: false)
|
|
197
|
+
attr_accessor :otel_service_name # Falls back to application_name when nil
|
|
198
|
+
attr_accessor :otel_spans # Array of enabled span kinds — see Integrations::Tracer::ALL_SPAN_KINDS
|
|
199
|
+
|
|
192
200
|
# Dashboard UI appearance
|
|
193
201
|
attr_accessor :accent_color # :crimson (default), :ruby, :ember, :violet
|
|
194
202
|
|
|
@@ -372,6 +380,13 @@ module RailsErrorDashboard
|
|
|
372
380
|
@llm_observability_content_capture = false
|
|
373
381
|
@llm_pricing_overrides = {}
|
|
374
382
|
|
|
383
|
+
# OTel outbound export defaults — OFF (opt-in). All four span kinds enabled
|
|
384
|
+
# by default once master switch flips on; users can pass a subset to opt out
|
|
385
|
+
# of e.g. notification spans without code changes.
|
|
386
|
+
@enable_otel_export = false
|
|
387
|
+
@otel_service_name = nil
|
|
388
|
+
@otel_spans = %i[capture breadcrumbs health notifications]
|
|
389
|
+
|
|
375
390
|
# Internal logging defaults - SILENT by default
|
|
376
391
|
@enable_internal_logging = false # Opt-in for debugging
|
|
377
392
|
@log_level = :silent # Silent by default, use :debug, :info, :warn, :error, or :silent
|
|
@@ -553,6 +568,28 @@ module RailsErrorDashboard
|
|
|
553
568
|
@enable_llm_observability = false
|
|
554
569
|
end
|
|
555
570
|
|
|
571
|
+
# Validate OTel export config — coerce or warn rather than raise so a
|
|
572
|
+
# config typo never blocks the host app from booting.
|
|
573
|
+
if enable_otel_export
|
|
574
|
+
unless otel_spans.is_a?(Array)
|
|
575
|
+
warnings << "otel_spans must be an Array of symbols (e.g. [:capture, :breadcrumbs]). " \
|
|
576
|
+
"Resetting to all-enabled."
|
|
577
|
+
@otel_spans = %i[capture breadcrumbs health notifications]
|
|
578
|
+
end
|
|
579
|
+
|
|
580
|
+
invalid = otel_spans - %i[capture breadcrumbs health notifications]
|
|
581
|
+
if invalid.any?
|
|
582
|
+
warnings << "otel_spans contains unknown kinds: #{invalid.inspect}. Allowed: " \
|
|
583
|
+
"[:capture, :breadcrumbs, :health, :notifications]. Ignoring unknown values."
|
|
584
|
+
@otel_spans = otel_spans - invalid
|
|
585
|
+
end
|
|
586
|
+
|
|
587
|
+
if @otel_spans.empty?
|
|
588
|
+
warnings << "enable_otel_export = true but otel_spans is empty — no spans will be emitted. " \
|
|
589
|
+
"Set otel_spans to enable at least one of [:capture, :breadcrumbs, :health, :notifications]."
|
|
590
|
+
end
|
|
591
|
+
end
|
|
592
|
+
|
|
556
593
|
# Skip credential/service-dependent validations during Docker builds.
|
|
557
594
|
# SECRET_KEY_BASE_DUMMY=1 means no credentials or external services available.
|
|
558
595
|
build_env = ENV["SECRET_KEY_BASE_DUMMY"].present?
|
|
@@ -107,6 +107,20 @@ module RailsErrorDashboard
|
|
|
107
107
|
# capability, so this is safe to call unconditionally.
|
|
108
108
|
RailsErrorDashboard::Integrations::LlmSpanProcessor.register!
|
|
109
109
|
|
|
110
|
+
# Outbound OTel export — warn at boot if the feature is enabled but
|
|
111
|
+
# the OTel API isn't loaded. The Tracer façade silently no-ops in that
|
|
112
|
+
# state, so without this warning users could enable the feature and
|
|
113
|
+
# see zero spans without knowing why. Don't auto-disable — the user
|
|
114
|
+
# may install OTel later in the boot sequence.
|
|
115
|
+
if RailsErrorDashboard.configuration.enable_otel_export &&
|
|
116
|
+
!RailsErrorDashboard::Integrations::Tracer.otel_api_loaded?
|
|
117
|
+
Rails.logger.warn(
|
|
118
|
+
"[RailsErrorDashboard] enable_otel_export = true but the OpenTelemetry API " \
|
|
119
|
+
"(opentelemetry-api gem) isn't loaded. Outbound spans will not emit. " \
|
|
120
|
+
"Add `gem \"opentelemetry-api\"` (or the full opentelemetry-sdk) to your Gemfile."
|
|
121
|
+
)
|
|
122
|
+
end
|
|
123
|
+
|
|
110
124
|
# Subscribe to red.llm_call / red.llm_tool_call AS::Notifications — Tier 3
|
|
111
125
|
# path for hosts using direct Net::HTTP / gRPC / local inference servers
|
|
112
126
|
# that aren't covered by OTel or the Faraday middleware.
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RailsErrorDashboard
|
|
4
|
+
module Integrations
|
|
5
|
+
# OpenTelemetry tracer façade for the outbound direction — emits spans
|
|
6
|
+
# from the gem's capture path so host operators can audit error tracking
|
|
7
|
+
# latency from their existing Datadog/Honeycomb/Jaeger pipeline.
|
|
8
|
+
#
|
|
9
|
+
# Symmetric counterpart to LlmSpanProcessor (which is INBOUND — pulls
|
|
10
|
+
# OTel spans INTO RED breadcrumbs). This module pushes OUTBOUND: gem
|
|
11
|
+
# operations OUT to the host's tracer provider.
|
|
12
|
+
#
|
|
13
|
+
# Designed to be called from hot paths unconditionally. When OTel is
|
|
14
|
+
# absent or the feature is off, `in_span` runs the block with a no-op
|
|
15
|
+
# span object — call sites do NOT branch on availability.
|
|
16
|
+
#
|
|
17
|
+
# HOST APP SAFETY (HOST_APP_SAFETY.md):
|
|
18
|
+
# - No-op when `enable_otel_export = false` OR OTel API not loaded
|
|
19
|
+
# - Per-span-kind opt-in/out via config.otel_spans
|
|
20
|
+
# - Tracer instance memoized per-process (rebuild on `reset!`)
|
|
21
|
+
# - Every public method hard-rescues — never raises into host code
|
|
22
|
+
# - Block return value is preserved even when tracer errors
|
|
23
|
+
# - Exceptions raised by the block re-raise after being recorded
|
|
24
|
+
#
|
|
25
|
+
# Configuration:
|
|
26
|
+
# config.enable_otel_export = true # master switch (default false)
|
|
27
|
+
# config.otel_service_name = "my-app" # falls back to application_name
|
|
28
|
+
# config.otel_spans = [:capture, :breadcrumbs, :health, :notifications]
|
|
29
|
+
#
|
|
30
|
+
# Usage from capture-path code:
|
|
31
|
+
#
|
|
32
|
+
# Tracer.in_span("capture_error", kind: :capture,
|
|
33
|
+
# attributes: { error_type: exception.class.name }) do |span|
|
|
34
|
+
# # ... do the work ...
|
|
35
|
+
# span&.set_attribute("rails_error_dashboard.error_id", error.id)
|
|
36
|
+
# end
|
|
37
|
+
#
|
|
38
|
+
# The span object yielded may be the real OTel span or a NoopSpan.
|
|
39
|
+
# Always use safe-nav (`span&.`) or guard with `span.respond_to?(:...)`.
|
|
40
|
+
module Tracer
|
|
41
|
+
INSTRUMENTATION_NAME = "rails_error_dashboard"
|
|
42
|
+
ALL_SPAN_KINDS = %i[capture breadcrumbs health notifications].freeze
|
|
43
|
+
|
|
44
|
+
# No-op stand-in returned to the block when tracing is off or unavailable.
|
|
45
|
+
# Mimics the OTel Span interface (set_attribute, add_event, record_exception)
|
|
46
|
+
# so call sites don't branch.
|
|
47
|
+
class NoopSpan
|
|
48
|
+
def set_attribute(_key, _value); self; end
|
|
49
|
+
def add_event(_name, attributes: nil); self; end
|
|
50
|
+
def record_exception(_exception, attributes: nil); self; end
|
|
51
|
+
def status=(_status); end
|
|
52
|
+
def finish; self; end
|
|
53
|
+
def context; nil; end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
NOOP_SPAN = NoopSpan.new.freeze
|
|
57
|
+
|
|
58
|
+
class << self
|
|
59
|
+
# Yields a span object to the block. Returns the block's return value.
|
|
60
|
+
# Records exceptions raised by the block as span events and re-raises.
|
|
61
|
+
#
|
|
62
|
+
# @param name [String] short span name (will be namespaced with INSTRUMENTATION_NAME)
|
|
63
|
+
# @param kind [Symbol] one of ALL_SPAN_KINDS — checked against config.otel_spans
|
|
64
|
+
# @param attributes [Hash<String,Object>] attached to the span at creation
|
|
65
|
+
# @yieldparam span [NoopSpan, ::OpenTelemetry::Trace::Span] real or no-op
|
|
66
|
+
# @return [Object] whatever the block returns
|
|
67
|
+
def in_span(name, kind: :capture, attributes: {})
|
|
68
|
+
return yield(NOOP_SPAN) unless emit?(kind)
|
|
69
|
+
|
|
70
|
+
tr = tracer
|
|
71
|
+
return yield(NOOP_SPAN) unless tr
|
|
72
|
+
|
|
73
|
+
full_name = "#{INSTRUMENTATION_NAME}.#{name}"
|
|
74
|
+
merged = base_attributes.merge(safe_stringify(attributes))
|
|
75
|
+
|
|
76
|
+
tr.in_span(full_name, attributes: merged) do |span|
|
|
77
|
+
begin
|
|
78
|
+
yield span
|
|
79
|
+
rescue StandardError => e
|
|
80
|
+
record_block_exception(span, e)
|
|
81
|
+
raise
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
rescue StandardError => e
|
|
85
|
+
# Tracer internals failed (e.g. OTel SDK threw on add_span). Fall back
|
|
86
|
+
# to running the block with a no-op so the host app never sees a crash
|
|
87
|
+
# caused by the tracer.
|
|
88
|
+
Logger.debug("[RailsErrorDashboard] Tracer.in_span(#{name.inspect}) failed: #{e.class}: #{e.message}")
|
|
89
|
+
yield NOOP_SPAN
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Returns true when the OTel API is loaded AND the master switch is on
|
|
93
|
+
# AND the given span kind is in the enabled set. Cheap — called on every
|
|
94
|
+
# in_span invocation, including in the hot path.
|
|
95
|
+
# @param kind [Symbol]
|
|
96
|
+
# @return [Boolean]
|
|
97
|
+
def emit?(kind)
|
|
98
|
+
config = RailsErrorDashboard.configuration
|
|
99
|
+
return false unless config.enable_otel_export
|
|
100
|
+
return false unless otel_api_loaded?
|
|
101
|
+
|
|
102
|
+
enabled_kinds = config.otel_spans
|
|
103
|
+
return false if enabled_kinds.nil? || enabled_kinds.empty?
|
|
104
|
+
enabled_kinds.include?(kind)
|
|
105
|
+
rescue StandardError
|
|
106
|
+
false
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# Reset memoized tracer + availability — for spec isolation only.
|
|
110
|
+
def reset!
|
|
111
|
+
@tracer = nil
|
|
112
|
+
@otel_api_loaded = nil
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Returns true if the OTel API gem is loaded (NOT the SDK). The API alone
|
|
116
|
+
# is enough — it ships a ProxyTracerProvider that's a no-op when no SDK
|
|
117
|
+
# is configured, which is the behavior we want.
|
|
118
|
+
# @return [Boolean]
|
|
119
|
+
def otel_api_loaded?
|
|
120
|
+
return @otel_api_loaded unless @otel_api_loaded.nil?
|
|
121
|
+
@otel_api_loaded = !!(defined?(::OpenTelemetry) &&
|
|
122
|
+
::OpenTelemetry.respond_to?(:tracer_provider))
|
|
123
|
+
rescue StandardError
|
|
124
|
+
@otel_api_loaded = false
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
private
|
|
128
|
+
|
|
129
|
+
# Memoized tracer instance. Returns nil on any failure so the caller
|
|
130
|
+
# falls back to no-op behavior.
|
|
131
|
+
# @return [::OpenTelemetry::Trace::Tracer, nil]
|
|
132
|
+
def tracer
|
|
133
|
+
return @tracer if @tracer
|
|
134
|
+
return nil unless otel_api_loaded?
|
|
135
|
+
|
|
136
|
+
@tracer = ::OpenTelemetry.tracer_provider.tracer(
|
|
137
|
+
INSTRUMENTATION_NAME,
|
|
138
|
+
RailsErrorDashboard::VERSION
|
|
139
|
+
)
|
|
140
|
+
rescue StandardError => e
|
|
141
|
+
Logger.debug("[RailsErrorDashboard] Tracer initialization failed: #{e.class}: #{e.message}")
|
|
142
|
+
nil
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
# Attributes attached to every span — service-name and gem version
|
|
146
|
+
# let operators filter the gem's traffic out of their dashboards.
|
|
147
|
+
def base_attributes
|
|
148
|
+
config = RailsErrorDashboard.configuration
|
|
149
|
+
{
|
|
150
|
+
"rails_error_dashboard.version" => RailsErrorDashboard::VERSION,
|
|
151
|
+
"rails_error_dashboard.service_name" => config.otel_service_name ||
|
|
152
|
+
config.application_name ||
|
|
153
|
+
"unknown"
|
|
154
|
+
}
|
|
155
|
+
rescue StandardError
|
|
156
|
+
{}
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
# OTel attribute values must be strings, bools, numerics, or arrays of those.
|
|
160
|
+
# Coerce hash values to strings as a safety net — host code passing arbitrary
|
|
161
|
+
# objects (e.g. a Symbol or an Exception) won't crash the SDK.
|
|
162
|
+
def safe_stringify(attrs)
|
|
163
|
+
return {} unless attrs.is_a?(Hash)
|
|
164
|
+
attrs.each_with_object({}) do |(k, v), acc|
|
|
165
|
+
key = k.to_s
|
|
166
|
+
acc[key] = case v
|
|
167
|
+
when String, Numeric, TrueClass, FalseClass then v
|
|
168
|
+
when Array
|
|
169
|
+
v.map { |x| x.is_a?(String) || x.is_a?(Numeric) || x == true || x == false ? x : x.to_s }
|
|
170
|
+
when nil then nil
|
|
171
|
+
else v.to_s
|
|
172
|
+
end
|
|
173
|
+
end.compact
|
|
174
|
+
rescue StandardError
|
|
175
|
+
{}
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
# OTel semconv for exceptions:
|
|
179
|
+
# span.record_exception(exception) -- adds an "exception" event
|
|
180
|
+
# span.status = OpenTelemetry::Trace::Status.error("message")
|
|
181
|
+
def record_block_exception(span, exception)
|
|
182
|
+
return unless span.respond_to?(:record_exception)
|
|
183
|
+
span.record_exception(exception)
|
|
184
|
+
|
|
185
|
+
if defined?(::OpenTelemetry::Trace::Status) &&
|
|
186
|
+
::OpenTelemetry::Trace::Status.respond_to?(:error)
|
|
187
|
+
span.status = ::OpenTelemetry::Trace::Status.error(exception.message.to_s[0, 200])
|
|
188
|
+
end
|
|
189
|
+
rescue StandardError
|
|
190
|
+
nil
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
end
|
|
195
|
+
end
|
|
@@ -120,12 +120,33 @@ module RailsErrorDashboard
|
|
|
120
120
|
# Harvest breadcrumbs from the current buffer and clear it
|
|
121
121
|
# @return [Array<Hash>] Array of breadcrumb hashes (empty if none)
|
|
122
122
|
def self.harvest
|
|
123
|
-
|
|
124
|
-
|
|
123
|
+
# OTel: emit a child span around the harvest so operators see the
|
|
124
|
+
# buffer-drain step in the capture trace. Cheap to compute (single
|
|
125
|
+
# Array#size + JSON byte estimate) and contained to LogError invocations
|
|
126
|
+
# via the parent rails_error_dashboard.capture_error span.
|
|
127
|
+
RailsErrorDashboard::Integrations::Tracer.in_span(
|
|
128
|
+
"breadcrumb_collection",
|
|
129
|
+
kind: :breadcrumbs
|
|
130
|
+
) do |span|
|
|
131
|
+
buffer = Thread.current[THREAD_KEY]
|
|
132
|
+
if buffer.nil?
|
|
133
|
+
span&.set_attribute("breadcrumb_count", 0)
|
|
134
|
+
next []
|
|
135
|
+
end
|
|
125
136
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
137
|
+
result = buffer.to_a
|
|
138
|
+
buffer.clear
|
|
139
|
+
|
|
140
|
+
# Only pay for attribute computation when a real span is recording.
|
|
141
|
+
# NoopSpan is the singleton returned when OTel is off — skip the work
|
|
142
|
+
# entirely so the harvest path stays free in the common case.
|
|
143
|
+
if span && !span.equal?(RailsErrorDashboard::Integrations::Tracer::NOOP_SPAN)
|
|
144
|
+
span.set_attribute("breadcrumb_count", result.size)
|
|
145
|
+
span.set_attribute("bytes_serialized_estimate", estimate_byte_size(result))
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
result
|
|
149
|
+
end
|
|
129
150
|
rescue => e
|
|
130
151
|
RailsErrorDashboard::Logger.debug("[RailsErrorDashboard] BreadcrumbCollector.harvest failed: #{e.message}")
|
|
131
152
|
[]
|
|
@@ -222,6 +243,25 @@ module RailsErrorDashboard
|
|
|
222
243
|
{}
|
|
223
244
|
end
|
|
224
245
|
private_class_method :truncate_metadata
|
|
246
|
+
|
|
247
|
+
# Rough byte-size estimate without paying the full JSON serialization
|
|
248
|
+
# cost. Sums the (already-truncated) message lengths and metadata string
|
|
249
|
+
# values. Used as the bytes_serialized_estimate attribute on the OTel
|
|
250
|
+
# breadcrumb_collection span.
|
|
251
|
+
def self.estimate_byte_size(breadcrumbs)
|
|
252
|
+
return 0 unless breadcrumbs.is_a?(Array)
|
|
253
|
+
breadcrumbs.sum do |c|
|
|
254
|
+
next 0 unless c.is_a?(Hash)
|
|
255
|
+
# ~12 bytes constant overhead per crumb (timestamp + category key)
|
|
256
|
+
base = 12 + (c[:m] || c["m"]).to_s.bytesize
|
|
257
|
+
meta = c[:meta] || c["meta"]
|
|
258
|
+
base += meta.values.sum { |v| v.to_s.bytesize } if meta.is_a?(Hash)
|
|
259
|
+
base
|
|
260
|
+
end
|
|
261
|
+
rescue StandardError
|
|
262
|
+
0
|
|
263
|
+
end
|
|
264
|
+
private_class_method :estimate_byte_size
|
|
225
265
|
end
|
|
226
266
|
end
|
|
227
267
|
end
|
|
@@ -12,26 +12,48 @@ module RailsErrorDashboard
|
|
|
12
12
|
class ErrorNotificationDispatcher
|
|
13
13
|
# @param error_log [ErrorLog] The error to notify about
|
|
14
14
|
def self.call(error_log)
|
|
15
|
-
|
|
15
|
+
# OTel: emit a child span around the dispatch so operators can see
|
|
16
|
+
# which channels fired for a given error and how long the enqueue
|
|
17
|
+
# itself took. Actual delivery happens in the background jobs (Slack
|
|
18
|
+
# HTTP, SMTP, etc.) — those would need their own instrumentation to
|
|
19
|
+
# measure delivery latency.
|
|
20
|
+
RailsErrorDashboard::Integrations::Tracer.in_span(
|
|
21
|
+
"notification_dispatch",
|
|
22
|
+
kind: :notifications,
|
|
23
|
+
attributes: { "rails_error_dashboard.error_id" => error_log.id.to_i }
|
|
24
|
+
) do |span|
|
|
25
|
+
config = RailsErrorDashboard.configuration
|
|
26
|
+
fired = []
|
|
16
27
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
28
|
+
if config.enable_slack_notifications && config.slack_webhook_url.present?
|
|
29
|
+
SlackErrorNotificationJob.perform_later(error_log.id)
|
|
30
|
+
fired << "slack"
|
|
31
|
+
end
|
|
20
32
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
33
|
+
if config.enable_email_notifications && config.notification_email_recipients.present?
|
|
34
|
+
EmailErrorNotificationJob.perform_later(error_log.id)
|
|
35
|
+
fired << "email"
|
|
36
|
+
end
|
|
24
37
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
38
|
+
if config.enable_discord_notifications && config.discord_webhook_url.present?
|
|
39
|
+
DiscordErrorNotificationJob.perform_later(error_log.id)
|
|
40
|
+
fired << "discord"
|
|
41
|
+
end
|
|
28
42
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
43
|
+
if config.enable_pagerduty_notifications && config.pagerduty_integration_key.present?
|
|
44
|
+
PagerdutyErrorNotificationJob.perform_later(error_log.id)
|
|
45
|
+
fired << "pagerduty"
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
if config.enable_webhook_notifications && config.webhook_urls.present?
|
|
49
|
+
WebhookErrorNotificationJob.perform_later(error_log.id)
|
|
50
|
+
fired << "webhook"
|
|
51
|
+
end
|
|
32
52
|
|
|
33
|
-
|
|
34
|
-
|
|
53
|
+
if span && !span.equal?(RailsErrorDashboard::Integrations::Tracer::NOOP_SPAN)
|
|
54
|
+
span.set_attribute("channels", fired)
|
|
55
|
+
span.set_attribute("channel_count", fired.size)
|
|
56
|
+
end
|
|
35
57
|
end
|
|
36
58
|
end
|
|
37
59
|
end
|
|
@@ -24,7 +24,16 @@ module RailsErrorDashboard
|
|
|
24
24
|
# Capture current system health metrics
|
|
25
25
|
# @return [Hash] Health snapshot (always safe, never raises)
|
|
26
26
|
def self.capture
|
|
27
|
-
|
|
27
|
+
# OTel: emit a child span around the snapshot so operators can verify
|
|
28
|
+
# the <1ms health-budget claim from their own tracing dashboard. The
|
|
29
|
+
# snapshot itself is read-only (GC.stat, pool.stat, procfs reads) so
|
|
30
|
+
# the span carries no useful attributes beyond timing.
|
|
31
|
+
RailsErrorDashboard::Integrations::Tracer.in_span(
|
|
32
|
+
"system_health_snapshot",
|
|
33
|
+
kind: :health
|
|
34
|
+
) do |_span|
|
|
35
|
+
new.capture
|
|
36
|
+
end
|
|
28
37
|
rescue => e
|
|
29
38
|
RailsErrorDashboard::Logger.debug("[RailsErrorDashboard] SystemHealthSnapshot.capture failed: #{e.message}")
|
|
30
39
|
{ captured_at: Time.current.iso8601 }
|
|
@@ -21,6 +21,7 @@ begin; require "turbo-rails"; rescue LoadError; end
|
|
|
21
21
|
require "rails_error_dashboard/value_objects/error_context"
|
|
22
22
|
require "rails_error_dashboard/value_objects/llm_call_event"
|
|
23
23
|
require "rails_error_dashboard/integrations/o_tel"
|
|
24
|
+
require "rails_error_dashboard/integrations/tracer"
|
|
24
25
|
require "rails_error_dashboard/integrations/llm_span_processor"
|
|
25
26
|
require "rails_error_dashboard/integrations/llm_middleware"
|
|
26
27
|
require "rails_error_dashboard/helpers/user_model_detector"
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: rails_error_dashboard
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.8.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Anjan Jagirdar
|
|
@@ -398,6 +398,7 @@ files:
|
|
|
398
398
|
- lib/rails_error_dashboard/integrations/llm_middleware.rb
|
|
399
399
|
- lib/rails_error_dashboard/integrations/llm_span_processor.rb
|
|
400
400
|
- lib/rails_error_dashboard/integrations/o_tel.rb
|
|
401
|
+
- lib/rails_error_dashboard/integrations/tracer.rb
|
|
401
402
|
- lib/rails_error_dashboard/logger.rb
|
|
402
403
|
- lib/rails_error_dashboard/manual_error_reporter.rb
|
|
403
404
|
- lib/rails_error_dashboard/middleware/error_catcher.rb
|
|
@@ -510,7 +511,7 @@ metadata:
|
|
|
510
511
|
funding_uri: https://github.com/sponsors/AnjanJ
|
|
511
512
|
post_install_message: |
|
|
512
513
|
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
513
|
-
RED (Rails Error Dashboard) v0.
|
|
514
|
+
RED (Rails Error Dashboard) v0.8.0
|
|
514
515
|
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
515
516
|
|
|
516
517
|
First install:
|
|
@@ -546,7 +547,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
546
547
|
- !ruby/object:Gem::Version
|
|
547
548
|
version: '0'
|
|
548
549
|
requirements: []
|
|
549
|
-
rubygems_version:
|
|
550
|
+
rubygems_version: 4.0.3
|
|
550
551
|
specification_version: 4
|
|
551
552
|
summary: Self-hosted error tracking and exception monitoring for Rails. Free, forever.
|
|
552
553
|
test_files: []
|