rails_error_dashboard 0.1.0 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +305 -703
- data/app/assets/stylesheets/rails_error_dashboard/_catppuccin_mocha.scss +107 -0
- data/app/assets/stylesheets/rails_error_dashboard/_components.scss +625 -0
- data/app/assets/stylesheets/rails_error_dashboard/_layout.scss +257 -0
- data/app/assets/stylesheets/rails_error_dashboard/_theme_variables.scss +203 -0
- data/app/assets/stylesheets/rails_error_dashboard/application.css +926 -15
- data/app/assets/stylesheets/rails_error_dashboard/application.css.map +7 -0
- data/app/assets/stylesheets/rails_error_dashboard/application.scss +61 -0
- data/app/controllers/rails_error_dashboard/application_controller.rb +18 -0
- data/app/controllers/rails_error_dashboard/errors_controller.rb +140 -4
- data/app/helpers/rails_error_dashboard/application_helper.rb +55 -0
- data/app/helpers/rails_error_dashboard/backtrace_helper.rb +91 -0
- data/app/helpers/rails_error_dashboard/overview_helper.rb +78 -0
- data/app/helpers/rails_error_dashboard/user_agent_helper.rb +118 -0
- data/app/jobs/rails_error_dashboard/application_job.rb +19 -0
- data/app/jobs/rails_error_dashboard/async_error_logging_job.rb +48 -0
- data/app/jobs/rails_error_dashboard/baseline_alert_job.rb +263 -0
- data/app/jobs/rails_error_dashboard/discord_error_notification_job.rb +4 -8
- data/app/jobs/rails_error_dashboard/email_error_notification_job.rb +2 -1
- data/app/jobs/rails_error_dashboard/pagerduty_error_notification_job.rb +5 -5
- data/app/jobs/rails_error_dashboard/slack_error_notification_job.rb +10 -6
- data/app/jobs/rails_error_dashboard/webhook_error_notification_job.rb +5 -6
- data/app/mailers/rails_error_dashboard/application_mailer.rb +1 -1
- data/app/mailers/rails_error_dashboard/error_notification_mailer.rb +1 -1
- data/app/models/rails_error_dashboard/cascade_pattern.rb +74 -0
- data/app/models/rails_error_dashboard/error_baseline.rb +100 -0
- data/app/models/rails_error_dashboard/error_comment.rb +27 -0
- data/app/models/rails_error_dashboard/error_log.rb +471 -3
- data/app/models/rails_error_dashboard/error_occurrence.rb +49 -0
- data/app/views/layouts/rails_error_dashboard.html.erb +816 -178
- data/app/views/layouts/rails_error_dashboard_old_backup.html.erb +383 -0
- data/app/views/rails_error_dashboard/error_notification_mailer/error_alert.html.erb +3 -10
- data/app/views/rails_error_dashboard/error_notification_mailer/error_alert.text.erb +1 -2
- data/app/views/rails_error_dashboard/errors/_error_row.html.erb +78 -0
- data/app/views/rails_error_dashboard/errors/_pattern_insights.html.erb +209 -0
- data/app/views/rails_error_dashboard/errors/_stats.html.erb +34 -0
- data/app/views/rails_error_dashboard/errors/_timeline.html.erb +167 -0
- data/app/views/rails_error_dashboard/errors/analytics.html.erb +152 -56
- data/app/views/rails_error_dashboard/errors/correlation.html.erb +373 -0
- data/app/views/rails_error_dashboard/errors/index.html.erb +294 -138
- data/app/views/rails_error_dashboard/errors/overview.html.erb +253 -0
- data/app/views/rails_error_dashboard/errors/platform_comparison.html.erb +399 -0
- data/app/views/rails_error_dashboard/errors/show.html.erb +781 -65
- data/config/routes.rb +9 -0
- data/db/migrate/20251225071314_add_optimized_indexes_to_error_logs.rb +66 -0
- data/db/migrate/20251225074653_remove_environment_from_error_logs.rb +26 -0
- data/db/migrate/20251225085859_add_enhanced_metrics_to_error_logs.rb +12 -0
- data/db/migrate/20251225093603_add_similarity_tracking_to_error_logs.rb +9 -0
- data/db/migrate/20251225100236_create_error_occurrences.rb +31 -0
- data/db/migrate/20251225101920_create_cascade_patterns.rb +33 -0
- data/db/migrate/20251225102500_create_error_baselines.rb +38 -0
- data/db/migrate/20251226020000_add_workflow_fields_to_error_logs.rb +27 -0
- data/db/migrate/20251226020100_create_error_comments.rb +18 -0
- data/lib/generators/rails_error_dashboard/install/install_generator.rb +276 -1
- data/lib/generators/rails_error_dashboard/install/templates/initializer.rb +272 -37
- data/lib/generators/rails_error_dashboard/solid_queue/solid_queue_generator.rb +36 -0
- data/lib/generators/rails_error_dashboard/solid_queue/templates/queue.yml +55 -0
- data/lib/rails_error_dashboard/commands/batch_delete_errors.rb +1 -1
- data/lib/rails_error_dashboard/commands/batch_resolve_errors.rb +2 -2
- data/lib/rails_error_dashboard/commands/log_error.rb +272 -7
- data/lib/rails_error_dashboard/commands/resolve_error.rb +16 -0
- data/lib/rails_error_dashboard/configuration.rb +90 -5
- data/lib/rails_error_dashboard/error_reporter.rb +15 -7
- data/lib/rails_error_dashboard/logger.rb +105 -0
- data/lib/rails_error_dashboard/middleware/error_catcher.rb +17 -10
- data/lib/rails_error_dashboard/plugin.rb +6 -3
- data/lib/rails_error_dashboard/plugin_registry.rb +2 -2
- data/lib/rails_error_dashboard/plugins/audit_log_plugin.rb +0 -1
- data/lib/rails_error_dashboard/plugins/jira_integration_plugin.rb +3 -4
- data/lib/rails_error_dashboard/plugins/metrics_plugin.rb +1 -3
- data/lib/rails_error_dashboard/queries/analytics_stats.rb +44 -6
- data/lib/rails_error_dashboard/queries/baseline_stats.rb +107 -0
- data/lib/rails_error_dashboard/queries/co_occurring_errors.rb +86 -0
- data/lib/rails_error_dashboard/queries/dashboard_stats.rb +242 -2
- data/lib/rails_error_dashboard/queries/error_cascades.rb +74 -0
- data/lib/rails_error_dashboard/queries/error_correlation.rb +375 -0
- data/lib/rails_error_dashboard/queries/errors_list.rb +106 -10
- data/lib/rails_error_dashboard/queries/filter_options.rb +0 -1
- data/lib/rails_error_dashboard/queries/platform_comparison.rb +254 -0
- data/lib/rails_error_dashboard/queries/similar_errors.rb +93 -0
- data/lib/rails_error_dashboard/services/backtrace_parser.rb +113 -0
- data/lib/rails_error_dashboard/services/baseline_alert_throttler.rb +88 -0
- data/lib/rails_error_dashboard/services/baseline_calculator.rb +269 -0
- data/lib/rails_error_dashboard/services/cascade_detector.rb +95 -0
- data/lib/rails_error_dashboard/services/pattern_detector.rb +268 -0
- data/lib/rails_error_dashboard/services/similarity_calculator.rb +144 -0
- data/lib/rails_error_dashboard/value_objects/error_context.rb +27 -1
- data/lib/rails_error_dashboard/version.rb +1 -1
- data/lib/rails_error_dashboard.rb +57 -7
- metadata +69 -10
- data/app/models/rails_error_dashboard/application_record.rb +0 -5
- data/lib/rails_error_dashboard/queries/developer_insights.rb +0 -277
- data/lib/rails_error_dashboard/queries/errors_list_v2.rb +0 -149
- data/lib/tasks/rails_error_dashboard_tasks.rake +0 -4
|
@@ -1,4 +1,23 @@
|
|
|
1
1
|
module RailsErrorDashboard
|
|
2
2
|
class ApplicationJob < ActiveJob::Base
|
|
3
|
+
# CRITICAL: Ensure job failures don't break the app or spam error logs
|
|
4
|
+
# Retry failed jobs with exponential backoff, but limit attempts
|
|
5
|
+
retry_on StandardError, wait: :exponentially_longer, attempts: 3
|
|
6
|
+
|
|
7
|
+
# Global exception handling for all dashboard jobs
|
|
8
|
+
rescue_from StandardError do |exception|
|
|
9
|
+
# Log the error for debugging but don't propagate
|
|
10
|
+
Rails.logger.error("[RailsErrorDashboard] Job #{self.class.name} failed: #{exception.class} - #{exception.message}")
|
|
11
|
+
Rails.logger.error("Job arguments: #{arguments.inspect}")
|
|
12
|
+
Rails.logger.error("Attempt: #{executions}/3") if respond_to?(:executions)
|
|
13
|
+
Rails.logger.error(exception.backtrace&.first(10)&.join("\n")) if exception.backtrace
|
|
14
|
+
|
|
15
|
+
# Re-raise to trigger retry mechanism (up to 3 attempts)
|
|
16
|
+
# After 3 attempts, ActiveJob will discard the job and log it
|
|
17
|
+
raise exception if executions < 3
|
|
18
|
+
|
|
19
|
+
# If we've exhausted retries, log and give up gracefully
|
|
20
|
+
Rails.logger.error("[RailsErrorDashboard] Job #{self.class.name} discarded after #{executions} attempts")
|
|
21
|
+
end
|
|
3
22
|
end
|
|
4
23
|
end
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RailsErrorDashboard
|
|
4
|
+
# Background job for asynchronous error logging
|
|
5
|
+
# This prevents error logging from blocking the main request/response cycle
|
|
6
|
+
class AsyncErrorLoggingJob < ApplicationJob
|
|
7
|
+
queue_as :default
|
|
8
|
+
|
|
9
|
+
# Performs async error logging
|
|
10
|
+
# @param exception_data [Hash] Serialized exception data
|
|
11
|
+
# @param context [Hash] Error context (request, user, etc.)
|
|
12
|
+
def perform(exception_data, context)
|
|
13
|
+
# Reconstruct the exception from serialized data
|
|
14
|
+
exception = reconstruct_exception(exception_data)
|
|
15
|
+
|
|
16
|
+
# Log the error synchronously in the background job
|
|
17
|
+
# Call .new().call to bypass async check (we're already async)
|
|
18
|
+
Commands::LogError.new(exception, context).call
|
|
19
|
+
rescue => e
|
|
20
|
+
# Don't let async job errors break the job queue
|
|
21
|
+
Rails.logger.error("AsyncErrorLoggingJob failed: #{e.message}")
|
|
22
|
+
Rails.logger.error("Backtrace: #{e.backtrace&.first(5)&.join("\n")}")
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
private
|
|
26
|
+
|
|
27
|
+
# Reconstruct exception from serialized data
|
|
28
|
+
# @param data [Hash] Serialized exception data
|
|
29
|
+
# @return [Exception] Reconstructed exception object
|
|
30
|
+
def reconstruct_exception(data)
|
|
31
|
+
# Get or create the exception class
|
|
32
|
+
exception_class = begin
|
|
33
|
+
data[:class_name].constantize
|
|
34
|
+
rescue NameError
|
|
35
|
+
# If class doesn't exist, use StandardError
|
|
36
|
+
StandardError
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Create new exception with the original message
|
|
40
|
+
exception = exception_class.new(data[:message])
|
|
41
|
+
|
|
42
|
+
# Restore the backtrace
|
|
43
|
+
exception.set_backtrace(data[:backtrace]) if data[:backtrace]
|
|
44
|
+
|
|
45
|
+
exception
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RailsErrorDashboard
|
|
4
|
+
# Sends baseline anomaly alerts through configured notification channels
|
|
5
|
+
#
|
|
6
|
+
# This job is triggered when an error exceeds baseline thresholds.
|
|
7
|
+
# It respects cooldown periods to prevent alert fatigue and sends
|
|
8
|
+
# notifications through all enabled channels (Slack, Email, Discord, etc.)
|
|
9
|
+
class BaselineAlertJob < ApplicationJob
|
|
10
|
+
queue_as :default
|
|
11
|
+
|
|
12
|
+
# @param error_log_id [Integer] The error log that triggered the alert
|
|
13
|
+
# @param anomaly_data [Hash] Anomaly information from baseline check
|
|
14
|
+
def perform(error_log_id, anomaly_data)
|
|
15
|
+
error_log = ErrorLog.find_by(id: error_log_id)
|
|
16
|
+
return unless error_log
|
|
17
|
+
|
|
18
|
+
config = RailsErrorDashboard.configuration
|
|
19
|
+
|
|
20
|
+
# Check if we should send alert (cooldown check)
|
|
21
|
+
unless Services::BaselineAlertThrottler.should_alert?(
|
|
22
|
+
error_log.error_type,
|
|
23
|
+
error_log.platform,
|
|
24
|
+
cooldown_minutes: config.baseline_alert_cooldown_minutes
|
|
25
|
+
)
|
|
26
|
+
Rails.logger.info(
|
|
27
|
+
"Baseline alert throttled for #{error_log.error_type} on #{error_log.platform}"
|
|
28
|
+
)
|
|
29
|
+
return
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Record that we're sending an alert
|
|
33
|
+
Services::BaselineAlertThrottler.record_alert(
|
|
34
|
+
error_log.error_type,
|
|
35
|
+
error_log.platform
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
# Send notifications through all enabled channels
|
|
39
|
+
send_notifications(error_log, anomaly_data, config)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
private
|
|
43
|
+
|
|
44
|
+
def send_notifications(error_log, anomaly_data, config)
|
|
45
|
+
# Slack notification
|
|
46
|
+
if config.enable_slack_notifications && config.slack_webhook_url.present?
|
|
47
|
+
send_slack_notification(error_log, anomaly_data, config)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Email notification
|
|
51
|
+
if config.enable_email_notifications && config.notification_email_recipients.any?
|
|
52
|
+
send_email_notification(error_log, anomaly_data, config)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Discord notification
|
|
56
|
+
if config.enable_discord_notifications && config.discord_webhook_url.present?
|
|
57
|
+
send_discord_notification(error_log, anomaly_data, config)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Webhook notification
|
|
61
|
+
if config.enable_webhook_notifications && config.webhook_urls.any?
|
|
62
|
+
send_webhook_notification(error_log, anomaly_data, config)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# PagerDuty for critical anomalies
|
|
66
|
+
if config.enable_pagerduty_notifications &&
|
|
67
|
+
config.pagerduty_integration_key.present? &&
|
|
68
|
+
anomaly_data[:level] == :critical
|
|
69
|
+
send_pagerduty_notification(error_log, anomaly_data, config)
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def send_slack_notification(error_log, anomaly_data, config)
|
|
74
|
+
payload = build_slack_payload(error_log, anomaly_data, config)
|
|
75
|
+
|
|
76
|
+
HTTParty.post(
|
|
77
|
+
config.slack_webhook_url,
|
|
78
|
+
body: payload.to_json,
|
|
79
|
+
headers: { "Content-Type" => "application/json" }
|
|
80
|
+
)
|
|
81
|
+
rescue => e
|
|
82
|
+
Rails.logger.error("Failed to send baseline alert to Slack: #{e.message}")
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def send_email_notification(error_log, _anomaly_data, _config)
|
|
86
|
+
# Use existing email notification infrastructure if available
|
|
87
|
+
# For now, log that email would be sent
|
|
88
|
+
Rails.logger.info(
|
|
89
|
+
"Baseline alert email would be sent for #{error_log.error_type}"
|
|
90
|
+
)
|
|
91
|
+
rescue => e
|
|
92
|
+
Rails.logger.error("Failed to send baseline alert email: #{e.message}")
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def send_discord_notification(error_log, anomaly_data, config)
|
|
96
|
+
payload = build_discord_payload(error_log, anomaly_data, config)
|
|
97
|
+
|
|
98
|
+
HTTParty.post(
|
|
99
|
+
config.discord_webhook_url,
|
|
100
|
+
body: payload.to_json,
|
|
101
|
+
headers: { "Content-Type" => "application/json" }
|
|
102
|
+
)
|
|
103
|
+
rescue => e
|
|
104
|
+
Rails.logger.error("Failed to send baseline alert to Discord: #{e.message}")
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def send_webhook_notification(error_log, anomaly_data, config)
|
|
108
|
+
payload = build_webhook_payload(error_log, anomaly_data)
|
|
109
|
+
|
|
110
|
+
config.webhook_urls.each do |url|
|
|
111
|
+
HTTParty.post(
|
|
112
|
+
url,
|
|
113
|
+
body: payload.to_json,
|
|
114
|
+
headers: { "Content-Type" => "application/json" }
|
|
115
|
+
)
|
|
116
|
+
end
|
|
117
|
+
rescue => e
|
|
118
|
+
Rails.logger.error("Failed to send baseline alert to webhook: #{e.message}")
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def send_pagerduty_notification(error_log, _anomaly_data, _config)
|
|
122
|
+
# Use existing PagerDuty notification infrastructure if available
|
|
123
|
+
Rails.logger.info(
|
|
124
|
+
"Baseline alert PagerDuty notification for #{error_log.error_type}"
|
|
125
|
+
)
|
|
126
|
+
rescue => e
|
|
127
|
+
Rails.logger.error("Failed to send baseline alert to PagerDuty: #{e.message}")
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# Build Slack message payload
|
|
131
|
+
def build_slack_payload(error_log, anomaly_data, config)
|
|
132
|
+
{
|
|
133
|
+
text: "🚨 Baseline Anomaly Alert",
|
|
134
|
+
blocks: [
|
|
135
|
+
{
|
|
136
|
+
type: "header",
|
|
137
|
+
text: {
|
|
138
|
+
type: "plain_text",
|
|
139
|
+
text: "🚨 Baseline Anomaly Detected"
|
|
140
|
+
}
|
|
141
|
+
},
|
|
142
|
+
{
|
|
143
|
+
type: "section",
|
|
144
|
+
fields: [
|
|
145
|
+
{
|
|
146
|
+
type: "mrkdwn",
|
|
147
|
+
text: "*Error Type:*\n#{error_log.error_type}"
|
|
148
|
+
},
|
|
149
|
+
{
|
|
150
|
+
type: "mrkdwn",
|
|
151
|
+
text: "*Platform:*\n#{error_log.platform}"
|
|
152
|
+
},
|
|
153
|
+
{
|
|
154
|
+
type: "mrkdwn",
|
|
155
|
+
text: "*Severity:*\n#{anomaly_level_emoji(anomaly_data[:level])} #{anomaly_data[:level].to_s.upcase}"
|
|
156
|
+
},
|
|
157
|
+
{
|
|
158
|
+
type: "mrkdwn",
|
|
159
|
+
text: "*Standard Deviations:*\n#{anomaly_data[:std_devs_above]&.round(1)}σ above baseline"
|
|
160
|
+
}
|
|
161
|
+
]
|
|
162
|
+
},
|
|
163
|
+
{
|
|
164
|
+
type: "section",
|
|
165
|
+
text: {
|
|
166
|
+
type: "mrkdwn",
|
|
167
|
+
text: "*Message:*\n```#{error_log.message.truncate(200)}```"
|
|
168
|
+
}
|
|
169
|
+
},
|
|
170
|
+
{
|
|
171
|
+
type: "section",
|
|
172
|
+
text: {
|
|
173
|
+
type: "mrkdwn",
|
|
174
|
+
text: "*Baseline Info:*\nThreshold: #{anomaly_data[:threshold]&.round(1)} errors\nBaseline Type: #{anomaly_data[:baseline_type]}"
|
|
175
|
+
}
|
|
176
|
+
},
|
|
177
|
+
{
|
|
178
|
+
type: "actions",
|
|
179
|
+
elements: [
|
|
180
|
+
{
|
|
181
|
+
type: "button",
|
|
182
|
+
text: {
|
|
183
|
+
type: "plain_text",
|
|
184
|
+
text: "View in Dashboard"
|
|
185
|
+
},
|
|
186
|
+
url: dashboard_url(error_log, config)
|
|
187
|
+
}
|
|
188
|
+
]
|
|
189
|
+
}
|
|
190
|
+
]
|
|
191
|
+
}
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
# Build Discord embed payload
|
|
195
|
+
def build_discord_payload(error_log, anomaly_data, config)
|
|
196
|
+
{
|
|
197
|
+
embeds: [
|
|
198
|
+
{
|
|
199
|
+
title: "🚨 Baseline Anomaly Detected",
|
|
200
|
+
color: anomaly_color(anomaly_data[:level]),
|
|
201
|
+
fields: [
|
|
202
|
+
{ name: "Error Type", value: error_log.error_type, inline: true },
|
|
203
|
+
{ name: "Platform", value: error_log.platform, inline: true },
|
|
204
|
+
{ name: "Severity", value: anomaly_data[:level].to_s.upcase, inline: true },
|
|
205
|
+
{ name: "Standard Deviations", value: "#{anomaly_data[:std_devs_above]&.round(1)}σ above baseline", inline: true },
|
|
206
|
+
{ name: "Threshold", value: "#{anomaly_data[:threshold]&.round(1)} errors", inline: true },
|
|
207
|
+
{ name: "Baseline Type", value: anomaly_data[:baseline_type] || "N/A", inline: true },
|
|
208
|
+
{ name: "Message", value: "```#{error_log.message.truncate(200)}```", inline: false }
|
|
209
|
+
],
|
|
210
|
+
url: dashboard_url(error_log, config),
|
|
211
|
+
timestamp: Time.current.iso8601
|
|
212
|
+
}
|
|
213
|
+
]
|
|
214
|
+
}
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
# Build generic webhook payload
|
|
218
|
+
def build_webhook_payload(error_log, anomaly_data)
|
|
219
|
+
{
|
|
220
|
+
event: "baseline_anomaly",
|
|
221
|
+
timestamp: Time.current.iso8601,
|
|
222
|
+
error: {
|
|
223
|
+
id: error_log.id,
|
|
224
|
+
type: error_log.error_type,
|
|
225
|
+
message: error_log.message,
|
|
226
|
+
platform: error_log.platform,
|
|
227
|
+
severity: error_log.severity.to_s,
|
|
228
|
+
occurred_at: error_log.occurred_at.iso8601
|
|
229
|
+
},
|
|
230
|
+
anomaly: {
|
|
231
|
+
level: anomaly_data[:level].to_s,
|
|
232
|
+
std_devs_above: anomaly_data[:std_devs_above],
|
|
233
|
+
threshold: anomaly_data[:threshold],
|
|
234
|
+
baseline_type: anomaly_data[:baseline_type]
|
|
235
|
+
},
|
|
236
|
+
dashboard_url: dashboard_url(error_log, RailsErrorDashboard.configuration)
|
|
237
|
+
}
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
def anomaly_level_emoji(level)
|
|
241
|
+
case level
|
|
242
|
+
when :critical then "🔴"
|
|
243
|
+
when :high then "🟠"
|
|
244
|
+
when :elevated then "🟡"
|
|
245
|
+
else "⚪"
|
|
246
|
+
end
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
def anomaly_color(level)
|
|
250
|
+
case level
|
|
251
|
+
when :critical then 15158332 # Red
|
|
252
|
+
when :high then 16744192 # Orange
|
|
253
|
+
when :elevated then 16776960 # Yellow
|
|
254
|
+
else 9807270 # Gray
|
|
255
|
+
end
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
def dashboard_url(error_log, config)
|
|
259
|
+
base_url = config.dashboard_base_url || "http://localhost:3000"
|
|
260
|
+
"#{base_url}/error_dashboard/errors/#{error_log.id}"
|
|
261
|
+
end
|
|
262
|
+
end
|
|
263
|
+
end
|
|
@@ -18,11 +18,12 @@ module RailsErrorDashboard
|
|
|
18
18
|
HTTParty.post(
|
|
19
19
|
webhook_url,
|
|
20
20
|
body: payload.to_json,
|
|
21
|
-
headers: { "Content-Type" => "application/json" }
|
|
21
|
+
headers: { "Content-Type" => "application/json" },
|
|
22
|
+
timeout: 10 # CRITICAL: 10 second timeout to prevent hanging
|
|
22
23
|
)
|
|
23
24
|
rescue StandardError => e
|
|
24
|
-
Rails.logger.error("Failed to send Discord notification: #{e.message}")
|
|
25
|
-
Rails.logger.error(e.backtrace
|
|
25
|
+
Rails.logger.error("[RailsErrorDashboard] Failed to send Discord notification: #{e.message}")
|
|
26
|
+
Rails.logger.error(e.backtrace&.first(5)&.join("\n")) if e.backtrace
|
|
26
27
|
end
|
|
27
28
|
|
|
28
29
|
private
|
|
@@ -39,11 +40,6 @@ module RailsErrorDashboard
|
|
|
39
40
|
value: error_log.platform || "Unknown",
|
|
40
41
|
inline: true
|
|
41
42
|
},
|
|
42
|
-
{
|
|
43
|
-
name: "Environment",
|
|
44
|
-
value: error_log.environment || "Unknown",
|
|
45
|
-
inline: true
|
|
46
|
-
},
|
|
47
43
|
{
|
|
48
44
|
name: "Occurrences",
|
|
49
45
|
value: error_log.occurrence_count.to_s,
|
|
@@ -13,7 +13,8 @@ module RailsErrorDashboard
|
|
|
13
13
|
|
|
14
14
|
ErrorNotificationMailer.error_alert(error_log, recipients).deliver_now
|
|
15
15
|
rescue => e
|
|
16
|
-
Rails.logger.error("Failed to send email notification: #{e.message}")
|
|
16
|
+
Rails.logger.error("[RailsErrorDashboard] Failed to send email notification: #{e.message}")
|
|
17
|
+
Rails.logger.error(e.backtrace&.first(5)&.join("\n")) if e.backtrace
|
|
17
18
|
end
|
|
18
19
|
end
|
|
19
20
|
end
|
|
@@ -24,15 +24,16 @@ module RailsErrorDashboard
|
|
|
24
24
|
response = HTTParty.post(
|
|
25
25
|
PAGERDUTY_EVENTS_API,
|
|
26
26
|
body: payload.to_json,
|
|
27
|
-
headers: { "Content-Type" => "application/json" }
|
|
27
|
+
headers: { "Content-Type" => "application/json" },
|
|
28
|
+
timeout: 10 # CRITICAL: 10 second timeout to prevent hanging
|
|
28
29
|
)
|
|
29
30
|
|
|
30
31
|
unless response.success?
|
|
31
|
-
Rails.logger.error("PagerDuty API error: #{response.code} - #{response.body}")
|
|
32
|
+
Rails.logger.error("[RailsErrorDashboard] PagerDuty API error: #{response.code} - #{response.body}")
|
|
32
33
|
end
|
|
33
34
|
rescue StandardError => e
|
|
34
|
-
Rails.logger.error("Failed to send PagerDuty notification: #{e.message}")
|
|
35
|
-
Rails.logger.error(e.backtrace
|
|
35
|
+
Rails.logger.error("[RailsErrorDashboard] Failed to send PagerDuty notification: #{e.message}")
|
|
36
|
+
Rails.logger.error(e.backtrace&.first(5)&.join("\n")) if e.backtrace
|
|
36
37
|
end
|
|
37
38
|
|
|
38
39
|
private
|
|
@@ -53,7 +54,6 @@ module RailsErrorDashboard
|
|
|
53
54
|
controller: error_log.controller_name,
|
|
54
55
|
action: error_log.action_name,
|
|
55
56
|
platform: error_log.platform,
|
|
56
|
-
environment: error_log.environment,
|
|
57
57
|
occurrences: error_log.occurrence_count,
|
|
58
58
|
first_seen_at: error_log.first_seen_at&.iso8601,
|
|
59
59
|
last_seen_at: error_log.last_seen_at&.iso8601,
|
|
@@ -26,19 +26,27 @@ module RailsErrorDashboard
|
|
|
26
26
|
http = Net::HTTP.new(uri.host, uri.port)
|
|
27
27
|
http.use_ssl = true
|
|
28
28
|
|
|
29
|
+
# CRITICAL: Add timeouts to prevent hanging the job queue
|
|
30
|
+
http.open_timeout = 5 # 5 seconds to establish connection
|
|
31
|
+
http.read_timeout = 10 # 10 seconds to read response
|
|
32
|
+
|
|
29
33
|
request = Net::HTTP::Post.new(uri.path, { "Content-Type" => "application/json" })
|
|
30
34
|
request.body = slack_payload(error_log).to_json
|
|
31
35
|
|
|
32
36
|
response = http.request(request)
|
|
33
37
|
|
|
34
38
|
unless response.is_a?(Net::HTTPSuccess)
|
|
35
|
-
Rails.logger.error("Slack notification failed: #{response.code} - #{response.body}")
|
|
39
|
+
Rails.logger.error("[RailsErrorDashboard] Slack notification failed: #{response.code} - #{response.body}")
|
|
36
40
|
end
|
|
41
|
+
rescue Timeout::Error, Errno::ECONNREFUSED, SocketError, Net::OpenTimeout, Net::ReadTimeout => e
|
|
42
|
+
# Network errors - log and fail gracefully
|
|
43
|
+
Rails.logger.error("[RailsErrorDashboard] Slack HTTP request failed: #{e.class} - #{e.message}")
|
|
44
|
+
nil
|
|
37
45
|
end
|
|
38
46
|
|
|
39
47
|
def slack_payload(error_log)
|
|
40
48
|
{
|
|
41
|
-
text: "🚨 New Error
|
|
49
|
+
text: "🚨 New Error Alert",
|
|
42
50
|
blocks: [
|
|
43
51
|
{
|
|
44
52
|
type: "header",
|
|
@@ -55,10 +63,6 @@ module RailsErrorDashboard
|
|
|
55
63
|
type: "mrkdwn",
|
|
56
64
|
text: "*Error Type:*\n`#{error_log.error_type}`"
|
|
57
65
|
},
|
|
58
|
-
{
|
|
59
|
-
type: "mrkdwn",
|
|
60
|
-
text: "*Environment:*\n#{error_log.environment.titleize}"
|
|
61
|
-
},
|
|
62
66
|
{
|
|
63
67
|
type: "mrkdwn",
|
|
64
68
|
text: "*Platform:*\n#{platform_emoji(error_log.platform)} #{error_log.platform || 'Unknown'}"
|
|
@@ -23,8 +23,8 @@ module RailsErrorDashboard
|
|
|
23
23
|
send_webhook(url, payload, error_log)
|
|
24
24
|
end
|
|
25
25
|
rescue StandardError => e
|
|
26
|
-
Rails.logger.error("Failed to send webhook notification: #{e.message}")
|
|
27
|
-
Rails.logger.error(e.backtrace
|
|
26
|
+
Rails.logger.error("[RailsErrorDashboard] Failed to send webhook notification: #{e.message}")
|
|
27
|
+
Rails.logger.error(e.backtrace&.first(5)&.join("\n")) if e.backtrace
|
|
28
28
|
end
|
|
29
29
|
|
|
30
30
|
private
|
|
@@ -39,14 +39,14 @@ module RailsErrorDashboard
|
|
|
39
39
|
"X-Error-Dashboard-Event" => "error.created",
|
|
40
40
|
"X-Error-Dashboard-ID" => error_log.id.to_s
|
|
41
41
|
},
|
|
42
|
-
timeout: 10
|
|
42
|
+
timeout: 10 # CRITICAL: 10 second timeout to prevent hanging
|
|
43
43
|
)
|
|
44
44
|
|
|
45
45
|
unless response.success?
|
|
46
|
-
Rails.logger.warn("Webhook failed for #{url}: #{response.code}")
|
|
46
|
+
Rails.logger.warn("[RailsErrorDashboard] Webhook failed for #{url}: #{response.code}")
|
|
47
47
|
end
|
|
48
48
|
rescue StandardError => e
|
|
49
|
-
Rails.logger.error("Webhook error for #{url}: #{e.message}")
|
|
49
|
+
Rails.logger.error("[RailsErrorDashboard] Webhook error for #{url}: #{e.message}")
|
|
50
50
|
end
|
|
51
51
|
|
|
52
52
|
def build_webhook_payload(error_log)
|
|
@@ -59,7 +59,6 @@ module RailsErrorDashboard
|
|
|
59
59
|
message: error_log.message,
|
|
60
60
|
severity: error_log.severity.to_s,
|
|
61
61
|
platform: error_log.platform,
|
|
62
|
-
environment: error_log.environment,
|
|
63
62
|
controller: error_log.controller_name,
|
|
64
63
|
action: error_log.action_name,
|
|
65
64
|
occurrence_count: error_log.occurrence_count,
|
|
@@ -8,7 +8,7 @@ module RailsErrorDashboard
|
|
|
8
8
|
|
|
9
9
|
mail(
|
|
10
10
|
to: recipients,
|
|
11
|
-
subject: "🚨
|
|
11
|
+
subject: "🚨 #{error_log.error_type}: #{truncate_subject(error_log.message)}"
|
|
12
12
|
)
|
|
13
13
|
end
|
|
14
14
|
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RailsErrorDashboard
|
|
4
|
+
# Tracks cascade patterns where one error causes another
|
|
5
|
+
#
|
|
6
|
+
# A cascade pattern represents a causal relationship between errors:
|
|
7
|
+
# Parent Error → Child Error
|
|
8
|
+
#
|
|
9
|
+
# For example: DatabaseConnectionError → NoMethodError
|
|
10
|
+
# When a database connection fails, subsequent code may try to call
|
|
11
|
+
# methods on nil objects, causing NoMethodError.
|
|
12
|
+
#
|
|
13
|
+
# @attr parent_error_id [Integer] The error that happens first (potential cause)
|
|
14
|
+
# @attr child_error_id [Integer] The error that happens after (potential effect)
|
|
15
|
+
# @attr frequency [Integer] How many times this cascade has been observed
|
|
16
|
+
# @attr avg_delay_seconds [Float] Average time between parent and child
|
|
17
|
+
# @attr cascade_probability [Float] Likelihood (0.0-1.0) that parent causes child
|
|
18
|
+
# @attr last_detected_at [DateTime] When this cascade was last observed
|
|
19
|
+
class CascadePattern < ErrorLogsRecord
|
|
20
|
+
self.table_name = "rails_error_dashboard_cascade_patterns"
|
|
21
|
+
|
|
22
|
+
belongs_to :parent_error, class_name: "RailsErrorDashboard::ErrorLog"
|
|
23
|
+
belongs_to :child_error, class_name: "RailsErrorDashboard::ErrorLog"
|
|
24
|
+
|
|
25
|
+
validates :parent_error_id, presence: true
|
|
26
|
+
validates :child_error_id, presence: true
|
|
27
|
+
validates :frequency, presence: true, numericality: { greater_than: 0 }
|
|
28
|
+
validate :parent_and_child_must_be_different
|
|
29
|
+
|
|
30
|
+
scope :high_confidence, -> { where("cascade_probability >= ?", 0.7) }
|
|
31
|
+
scope :frequent, ->(min_frequency = 3) { where("frequency >= ?", min_frequency) }
|
|
32
|
+
scope :recent, -> { order(last_detected_at: :desc) }
|
|
33
|
+
scope :by_parent, ->(error_id) { where(parent_error_id: error_id) }
|
|
34
|
+
scope :by_child, ->(error_id) { where(child_error_id: error_id) }
|
|
35
|
+
|
|
36
|
+
# Update cascade pattern stats
|
|
37
|
+
def increment_detection!(delay_seconds)
|
|
38
|
+
self.frequency += 1
|
|
39
|
+
|
|
40
|
+
# Update average delay using incremental formula
|
|
41
|
+
if avg_delay_seconds.present?
|
|
42
|
+
self.avg_delay_seconds = ((avg_delay_seconds * (frequency - 1)) + delay_seconds) / frequency
|
|
43
|
+
else
|
|
44
|
+
self.avg_delay_seconds = delay_seconds
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
self.last_detected_at = Time.current
|
|
48
|
+
save
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Calculate cascade probability based on frequency
|
|
52
|
+
# Probability = (times child follows parent) / (total parent occurrences)
|
|
53
|
+
def calculate_probability!
|
|
54
|
+
parent_occurrence_count = parent_error.error_occurrences.count
|
|
55
|
+
return if parent_occurrence_count.zero?
|
|
56
|
+
|
|
57
|
+
self.cascade_probability = (frequency.to_f / parent_occurrence_count).round(3)
|
|
58
|
+
save
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Check if this is a strong cascade pattern
|
|
62
|
+
def strong_cascade?
|
|
63
|
+
cascade_probability.present? && cascade_probability >= 0.7 && frequency >= 3
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
private
|
|
67
|
+
|
|
68
|
+
def parent_and_child_must_be_different
|
|
69
|
+
if parent_error_id == child_error_id
|
|
70
|
+
errors.add(:child_error_id, "cannot be the same as parent error")
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RailsErrorDashboard
|
|
4
|
+
# Stores baseline statistics for error types
|
|
5
|
+
#
|
|
6
|
+
# Baselines are calculated periodically (hourly, daily, weekly) to establish
|
|
7
|
+
# "normal" error behavior. This enables anomaly detection by comparing current
|
|
8
|
+
# error counts against historical baselines.
|
|
9
|
+
#
|
|
10
|
+
# @attr error_type [String] The type of error (e.g., "NoMethodError")
|
|
11
|
+
# @attr platform [String] Platform (iOS, Android, API, Web)
|
|
12
|
+
# @attr baseline_type [String] Time period type (hourly, daily, weekly)
|
|
13
|
+
# @attr period_start [DateTime] Start of the period this baseline covers
|
|
14
|
+
# @attr period_end [DateTime] End of the period this baseline covers
|
|
15
|
+
# @attr count [Integer] Total errors in this period
|
|
16
|
+
# @attr mean [Float] Average error count
|
|
17
|
+
# @attr std_dev [Float] Standard deviation
|
|
18
|
+
# @attr percentile_95 [Float] 95th percentile
|
|
19
|
+
# @attr percentile_99 [Float] 99th percentile
|
|
20
|
+
# @attr sample_size [Integer] Number of periods in the sample
|
|
21
|
+
class ErrorBaseline < ErrorLogsRecord
|
|
22
|
+
self.table_name = "rails_error_dashboard_error_baselines"
|
|
23
|
+
|
|
24
|
+
BASELINE_TYPES = %w[hourly daily weekly].freeze
|
|
25
|
+
|
|
26
|
+
validates :error_type, presence: true
|
|
27
|
+
validates :platform, presence: true
|
|
28
|
+
validates :baseline_type, presence: true, inclusion: { in: BASELINE_TYPES }
|
|
29
|
+
validates :period_start, presence: true
|
|
30
|
+
validates :period_end, presence: true
|
|
31
|
+
validates :count, presence: true, numericality: { greater_than_or_equal_to: 0 }
|
|
32
|
+
validates :sample_size, presence: true, numericality: { greater_than_or_equal_to: 0 }
|
|
33
|
+
|
|
34
|
+
validate :period_end_after_period_start
|
|
35
|
+
|
|
36
|
+
scope :for_error_type, ->(error_type) { where(error_type: error_type) }
|
|
37
|
+
scope :for_platform, ->(platform) { where(platform: platform) }
|
|
38
|
+
scope :hourly, -> { where(baseline_type: "hourly") }
|
|
39
|
+
scope :daily, -> { where(baseline_type: "daily") }
|
|
40
|
+
scope :weekly, -> { where(baseline_type: "weekly") }
|
|
41
|
+
scope :recent, -> { order(period_start: :desc) }
|
|
42
|
+
|
|
43
|
+
# Check if a given count is anomalous compared to this baseline
|
|
44
|
+
# @param current_count [Integer] Current error count to check
|
|
45
|
+
# @param sensitivity [Integer] Number of standard deviations (default: 2)
|
|
46
|
+
# @return [Symbol, nil] :elevated, :high, :critical, or nil if normal
|
|
47
|
+
def anomaly_level(current_count, sensitivity: 2)
|
|
48
|
+
return nil if mean.nil? || std_dev.nil?
|
|
49
|
+
return nil if current_count <= mean
|
|
50
|
+
|
|
51
|
+
std_devs_above = (current_count - mean) / std_dev
|
|
52
|
+
|
|
53
|
+
case std_devs_above
|
|
54
|
+
when sensitivity..(sensitivity + 1)
|
|
55
|
+
:elevated
|
|
56
|
+
when (sensitivity + 1)..(sensitivity + 2)
|
|
57
|
+
:high
|
|
58
|
+
when (sensitivity + 2)..Float::INFINITY
|
|
59
|
+
:critical
|
|
60
|
+
else
|
|
61
|
+
nil
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Check if current count is above baseline
|
|
66
|
+
# @param current_count [Integer] Current error count
|
|
67
|
+
# @param sensitivity [Integer] Number of standard deviations (default: 2)
|
|
68
|
+
# @return [Boolean] True if count exceeds baseline + (sensitivity * std_dev)
|
|
69
|
+
def exceeds_baseline?(current_count, sensitivity: 2)
|
|
70
|
+
return false if mean.nil? || std_dev.nil?
|
|
71
|
+
current_count > (mean + (sensitivity * std_dev))
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Get the threshold for anomaly detection
|
|
75
|
+
# @param sensitivity [Integer] Number of standard deviations (default: 2)
|
|
76
|
+
# @return [Float, nil] Threshold value or nil if stats not available
|
|
77
|
+
def threshold(sensitivity: 2)
|
|
78
|
+
return nil if mean.nil? || std_dev.nil?
|
|
79
|
+
mean + (sensitivity * std_dev)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Calculate how many standard deviations above mean
|
|
83
|
+
# @param current_count [Integer] Current error count
|
|
84
|
+
# @return [Float, nil] Number of standard deviations or nil
|
|
85
|
+
def std_devs_above_mean(current_count)
|
|
86
|
+
return nil if mean.nil? || std_dev.nil? || std_dev.zero?
|
|
87
|
+
(current_count - mean) / std_dev
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
private
|
|
91
|
+
|
|
92
|
+
def period_end_after_period_start
|
|
93
|
+
return if period_start.nil? || period_end.nil?
|
|
94
|
+
|
|
95
|
+
if period_end <= period_start
|
|
96
|
+
errors.add(:period_end, "must be after period_start")
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
end
|