rails_error_dashboard 0.1.0 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +305 -703
- data/app/assets/stylesheets/rails_error_dashboard/_catppuccin_mocha.scss +107 -0
- data/app/assets/stylesheets/rails_error_dashboard/_components.scss +625 -0
- data/app/assets/stylesheets/rails_error_dashboard/_layout.scss +257 -0
- data/app/assets/stylesheets/rails_error_dashboard/_theme_variables.scss +203 -0
- data/app/assets/stylesheets/rails_error_dashboard/application.css +926 -15
- data/app/assets/stylesheets/rails_error_dashboard/application.css.map +7 -0
- data/app/assets/stylesheets/rails_error_dashboard/application.scss +61 -0
- data/app/controllers/rails_error_dashboard/application_controller.rb +18 -0
- data/app/controllers/rails_error_dashboard/errors_controller.rb +140 -4
- data/app/helpers/rails_error_dashboard/application_helper.rb +55 -0
- data/app/helpers/rails_error_dashboard/backtrace_helper.rb +91 -0
- data/app/helpers/rails_error_dashboard/overview_helper.rb +78 -0
- data/app/helpers/rails_error_dashboard/user_agent_helper.rb +118 -0
- data/app/jobs/rails_error_dashboard/application_job.rb +19 -0
- data/app/jobs/rails_error_dashboard/async_error_logging_job.rb +48 -0
- data/app/jobs/rails_error_dashboard/baseline_alert_job.rb +263 -0
- data/app/jobs/rails_error_dashboard/discord_error_notification_job.rb +4 -8
- data/app/jobs/rails_error_dashboard/email_error_notification_job.rb +2 -1
- data/app/jobs/rails_error_dashboard/pagerduty_error_notification_job.rb +5 -5
- data/app/jobs/rails_error_dashboard/slack_error_notification_job.rb +10 -6
- data/app/jobs/rails_error_dashboard/webhook_error_notification_job.rb +5 -6
- data/app/mailers/rails_error_dashboard/application_mailer.rb +1 -1
- data/app/mailers/rails_error_dashboard/error_notification_mailer.rb +1 -1
- data/app/models/rails_error_dashboard/cascade_pattern.rb +74 -0
- data/app/models/rails_error_dashboard/error_baseline.rb +100 -0
- data/app/models/rails_error_dashboard/error_comment.rb +27 -0
- data/app/models/rails_error_dashboard/error_log.rb +471 -3
- data/app/models/rails_error_dashboard/error_occurrence.rb +49 -0
- data/app/views/layouts/rails_error_dashboard.html.erb +816 -178
- data/app/views/layouts/rails_error_dashboard_old_backup.html.erb +383 -0
- data/app/views/rails_error_dashboard/error_notification_mailer/error_alert.html.erb +3 -10
- data/app/views/rails_error_dashboard/error_notification_mailer/error_alert.text.erb +1 -2
- data/app/views/rails_error_dashboard/errors/_error_row.html.erb +78 -0
- data/app/views/rails_error_dashboard/errors/_pattern_insights.html.erb +209 -0
- data/app/views/rails_error_dashboard/errors/_stats.html.erb +34 -0
- data/app/views/rails_error_dashboard/errors/_timeline.html.erb +167 -0
- data/app/views/rails_error_dashboard/errors/analytics.html.erb +152 -56
- data/app/views/rails_error_dashboard/errors/correlation.html.erb +373 -0
- data/app/views/rails_error_dashboard/errors/index.html.erb +294 -138
- data/app/views/rails_error_dashboard/errors/overview.html.erb +253 -0
- data/app/views/rails_error_dashboard/errors/platform_comparison.html.erb +399 -0
- data/app/views/rails_error_dashboard/errors/show.html.erb +781 -65
- data/config/routes.rb +9 -0
- data/db/migrate/20251225071314_add_optimized_indexes_to_error_logs.rb +66 -0
- data/db/migrate/20251225074653_remove_environment_from_error_logs.rb +26 -0
- data/db/migrate/20251225085859_add_enhanced_metrics_to_error_logs.rb +12 -0
- data/db/migrate/20251225093603_add_similarity_tracking_to_error_logs.rb +9 -0
- data/db/migrate/20251225100236_create_error_occurrences.rb +31 -0
- data/db/migrate/20251225101920_create_cascade_patterns.rb +33 -0
- data/db/migrate/20251225102500_create_error_baselines.rb +38 -0
- data/db/migrate/20251226020000_add_workflow_fields_to_error_logs.rb +27 -0
- data/db/migrate/20251226020100_create_error_comments.rb +18 -0
- data/lib/generators/rails_error_dashboard/install/install_generator.rb +276 -1
- data/lib/generators/rails_error_dashboard/install/templates/initializer.rb +272 -37
- data/lib/generators/rails_error_dashboard/solid_queue/solid_queue_generator.rb +36 -0
- data/lib/generators/rails_error_dashboard/solid_queue/templates/queue.yml +55 -0
- data/lib/rails_error_dashboard/commands/batch_delete_errors.rb +1 -1
- data/lib/rails_error_dashboard/commands/batch_resolve_errors.rb +2 -2
- data/lib/rails_error_dashboard/commands/log_error.rb +272 -7
- data/lib/rails_error_dashboard/commands/resolve_error.rb +16 -0
- data/lib/rails_error_dashboard/configuration.rb +90 -5
- data/lib/rails_error_dashboard/error_reporter.rb +15 -7
- data/lib/rails_error_dashboard/logger.rb +105 -0
- data/lib/rails_error_dashboard/middleware/error_catcher.rb +17 -10
- data/lib/rails_error_dashboard/plugin.rb +6 -3
- data/lib/rails_error_dashboard/plugin_registry.rb +2 -2
- data/lib/rails_error_dashboard/plugins/audit_log_plugin.rb +0 -1
- data/lib/rails_error_dashboard/plugins/jira_integration_plugin.rb +3 -4
- data/lib/rails_error_dashboard/plugins/metrics_plugin.rb +1 -3
- data/lib/rails_error_dashboard/queries/analytics_stats.rb +44 -6
- data/lib/rails_error_dashboard/queries/baseline_stats.rb +107 -0
- data/lib/rails_error_dashboard/queries/co_occurring_errors.rb +86 -0
- data/lib/rails_error_dashboard/queries/dashboard_stats.rb +242 -2
- data/lib/rails_error_dashboard/queries/error_cascades.rb +74 -0
- data/lib/rails_error_dashboard/queries/error_correlation.rb +375 -0
- data/lib/rails_error_dashboard/queries/errors_list.rb +106 -10
- data/lib/rails_error_dashboard/queries/filter_options.rb +0 -1
- data/lib/rails_error_dashboard/queries/platform_comparison.rb +254 -0
- data/lib/rails_error_dashboard/queries/similar_errors.rb +93 -0
- data/lib/rails_error_dashboard/services/backtrace_parser.rb +113 -0
- data/lib/rails_error_dashboard/services/baseline_alert_throttler.rb +88 -0
- data/lib/rails_error_dashboard/services/baseline_calculator.rb +269 -0
- data/lib/rails_error_dashboard/services/cascade_detector.rb +95 -0
- data/lib/rails_error_dashboard/services/pattern_detector.rb +268 -0
- data/lib/rails_error_dashboard/services/similarity_calculator.rb +144 -0
- data/lib/rails_error_dashboard/value_objects/error_context.rb +27 -1
- data/lib/rails_error_dashboard/version.rb +1 -1
- data/lib/rails_error_dashboard.rb +57 -7
- metadata +69 -10
- data/app/models/rails_error_dashboard/application_record.rb +0 -5
- data/lib/rails_error_dashboard/queries/developer_insights.rb +0 -277
- data/lib/rails_error_dashboard/queries/errors_list_v2.rb +0 -149
- data/lib/tasks/rails_error_dashboard_tasks.rake +0 -4
|
@@ -18,13 +18,13 @@ module RailsErrorDashboard
|
|
|
18
18
|
end
|
|
19
19
|
|
|
20
20
|
if plugins.any? { |p| p.name == plugin.name }
|
|
21
|
-
|
|
21
|
+
RailsErrorDashboard::Logger.warn("Plugin '#{plugin.name}' is already registered, skipping")
|
|
22
22
|
return false
|
|
23
23
|
end
|
|
24
24
|
|
|
25
25
|
plugins << plugin
|
|
26
26
|
plugin.on_register
|
|
27
|
-
|
|
27
|
+
RailsErrorDashboard::Logger.info("Registered plugin: #{plugin.name} (#{plugin.version})")
|
|
28
28
|
true
|
|
29
29
|
end
|
|
30
30
|
|
|
@@ -55,14 +55,14 @@ module RailsErrorDashboard
|
|
|
55
55
|
|
|
56
56
|
ticket_data = {
|
|
57
57
|
project: { key: @jira_project_key },
|
|
58
|
-
summary: "
|
|
58
|
+
summary: "#{error_log.error_type}",
|
|
59
59
|
description: build_description(error_log),
|
|
60
60
|
issuetype: { name: "Bug" },
|
|
61
61
|
priority: { name: jira_priority(error_log) },
|
|
62
|
-
labels: [ "rails-error-dashboard", error_log.platform
|
|
62
|
+
labels: [ "rails-error-dashboard", error_log.platform ].compact
|
|
63
63
|
}
|
|
64
64
|
|
|
65
|
-
|
|
65
|
+
RailsErrorDashboard::Logger.info("Would create Jira ticket: #{ticket_data.to_json}")
|
|
66
66
|
|
|
67
67
|
# Actual implementation:
|
|
68
68
|
# require 'httparty'
|
|
@@ -81,7 +81,6 @@ module RailsErrorDashboard
|
|
|
81
81
|
*Error Type:* #{error_log.error_type}
|
|
82
82
|
*Message:* #{error_log.message}
|
|
83
83
|
*Platform:* #{error_log.platform}
|
|
84
|
-
*Environment:* #{error_log.environment}
|
|
85
84
|
*Severity:* #{error_log.severity}
|
|
86
85
|
*Controller:* #{error_log.controller_name}
|
|
87
86
|
*Action:* #{error_log.action_name}
|
|
@@ -27,7 +27,6 @@ module RailsErrorDashboard
|
|
|
27
27
|
increment_counter("errors.new", error_log)
|
|
28
28
|
increment_counter("errors.by_type.#{sanitize_metric_name(error_log.error_type)}", error_log)
|
|
29
29
|
increment_counter("errors.by_platform.#{error_log.platform || 'unknown'}", error_log)
|
|
30
|
-
increment_counter("errors.by_environment.#{error_log.environment}", error_log)
|
|
31
30
|
end
|
|
32
31
|
|
|
33
32
|
def on_error_recurred(error_log)
|
|
@@ -57,7 +56,7 @@ module RailsErrorDashboard
|
|
|
57
56
|
# Datadog::Statsd.increment(metric_name, tags: metric_tags(data))
|
|
58
57
|
|
|
59
58
|
# For demonstration, just log
|
|
60
|
-
|
|
59
|
+
RailsErrorDashboard::Logger.info("Metrics: #{metric_name} - #{data.is_a?(Hash) ? data : data.class.name}")
|
|
61
60
|
end
|
|
62
61
|
|
|
63
62
|
def metric_tags(data)
|
|
@@ -65,7 +64,6 @@ module RailsErrorDashboard
|
|
|
65
64
|
|
|
66
65
|
[
|
|
67
66
|
"platform:#{data.platform || 'unknown'}",
|
|
68
|
-
"environment:#{data.environment}",
|
|
69
67
|
"severity:#{data.severity}"
|
|
70
68
|
]
|
|
71
69
|
end
|
|
@@ -21,12 +21,12 @@ module RailsErrorDashboard
|
|
|
21
21
|
errors_over_time: errors_over_time,
|
|
22
22
|
errors_by_type: errors_by_type,
|
|
23
23
|
errors_by_platform: errors_by_platform,
|
|
24
|
-
errors_by_environment: errors_by_environment,
|
|
25
24
|
errors_by_hour: errors_by_hour,
|
|
26
25
|
top_users: top_affected_users,
|
|
27
26
|
resolution_rate: resolution_rate,
|
|
28
27
|
mobile_errors: mobile_errors_count,
|
|
29
|
-
api_errors: api_errors_count
|
|
28
|
+
api_errors: api_errors_count,
|
|
29
|
+
pattern_insights: pattern_insights
|
|
30
30
|
}
|
|
31
31
|
end
|
|
32
32
|
|
|
@@ -61,10 +61,6 @@ module RailsErrorDashboard
|
|
|
61
61
|
base_query.group(:platform).count
|
|
62
62
|
end
|
|
63
63
|
|
|
64
|
-
def errors_by_environment
|
|
65
|
-
base_query.group(:environment).count
|
|
66
|
-
end
|
|
67
|
-
|
|
68
64
|
def errors_by_hour
|
|
69
65
|
base_query.group_by_hour(:occurred_at).count
|
|
70
66
|
end
|
|
@@ -103,6 +99,48 @@ module RailsErrorDashboard
|
|
|
103
99
|
def api_errors_count
|
|
104
100
|
base_query.where("platform IS NULL OR platform = ?", "API").count
|
|
105
101
|
end
|
|
102
|
+
|
|
103
|
+
# Pattern insights for top error types
|
|
104
|
+
# Analyzes occurrence patterns and bursts for top 5 error types
|
|
105
|
+
def pattern_insights
|
|
106
|
+
return {} unless defined?(Services::PatternDetector)
|
|
107
|
+
|
|
108
|
+
# Get top 5 error types by count
|
|
109
|
+
top_errors = errors_by_type.first(5)
|
|
110
|
+
|
|
111
|
+
insights = {}
|
|
112
|
+
top_errors.each do |error_type, _count|
|
|
113
|
+
# Get platform for this error type (most common platform)
|
|
114
|
+
platform = base_query.where(error_type: error_type)
|
|
115
|
+
.group(:platform)
|
|
116
|
+
.count
|
|
117
|
+
.max_by { |_, count| count }
|
|
118
|
+
&.first || "API"
|
|
119
|
+
|
|
120
|
+
# Analyze pattern for this error type
|
|
121
|
+
pattern = Services::PatternDetector.analyze_cyclical_pattern(
|
|
122
|
+
error_type: error_type,
|
|
123
|
+
platform: platform,
|
|
124
|
+
days: @days
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
# Detect bursts
|
|
128
|
+
bursts = Services::PatternDetector.detect_bursts(
|
|
129
|
+
error_type: error_type,
|
|
130
|
+
platform: platform,
|
|
131
|
+
days: [ 7, @days ].min # Use 7 days for burst detection, or less if analyzing shorter period
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
insights[error_type] = {
|
|
135
|
+
pattern: pattern,
|
|
136
|
+
bursts: bursts,
|
|
137
|
+
has_pattern: pattern[:pattern_type] != :none,
|
|
138
|
+
has_bursts: bursts.any?
|
|
139
|
+
}
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
insights
|
|
143
|
+
end
|
|
106
144
|
end
|
|
107
145
|
end
|
|
108
146
|
end
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RailsErrorDashboard
|
|
4
|
+
module Queries
|
|
5
|
+
# Calculate and retrieve baseline statistics for error types
|
|
6
|
+
#
|
|
7
|
+
# Provides methods to get hourly, daily, and weekly baselines for error types.
|
|
8
|
+
# Baselines help establish "normal" error behavior for anomaly detection.
|
|
9
|
+
#
|
|
10
|
+
# @example
|
|
11
|
+
# baseline = BaselineStats.hourly_baseline("NoMethodError", "iOS")
|
|
12
|
+
# # => { mean: 5.2, std_dev: 2.1, percentile_95: 9.0, ... }
|
|
13
|
+
class BaselineStats
|
|
14
|
+
def self.hourly_baseline(error_type, platform)
|
|
15
|
+
new(error_type, platform).hourly_baseline
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def self.daily_baseline(error_type, platform)
|
|
19
|
+
new(error_type, platform).daily_baseline
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def self.weekly_baseline(error_type, platform)
|
|
23
|
+
new(error_type, platform).weekly_baseline
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def initialize(error_type, platform)
|
|
27
|
+
@error_type = error_type
|
|
28
|
+
@platform = platform
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Get the most recent hourly baseline
|
|
32
|
+
# Covers last 4 weeks of data, aggregated by hour of day
|
|
33
|
+
# @return [ErrorBaseline, nil] Most recent hourly baseline or nil
|
|
34
|
+
def hourly_baseline
|
|
35
|
+
return nil unless defined?(ErrorBaseline) && ErrorBaseline.table_exists?
|
|
36
|
+
|
|
37
|
+
ErrorBaseline
|
|
38
|
+
.for_error_type(@error_type)
|
|
39
|
+
.for_platform(@platform)
|
|
40
|
+
.hourly
|
|
41
|
+
.recent
|
|
42
|
+
.first
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Get the most recent daily baseline
|
|
46
|
+
# Covers last 12 weeks of data, aggregated by day of week
|
|
47
|
+
# @return [ErrorBaseline, nil] Most recent daily baseline or nil
|
|
48
|
+
def daily_baseline
|
|
49
|
+
return nil unless defined?(ErrorBaseline) && ErrorBaseline.table_exists?
|
|
50
|
+
|
|
51
|
+
ErrorBaseline
|
|
52
|
+
.for_error_type(@error_type)
|
|
53
|
+
.for_platform(@platform)
|
|
54
|
+
.daily
|
|
55
|
+
.recent
|
|
56
|
+
.first
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Get the most recent weekly baseline
|
|
60
|
+
# Covers last 1 year of data, aggregated by week
|
|
61
|
+
# @return [ErrorBaseline, nil] Most recent weekly baseline or nil
|
|
62
|
+
def weekly_baseline
|
|
63
|
+
return nil unless defined?(ErrorBaseline) && ErrorBaseline.table_exists?
|
|
64
|
+
|
|
65
|
+
ErrorBaseline
|
|
66
|
+
.for_error_type(@error_type)
|
|
67
|
+
.for_platform(@platform)
|
|
68
|
+
.weekly
|
|
69
|
+
.recent
|
|
70
|
+
.first
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Get all baselines for an error type and platform
|
|
74
|
+
# @return [Hash] Hash with :hourly, :daily, :weekly keys
|
|
75
|
+
def all_baselines
|
|
76
|
+
{
|
|
77
|
+
hourly: hourly_baseline,
|
|
78
|
+
daily: daily_baseline,
|
|
79
|
+
weekly: weekly_baseline
|
|
80
|
+
}
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Check if current count is anomalous based on best available baseline
|
|
84
|
+
# Uses hourly baseline if available, falls back to daily, then weekly
|
|
85
|
+
# @param current_count [Integer] Current error count
|
|
86
|
+
# @param sensitivity [Integer] Standard deviations threshold (default: 2)
|
|
87
|
+
# @return [Hash] { anomaly: true/false, level: Symbol, baseline_type: String }
|
|
88
|
+
def check_anomaly(current_count, sensitivity: 2)
|
|
89
|
+
baseline = hourly_baseline || daily_baseline || weekly_baseline
|
|
90
|
+
|
|
91
|
+
if baseline.nil?
|
|
92
|
+
return { anomaly: false, level: nil, baseline_type: nil, message: "No baseline available" }
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
level = baseline.anomaly_level(current_count, sensitivity: sensitivity)
|
|
96
|
+
|
|
97
|
+
{
|
|
98
|
+
anomaly: level.present?,
|
|
99
|
+
level: level,
|
|
100
|
+
baseline_type: baseline.baseline_type,
|
|
101
|
+
threshold: baseline.threshold(sensitivity: sensitivity),
|
|
102
|
+
std_devs_above: baseline.std_devs_above_mean(current_count)
|
|
103
|
+
}
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
end
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RailsErrorDashboard
|
|
4
|
+
module Queries
|
|
5
|
+
# Find errors that occur together in time (co-occurring errors)
|
|
6
|
+
#
|
|
7
|
+
# This query analyzes error occurrences to find patterns of errors
|
|
8
|
+
# that happen within the same time window, which can indicate:
|
|
9
|
+
# - Cascading failures (one error causes another)
|
|
10
|
+
# - Related errors from the same underlying issue
|
|
11
|
+
# - Correlated errors from the same feature/endpoint
|
|
12
|
+
#
|
|
13
|
+
# @example Find errors that occur with NoMethodError
|
|
14
|
+
# co_occurring = CoOccurringErrors.call(error_log_id: 123, window_minutes: 5)
|
|
15
|
+
# co_occurring.each do |result|
|
|
16
|
+
# puts "#{result[:error].error_type} occurred #{result[:frequency]} times together"
|
|
17
|
+
# end
|
|
18
|
+
class CoOccurringErrors
|
|
19
|
+
# Find co-occurring errors
|
|
20
|
+
#
|
|
21
|
+
# @param error_log_id [Integer] ID of target error
|
|
22
|
+
# @param window_minutes [Integer] Time window in minutes (default: 5)
|
|
23
|
+
# @param min_frequency [Integer] Minimum co-occurrence count (default: 2)
|
|
24
|
+
# @param limit [Integer] Maximum number of results (default: 10)
|
|
25
|
+
# @return [Array<Hash>] Array of {error: ErrorLog, frequency: Integer, avg_delay_seconds: Float}
|
|
26
|
+
def self.call(error_log_id:, window_minutes: 5, min_frequency: 2, limit: 10)
|
|
27
|
+
new(error_log_id, window_minutes: window_minutes, min_frequency: min_frequency, limit: limit).find_co_occurring
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def initialize(error_log_id, window_minutes: 5, min_frequency: 2, limit: 10)
|
|
31
|
+
@error_log_id = error_log_id
|
|
32
|
+
@window_minutes = window_minutes.to_i
|
|
33
|
+
@min_frequency = min_frequency.to_i
|
|
34
|
+
@limit = limit.to_i
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def find_co_occurring
|
|
38
|
+
target_error = ErrorLog.find_by(id: @error_log_id)
|
|
39
|
+
return [] unless target_error
|
|
40
|
+
|
|
41
|
+
# Get all occurrences of the target error
|
|
42
|
+
target_occurrences = ErrorOccurrence.where(error_log_id: @error_log_id)
|
|
43
|
+
return [] if target_occurrences.empty?
|
|
44
|
+
|
|
45
|
+
# For each occurrence, find other errors in the time window
|
|
46
|
+
co_occurrence_data = Hash.new { |h, k| h[k] = { count: 0, delays: [] } }
|
|
47
|
+
|
|
48
|
+
target_occurrences.find_each do |occurrence|
|
|
49
|
+
window = @window_minutes.minutes
|
|
50
|
+
start_time = occurrence.occurred_at - window
|
|
51
|
+
end_time = occurrence.occurred_at + window
|
|
52
|
+
|
|
53
|
+
# Find other error occurrences in this time window
|
|
54
|
+
nearby_occurrences = ErrorOccurrence
|
|
55
|
+
.in_time_window(start_time, end_time)
|
|
56
|
+
.where.not(error_log_id: @error_log_id)
|
|
57
|
+
.includes(:error_log)
|
|
58
|
+
|
|
59
|
+
nearby_occurrences.each do |nearby|
|
|
60
|
+
error_log_id = nearby.error_log_id
|
|
61
|
+
co_occurrence_data[error_log_id][:count] += 1
|
|
62
|
+
|
|
63
|
+
# Calculate delay (negative = before, positive = after target error)
|
|
64
|
+
delay = (nearby.occurred_at - occurrence.occurred_at).to_f
|
|
65
|
+
co_occurrence_data[error_log_id][:delays] << delay
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Filter by minimum frequency and build results
|
|
70
|
+
results = co_occurrence_data.select { |_id, data| data[:count] >= @min_frequency }.map do |error_log_id, data|
|
|
71
|
+
error = ErrorLog.find(error_log_id)
|
|
72
|
+
avg_delay = data[:delays].sum / data[:delays].size
|
|
73
|
+
|
|
74
|
+
{
|
|
75
|
+
error: error,
|
|
76
|
+
frequency: data[:count],
|
|
77
|
+
avg_delay_seconds: avg_delay.round(2)
|
|
78
|
+
}
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Sort by frequency (most common first) and limit results
|
|
82
|
+
results.sort_by { |r| -r[:frequency] }.first(@limit)
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
@@ -16,9 +16,21 @@ module RailsErrorDashboard
|
|
|
16
16
|
total_month: ErrorLog.where("occurred_at >= ?", 30.days.ago).count,
|
|
17
17
|
unresolved: ErrorLog.unresolved.count,
|
|
18
18
|
resolved: ErrorLog.resolved.count,
|
|
19
|
-
by_environment: ErrorLog.group(:environment).count,
|
|
20
19
|
by_platform: ErrorLog.group(:platform).count,
|
|
21
|
-
top_errors: top_errors
|
|
20
|
+
top_errors: top_errors,
|
|
21
|
+
# Trend visualizations
|
|
22
|
+
errors_trend_7d: errors_trend_7d,
|
|
23
|
+
errors_by_severity_7d: errors_by_severity_7d,
|
|
24
|
+
spike_detected: spike_detected?,
|
|
25
|
+
spike_info: spike_info,
|
|
26
|
+
# New metrics for Overview dashboard
|
|
27
|
+
error_rate: error_rate,
|
|
28
|
+
affected_users_today: affected_users_today,
|
|
29
|
+
affected_users_yesterday: affected_users_yesterday,
|
|
30
|
+
affected_users_change: affected_users_change,
|
|
31
|
+
trend_percentage: trend_percentage,
|
|
32
|
+
trend_direction: trend_direction,
|
|
33
|
+
top_errors_by_impact: top_errors_by_impact
|
|
22
34
|
}
|
|
23
35
|
end
|
|
24
36
|
|
|
@@ -32,6 +44,234 @@ module RailsErrorDashboard
|
|
|
32
44
|
.first(10)
|
|
33
45
|
.to_h
|
|
34
46
|
end
|
|
47
|
+
|
|
48
|
+
# Get 7-day error trend (daily counts)
|
|
49
|
+
def errors_trend_7d
|
|
50
|
+
ErrorLog.where("occurred_at >= ?", 7.days.ago)
|
|
51
|
+
.group_by_day(:occurred_at, range: 7.days.ago.to_date..Date.current, default_value: 0)
|
|
52
|
+
.count
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Get error counts by severity for last 7 days
|
|
56
|
+
def errors_by_severity_7d
|
|
57
|
+
last_7_days = ErrorLog.where("occurred_at >= ?", 7.days.ago)
|
|
58
|
+
|
|
59
|
+
{
|
|
60
|
+
critical: last_7_days.select { |e| e.severity == :critical }.count,
|
|
61
|
+
high: last_7_days.select { |e| e.severity == :high }.count,
|
|
62
|
+
medium: last_7_days.select { |e| e.severity == :medium }.count,
|
|
63
|
+
low: last_7_days.select { |e| e.severity == :low }.count
|
|
64
|
+
}
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Detect if there's an error spike
|
|
68
|
+
# Uses baselines if available, falls back to simple 2x average
|
|
69
|
+
def spike_detected?
|
|
70
|
+
return false if errors_trend_7d.empty?
|
|
71
|
+
|
|
72
|
+
today_count = ErrorLog.where("occurred_at >= ?", Time.current.beginning_of_day).count
|
|
73
|
+
|
|
74
|
+
# Try baseline-based detection first
|
|
75
|
+
if baseline_anomaly_detected?(today_count)
|
|
76
|
+
return true
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Fall back to simple 2x average detection
|
|
80
|
+
avg_count = errors_trend_7d.values.sum / 7.0
|
|
81
|
+
return false if avg_count.zero?
|
|
82
|
+
|
|
83
|
+
today_count >= (avg_count * 2)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Get spike information
|
|
87
|
+
# Enhanced with baseline information
|
|
88
|
+
def spike_info
|
|
89
|
+
return nil unless spike_detected?
|
|
90
|
+
|
|
91
|
+
today_count = ErrorLog.where("occurred_at >= ?", Time.current.beginning_of_day).count
|
|
92
|
+
avg_count = (errors_trend_7d.values.sum / 7.0).round(1)
|
|
93
|
+
|
|
94
|
+
info = {
|
|
95
|
+
today_count: today_count,
|
|
96
|
+
avg_count: avg_count,
|
|
97
|
+
multiplier: (today_count / avg_count).round(1),
|
|
98
|
+
severity: spike_severity(today_count / avg_count)
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
# Add baseline info if available
|
|
102
|
+
baseline_info = baseline_anomaly_info(today_count)
|
|
103
|
+
info.merge!(baseline_info) if baseline_info.present?
|
|
104
|
+
|
|
105
|
+
info
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Check if baseline indicates anomaly
|
|
109
|
+
def baseline_anomaly_detected?(_count)
|
|
110
|
+
return false unless defined?(Queries::BaselineStats)
|
|
111
|
+
|
|
112
|
+
# Check most common error types for anomalies
|
|
113
|
+
ErrorLog.distinct.pluck(:error_type, :platform).compact.any? do |(error_type, platform)|
|
|
114
|
+
stats = Queries::BaselineStats.new(error_type, platform)
|
|
115
|
+
error_count = ErrorLog.where(
|
|
116
|
+
error_type: error_type,
|
|
117
|
+
platform: platform
|
|
118
|
+
).where("occurred_at >= ?", Time.current.beginning_of_day).count
|
|
119
|
+
|
|
120
|
+
result = stats.check_anomaly(error_count, sensitivity: 2)
|
|
121
|
+
result[:anomaly]
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Get baseline anomaly information
|
|
126
|
+
def baseline_anomaly_info(_total_count)
|
|
127
|
+
return nil unless defined?(Queries::BaselineStats)
|
|
128
|
+
|
|
129
|
+
# Find the most anomalous error type
|
|
130
|
+
anomalies = ErrorLog.distinct.pluck(:error_type, :platform).compact.map do |(error_type, platform)|
|
|
131
|
+
stats = Queries::BaselineStats.new(error_type, platform)
|
|
132
|
+
error_count = ErrorLog.where(
|
|
133
|
+
error_type: error_type,
|
|
134
|
+
platform: platform
|
|
135
|
+
).where("occurred_at >= ?", Time.current.beginning_of_day).count
|
|
136
|
+
|
|
137
|
+
result = stats.check_anomaly(error_count, sensitivity: 2)
|
|
138
|
+
next unless result[:anomaly]
|
|
139
|
+
|
|
140
|
+
{
|
|
141
|
+
error_type: error_type,
|
|
142
|
+
platform: platform,
|
|
143
|
+
count: error_count,
|
|
144
|
+
level: result[:level],
|
|
145
|
+
std_devs_above: result[:std_devs_above]
|
|
146
|
+
}
|
|
147
|
+
end.compact
|
|
148
|
+
|
|
149
|
+
return nil if anomalies.empty?
|
|
150
|
+
|
|
151
|
+
# Return info about worst anomaly
|
|
152
|
+
worst = anomalies.max_by { |a| a[:std_devs_above] || 0 }
|
|
153
|
+
{
|
|
154
|
+
baseline_detected: true,
|
|
155
|
+
anomaly_error_type: worst[:error_type],
|
|
156
|
+
anomaly_platform: worst[:platform],
|
|
157
|
+
anomaly_level: worst[:level],
|
|
158
|
+
std_devs_above: worst[:std_devs_above]&.round(1)
|
|
159
|
+
}
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
# Determine spike severity based on multiplier
|
|
163
|
+
def spike_severity(multiplier)
|
|
164
|
+
case multiplier
|
|
165
|
+
when 0...2
|
|
166
|
+
:normal
|
|
167
|
+
when 2...5
|
|
168
|
+
:elevated
|
|
169
|
+
when 5...10
|
|
170
|
+
:high
|
|
171
|
+
else
|
|
172
|
+
:critical
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
# Calculate error rate as a percentage
|
|
177
|
+
# Since we don't track total requests, we'll use error count as proxy
|
|
178
|
+
# In the future, this could be: (errors / total_requests) * 100
|
|
179
|
+
def error_rate
|
|
180
|
+
today_errors = ErrorLog.where("occurred_at >= ?", Time.current.beginning_of_day).count
|
|
181
|
+
return 0.0 if today_errors.zero?
|
|
182
|
+
|
|
183
|
+
# For now, use a simple heuristic: errors per hour today
|
|
184
|
+
# Assume we want < 1 error per hour = good (< 1%)
|
|
185
|
+
# 1-5 errors per hour = warning (1-5%)
|
|
186
|
+
# > 5 errors per hour = critical (> 5%)
|
|
187
|
+
hours_today = ((Time.current - Time.current.beginning_of_day) / 1.hour).round(1)
|
|
188
|
+
hours_today = 1.0 if hours_today < 1.0 # Avoid division by zero in early morning
|
|
189
|
+
|
|
190
|
+
errors_per_hour = today_errors / hours_today
|
|
191
|
+
# Convert to percentage scale (0-100)
|
|
192
|
+
# Scale: 0 errors/hr = 0%, 1 error/hr = 1%, 10 errors/hr = 10%, etc.
|
|
193
|
+
[ errors_per_hour, 100.0 ].min.round(1)
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
# Count distinct users affected by errors today
|
|
197
|
+
def affected_users_today
|
|
198
|
+
ErrorLog.where("occurred_at >= ?", Time.current.beginning_of_day)
|
|
199
|
+
.where.not(user_id: nil)
|
|
200
|
+
.distinct
|
|
201
|
+
.count(:user_id)
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
# Count distinct users affected by errors yesterday
|
|
205
|
+
def affected_users_yesterday
|
|
206
|
+
ErrorLog.where("occurred_at >= ? AND occurred_at < ?",
|
|
207
|
+
1.day.ago.beginning_of_day,
|
|
208
|
+
Time.current.beginning_of_day)
|
|
209
|
+
.where.not(user_id: nil)
|
|
210
|
+
.distinct
|
|
211
|
+
.count(:user_id)
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
# Calculate change in affected users (today vs yesterday)
|
|
215
|
+
def affected_users_change
|
|
216
|
+
today = affected_users_today
|
|
217
|
+
yesterday = affected_users_yesterday
|
|
218
|
+
|
|
219
|
+
return 0 if today.zero? && yesterday.zero?
|
|
220
|
+
return today if yesterday.zero?
|
|
221
|
+
|
|
222
|
+
today - yesterday
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
# Calculate percentage change in errors (today vs yesterday)
|
|
226
|
+
def trend_percentage
|
|
227
|
+
today = ErrorLog.where("occurred_at >= ?", Time.current.beginning_of_day).count
|
|
228
|
+
yesterday = ErrorLog.where("occurred_at >= ? AND occurred_at < ?",
|
|
229
|
+
1.day.ago.beginning_of_day,
|
|
230
|
+
Time.current.beginning_of_day).count
|
|
231
|
+
|
|
232
|
+
return 0.0 if today.zero? && yesterday.zero?
|
|
233
|
+
return 100.0 if yesterday.zero? && today.positive?
|
|
234
|
+
|
|
235
|
+
((today - yesterday).to_f / yesterday * 100).round(1)
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
# Determine trend direction (increasing, decreasing, stable)
|
|
239
|
+
def trend_direction
|
|
240
|
+
trend = trend_percentage
|
|
241
|
+
|
|
242
|
+
if trend > 10
|
|
243
|
+
:increasing
|
|
244
|
+
elsif trend < -10
|
|
245
|
+
:decreasing
|
|
246
|
+
else
|
|
247
|
+
:stable
|
|
248
|
+
end
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
# Get top 5 errors ranked by impact score
|
|
252
|
+
# Impact = affected_users_count × occurrence_count
|
|
253
|
+
def top_errors_by_impact
|
|
254
|
+
ErrorLog.where("occurred_at >= ?", 7.days.ago)
|
|
255
|
+
.group(:error_type, :id)
|
|
256
|
+
.select("error_type, id, occurrence_count,
|
|
257
|
+
COUNT(DISTINCT user_id) as affected_users,
|
|
258
|
+
COUNT(DISTINCT user_id) * occurrence_count as impact_score")
|
|
259
|
+
.order("impact_score DESC")
|
|
260
|
+
.limit(5)
|
|
261
|
+
.map do |error|
|
|
262
|
+
full_error = ErrorLog.find(error.id)
|
|
263
|
+
{
|
|
264
|
+
id: error.id,
|
|
265
|
+
error_type: error.error_type,
|
|
266
|
+
message: full_error.message&.truncate(80),
|
|
267
|
+
severity: full_error.severity,
|
|
268
|
+
occurrence_count: error.occurrence_count,
|
|
269
|
+
affected_users: error.affected_users.to_i,
|
|
270
|
+
impact_score: error.impact_score.to_i,
|
|
271
|
+
occurred_at: full_error.occurred_at
|
|
272
|
+
}
|
|
273
|
+
end
|
|
274
|
+
end
|
|
35
275
|
end
|
|
36
276
|
end
|
|
37
277
|
end
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RailsErrorDashboard
|
|
4
|
+
module Queries
|
|
5
|
+
# Find cascade patterns for an error (what causes it, what it causes)
|
|
6
|
+
#
|
|
7
|
+
# A cascade is when one error leads to another within a time window.
|
|
8
|
+
# This helps identify root causes vs symptoms.
|
|
9
|
+
#
|
|
10
|
+
# @example
|
|
11
|
+
# cascades = ErrorCascades.call(error_id: 123)
|
|
12
|
+
# cascades[:parents] # Errors that cause this one
|
|
13
|
+
# cascades[:children] # Errors this one causes
|
|
14
|
+
class ErrorCascades
|
|
15
|
+
def self.call(error_id:, min_probability: 0.5)
|
|
16
|
+
new(error_id, min_probability: min_probability).find_cascades
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def initialize(error_id, min_probability: 0.5)
|
|
20
|
+
@error_id = error_id
|
|
21
|
+
@min_probability = min_probability.to_f
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def find_cascades
|
|
25
|
+
return { parents: [], children: [] } unless defined?(CascadePattern)
|
|
26
|
+
return { parents: [], children: [] } unless CascadePattern.table_exists?
|
|
27
|
+
|
|
28
|
+
target_error = ErrorLog.find_by(id: @error_id)
|
|
29
|
+
return { parents: [], children: [] } unless target_error
|
|
30
|
+
|
|
31
|
+
{
|
|
32
|
+
parents: find_parent_cascades,
|
|
33
|
+
children: find_child_cascades
|
|
34
|
+
}
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
private
|
|
38
|
+
|
|
39
|
+
def find_parent_cascades
|
|
40
|
+
# Errors that cause this error (this error is the child)
|
|
41
|
+
CascadePattern
|
|
42
|
+
.by_child(@error_id)
|
|
43
|
+
.where("cascade_probability >= ?", @min_probability)
|
|
44
|
+
.includes(:parent_error)
|
|
45
|
+
.order(cascade_probability: :desc)
|
|
46
|
+
.map do |pattern|
|
|
47
|
+
{
|
|
48
|
+
error: pattern.parent_error,
|
|
49
|
+
frequency: pattern.frequency,
|
|
50
|
+
probability: pattern.cascade_probability,
|
|
51
|
+
avg_delay_seconds: pattern.avg_delay_seconds
|
|
52
|
+
}
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def find_child_cascades
|
|
57
|
+
# Errors caused by this error (this error is the parent)
|
|
58
|
+
CascadePattern
|
|
59
|
+
.by_parent(@error_id)
|
|
60
|
+
.where("cascade_probability >= ?", @min_probability)
|
|
61
|
+
.includes(:child_error)
|
|
62
|
+
.order(cascade_probability: :desc)
|
|
63
|
+
.map do |pattern|
|
|
64
|
+
{
|
|
65
|
+
error: pattern.child_error,
|
|
66
|
+
frequency: pattern.frequency,
|
|
67
|
+
probability: pattern.cascade_probability,
|
|
68
|
+
avg_delay_seconds: pattern.avg_delay_seconds
|
|
69
|
+
}
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|