rails_error_dashboard 0.1.0 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +305 -703
- data/app/assets/stylesheets/rails_error_dashboard/_catppuccin_mocha.scss +107 -0
- data/app/assets/stylesheets/rails_error_dashboard/_components.scss +625 -0
- data/app/assets/stylesheets/rails_error_dashboard/_layout.scss +257 -0
- data/app/assets/stylesheets/rails_error_dashboard/_theme_variables.scss +203 -0
- data/app/assets/stylesheets/rails_error_dashboard/application.css +926 -15
- data/app/assets/stylesheets/rails_error_dashboard/application.css.map +7 -0
- data/app/assets/stylesheets/rails_error_dashboard/application.scss +61 -0
- data/app/controllers/rails_error_dashboard/application_controller.rb +18 -0
- data/app/controllers/rails_error_dashboard/errors_controller.rb +140 -4
- data/app/helpers/rails_error_dashboard/application_helper.rb +55 -0
- data/app/helpers/rails_error_dashboard/backtrace_helper.rb +91 -0
- data/app/helpers/rails_error_dashboard/overview_helper.rb +78 -0
- data/app/helpers/rails_error_dashboard/user_agent_helper.rb +118 -0
- data/app/jobs/rails_error_dashboard/application_job.rb +19 -0
- data/app/jobs/rails_error_dashboard/async_error_logging_job.rb +48 -0
- data/app/jobs/rails_error_dashboard/baseline_alert_job.rb +263 -0
- data/app/jobs/rails_error_dashboard/discord_error_notification_job.rb +4 -8
- data/app/jobs/rails_error_dashboard/email_error_notification_job.rb +2 -1
- data/app/jobs/rails_error_dashboard/pagerduty_error_notification_job.rb +5 -5
- data/app/jobs/rails_error_dashboard/slack_error_notification_job.rb +10 -6
- data/app/jobs/rails_error_dashboard/webhook_error_notification_job.rb +5 -6
- data/app/mailers/rails_error_dashboard/application_mailer.rb +1 -1
- data/app/mailers/rails_error_dashboard/error_notification_mailer.rb +1 -1
- data/app/models/rails_error_dashboard/cascade_pattern.rb +74 -0
- data/app/models/rails_error_dashboard/error_baseline.rb +100 -0
- data/app/models/rails_error_dashboard/error_comment.rb +27 -0
- data/app/models/rails_error_dashboard/error_log.rb +471 -3
- data/app/models/rails_error_dashboard/error_occurrence.rb +49 -0
- data/app/views/layouts/rails_error_dashboard.html.erb +816 -178
- data/app/views/layouts/rails_error_dashboard_old_backup.html.erb +383 -0
- data/app/views/rails_error_dashboard/error_notification_mailer/error_alert.html.erb +3 -10
- data/app/views/rails_error_dashboard/error_notification_mailer/error_alert.text.erb +1 -2
- data/app/views/rails_error_dashboard/errors/_error_row.html.erb +78 -0
- data/app/views/rails_error_dashboard/errors/_pattern_insights.html.erb +209 -0
- data/app/views/rails_error_dashboard/errors/_stats.html.erb +34 -0
- data/app/views/rails_error_dashboard/errors/_timeline.html.erb +167 -0
- data/app/views/rails_error_dashboard/errors/analytics.html.erb +152 -56
- data/app/views/rails_error_dashboard/errors/correlation.html.erb +373 -0
- data/app/views/rails_error_dashboard/errors/index.html.erb +294 -138
- data/app/views/rails_error_dashboard/errors/overview.html.erb +253 -0
- data/app/views/rails_error_dashboard/errors/platform_comparison.html.erb +399 -0
- data/app/views/rails_error_dashboard/errors/show.html.erb +781 -65
- data/config/routes.rb +9 -0
- data/db/migrate/20251225071314_add_optimized_indexes_to_error_logs.rb +66 -0
- data/db/migrate/20251225074653_remove_environment_from_error_logs.rb +26 -0
- data/db/migrate/20251225085859_add_enhanced_metrics_to_error_logs.rb +12 -0
- data/db/migrate/20251225093603_add_similarity_tracking_to_error_logs.rb +9 -0
- data/db/migrate/20251225100236_create_error_occurrences.rb +31 -0
- data/db/migrate/20251225101920_create_cascade_patterns.rb +33 -0
- data/db/migrate/20251225102500_create_error_baselines.rb +38 -0
- data/db/migrate/20251226020000_add_workflow_fields_to_error_logs.rb +27 -0
- data/db/migrate/20251226020100_create_error_comments.rb +18 -0
- data/lib/generators/rails_error_dashboard/install/install_generator.rb +276 -1
- data/lib/generators/rails_error_dashboard/install/templates/initializer.rb +272 -37
- data/lib/generators/rails_error_dashboard/solid_queue/solid_queue_generator.rb +36 -0
- data/lib/generators/rails_error_dashboard/solid_queue/templates/queue.yml +55 -0
- data/lib/rails_error_dashboard/commands/batch_delete_errors.rb +1 -1
- data/lib/rails_error_dashboard/commands/batch_resolve_errors.rb +2 -2
- data/lib/rails_error_dashboard/commands/log_error.rb +272 -7
- data/lib/rails_error_dashboard/commands/resolve_error.rb +16 -0
- data/lib/rails_error_dashboard/configuration.rb +90 -5
- data/lib/rails_error_dashboard/error_reporter.rb +15 -7
- data/lib/rails_error_dashboard/logger.rb +105 -0
- data/lib/rails_error_dashboard/middleware/error_catcher.rb +17 -10
- data/lib/rails_error_dashboard/plugin.rb +6 -3
- data/lib/rails_error_dashboard/plugin_registry.rb +2 -2
- data/lib/rails_error_dashboard/plugins/audit_log_plugin.rb +0 -1
- data/lib/rails_error_dashboard/plugins/jira_integration_plugin.rb +3 -4
- data/lib/rails_error_dashboard/plugins/metrics_plugin.rb +1 -3
- data/lib/rails_error_dashboard/queries/analytics_stats.rb +44 -6
- data/lib/rails_error_dashboard/queries/baseline_stats.rb +107 -0
- data/lib/rails_error_dashboard/queries/co_occurring_errors.rb +86 -0
- data/lib/rails_error_dashboard/queries/dashboard_stats.rb +242 -2
- data/lib/rails_error_dashboard/queries/error_cascades.rb +74 -0
- data/lib/rails_error_dashboard/queries/error_correlation.rb +375 -0
- data/lib/rails_error_dashboard/queries/errors_list.rb +106 -10
- data/lib/rails_error_dashboard/queries/filter_options.rb +0 -1
- data/lib/rails_error_dashboard/queries/platform_comparison.rb +254 -0
- data/lib/rails_error_dashboard/queries/similar_errors.rb +93 -0
- data/lib/rails_error_dashboard/services/backtrace_parser.rb +113 -0
- data/lib/rails_error_dashboard/services/baseline_alert_throttler.rb +88 -0
- data/lib/rails_error_dashboard/services/baseline_calculator.rb +269 -0
- data/lib/rails_error_dashboard/services/cascade_detector.rb +95 -0
- data/lib/rails_error_dashboard/services/pattern_detector.rb +268 -0
- data/lib/rails_error_dashboard/services/similarity_calculator.rb +144 -0
- data/lib/rails_error_dashboard/value_objects/error_context.rb +27 -1
- data/lib/rails_error_dashboard/version.rb +1 -1
- data/lib/rails_error_dashboard.rb +57 -7
- metadata +69 -10
- data/app/models/rails_error_dashboard/application_record.rb +0 -5
- data/lib/rails_error_dashboard/queries/developer_insights.rb +0 -277
- data/lib/rails_error_dashboard/queries/errors_list_v2.rb +0 -149
- data/lib/tasks/rails_error_dashboard_tasks.rake +0 -4
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RailsErrorDashboard
|
|
4
|
+
module Services
|
|
5
|
+
# Calculates baseline statistics for error types
|
|
6
|
+
#
|
|
7
|
+
# This service analyzes historical error data to calculate statistical baselines
|
|
8
|
+
# for different time periods (hourly, daily, weekly). These baselines enable
|
|
9
|
+
# anomaly detection by establishing "normal" error behavior.
|
|
10
|
+
#
|
|
11
|
+
# Statistical methods used:
|
|
12
|
+
# - Mean and Standard Deviation
|
|
13
|
+
# - 95th and 99th Percentiles
|
|
14
|
+
# - Outlier removal (> 3 std devs)
|
|
15
|
+
#
|
|
16
|
+
# @example
|
|
17
|
+
# BaselineCalculator.calculate_all_baselines
|
|
18
|
+
# # Calculates baselines for all error types and platforms
|
|
19
|
+
class BaselineCalculator
|
|
20
|
+
# Lookback periods for baseline calculation
|
|
21
|
+
HOURLY_LOOKBACK = 4.weeks
|
|
22
|
+
DAILY_LOOKBACK = 12.weeks
|
|
23
|
+
WEEKLY_LOOKBACK = 1.year
|
|
24
|
+
|
|
25
|
+
# Outlier threshold (standard deviations)
|
|
26
|
+
OUTLIER_THRESHOLD = 3
|
|
27
|
+
|
|
28
|
+
def self.calculate_all_baselines
|
|
29
|
+
new.calculate_all_baselines
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def self.calculate_for_error_type(error_type, platform)
|
|
33
|
+
new.calculate_for_error_type(error_type, platform)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def initialize
|
|
37
|
+
@calculated_count = 0
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Calculate baselines for all error types and platforms
|
|
41
|
+
# @return [Hash] Summary of calculated baselines
|
|
42
|
+
def calculate_all_baselines
|
|
43
|
+
return { calculated: 0, message: "ErrorBaseline table not available" } unless can_calculate?
|
|
44
|
+
|
|
45
|
+
# Get all unique combinations of error_type and platform
|
|
46
|
+
combinations = ErrorLog.distinct.pluck(:error_type, :platform).compact
|
|
47
|
+
|
|
48
|
+
combinations.each do |(error_type, platform)|
|
|
49
|
+
calculate_for_error_type(error_type, platform)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
{ calculated: @calculated_count }
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Calculate baselines for a specific error type and platform
|
|
56
|
+
# @param error_type [String] The error type
|
|
57
|
+
# @param platform [String] The platform
|
|
58
|
+
# @return [Hash] Summary with hourly, daily, weekly baseline info
|
|
59
|
+
def calculate_for_error_type(error_type, platform)
|
|
60
|
+
return {} unless can_calculate?
|
|
61
|
+
|
|
62
|
+
{
|
|
63
|
+
hourly: calculate_hourly_baseline(error_type, platform),
|
|
64
|
+
daily: calculate_daily_baseline(error_type, platform),
|
|
65
|
+
weekly: calculate_weekly_baseline(error_type, platform)
|
|
66
|
+
}
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
private
|
|
70
|
+
|
|
71
|
+
def can_calculate?
|
|
72
|
+
defined?(ErrorBaseline) && ErrorBaseline.table_exists?
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Calculate hourly baseline (last 4 weeks, by hour of day)
|
|
76
|
+
def calculate_hourly_baseline(error_type, platform)
|
|
77
|
+
period_start = HOURLY_LOOKBACK.ago.beginning_of_hour
|
|
78
|
+
period_end = Time.current.beginning_of_hour
|
|
79
|
+
|
|
80
|
+
# Get error counts grouped by hour
|
|
81
|
+
hourly_counts = ErrorLog
|
|
82
|
+
.where(error_type: error_type, platform: platform)
|
|
83
|
+
.where("occurred_at >= ?", period_start)
|
|
84
|
+
.group("strftime('%H', occurred_at)")
|
|
85
|
+
.count
|
|
86
|
+
|
|
87
|
+
return nil if hourly_counts.empty?
|
|
88
|
+
|
|
89
|
+
# Calculate statistics
|
|
90
|
+
counts = hourly_counts.values
|
|
91
|
+
stats = calculate_statistics(counts)
|
|
92
|
+
|
|
93
|
+
# Create or update baseline
|
|
94
|
+
baseline = ErrorBaseline.find_or_initialize_by(
|
|
95
|
+
error_type: error_type,
|
|
96
|
+
platform: platform,
|
|
97
|
+
baseline_type: "hourly",
|
|
98
|
+
period_start: period_start
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
baseline.update!(
|
|
102
|
+
period_end: period_end,
|
|
103
|
+
count: counts.sum,
|
|
104
|
+
mean: stats[:mean],
|
|
105
|
+
std_dev: stats[:std_dev],
|
|
106
|
+
percentile_95: stats[:percentile_95],
|
|
107
|
+
percentile_99: stats[:percentile_99],
|
|
108
|
+
sample_size: counts.size
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
@calculated_count += 1
|
|
112
|
+
baseline
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Calculate daily baseline (last 12 weeks, by day of week)
|
|
116
|
+
def calculate_daily_baseline(error_type, platform)
|
|
117
|
+
period_start = DAILY_LOOKBACK.ago.beginning_of_day
|
|
118
|
+
period_end = Time.current.beginning_of_day
|
|
119
|
+
|
|
120
|
+
# Get error counts grouped by day
|
|
121
|
+
daily_counts = ErrorLog
|
|
122
|
+
.where(error_type: error_type, platform: platform)
|
|
123
|
+
.where("occurred_at >= ?", period_start)
|
|
124
|
+
.group("DATE(occurred_at)")
|
|
125
|
+
.count
|
|
126
|
+
|
|
127
|
+
return nil if daily_counts.empty?
|
|
128
|
+
|
|
129
|
+
# Calculate statistics
|
|
130
|
+
counts = daily_counts.values
|
|
131
|
+
stats = calculate_statistics(counts)
|
|
132
|
+
|
|
133
|
+
# Create or update baseline
|
|
134
|
+
baseline = ErrorBaseline.find_or_initialize_by(
|
|
135
|
+
error_type: error_type,
|
|
136
|
+
platform: platform,
|
|
137
|
+
baseline_type: "daily",
|
|
138
|
+
period_start: period_start
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
baseline.update!(
|
|
142
|
+
period_end: period_end,
|
|
143
|
+
count: counts.sum,
|
|
144
|
+
mean: stats[:mean],
|
|
145
|
+
std_dev: stats[:std_dev],
|
|
146
|
+
percentile_95: stats[:percentile_95],
|
|
147
|
+
percentile_99: stats[:percentile_99],
|
|
148
|
+
sample_size: counts.size
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
@calculated_count += 1
|
|
152
|
+
baseline
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
# Calculate weekly baseline (last 1 year, by week)
|
|
156
|
+
def calculate_weekly_baseline(error_type, platform)
|
|
157
|
+
period_start = WEEKLY_LOOKBACK.ago.beginning_of_week
|
|
158
|
+
period_end = Time.current.beginning_of_week
|
|
159
|
+
|
|
160
|
+
# Get error counts grouped by week
|
|
161
|
+
weekly_counts = ErrorLog
|
|
162
|
+
.where(error_type: error_type, platform: platform)
|
|
163
|
+
.where("occurred_at >= ?", period_start)
|
|
164
|
+
.group("strftime('%Y-%W', occurred_at)")
|
|
165
|
+
.count
|
|
166
|
+
|
|
167
|
+
return nil if weekly_counts.empty?
|
|
168
|
+
|
|
169
|
+
# Calculate statistics
|
|
170
|
+
counts = weekly_counts.values
|
|
171
|
+
stats = calculate_statistics(counts)
|
|
172
|
+
|
|
173
|
+
# Create or update baseline
|
|
174
|
+
baseline = ErrorBaseline.find_or_initialize_by(
|
|
175
|
+
error_type: error_type,
|
|
176
|
+
platform: platform,
|
|
177
|
+
baseline_type: "weekly",
|
|
178
|
+
period_start: period_start
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
baseline.update!(
|
|
182
|
+
period_end: period_end,
|
|
183
|
+
count: counts.sum,
|
|
184
|
+
mean: stats[:mean],
|
|
185
|
+
std_dev: stats[:std_dev],
|
|
186
|
+
percentile_95: stats[:percentile_95],
|
|
187
|
+
percentile_99: stats[:percentile_99],
|
|
188
|
+
sample_size: counts.size
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
@calculated_count += 1
|
|
192
|
+
baseline
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
# Calculate statistical metrics from an array of counts
|
|
196
|
+
# Removes outliers (> 3 std devs from mean)
|
|
197
|
+
# @param counts [Array<Integer>] Array of error counts
|
|
198
|
+
# @return [Hash] Statistics hash
|
|
199
|
+
def calculate_statistics(counts)
|
|
200
|
+
return default_stats if counts.empty?
|
|
201
|
+
|
|
202
|
+
# Remove outliers
|
|
203
|
+
clean_counts = remove_outliers(counts)
|
|
204
|
+
return default_stats if clean_counts.empty?
|
|
205
|
+
|
|
206
|
+
mean = clean_counts.sum.to_f / clean_counts.size
|
|
207
|
+
variance = clean_counts.map { |c| (c - mean)**2 }.sum / clean_counts.size
|
|
208
|
+
std_dev = Math.sqrt(variance)
|
|
209
|
+
|
|
210
|
+
sorted = clean_counts.sort
|
|
211
|
+
percentile_95 = percentile(sorted, 95)
|
|
212
|
+
percentile_99 = percentile(sorted, 99)
|
|
213
|
+
|
|
214
|
+
{
|
|
215
|
+
mean: mean.round(2),
|
|
216
|
+
std_dev: std_dev.round(2),
|
|
217
|
+
percentile_95: percentile_95.round(2),
|
|
218
|
+
percentile_99: percentile_99.round(2)
|
|
219
|
+
}
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
# Remove outliers from counts (values > 3 std devs from mean)
|
|
223
|
+
# @param counts [Array<Integer>] Raw counts
|
|
224
|
+
# @return [Array<Integer>] Counts with outliers removed
|
|
225
|
+
def remove_outliers(counts)
|
|
226
|
+
return counts if counts.size < 3
|
|
227
|
+
|
|
228
|
+
mean = counts.sum.to_f / counts.size
|
|
229
|
+
variance = counts.map { |c| (c - mean)**2 }.sum / counts.size
|
|
230
|
+
std_dev = Math.sqrt(variance)
|
|
231
|
+
|
|
232
|
+
# Remove values more than OUTLIER_THRESHOLD std devs from mean
|
|
233
|
+
counts.select { |c| (c - mean).abs <= (OUTLIER_THRESHOLD * std_dev) }
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
# Calculate percentile value
|
|
237
|
+
# @param sorted_array [Array] Sorted array of numbers
|
|
238
|
+
# @param percentile [Integer] Percentile to calculate (0-100)
|
|
239
|
+
# @return [Float] Percentile value
|
|
240
|
+
def percentile(sorted_array, percentile)
|
|
241
|
+
return 0 if sorted_array.empty?
|
|
242
|
+
return sorted_array.first if sorted_array.size == 1
|
|
243
|
+
|
|
244
|
+
rank = (percentile / 100.0) * (sorted_array.size - 1)
|
|
245
|
+
lower_index = rank.floor
|
|
246
|
+
upper_index = rank.ceil
|
|
247
|
+
|
|
248
|
+
if lower_index == upper_index
|
|
249
|
+
sorted_array[lower_index].to_f
|
|
250
|
+
else
|
|
251
|
+
# Linear interpolation
|
|
252
|
+
lower_value = sorted_array[lower_index]
|
|
253
|
+
upper_value = sorted_array[upper_index]
|
|
254
|
+
fraction = rank - lower_index
|
|
255
|
+
lower_value + (upper_value - lower_value) * fraction
|
|
256
|
+
end
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
def default_stats
|
|
260
|
+
{
|
|
261
|
+
mean: 0.0,
|
|
262
|
+
std_dev: 0.0,
|
|
263
|
+
percentile_95: 0.0,
|
|
264
|
+
percentile_99: 0.0
|
|
265
|
+
}
|
|
266
|
+
end
|
|
267
|
+
end
|
|
268
|
+
end
|
|
269
|
+
end
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RailsErrorDashboard
|
|
4
|
+
module Services
|
|
5
|
+
# Detects cascade patterns by analyzing error occurrences
|
|
6
|
+
#
|
|
7
|
+
# Runs periodically to find errors that consistently follow other errors,
|
|
8
|
+
# indicating a causal relationship.
|
|
9
|
+
class CascadeDetector
|
|
10
|
+
# Time window to look for cascades (errors within this window may be related)
|
|
11
|
+
DETECTION_WINDOW = 60.seconds
|
|
12
|
+
|
|
13
|
+
# Minimum times a pattern must occur to be considered a cascade
|
|
14
|
+
MIN_CASCADE_FREQUENCY = 3
|
|
15
|
+
|
|
16
|
+
# Minimum probability threshold (% of time parent leads to child)
|
|
17
|
+
MIN_CASCADE_PROBABILITY = 0.7
|
|
18
|
+
|
|
19
|
+
def self.call(lookback_hours: 24)
|
|
20
|
+
new(lookback_hours: lookback_hours).detect_cascades
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def initialize(lookback_hours: 24)
|
|
24
|
+
@lookback_hours = lookback_hours
|
|
25
|
+
@detected_count = 0
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def detect_cascades
|
|
29
|
+
return { detected: 0, updated: 0 } unless can_detect?
|
|
30
|
+
|
|
31
|
+
# Get recent error occurrences
|
|
32
|
+
start_time = @lookback_hours.hours.ago
|
|
33
|
+
occurrences = ErrorOccurrence.where("occurred_at >= ?", start_time).order(:occurred_at)
|
|
34
|
+
|
|
35
|
+
# For each error occurrence, find potential children
|
|
36
|
+
patterns_found = Hash.new { |h, k| h[k] = { delays: [], count: 0 } }
|
|
37
|
+
|
|
38
|
+
occurrences.each do |parent_occ|
|
|
39
|
+
# Find occurrences within detection window
|
|
40
|
+
potential_children = ErrorOccurrence
|
|
41
|
+
.where("occurred_at > ? AND occurred_at <= ?",
|
|
42
|
+
parent_occ.occurred_at,
|
|
43
|
+
parent_occ.occurred_at + DETECTION_WINDOW)
|
|
44
|
+
.where.not(error_log_id: parent_occ.error_log_id)
|
|
45
|
+
|
|
46
|
+
potential_children.each do |child_occ|
|
|
47
|
+
key = [ parent_occ.error_log_id, child_occ.error_log_id ]
|
|
48
|
+
delay = (child_occ.occurred_at - parent_occ.occurred_at).to_f
|
|
49
|
+
|
|
50
|
+
patterns_found[key][:delays] << delay
|
|
51
|
+
patterns_found[key][:count] += 1
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Filter and save cascade patterns
|
|
56
|
+
updated_count = 0
|
|
57
|
+
patterns_found.each do |(parent_id, child_id), data|
|
|
58
|
+
next if data[:count] < MIN_CASCADE_FREQUENCY
|
|
59
|
+
|
|
60
|
+
# Find or create cascade pattern
|
|
61
|
+
pattern = CascadePattern.find_or_initialize_by(
|
|
62
|
+
parent_error_id: parent_id,
|
|
63
|
+
child_error_id: child_id
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
avg_delay = data[:delays].sum / data[:delays].size
|
|
67
|
+
|
|
68
|
+
if pattern.new_record?
|
|
69
|
+
pattern.frequency = data[:count]
|
|
70
|
+
pattern.avg_delay_seconds = avg_delay
|
|
71
|
+
pattern.last_detected_at = Time.current
|
|
72
|
+
pattern.save
|
|
73
|
+
@detected_count += 1
|
|
74
|
+
else
|
|
75
|
+
# Update existing pattern
|
|
76
|
+
pattern.increment_detection!(avg_delay)
|
|
77
|
+
updated_count += 1
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Calculate probability
|
|
81
|
+
pattern.calculate_probability!
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
{ detected: @detected_count, updated: updated_count }
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
private
|
|
88
|
+
|
|
89
|
+
def can_detect?
|
|
90
|
+
defined?(CascadePattern) && CascadePattern.table_exists? &&
|
|
91
|
+
defined?(ErrorOccurrence) && ErrorOccurrence.table_exists?
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RailsErrorDashboard
|
|
4
|
+
module Services
|
|
5
|
+
# Service object for detecting occurrence patterns in errors
|
|
6
|
+
#
|
|
7
|
+
# Provides two main pattern detection capabilities:
|
|
8
|
+
# 1. Cyclical patterns - Daily/weekly rhythms (e.g., business hours pattern)
|
|
9
|
+
# 2. Burst detection - Many errors in short time period
|
|
10
|
+
#
|
|
11
|
+
# @example Cyclical pattern
|
|
12
|
+
# pattern = PatternDetector.analyze_cyclical_pattern(
|
|
13
|
+
# error_type: "NoMethodError",
|
|
14
|
+
# platform: "ios",
|
|
15
|
+
# days: 30
|
|
16
|
+
# )
|
|
17
|
+
# # => {
|
|
18
|
+
# # pattern_type: :business_hours,
|
|
19
|
+
# # peak_hours: [9, 10, 11, 14, 15],
|
|
20
|
+
# # hourly_distribution: { 0 => 5, 1 => 3, ... },
|
|
21
|
+
# # pattern_strength: 0.8
|
|
22
|
+
# # }
|
|
23
|
+
#
|
|
24
|
+
# @example Burst detection
|
|
25
|
+
# bursts = PatternDetector.detect_bursts(
|
|
26
|
+
# error_type: "NoMethodError",
|
|
27
|
+
# platform: "ios",
|
|
28
|
+
# days: 7
|
|
29
|
+
# )
|
|
30
|
+
# # => [{
|
|
31
|
+
# # start_time: <Time>,
|
|
32
|
+
# # end_time: <Time>,
|
|
33
|
+
# # duration_seconds: 300,
|
|
34
|
+
# # error_count: 25,
|
|
35
|
+
# # burst_intensity: :high
|
|
36
|
+
# # }]
|
|
37
|
+
class PatternDetector
|
|
38
|
+
# Analyze cyclical patterns in error occurrences
|
|
39
|
+
#
|
|
40
|
+
# Detects:
|
|
41
|
+
# - Business hours pattern (9am-5pm peak)
|
|
42
|
+
# - Night pattern (midnight-6am peak)
|
|
43
|
+
# - Weekend pattern (Sat-Sun peak)
|
|
44
|
+
# - Uniform pattern (no clear pattern)
|
|
45
|
+
#
|
|
46
|
+
# @param error_type [String] The error type to analyze
|
|
47
|
+
# @param platform [String] The platform (iOS, Android, API, etc.)
|
|
48
|
+
# @param days [Integer] Number of days to analyze (default: 30)
|
|
49
|
+
# @return [Hash] Pattern analysis with type, peaks, distribution, and strength
|
|
50
|
+
def self.analyze_cyclical_pattern(error_type:, platform:, days: 30)
|
|
51
|
+
start_date = days.days.ago
|
|
52
|
+
|
|
53
|
+
# Get all error occurrences for this error type/platform
|
|
54
|
+
errors = ErrorLog
|
|
55
|
+
.where(error_type: error_type, platform: platform)
|
|
56
|
+
.where("occurred_at >= ?", start_date)
|
|
57
|
+
|
|
58
|
+
return empty_pattern if errors.empty?
|
|
59
|
+
|
|
60
|
+
# Group by hour of day (0-23)
|
|
61
|
+
hourly_distribution = Hash.new(0)
|
|
62
|
+
weekday_distribution = Hash.new(0)
|
|
63
|
+
|
|
64
|
+
errors.each do |error|
|
|
65
|
+
hour = error.occurred_at.hour
|
|
66
|
+
wday = error.occurred_at.wday # 0 = Sunday, 6 = Saturday
|
|
67
|
+
hourly_distribution[hour] += 1
|
|
68
|
+
weekday_distribution[wday] += 1
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Calculate pattern type and peaks
|
|
72
|
+
pattern_type = determine_pattern_type(hourly_distribution, weekday_distribution)
|
|
73
|
+
peak_hours = find_peak_hours(hourly_distribution)
|
|
74
|
+
pattern_strength = calculate_pattern_strength(hourly_distribution)
|
|
75
|
+
|
|
76
|
+
{
|
|
77
|
+
pattern_type: pattern_type,
|
|
78
|
+
peak_hours: peak_hours,
|
|
79
|
+
hourly_distribution: hourly_distribution,
|
|
80
|
+
weekday_distribution: weekday_distribution,
|
|
81
|
+
pattern_strength: pattern_strength,
|
|
82
|
+
total_errors: errors.count,
|
|
83
|
+
analysis_days: days
|
|
84
|
+
}
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Detect error bursts (sequences where errors occur rapidly)
|
|
88
|
+
#
|
|
89
|
+
# A burst is defined as a sequence where inter-arrival time < 1 minute
|
|
90
|
+
# Burst intensity:
|
|
91
|
+
# - :high - 20+ errors in burst
|
|
92
|
+
# - :medium - 10-19 errors
|
|
93
|
+
# - :low - 5-9 errors
|
|
94
|
+
#
|
|
95
|
+
# @param error_type [String] The error type to analyze
|
|
96
|
+
# @param platform [String] The platform
|
|
97
|
+
# @param days [Integer] Number of days to analyze (default: 7)
|
|
98
|
+
# @return [Array<Hash>] Array of burst metadata
|
|
99
|
+
def self.detect_bursts(error_type:, platform:, days: 7)
|
|
100
|
+
start_date = days.days.ago
|
|
101
|
+
|
|
102
|
+
# Get all error occurrences sorted by time
|
|
103
|
+
errors = ErrorLog
|
|
104
|
+
.where(error_type: error_type, platform: platform)
|
|
105
|
+
.where("occurred_at >= ?", start_date)
|
|
106
|
+
.order(:occurred_at)
|
|
107
|
+
|
|
108
|
+
return [] if errors.count < 5 # Need at least 5 errors to detect a burst
|
|
109
|
+
|
|
110
|
+
# Get all occurrence timestamps
|
|
111
|
+
timestamps = errors.flat_map do |error|
|
|
112
|
+
# If error has error_occurrences, use those timestamps
|
|
113
|
+
if error.respond_to?(:error_occurrences) && error.error_occurrences.any?
|
|
114
|
+
error.error_occurrences.pluck(:occurred_at)
|
|
115
|
+
else
|
|
116
|
+
# Otherwise use the error's occurred_at repeated by occurrence_count
|
|
117
|
+
Array.new(error.occurrence_count || 1, error.occurred_at)
|
|
118
|
+
end
|
|
119
|
+
end.sort
|
|
120
|
+
|
|
121
|
+
return [] if timestamps.size < 5
|
|
122
|
+
|
|
123
|
+
# Detect bursts: sequences where inter-arrival < 60 seconds
|
|
124
|
+
bursts = []
|
|
125
|
+
current_burst = nil
|
|
126
|
+
|
|
127
|
+
timestamps.each_with_index do |timestamp, i|
|
|
128
|
+
next if i.zero?
|
|
129
|
+
|
|
130
|
+
inter_arrival = timestamp - timestamps[i - 1]
|
|
131
|
+
|
|
132
|
+
if inter_arrival <= 60 # 60 seconds threshold
|
|
133
|
+
# Start new burst or continue existing
|
|
134
|
+
if current_burst.nil?
|
|
135
|
+
current_burst = {
|
|
136
|
+
start_time: timestamps[i - 1],
|
|
137
|
+
timestamps: [ timestamps[i - 1], timestamp ]
|
|
138
|
+
}
|
|
139
|
+
else
|
|
140
|
+
current_burst[:timestamps] << timestamp
|
|
141
|
+
end
|
|
142
|
+
else
|
|
143
|
+
# End current burst if it exists and has enough errors
|
|
144
|
+
if current_burst && current_burst[:timestamps].size >= 5
|
|
145
|
+
bursts << finalize_burst(current_burst)
|
|
146
|
+
end
|
|
147
|
+
current_burst = nil
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
# Don't forget the last burst
|
|
152
|
+
if current_burst && current_burst[:timestamps].size >= 5
|
|
153
|
+
bursts << finalize_burst(current_burst)
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
bursts
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
private
|
|
160
|
+
|
|
161
|
+
# Empty pattern result
|
|
162
|
+
def self.empty_pattern
|
|
163
|
+
{
|
|
164
|
+
pattern_type: :none,
|
|
165
|
+
peak_hours: [],
|
|
166
|
+
hourly_distribution: {},
|
|
167
|
+
weekday_distribution: {},
|
|
168
|
+
pattern_strength: 0.0,
|
|
169
|
+
total_errors: 0,
|
|
170
|
+
analysis_days: 0
|
|
171
|
+
}
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
# Determine the pattern type based on hour and weekday distributions
|
|
175
|
+
def self.determine_pattern_type(hourly_dist, weekday_dist)
|
|
176
|
+
return :none if hourly_dist.empty?
|
|
177
|
+
|
|
178
|
+
# Calculate average errors per hour
|
|
179
|
+
avg_per_hour = hourly_dist.values.sum.to_f / 24
|
|
180
|
+
|
|
181
|
+
# Find peak hours (>2x average)
|
|
182
|
+
peak_hours = hourly_dist.select { |_, count| count > avg_per_hour * 2 }.keys.sort
|
|
183
|
+
|
|
184
|
+
# Business hours pattern: peaks between 9am-5pm
|
|
185
|
+
business_hours = (9..17).to_a
|
|
186
|
+
business_peaks = peak_hours & business_hours
|
|
187
|
+
if business_peaks.size >= 3
|
|
188
|
+
return :business_hours
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
# Night pattern: peaks between midnight-6am
|
|
192
|
+
night_hours = (0..6).to_a
|
|
193
|
+
night_peaks = peak_hours & night_hours
|
|
194
|
+
if night_peaks.size >= 2
|
|
195
|
+
return :night
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
# Weekend pattern: most errors on Sat/Sun
|
|
199
|
+
if weekday_dist.any?
|
|
200
|
+
weekend_count = (weekday_dist[0] || 0) + (weekday_dist[6] || 0) # Sun + Sat
|
|
201
|
+
total_count = weekday_dist.values.sum
|
|
202
|
+
if weekend_count > total_count * 0.5
|
|
203
|
+
return :weekend
|
|
204
|
+
end
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
# No clear pattern
|
|
208
|
+
:uniform
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
# Find peak hours (hours with >2x average)
|
|
212
|
+
def self.find_peak_hours(hourly_dist)
|
|
213
|
+
return [] if hourly_dist.empty?
|
|
214
|
+
|
|
215
|
+
avg = hourly_dist.values.sum.to_f / 24
|
|
216
|
+
hourly_dist.select { |_, count| count > avg * 2 }.keys.sort
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
# Calculate pattern strength (0.0-1.0)
|
|
220
|
+
# Measures how concentrated the errors are in peak hours
|
|
221
|
+
def self.calculate_pattern_strength(hourly_dist)
|
|
222
|
+
return 0.0 if hourly_dist.empty?
|
|
223
|
+
|
|
224
|
+
total = hourly_dist.values.sum
|
|
225
|
+
return 0.0 if total.zero?
|
|
226
|
+
|
|
227
|
+
# Calculate coefficient of variation (std dev / mean)
|
|
228
|
+
# Higher variation = stronger pattern
|
|
229
|
+
values = (0..23).map { |h| hourly_dist[h] || 0 }
|
|
230
|
+
mean = total.to_f / 24
|
|
231
|
+
variance = values.sum { |v| (v - mean)**2 } / 24
|
|
232
|
+
std_dev = Math.sqrt(variance)
|
|
233
|
+
|
|
234
|
+
# Normalize to 0-1 scale (coefficient of variation)
|
|
235
|
+
# Divide by sqrt(mean) to get a rough 0-1 scale
|
|
236
|
+
cv = mean > 0 ? std_dev / mean : 0
|
|
237
|
+
[ cv.round(2), 1.0 ].min
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
# Finalize burst metadata
|
|
241
|
+
def self.finalize_burst(burst_data)
|
|
242
|
+
start_time = burst_data[:start_time]
|
|
243
|
+
end_time = burst_data[:timestamps].last
|
|
244
|
+
duration = end_time - start_time
|
|
245
|
+
count = burst_data[:timestamps].size
|
|
246
|
+
|
|
247
|
+
{
|
|
248
|
+
start_time: start_time,
|
|
249
|
+
end_time: end_time,
|
|
250
|
+
duration_seconds: duration.round(1),
|
|
251
|
+
error_count: count,
|
|
252
|
+
burst_intensity: classify_burst_intensity(count)
|
|
253
|
+
}
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
# Classify burst intensity based on error count
|
|
257
|
+
def self.classify_burst_intensity(count)
|
|
258
|
+
if count >= 20
|
|
259
|
+
:high
|
|
260
|
+
elsif count >= 10
|
|
261
|
+
:medium
|
|
262
|
+
else
|
|
263
|
+
:low
|
|
264
|
+
end
|
|
265
|
+
end
|
|
266
|
+
end
|
|
267
|
+
end
|
|
268
|
+
end
|