findbug 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +8 -0
- data/LICENSE.txt +21 -0
- data/README.md +375 -0
- data/Rakefile +12 -0
- data/app/controllers/findbug/application_controller.rb +105 -0
- data/app/controllers/findbug/dashboard_controller.rb +93 -0
- data/app/controllers/findbug/errors_controller.rb +129 -0
- data/app/controllers/findbug/performance_controller.rb +80 -0
- data/app/jobs/findbug/alert_job.rb +40 -0
- data/app/jobs/findbug/cleanup_job.rb +132 -0
- data/app/jobs/findbug/persist_job.rb +158 -0
- data/app/models/findbug/error_event.rb +197 -0
- data/app/models/findbug/performance_event.rb +237 -0
- data/app/views/findbug/dashboard/index.html.erb +199 -0
- data/app/views/findbug/errors/index.html.erb +137 -0
- data/app/views/findbug/errors/show.html.erb +185 -0
- data/app/views/findbug/performance/index.html.erb +168 -0
- data/app/views/findbug/performance/show.html.erb +203 -0
- data/app/views/layouts/findbug/application.html.erb +601 -0
- data/lib/findbug/alerts/channels/base.rb +75 -0
- data/lib/findbug/alerts/channels/discord.rb +155 -0
- data/lib/findbug/alerts/channels/email.rb +179 -0
- data/lib/findbug/alerts/channels/slack.rb +149 -0
- data/lib/findbug/alerts/channels/webhook.rb +143 -0
- data/lib/findbug/alerts/dispatcher.rb +126 -0
- data/lib/findbug/alerts/throttler.rb +110 -0
- data/lib/findbug/background_persister.rb +142 -0
- data/lib/findbug/capture/context.rb +301 -0
- data/lib/findbug/capture/exception_handler.rb +141 -0
- data/lib/findbug/capture/exception_subscriber.rb +228 -0
- data/lib/findbug/capture/message_handler.rb +104 -0
- data/lib/findbug/capture/middleware.rb +247 -0
- data/lib/findbug/configuration.rb +381 -0
- data/lib/findbug/engine.rb +109 -0
- data/lib/findbug/performance/instrumentation.rb +336 -0
- data/lib/findbug/performance/transaction.rb +193 -0
- data/lib/findbug/processing/data_scrubber.rb +163 -0
- data/lib/findbug/rails/controller_methods.rb +152 -0
- data/lib/findbug/railtie.rb +222 -0
- data/lib/findbug/storage/circuit_breaker.rb +223 -0
- data/lib/findbug/storage/connection_pool.rb +134 -0
- data/lib/findbug/storage/redis_buffer.rb +285 -0
- data/lib/findbug/tasks/findbug.rake +167 -0
- data/lib/findbug/version.rb +5 -0
- data/lib/findbug.rb +216 -0
- data/lib/generators/findbug/install_generator.rb +67 -0
- data/lib/generators/findbug/templates/POST_INSTALL +41 -0
- data/lib/generators/findbug/templates/create_findbug_error_events.rb +44 -0
- data/lib/generators/findbug/templates/create_findbug_performance_events.rb +47 -0
- data/lib/generators/findbug/templates/initializer.rb +157 -0
- data/sig/findbug.rbs +4 -0
- metadata +251 -0
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Findbug
|
|
4
|
+
# ErrorsController handles error listing and detail views.
|
|
5
|
+
#
|
|
6
|
+
class ErrorsController < ApplicationController
|
|
7
|
+
before_action :set_error, only: [:show, :resolve, :ignore, :reopen]
|
|
8
|
+
|
|
9
|
+
# GET /findbug/errors
|
|
10
|
+
#
|
|
11
|
+
# List all errors with filtering.
|
|
12
|
+
#
|
|
13
|
+
def index
|
|
14
|
+
@errors = Findbug::ErrorEvent.all
|
|
15
|
+
|
|
16
|
+
# Apply filters
|
|
17
|
+
@errors = apply_filters(@errors)
|
|
18
|
+
|
|
19
|
+
# Pagination
|
|
20
|
+
@page = (params[:page] || 1).to_i
|
|
21
|
+
@per_page = 25
|
|
22
|
+
@total_count = @errors.count
|
|
23
|
+
@errors = @errors.offset((@page - 1) * @per_page).limit(@per_page)
|
|
24
|
+
|
|
25
|
+
render template: "findbug/errors/index", layout: "findbug/application"
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# GET /findbug/errors/:id
|
|
29
|
+
#
|
|
30
|
+
# Show error details.
|
|
31
|
+
#
|
|
32
|
+
def show
|
|
33
|
+
@similar_errors = Findbug::ErrorEvent.where(exception_class: @error.exception_class)
|
|
34
|
+
.where.not(id: @error.id)
|
|
35
|
+
.recent
|
|
36
|
+
.limit(5)
|
|
37
|
+
|
|
38
|
+
render template: "findbug/errors/show", layout: "findbug/application"
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# POST /findbug/errors/:id/resolve
|
|
42
|
+
#
|
|
43
|
+
# Mark error as resolved.
|
|
44
|
+
#
|
|
45
|
+
def resolve
|
|
46
|
+
@error.resolve!
|
|
47
|
+
flash_success "Error marked as resolved"
|
|
48
|
+
redirect_back(fallback_location: findbug.errors_path)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# POST /findbug/errors/:id/ignore
|
|
52
|
+
#
|
|
53
|
+
# Mark error as ignored.
|
|
54
|
+
#
|
|
55
|
+
def ignore
|
|
56
|
+
@error.ignore!
|
|
57
|
+
flash_success "Error marked as ignored"
|
|
58
|
+
redirect_back(fallback_location: findbug.errors_path)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# POST /findbug/errors/:id/reopen
|
|
62
|
+
#
|
|
63
|
+
# Reopen a resolved/ignored error.
|
|
64
|
+
#
|
|
65
|
+
def reopen
|
|
66
|
+
@error.reopen!
|
|
67
|
+
flash_success "Error reopened"
|
|
68
|
+
redirect_back(fallback_location: findbug.errors_path)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
private
|
|
72
|
+
|
|
73
|
+
def set_error
|
|
74
|
+
@error = Findbug::ErrorEvent.find(params[:id])
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def apply_filters(scope)
|
|
78
|
+
# Status filter
|
|
79
|
+
# Note: empty string means "All Statuses" was selected
|
|
80
|
+
if params[:status].present?
|
|
81
|
+
scope = scope.where(status: params[:status])
|
|
82
|
+
elsif !params.key?(:status)
|
|
83
|
+
# Default to unresolved only on initial page load (no filter submitted)
|
|
84
|
+
scope = scope.unresolved
|
|
85
|
+
end
|
|
86
|
+
# If params[:status] is "" (All Statuses), don't filter by status
|
|
87
|
+
|
|
88
|
+
# Severity filter
|
|
89
|
+
if params[:severity].present?
|
|
90
|
+
scope = scope.where(severity: params[:severity])
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Search filter
|
|
94
|
+
if params[:search].present?
|
|
95
|
+
search = "%#{params[:search]}%"
|
|
96
|
+
scope = scope.where(
|
|
97
|
+
"exception_class ILIKE :search OR message ILIKE :search",
|
|
98
|
+
search: search
|
|
99
|
+
)
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Date range filter
|
|
103
|
+
if params[:since].present?
|
|
104
|
+
since = parse_since(params[:since])
|
|
105
|
+
scope = scope.where("last_seen_at >= ?", since)
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Sort
|
|
109
|
+
case params[:sort]
|
|
110
|
+
when "oldest"
|
|
111
|
+
scope.order(last_seen_at: :asc)
|
|
112
|
+
when "occurrences"
|
|
113
|
+
scope.order(occurrence_count: :desc)
|
|
114
|
+
else
|
|
115
|
+
scope.recent # Default: most recent
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def parse_since(value)
|
|
120
|
+
case value
|
|
121
|
+
when "1h" then 1.hour.ago
|
|
122
|
+
when "24h" then 24.hours.ago
|
|
123
|
+
when "7d" then 7.days.ago
|
|
124
|
+
when "30d" then 30.days.ago
|
|
125
|
+
else 24.hours.ago
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
end
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Findbug
|
|
4
|
+
# PerformanceController handles performance metrics views.
|
|
5
|
+
#
|
|
6
|
+
class PerformanceController < ApplicationController
|
|
7
|
+
# GET /findbug/performance
|
|
8
|
+
#
|
|
9
|
+
# Performance overview with slowest endpoints.
|
|
10
|
+
#
|
|
11
|
+
def index
|
|
12
|
+
@since = parse_since(params[:since] || "24h")
|
|
13
|
+
|
|
14
|
+
@slowest = Findbug::PerformanceEvent.slowest_transactions(since: @since, limit: 20)
|
|
15
|
+
@n_plus_one = Findbug::PerformanceEvent.n_plus_one_hotspots(since: @since, limit: 10)
|
|
16
|
+
@throughput = Findbug::PerformanceEvent.throughput_over_time(since: @since)
|
|
17
|
+
@stats = calculate_stats(@since)
|
|
18
|
+
|
|
19
|
+
render template: "findbug/performance/index", layout: "findbug/application"
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# GET /findbug/performance/:id
|
|
23
|
+
#
|
|
24
|
+
# Show details for a specific transaction type.
|
|
25
|
+
#
|
|
26
|
+
def show
|
|
27
|
+
@transaction_name = params[:id]
|
|
28
|
+
@since = parse_since(params[:since] || "24h")
|
|
29
|
+
|
|
30
|
+
# Get events for this transaction
|
|
31
|
+
@events = Findbug::PerformanceEvent.where(transaction_name: @transaction_name)
|
|
32
|
+
.where("captured_at >= ?", @since)
|
|
33
|
+
.recent
|
|
34
|
+
.limit(100)
|
|
35
|
+
|
|
36
|
+
# Calculate aggregates
|
|
37
|
+
@stats = Findbug::PerformanceEvent.aggregate_for(@transaction_name, since: @since)
|
|
38
|
+
|
|
39
|
+
# Get slowest individual requests
|
|
40
|
+
@slowest_requests = @events.order(duration_ms: :desc).limit(10)
|
|
41
|
+
|
|
42
|
+
# Get requests with N+1 issues
|
|
43
|
+
@n_plus_one_requests = @events.where(has_n_plus_one: true).limit(10)
|
|
44
|
+
|
|
45
|
+
render template: "findbug/performance/show", layout: "findbug/application"
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
private
|
|
49
|
+
|
|
50
|
+
def calculate_stats(since)
|
|
51
|
+
events = Findbug::PerformanceEvent.where("captured_at >= ?", since)
|
|
52
|
+
|
|
53
|
+
{
|
|
54
|
+
total_requests: events.count,
|
|
55
|
+
avg_duration: events.average(:duration_ms)&.round(2) || 0,
|
|
56
|
+
max_duration: events.maximum(:duration_ms)&.round(2) || 0,
|
|
57
|
+
avg_queries: events.average(:query_count)&.round(1) || 0,
|
|
58
|
+
n_plus_one_percentage: calculate_n_plus_one_percentage(events)
|
|
59
|
+
}
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def calculate_n_plus_one_percentage(events)
|
|
63
|
+
total = events.count
|
|
64
|
+
return 0 if total.zero?
|
|
65
|
+
|
|
66
|
+
n_plus_one = events.where(has_n_plus_one: true).count
|
|
67
|
+
((n_plus_one.to_f / total) * 100).round(1)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def parse_since(value)
|
|
71
|
+
case value
|
|
72
|
+
when "1h" then 1.hour.ago
|
|
73
|
+
when "24h" then 24.hours.ago
|
|
74
|
+
when "7d" then 7.days.ago
|
|
75
|
+
when "30d" then 30.days.ago
|
|
76
|
+
else 24.hours.ago
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Findbug
|
|
4
|
+
# AlertJob sends alerts asynchronously.
|
|
5
|
+
#
|
|
6
|
+
# WHY ASYNC ALERTS?
|
|
7
|
+
# =================
|
|
8
|
+
#
|
|
9
|
+
# Alert sending involves:
|
|
10
|
+
# - HTTP requests to Slack/Discord webhooks
|
|
11
|
+
# - Email delivery (SMTP)
|
|
12
|
+
# - Potential network latency
|
|
13
|
+
#
|
|
14
|
+
# If we did this synchronously during error capture:
|
|
15
|
+
# 1. Slow alerts would slow down error persistence
|
|
16
|
+
# 2. Failed alerts would block other alerts
|
|
17
|
+
# 3. Network issues would impact the persist job
|
|
18
|
+
#
|
|
19
|
+
# By using a separate job:
|
|
20
|
+
# 1. PersistJob stays fast
|
|
21
|
+
# 2. Alerts can retry independently
|
|
22
|
+
# 3. Network issues are isolated
|
|
23
|
+
#
|
|
24
|
+
class AlertJob < ActiveJob::Base
|
|
25
|
+
queue_as { Findbug.config.queue_name }
|
|
26
|
+
|
|
27
|
+
# Retry on network failures
|
|
28
|
+
retry_on StandardError, attempts: 3, wait: :polynomially_longer
|
|
29
|
+
|
|
30
|
+
def perform(error_event_id)
|
|
31
|
+
error_event = Findbug::ErrorEvent.find_by(id: error_event_id)
|
|
32
|
+
return unless error_event
|
|
33
|
+
|
|
34
|
+
Findbug::Alerts::Dispatcher.send_alerts(error_event)
|
|
35
|
+
rescue ActiveRecord::RecordNotFound
|
|
36
|
+
# Event was deleted, skip alerting
|
|
37
|
+
Findbug.logger.debug("[Findbug] Alert skipped: error event #{error_event_id} not found")
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Findbug
|
|
4
|
+
# CleanupJob removes old data based on retention policy.
|
|
5
|
+
#
|
|
6
|
+
# WHY CLEANUP?
|
|
7
|
+
# ============
|
|
8
|
+
#
|
|
9
|
+
# Without cleanup, your database would grow forever:
|
|
10
|
+
# - 1000 errors/day x 30 days = 30,000 records
|
|
11
|
+
# - 10000 perf events/day x 30 days = 300,000 records
|
|
12
|
+
#
|
|
13
|
+
# Cleanup enforces retention policy:
|
|
14
|
+
# - Default: 30 days
|
|
15
|
+
# - Configurable via config.retention_days
|
|
16
|
+
#
|
|
17
|
+
# WHAT GETS CLEANED
|
|
18
|
+
# =================
|
|
19
|
+
#
|
|
20
|
+
# 1. Error events older than retention_days
|
|
21
|
+
# - Except: unresolved errors (you probably want to fix these!)
|
|
22
|
+
#
|
|
23
|
+
# 2. Performance events older than retention_days
|
|
24
|
+
# - All performance data is cleaned (it's meant for trends, not forever)
|
|
25
|
+
#
|
|
26
|
+
# 3. Resolved/ignored errors older than retention_days
|
|
27
|
+
#
|
|
28
|
+
# SCHEDULING
|
|
29
|
+
# ==========
|
|
30
|
+
#
|
|
31
|
+
# Run this daily (not too often, not too rare):
|
|
32
|
+
#
|
|
33
|
+
# findbug_cleanup:
|
|
34
|
+
# cron: "0 3 * * *" # 3 AM daily
|
|
35
|
+
# class: Findbug::CleanupJob
|
|
36
|
+
#
|
|
37
|
+
class CleanupJob < ActiveJob::Base
|
|
38
|
+
queue_as { Findbug.config.queue_name }
|
|
39
|
+
|
|
40
|
+
# Delete in batches to avoid long-running transactions
|
|
41
|
+
BATCH_SIZE = 1000
|
|
42
|
+
|
|
43
|
+
def perform
|
|
44
|
+
return unless Findbug.enabled?
|
|
45
|
+
|
|
46
|
+
cleanup_errors
|
|
47
|
+
cleanup_performance
|
|
48
|
+
|
|
49
|
+
Findbug.logger.info("[Findbug] Cleanup completed")
|
|
50
|
+
rescue StandardError => e
|
|
51
|
+
Findbug.logger.error("[Findbug] CleanupJob failed: #{e.message}")
|
|
52
|
+
raise
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
private
|
|
56
|
+
|
|
57
|
+
def cleanup_errors
|
|
58
|
+
cutoff_date = retention_days.days.ago
|
|
59
|
+
|
|
60
|
+
# Delete resolved and ignored errors older than retention
|
|
61
|
+
deleted_count = delete_in_batches(
|
|
62
|
+
Findbug::ErrorEvent.where(status: [Findbug::ErrorEvent::STATUS_RESOLVED, Findbug::ErrorEvent::STATUS_IGNORED])
|
|
63
|
+
.where("last_seen_at < ?", cutoff_date)
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
# Optionally delete very old unresolved errors (e.g., 3x retention)
|
|
67
|
+
# This prevents truly ancient errors from accumulating
|
|
68
|
+
very_old_cutoff = (retention_days * 3).days.ago
|
|
69
|
+
old_unresolved_count = delete_in_batches(
|
|
70
|
+
Findbug::ErrorEvent.unresolved.where("last_seen_at < ?", very_old_cutoff)
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
total = deleted_count + old_unresolved_count
|
|
74
|
+
if total.positive?
|
|
75
|
+
Findbug.logger.info("[Findbug] Cleaned up #{total} error events")
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def cleanup_performance
|
|
80
|
+
cutoff_date = retention_days.days.ago
|
|
81
|
+
|
|
82
|
+
# Delete all performance events older than retention
|
|
83
|
+
deleted_count = delete_in_batches(
|
|
84
|
+
Findbug::PerformanceEvent.where("captured_at < ?", cutoff_date)
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
if deleted_count.positive?
|
|
88
|
+
Findbug.logger.info("[Findbug] Cleaned up #{deleted_count} performance events")
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Delete records in batches to avoid long transactions
|
|
93
|
+
#
|
|
94
|
+
# WHY BATCHING?
|
|
95
|
+
# =============
|
|
96
|
+
#
|
|
97
|
+
# Deleting 100,000 records in one query:
|
|
98
|
+
# 1. Locks the table for a long time
|
|
99
|
+
# 2. Can cause deadlocks with other queries
|
|
100
|
+
# 3. Uses lots of memory for transaction log
|
|
101
|
+
# 4. Might timeout
|
|
102
|
+
#
|
|
103
|
+
# Batching (1000 at a time):
|
|
104
|
+
# 1. Short locks between batches
|
|
105
|
+
# 2. Other queries can interleave
|
|
106
|
+
# 3. Steady memory usage
|
|
107
|
+
# 4. Can be interrupted and resumed
|
|
108
|
+
#
|
|
109
|
+
def delete_in_batches(scope)
|
|
110
|
+
total_deleted = 0
|
|
111
|
+
|
|
112
|
+
loop do
|
|
113
|
+
# Get IDs of records to delete
|
|
114
|
+
ids = scope.limit(BATCH_SIZE).pluck(:id)
|
|
115
|
+
break if ids.empty?
|
|
116
|
+
|
|
117
|
+
# Delete this batch
|
|
118
|
+
deleted = scope.where(id: ids).delete_all
|
|
119
|
+
total_deleted += deleted
|
|
120
|
+
|
|
121
|
+
# Give other queries a chance
|
|
122
|
+
sleep(0.01)
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
total_deleted
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def retention_days
|
|
129
|
+
Findbug.config.retention_days
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
end
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Findbug
|
|
4
|
+
# PersistJob moves data from Redis buffer to the database.
|
|
5
|
+
#
|
|
6
|
+
# THE TWO-PHASE STORAGE PATTERN
|
|
7
|
+
# =============================
|
|
8
|
+
#
|
|
9
|
+
# Phase 1: Real-time capture (Redis)
|
|
10
|
+
# - Happens in your request thread
|
|
11
|
+
# - Must be FAST (1-2ms)
|
|
12
|
+
# - Non-blocking
|
|
13
|
+
# - Data is temporary (24h TTL)
|
|
14
|
+
#
|
|
15
|
+
# Phase 2: Persistence (Database)
|
|
16
|
+
# - Happens in background job
|
|
17
|
+
# - Can be slow (50-100ms per batch)
|
|
18
|
+
# - Doesn't affect user requests
|
|
19
|
+
# - Data is permanent
|
|
20
|
+
#
|
|
21
|
+
# WHY THIS PATTERN?
|
|
22
|
+
# =================
|
|
23
|
+
#
|
|
24
|
+
# Direct database writes in the request cycle would:
|
|
25
|
+
# 1. Add 50-100ms latency to every error
|
|
26
|
+
# 2. Risk database connection exhaustion under high error rates
|
|
27
|
+
# 3. Create contention with app's own database traffic
|
|
28
|
+
#
|
|
29
|
+
# By buffering in Redis first:
|
|
30
|
+
# 1. Capture is instant (Redis LPUSH is ~1ms)
|
|
31
|
+
# 2. Database writes are batched (more efficient)
|
|
32
|
+
# 3. Load is smoothed out over time
|
|
33
|
+
#
|
|
34
|
+
# SCHEDULING
|
|
35
|
+
# ==========
|
|
36
|
+
#
|
|
37
|
+
# This job should run periodically (every 30 seconds is a good default).
|
|
38
|
+
# You can set this up with:
|
|
39
|
+
#
|
|
40
|
+
# 1. Sidekiq-scheduler / sidekiq-cron:
|
|
41
|
+
#
|
|
42
|
+
# findbug_persist:
|
|
43
|
+
# cron: "*/30 * * * * *" # Every 30 seconds
|
|
44
|
+
# class: Findbug::PersistJob
|
|
45
|
+
#
|
|
46
|
+
# 2. Whenever gem (cron):
|
|
47
|
+
#
|
|
48
|
+
# every 30.seconds do
|
|
49
|
+
# runner "Findbug::PersistJob.perform_now"
|
|
50
|
+
# end
|
|
51
|
+
#
|
|
52
|
+
# 3. Solid Queue (Rails 8):
|
|
53
|
+
#
|
|
54
|
+
# Findbug::PersistJob.set(wait: 30.seconds).perform_later
|
|
55
|
+
# (then reschedule itself at the end)
|
|
56
|
+
#
|
|
57
|
+
class PersistJob < ActiveJob::Base
|
|
58
|
+
queue_as { Findbug.config.queue_name }
|
|
59
|
+
|
|
60
|
+
# Maximum number of events to process in one job run
|
|
61
|
+
# This prevents the job from running too long
|
|
62
|
+
MAX_EVENTS_PER_RUN = 1000
|
|
63
|
+
|
|
64
|
+
def perform
|
|
65
|
+
return unless Findbug.enabled?
|
|
66
|
+
|
|
67
|
+
persist_errors
|
|
68
|
+
persist_performance
|
|
69
|
+
rescue StandardError => e
|
|
70
|
+
Findbug.logger.error("[Findbug] PersistJob failed: #{e.message}")
|
|
71
|
+
raise # Re-raise to trigger job retry
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Persist error events from Redis to database
|
|
75
|
+
def persist_errors
|
|
76
|
+
batch_size = Findbug.config.persist_batch_size
|
|
77
|
+
total_persisted = 0
|
|
78
|
+
|
|
79
|
+
loop do
|
|
80
|
+
# Pop a batch from Redis
|
|
81
|
+
events = Findbug::Storage::RedisBuffer.pop_errors(batch_size)
|
|
82
|
+
break if events.empty?
|
|
83
|
+
|
|
84
|
+
# Process the batch
|
|
85
|
+
self.class.persist_errors_batch(events)
|
|
86
|
+
total_persisted += events.size
|
|
87
|
+
|
|
88
|
+
# Safety limit to prevent infinite loops
|
|
89
|
+
break if total_persisted >= MAX_EVENTS_PER_RUN
|
|
90
|
+
|
|
91
|
+
# Small sleep to avoid hammering the database
|
|
92
|
+
sleep(0.01)
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
if total_persisted.positive?
|
|
96
|
+
Findbug.logger.info("[Findbug] Persisted #{total_persisted} error events")
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Persist performance events from Redis to database
|
|
101
|
+
def persist_performance
|
|
102
|
+
batch_size = Findbug.config.persist_batch_size
|
|
103
|
+
total_persisted = 0
|
|
104
|
+
|
|
105
|
+
loop do
|
|
106
|
+
events = Findbug::Storage::RedisBuffer.pop_performance(batch_size)
|
|
107
|
+
break if events.empty?
|
|
108
|
+
|
|
109
|
+
self.class.persist_performance_batch(events)
|
|
110
|
+
total_persisted += events.size
|
|
111
|
+
|
|
112
|
+
break if total_persisted >= MAX_EVENTS_PER_RUN
|
|
113
|
+
|
|
114
|
+
sleep(0.01)
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
if total_persisted.positive?
|
|
118
|
+
Findbug.logger.info("[Findbug] Persisted #{total_persisted} performance events")
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
class << self
|
|
123
|
+
# Persist a batch of error events
|
|
124
|
+
#
|
|
125
|
+
# @param events [Array<Hash>] error event data
|
|
126
|
+
#
|
|
127
|
+
def persist_errors_batch(events)
|
|
128
|
+
events.each do |event_data|
|
|
129
|
+
# Scrub sensitive data before persisting
|
|
130
|
+
scrubbed = Findbug::Processing::DataScrubber.scrub(event_data)
|
|
131
|
+
|
|
132
|
+
# Upsert to database
|
|
133
|
+
Findbug::ErrorEvent.upsert_from_event(scrubbed)
|
|
134
|
+
rescue StandardError => e
|
|
135
|
+
Findbug.logger.error(
|
|
136
|
+
"[Findbug] Failed to persist error event: #{e.message}"
|
|
137
|
+
)
|
|
138
|
+
# Continue with other events
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
# Persist a batch of performance events
|
|
143
|
+
#
|
|
144
|
+
# @param events [Array<Hash>] performance event data
|
|
145
|
+
#
|
|
146
|
+
def persist_performance_batch(events)
|
|
147
|
+
events.each do |event_data|
|
|
148
|
+
scrubbed = Findbug::Processing::DataScrubber.scrub(event_data)
|
|
149
|
+
Findbug::PerformanceEvent.create_from_event(scrubbed)
|
|
150
|
+
rescue StandardError => e
|
|
151
|
+
Findbug.logger.error(
|
|
152
|
+
"[Findbug] Failed to persist performance event: #{e.message}"
|
|
153
|
+
)
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
end
|