findbug 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +8 -0
  4. data/LICENSE.txt +21 -0
  5. data/README.md +375 -0
  6. data/Rakefile +12 -0
  7. data/app/controllers/findbug/application_controller.rb +105 -0
  8. data/app/controllers/findbug/dashboard_controller.rb +93 -0
  9. data/app/controllers/findbug/errors_controller.rb +129 -0
  10. data/app/controllers/findbug/performance_controller.rb +80 -0
  11. data/app/jobs/findbug/alert_job.rb +40 -0
  12. data/app/jobs/findbug/cleanup_job.rb +132 -0
  13. data/app/jobs/findbug/persist_job.rb +158 -0
  14. data/app/models/findbug/error_event.rb +197 -0
  15. data/app/models/findbug/performance_event.rb +237 -0
  16. data/app/views/findbug/dashboard/index.html.erb +199 -0
  17. data/app/views/findbug/errors/index.html.erb +137 -0
  18. data/app/views/findbug/errors/show.html.erb +185 -0
  19. data/app/views/findbug/performance/index.html.erb +168 -0
  20. data/app/views/findbug/performance/show.html.erb +203 -0
  21. data/app/views/layouts/findbug/application.html.erb +601 -0
  22. data/lib/findbug/alerts/channels/base.rb +75 -0
  23. data/lib/findbug/alerts/channels/discord.rb +155 -0
  24. data/lib/findbug/alerts/channels/email.rb +179 -0
  25. data/lib/findbug/alerts/channels/slack.rb +149 -0
  26. data/lib/findbug/alerts/channels/webhook.rb +143 -0
  27. data/lib/findbug/alerts/dispatcher.rb +126 -0
  28. data/lib/findbug/alerts/throttler.rb +110 -0
  29. data/lib/findbug/background_persister.rb +142 -0
  30. data/lib/findbug/capture/context.rb +301 -0
  31. data/lib/findbug/capture/exception_handler.rb +141 -0
  32. data/lib/findbug/capture/exception_subscriber.rb +228 -0
  33. data/lib/findbug/capture/message_handler.rb +104 -0
  34. data/lib/findbug/capture/middleware.rb +247 -0
  35. data/lib/findbug/configuration.rb +381 -0
  36. data/lib/findbug/engine.rb +109 -0
  37. data/lib/findbug/performance/instrumentation.rb +336 -0
  38. data/lib/findbug/performance/transaction.rb +193 -0
  39. data/lib/findbug/processing/data_scrubber.rb +163 -0
  40. data/lib/findbug/rails/controller_methods.rb +152 -0
  41. data/lib/findbug/railtie.rb +222 -0
  42. data/lib/findbug/storage/circuit_breaker.rb +223 -0
  43. data/lib/findbug/storage/connection_pool.rb +134 -0
  44. data/lib/findbug/storage/redis_buffer.rb +285 -0
  45. data/lib/findbug/tasks/findbug.rake +167 -0
  46. data/lib/findbug/version.rb +5 -0
  47. data/lib/findbug.rb +216 -0
  48. data/lib/generators/findbug/install_generator.rb +67 -0
  49. data/lib/generators/findbug/templates/POST_INSTALL +41 -0
  50. data/lib/generators/findbug/templates/create_findbug_error_events.rb +44 -0
  51. data/lib/generators/findbug/templates/create_findbug_performance_events.rb +47 -0
  52. data/lib/generators/findbug/templates/initializer.rb +157 -0
  53. data/sig/findbug.rbs +4 -0
  54. metadata +251 -0
@@ -0,0 +1,129 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Findbug
4
+ # ErrorsController handles error listing and detail views.
5
+ #
6
+ class ErrorsController < ApplicationController
7
+ before_action :set_error, only: [:show, :resolve, :ignore, :reopen]
8
+
9
+ # GET /findbug/errors
10
+ #
11
+ # List all errors with filtering.
12
+ #
13
+ def index
14
+ @errors = Findbug::ErrorEvent.all
15
+
16
+ # Apply filters
17
+ @errors = apply_filters(@errors)
18
+
19
+ # Pagination
20
+ @page = (params[:page] || 1).to_i
21
+ @per_page = 25
22
+ @total_count = @errors.count
23
+ @errors = @errors.offset((@page - 1) * @per_page).limit(@per_page)
24
+
25
+ render template: "findbug/errors/index", layout: "findbug/application"
26
+ end
27
+
28
+ # GET /findbug/errors/:id
29
+ #
30
+ # Show error details.
31
+ #
32
+ def show
33
+ @similar_errors = Findbug::ErrorEvent.where(exception_class: @error.exception_class)
34
+ .where.not(id: @error.id)
35
+ .recent
36
+ .limit(5)
37
+
38
+ render template: "findbug/errors/show", layout: "findbug/application"
39
+ end
40
+
41
+ # POST /findbug/errors/:id/resolve
42
+ #
43
+ # Mark error as resolved.
44
+ #
45
+ def resolve
46
+ @error.resolve!
47
+ flash_success "Error marked as resolved"
48
+ redirect_back(fallback_location: findbug.errors_path)
49
+ end
50
+
51
+ # POST /findbug/errors/:id/ignore
52
+ #
53
+ # Mark error as ignored.
54
+ #
55
+ def ignore
56
+ @error.ignore!
57
+ flash_success "Error marked as ignored"
58
+ redirect_back(fallback_location: findbug.errors_path)
59
+ end
60
+
61
+ # POST /findbug/errors/:id/reopen
62
+ #
63
+ # Reopen a resolved/ignored error.
64
+ #
65
+ def reopen
66
+ @error.reopen!
67
+ flash_success "Error reopened"
68
+ redirect_back(fallback_location: findbug.errors_path)
69
+ end
70
+
71
+ private
72
+
73
+ def set_error
74
+ @error = Findbug::ErrorEvent.find(params[:id])
75
+ end
76
+
77
+ def apply_filters(scope)
78
+ # Status filter
79
+ # Note: empty string means "All Statuses" was selected
80
+ if params[:status].present?
81
+ scope = scope.where(status: params[:status])
82
+ elsif !params.key?(:status)
83
+ # Default to unresolved only on initial page load (no filter submitted)
84
+ scope = scope.unresolved
85
+ end
86
+ # If params[:status] is "" (All Statuses), don't filter by status
87
+
88
+ # Severity filter
89
+ if params[:severity].present?
90
+ scope = scope.where(severity: params[:severity])
91
+ end
92
+
93
+ # Search filter
94
+ if params[:search].present?
95
+ search = "%#{params[:search]}%"
96
+ scope = scope.where(
97
+ "exception_class ILIKE :search OR message ILIKE :search",
98
+ search: search
99
+ )
100
+ end
101
+
102
+ # Date range filter
103
+ if params[:since].present?
104
+ since = parse_since(params[:since])
105
+ scope = scope.where("last_seen_at >= ?", since)
106
+ end
107
+
108
+ # Sort
109
+ case params[:sort]
110
+ when "oldest"
111
+ scope.order(last_seen_at: :asc)
112
+ when "occurrences"
113
+ scope.order(occurrence_count: :desc)
114
+ else
115
+ scope.recent # Default: most recent
116
+ end
117
+ end
118
+
119
+ def parse_since(value)
120
+ case value
121
+ when "1h" then 1.hour.ago
122
+ when "24h" then 24.hours.ago
123
+ when "7d" then 7.days.ago
124
+ when "30d" then 30.days.ago
125
+ else 24.hours.ago
126
+ end
127
+ end
128
+ end
129
+ end
@@ -0,0 +1,80 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Findbug
4
+ # PerformanceController handles performance metrics views.
5
+ #
6
+ class PerformanceController < ApplicationController
7
+ # GET /findbug/performance
8
+ #
9
+ # Performance overview with slowest endpoints.
10
+ #
11
+ def index
12
+ @since = parse_since(params[:since] || "24h")
13
+
14
+ @slowest = Findbug::PerformanceEvent.slowest_transactions(since: @since, limit: 20)
15
+ @n_plus_one = Findbug::PerformanceEvent.n_plus_one_hotspots(since: @since, limit: 10)
16
+ @throughput = Findbug::PerformanceEvent.throughput_over_time(since: @since)
17
+ @stats = calculate_stats(@since)
18
+
19
+ render template: "findbug/performance/index", layout: "findbug/application"
20
+ end
21
+
22
+ # GET /findbug/performance/:id
23
+ #
24
+ # Show details for a specific transaction type.
25
+ #
26
+ def show
27
+ @transaction_name = params[:id]
28
+ @since = parse_since(params[:since] || "24h")
29
+
30
+ # Get events for this transaction
31
+ @events = Findbug::PerformanceEvent.where(transaction_name: @transaction_name)
32
+ .where("captured_at >= ?", @since)
33
+ .recent
34
+ .limit(100)
35
+
36
+ # Calculate aggregates
37
+ @stats = Findbug::PerformanceEvent.aggregate_for(@transaction_name, since: @since)
38
+
39
+ # Get slowest individual requests
40
+ @slowest_requests = @events.order(duration_ms: :desc).limit(10)
41
+
42
+ # Get requests with N+1 issues
43
+ @n_plus_one_requests = @events.where(has_n_plus_one: true).limit(10)
44
+
45
+ render template: "findbug/performance/show", layout: "findbug/application"
46
+ end
47
+
48
+ private
49
+
50
+ def calculate_stats(since)
51
+ events = Findbug::PerformanceEvent.where("captured_at >= ?", since)
52
+
53
+ {
54
+ total_requests: events.count,
55
+ avg_duration: events.average(:duration_ms)&.round(2) || 0,
56
+ max_duration: events.maximum(:duration_ms)&.round(2) || 0,
57
+ avg_queries: events.average(:query_count)&.round(1) || 0,
58
+ n_plus_one_percentage: calculate_n_plus_one_percentage(events)
59
+ }
60
+ end
61
+
62
+ def calculate_n_plus_one_percentage(events)
63
+ total = events.count
64
+ return 0 if total.zero?
65
+
66
+ n_plus_one = events.where(has_n_plus_one: true).count
67
+ ((n_plus_one.to_f / total) * 100).round(1)
68
+ end
69
+
70
+ def parse_since(value)
71
+ case value
72
+ when "1h" then 1.hour.ago
73
+ when "24h" then 24.hours.ago
74
+ when "7d" then 7.days.ago
75
+ when "30d" then 30.days.ago
76
+ else 24.hours.ago
77
+ end
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Findbug
4
+ # AlertJob sends alerts asynchronously.
5
+ #
6
+ # WHY ASYNC ALERTS?
7
+ # =================
8
+ #
9
+ # Alert sending involves:
10
+ # - HTTP requests to Slack/Discord webhooks
11
+ # - Email delivery (SMTP)
12
+ # - Potential network latency
13
+ #
14
+ # If we did this synchronously during error capture:
15
+ # 1. Slow alerts would slow down error persistence
16
+ # 2. Failed alerts would block other alerts
17
+ # 3. Network issues would impact the persist job
18
+ #
19
+ # By using a separate job:
20
+ # 1. PersistJob stays fast
21
+ # 2. Alerts can retry independently
22
+ # 3. Network issues are isolated
23
+ #
24
+ class AlertJob < ActiveJob::Base
25
+ queue_as { Findbug.config.queue_name }
26
+
27
+ # Retry on network failures
28
+ retry_on StandardError, attempts: 3, wait: :polynomially_longer
29
+
30
+ def perform(error_event_id)
31
+ error_event = Findbug::ErrorEvent.find_by(id: error_event_id)
32
+ return unless error_event
33
+
34
+ Findbug::Alerts::Dispatcher.send_alerts(error_event)
35
+ rescue ActiveRecord::RecordNotFound
36
+ # Event was deleted, skip alerting
37
+ Findbug.logger.debug("[Findbug] Alert skipped: error event #{error_event_id} not found")
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,132 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Findbug
4
+ # CleanupJob removes old data based on retention policy.
5
+ #
6
+ # WHY CLEANUP?
7
+ # ============
8
+ #
9
+ # Without cleanup, your database would grow forever:
10
+ # - 1000 errors/day x 30 days = 30,000 records
11
+ # - 10000 perf events/day x 30 days = 300,000 records
12
+ #
13
+ # Cleanup enforces retention policy:
14
+ # - Default: 30 days
15
+ # - Configurable via config.retention_days
16
+ #
17
+ # WHAT GETS CLEANED
18
+ # =================
19
+ #
20
+ # 1. Error events older than retention_days
21
+ # - Except: unresolved errors (you probably want to fix these!)
22
+ #
23
+ # 2. Performance events older than retention_days
24
+ # - All performance data is cleaned (it's meant for trends, not forever)
25
+ #
26
+ # 3. Resolved/ignored errors older than retention_days
27
+ #
28
+ # SCHEDULING
29
+ # ==========
30
+ #
31
+ # Run this daily (not too often, not too rare):
32
+ #
33
+ # findbug_cleanup:
34
+ # cron: "0 3 * * *" # 3 AM daily
35
+ # class: Findbug::CleanupJob
36
+ #
37
+ class CleanupJob < ActiveJob::Base
38
+ queue_as { Findbug.config.queue_name }
39
+
40
+ # Delete in batches to avoid long-running transactions
41
+ BATCH_SIZE = 1000
42
+
43
+ def perform
44
+ return unless Findbug.enabled?
45
+
46
+ cleanup_errors
47
+ cleanup_performance
48
+
49
+ Findbug.logger.info("[Findbug] Cleanup completed")
50
+ rescue StandardError => e
51
+ Findbug.logger.error("[Findbug] CleanupJob failed: #{e.message}")
52
+ raise
53
+ end
54
+
55
+ private
56
+
57
+ def cleanup_errors
58
+ cutoff_date = retention_days.days.ago
59
+
60
+ # Delete resolved and ignored errors older than retention
61
+ deleted_count = delete_in_batches(
62
+ Findbug::ErrorEvent.where(status: [Findbug::ErrorEvent::STATUS_RESOLVED, Findbug::ErrorEvent::STATUS_IGNORED])
63
+ .where("last_seen_at < ?", cutoff_date)
64
+ )
65
+
66
+ # Optionally delete very old unresolved errors (e.g., 3x retention)
67
+ # This prevents truly ancient errors from accumulating
68
+ very_old_cutoff = (retention_days * 3).days.ago
69
+ old_unresolved_count = delete_in_batches(
70
+ Findbug::ErrorEvent.unresolved.where("last_seen_at < ?", very_old_cutoff)
71
+ )
72
+
73
+ total = deleted_count + old_unresolved_count
74
+ if total.positive?
75
+ Findbug.logger.info("[Findbug] Cleaned up #{total} error events")
76
+ end
77
+ end
78
+
79
+ def cleanup_performance
80
+ cutoff_date = retention_days.days.ago
81
+
82
+ # Delete all performance events older than retention
83
+ deleted_count = delete_in_batches(
84
+ Findbug::PerformanceEvent.where("captured_at < ?", cutoff_date)
85
+ )
86
+
87
+ if deleted_count.positive?
88
+ Findbug.logger.info("[Findbug] Cleaned up #{deleted_count} performance events")
89
+ end
90
+ end
91
+
92
+ # Delete records in batches to avoid long transactions
93
+ #
94
+ # WHY BATCHING?
95
+ # =============
96
+ #
97
+ # Deleting 100,000 records in one query:
98
+ # 1. Locks the table for a long time
99
+ # 2. Can cause deadlocks with other queries
100
+ # 3. Uses lots of memory for transaction log
101
+ # 4. Might timeout
102
+ #
103
+ # Batching (1000 at a time):
104
+ # 1. Short locks between batches
105
+ # 2. Other queries can interleave
106
+ # 3. Steady memory usage
107
+ # 4. Can be interrupted and resumed
108
+ #
109
+ def delete_in_batches(scope)
110
+ total_deleted = 0
111
+
112
+ loop do
113
+ # Get IDs of records to delete
114
+ ids = scope.limit(BATCH_SIZE).pluck(:id)
115
+ break if ids.empty?
116
+
117
+ # Delete this batch
118
+ deleted = scope.where(id: ids).delete_all
119
+ total_deleted += deleted
120
+
121
+ # Give other queries a chance
122
+ sleep(0.01)
123
+ end
124
+
125
+ total_deleted
126
+ end
127
+
128
+ def retention_days
129
+ Findbug.config.retention_days
130
+ end
131
+ end
132
+ end
@@ -0,0 +1,158 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Findbug
4
+ # PersistJob moves data from Redis buffer to the database.
5
+ #
6
+ # THE TWO-PHASE STORAGE PATTERN
7
+ # =============================
8
+ #
9
+ # Phase 1: Real-time capture (Redis)
10
+ # - Happens in your request thread
11
+ # - Must be FAST (1-2ms)
12
+ # - Non-blocking
13
+ # - Data is temporary (24h TTL)
14
+ #
15
+ # Phase 2: Persistence (Database)
16
+ # - Happens in background job
17
+ # - Can be slow (50-100ms per batch)
18
+ # - Doesn't affect user requests
19
+ # - Data is permanent
20
+ #
21
+ # WHY THIS PATTERN?
22
+ # =================
23
+ #
24
+ # Direct database writes in the request cycle would:
25
+ # 1. Add 50-100ms latency to every error
26
+ # 2. Risk database connection exhaustion under high error rates
27
+ # 3. Create contention with app's own database traffic
28
+ #
29
+ # By buffering in Redis first:
30
+ # 1. Capture is instant (Redis LPUSH is ~1ms)
31
+ # 2. Database writes are batched (more efficient)
32
+ # 3. Load is smoothed out over time
33
+ #
34
+ # SCHEDULING
35
+ # ==========
36
+ #
37
+ # This job should run periodically (every 30 seconds is a good default).
38
+ # You can set this up with:
39
+ #
40
+ # 1. Sidekiq-scheduler / sidekiq-cron:
41
+ #
42
+ # findbug_persist:
43
+ # cron: "*/30 * * * * *" # Every 30 seconds
44
+ # class: Findbug::PersistJob
45
+ #
46
+ # 2. Whenever gem (cron):
47
+ #
48
+ # every 30.seconds do
49
+ # runner "Findbug::PersistJob.perform_now"
50
+ # end
51
+ #
52
+ # 3. Solid Queue (Rails 8):
53
+ #
54
+ # Findbug::PersistJob.set(wait: 30.seconds).perform_later
55
+ # (then reschedule itself at the end)
56
+ #
57
+ class PersistJob < ActiveJob::Base
58
+ queue_as { Findbug.config.queue_name }
59
+
60
+ # Maximum number of events to process in one job run
61
+ # This prevents the job from running too long
62
+ MAX_EVENTS_PER_RUN = 1000
63
+
64
+ def perform
65
+ return unless Findbug.enabled?
66
+
67
+ persist_errors
68
+ persist_performance
69
+ rescue StandardError => e
70
+ Findbug.logger.error("[Findbug] PersistJob failed: #{e.message}")
71
+ raise # Re-raise to trigger job retry
72
+ end
73
+
74
+ # Persist error events from Redis to database
75
+ def persist_errors
76
+ batch_size = Findbug.config.persist_batch_size
77
+ total_persisted = 0
78
+
79
+ loop do
80
+ # Pop a batch from Redis
81
+ events = Findbug::Storage::RedisBuffer.pop_errors(batch_size)
82
+ break if events.empty?
83
+
84
+ # Process the batch
85
+ self.class.persist_errors_batch(events)
86
+ total_persisted += events.size
87
+
88
+ # Safety limit to prevent infinite loops
89
+ break if total_persisted >= MAX_EVENTS_PER_RUN
90
+
91
+ # Small sleep to avoid hammering the database
92
+ sleep(0.01)
93
+ end
94
+
95
+ if total_persisted.positive?
96
+ Findbug.logger.info("[Findbug] Persisted #{total_persisted} error events")
97
+ end
98
+ end
99
+
100
+ # Persist performance events from Redis to database
101
+ def persist_performance
102
+ batch_size = Findbug.config.persist_batch_size
103
+ total_persisted = 0
104
+
105
+ loop do
106
+ events = Findbug::Storage::RedisBuffer.pop_performance(batch_size)
107
+ break if events.empty?
108
+
109
+ self.class.persist_performance_batch(events)
110
+ total_persisted += events.size
111
+
112
+ break if total_persisted >= MAX_EVENTS_PER_RUN
113
+
114
+ sleep(0.01)
115
+ end
116
+
117
+ if total_persisted.positive?
118
+ Findbug.logger.info("[Findbug] Persisted #{total_persisted} performance events")
119
+ end
120
+ end
121
+
122
+ class << self
123
+ # Persist a batch of error events
124
+ #
125
+ # @param events [Array<Hash>] error event data
126
+ #
127
+ def persist_errors_batch(events)
128
+ events.each do |event_data|
129
+ # Scrub sensitive data before persisting
130
+ scrubbed = Findbug::Processing::DataScrubber.scrub(event_data)
131
+
132
+ # Upsert to database
133
+ Findbug::ErrorEvent.upsert_from_event(scrubbed)
134
+ rescue StandardError => e
135
+ Findbug.logger.error(
136
+ "[Findbug] Failed to persist error event: #{e.message}"
137
+ )
138
+ # Continue with other events
139
+ end
140
+ end
141
+
142
+ # Persist a batch of performance events
143
+ #
144
+ # @param events [Array<Hash>] performance event data
145
+ #
146
+ def persist_performance_batch(events)
147
+ events.each do |event_data|
148
+ scrubbed = Findbug::Processing::DataScrubber.scrub(event_data)
149
+ Findbug::PerformanceEvent.create_from_event(scrubbed)
150
+ rescue StandardError => e
151
+ Findbug.logger.error(
152
+ "[Findbug] Failed to persist performance event: #{e.message}"
153
+ )
154
+ end
155
+ end
156
+ end
157
+ end
158
+ end