RubyGems - dead_bro - Versions diffs - 0.2.8 → 0.2.9 - Mend

dead_bro 0.2.8 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

checksums.yaml +4 -4
data/README.md +42 -43
data/lib/dead_bro/circuit_breaker.rb +58 -38
data/lib/dead_bro/client.rb +112 -143
data/lib/dead_bro/configuration.rb +76 -40
data/lib/dead_bro/dispatcher.rb +130 -0
data/lib/dead_bro/error_middleware.rb +1 -1
data/lib/dead_bro/job_subscriber.rb +35 -12
data/lib/dead_bro/lightweight_memory_tracker.rb +5 -7
data/lib/dead_bro/logger.rb +30 -11
data/lib/dead_bro/memory_details.rb +71 -0
data/lib/dead_bro/memory_helpers.rb +62 -0
data/lib/dead_bro/memory_leak_detector.rb +178 -158
data/lib/dead_bro/memory_tracking_subscriber.rb +7 -31
data/lib/dead_bro/monitor.rb +18 -5
data/lib/dead_bro/railtie.rb +6 -6
data/lib/dead_bro/sql_subscriber.rb +103 -70
data/lib/dead_bro/subscriber.rb +36 -14
data/lib/dead_bro/version.rb +1 -1
data/lib/dead_bro.rb +85 -88
metadata +3 -1

data/lib/dead_bro/memory_leak_detector.rb CHANGED Viewed

@@ -1,196 +1,216 @@
 # frozen_string_literal: true
 module DeadBro
+  # Process-wide memory leak detector. Previously stored samples in
+  # `Thread.current[...]`, which meant each Puma worker thread saw only the
+  # handful of requests it served — far too few samples, and reset whenever a
+  # thread was recycled. History is now shared across all threads in the
+  # process behind a mutex, with a hard cap on the number of retained samples.
   class MemoryLeakDetector
-    # Track memory patterns over time to detect leaks
-    MEMORY_HISTORY_KEY = :dead_bro_memory_history
-    LEAK_DETECTION_WINDOW = 300 # 5 minutes
-    MEMORY_GROWTH_THRESHOLD = 50 # 50MB growth threshold
+    LEAK_DETECTION_WINDOW = 300 # seconds (5 minutes)
+    MEMORY_GROWTH_THRESHOLD = 50 # MB growth over the window
     MIN_SAMPLES_FOR_LEAK_DETECTION = 10
+    MAX_SAMPLES = 500 # hard cap so a long-running process can't grow unbounded
+    MAX_LEAK_ALERTS = 10
+    @mutex = Mutex.new
+    @history = {
+      samples: [],
+      last_cleanup: Time.now.utc.to_i,
+      leak_alerts: []
+    }
+    class << self
+      def record_memory_sample(sample_data)
+        sample = {
+          timestamp: Time.now.utc.to_i,
+          memory_usage: sample_data[:memory_usage] || 0,
+          gc_count: sample_data[:gc_count] || 0,
+          heap_pages: sample_data[:heap_pages] || 0,
+          object_count: sample_data[:object_count] || 0,
+          request_id: sample_data[:request_id],
+          controller: sample_data[:controller],
+          action: sample_data[:action]
+        }
+        samples_snapshot = @mutex.synchronize do
+          @history[:samples] << sample
+          cleanup_old_samples_unlocked
+          @history[:samples].dup
+        end
-    def self.initialize_history
-      Thread.current[MEMORY_HISTORY_KEY] = {
-        samples: [],
-        last_cleanup: Time.now.utc.to_i,
-        leak_alerts: []
-      }
-    end
+        check_for_memory_leaks(samples_snapshot)
-    def self.record_memory_sample(sample_data)
-      history = Thread.current[MEMORY_HISTORY_KEY] || initialize_history
+        nil
+      end
-      sample = {
-        timestamp: Time.now.utc.to_i,
-        memory_usage: sample_data[:memory_usage] || 0,
-        gc_count: sample_data[:gc_count] || 0,
-        heap_pages: sample_data[:heap_pages] || 0,
-        object_count: sample_data[:object_count] || 0,
-        request_id: sample_data[:request_id],
-        controller: sample_data[:controller],
-        action: sample_data[:action]
-      }
+      def get_memory_analysis
+        samples_snapshot, leak_alerts_snapshot = @mutex.synchronize do
+          [@history[:samples].dup, @history[:leak_alerts].dup]
+        end
-      history[:samples] << sample
+        if samples_snapshot.length < 5
+          return {status: "insufficient_data", sample_count: samples_snapshot.length}
+        end
-      # Clean up old samples
-      cleanup_old_samples(history)
+        memory_values = samples_snapshot.map { |s| s[:memory_usage] }
+        gc_counts = samples_snapshot.map { |s| s[:gc_count] }
+        object_counts = samples_snapshot.map { |s| s[:object_count] }
+        memory_stats = calculate_stats(memory_values)
+        gc_stats = calculate_stats(gc_counts)
+        object_stats = calculate_stats(object_counts)
+        memory_trend = calculate_memory_trend(memory_values, samples_snapshot.map { |s| s[:timestamp] })
+        recent_samples = samples_snapshot.last(10)
+        recent_controllers = recent_samples.map { |s| "#{s[:controller]}##{s[:action]}" }.tally
+        {
+          status: "analyzed",
+          sample_count: samples_snapshot.length,
+          time_window_seconds: samples_snapshot.last[:timestamp] - samples_snapshot.first[:timestamp],
+          memory_stats: memory_stats,
+          gc_stats: gc_stats,
+          object_stats: object_stats,
+          memory_trend: memory_trend,
+          recent_controllers: recent_controllers,
+          leak_alerts: leak_alerts_snapshot.last(5),
+          memory_efficiency: calculate_memory_efficiency(samples_snapshot)
+        }
+      end
-      # Check for memory leaks
-      check_for_memory_leaks(history)
+      def clear_history
+        @mutex.synchronize do
+          @history = {
+            samples: [],
+            last_cleanup: Time.now.utc.to_i,
+            leak_alerts: []
+          }
+        end
+      end
-      history
-    end
+      # Kept for backwards compatibility — history is now initialized at
+      # class-load time, so Railtie callers don't need to do anything.
+      def initialize_history
+        # no-op
+      end
-    def self.cleanup_old_samples(history)
-      cutoff_time = Time.now.utc.to_i - LEAK_DETECTION_WINDOW
-      history[:samples] = history[:samples].select { |sample| sample[:timestamp] > cutoff_time }
-    end
+      def calculate_memory_trend(memory_values, timestamps)
+        return {slope: 0, r_squared: 0} if memory_values.length < 2
-    def self.check_for_memory_leaks(history)
-      samples = history[:samples]
-      return if samples.length < MIN_SAMPLES_FOR_LEAK_DETECTION
+        n = memory_values.length
+        sum_x = timestamps.sum
+        sum_y = memory_values.sum
+        sum_xy = timestamps.zip(memory_values).sum { |x, y| x * y }
+        sum_x2 = timestamps.sum { |x| x * x }
-      # Calculate memory growth trend
-      memory_values = samples.map { |s| s[:memory_usage] }
-      timestamps = samples.map { |s| s[:timestamp] }
+        denominator = (n * sum_x2 - sum_x * sum_x)
+        return {slope: 0, r_squared: 0} if denominator.zero?
-      # Use linear regression to detect upward trend
-      trend = calculate_memory_trend(memory_values, timestamps)
+        slope = (n * sum_xy - sum_x * sum_y).to_f / denominator
+        intercept = (sum_y - slope * sum_x).to_f / n
-      # Check if memory is growing consistently
-      if trend[:slope] > 0.1 && trend[:r_squared] > 0.7 # Growing with good correlation
-        memory_growth = memory_values.last - memory_values.first
+        y_mean = sum_y.to_f / n
+        ss_tot = memory_values.sum { |y| (y - y_mean)**2 }
+        ss_res = memory_values.zip(timestamps).sum { |y, x| (y - (slope * x + intercept))**2 }
+        r_squared = (ss_tot > 0) ? 1 - (ss_res / ss_tot) : 0
-        if memory_growth > MEMORY_GROWTH_THRESHOLD
-          leak_alert = {
-            detected_at: Time.now.utc.to_i,
-            memory_growth_mb: memory_growth.round(2),
-            growth_rate_mb_per_second: trend[:slope],
-            confidence: trend[:r_squared],
-            sample_count: samples.length,
-            time_window_seconds: timestamps.last - timestamps.first,
-            recent_controllers: samples.last(5).map { |s| "#{s[:controller]}##{s[:action]}" }.uniq
-          }
+        {slope: slope, intercept: intercept, r_squared: r_squared}
+      end
-          history[:leak_alerts] << leak_alert
+      def calculate_stats(values)
+        return {} if values.empty?
-          # Only keep recent leak alerts
-          history[:leak_alerts] = history[:leak_alerts].last(10)
-        end
+        {
+          min: values.min,
+          max: values.max,
+          mean: (values.sum.to_f / values.length).round(2),
+          median: values.sort[values.length / 2],
+          std_dev: calculate_standard_deviation(values)
+        }
       end
-    end
-    def self.calculate_memory_trend(memory_values, timestamps)
-      return {slope: 0, r_squared: 0} if memory_values.length < 2
-      n = memory_values.length
-      sum_x = timestamps.sum
-      sum_y = memory_values.sum
-      sum_xy = timestamps.zip(memory_values).sum { |x, y| x * y }
-      sum_x2 = timestamps.sum { |x| x * x }
-      memory_values.sum { |y| y * y }
-      # Calculate slope (m) and intercept (b) for y = mx + b
-      slope = (n * sum_xy - sum_x * sum_y).to_f / (n * sum_x2 - sum_x * sum_x)
-      intercept = (sum_y - slope * sum_x).to_f / n
-      # Calculate R-squared (coefficient of determination)
-      y_mean = sum_y.to_f / n
-      ss_tot = memory_values.sum { |y| (y - y_mean)**2 }
-      ss_res = memory_values.zip(timestamps).sum { |y, x| (y - (slope * x + intercept))**2 }
-      r_squared = (ss_tot > 0) ? 1 - (ss_res / ss_tot) : 0
-      {
-        slope: slope,
-        intercept: intercept,
-        r_squared: r_squared
-      }
-    end
+      def calculate_standard_deviation(values)
+        return 0 if values.length < 2
-    def self.get_memory_analysis
-      history = Thread.current[MEMORY_HISTORY_KEY] || initialize_history
-      samples = history[:samples]
-      return {status: "insufficient_data", sample_count: samples.length} if samples.length < 5
-      memory_values = samples.map { |s| s[:memory_usage] }
-      gc_counts = samples.map { |s| s[:gc_count] }
-      object_counts = samples.map { |s| s[:object_count] }
-      # Calculate basic statistics
-      memory_stats = calculate_stats(memory_values)
-      gc_stats = calculate_stats(gc_counts)
-      object_stats = calculate_stats(object_counts)
-      # Detect patterns
-      memory_trend = calculate_memory_trend(memory_values, samples.map { |s| s[:timestamp] })
-      # Analyze recent activity
-      recent_samples = samples.last(10)
-      recent_controllers = recent_samples.map { |s| "#{s[:controller]}##{s[:action]}" }.tally
-      {
-        status: "analyzed",
-        sample_count: samples.length,
-        time_window_seconds: samples.last[:timestamp] - samples.first[:timestamp],
-        memory_stats: memory_stats,
-        gc_stats: gc_stats,
-        object_stats: object_stats,
-        memory_trend: memory_trend,
-        recent_controllers: recent_controllers,
-        leak_alerts: history[:leak_alerts].last(5),
-        memory_efficiency: calculate_memory_efficiency(samples)
-      }
-    end
+        mean = values.sum.to_f / values.length
+        variance = values.sum { |v| (v - mean)**2 } / (values.length - 1)
+        Math.sqrt(variance).round(2)
+      end
-    def self.calculate_stats(values)
-      return {} if values.empty?
+      def calculate_memory_efficiency(samples)
+        return {} if samples.length < 2
-      {
-        min: values.min,
-        max: values.max,
-        mean: (values.sum.to_f / values.length).round(2),
-        median: values.sort[values.length / 2],
-        std_dev: calculate_standard_deviation(values)
-      }
-    end
+        memory_per_object = samples.map do |sample|
+          (sample[:object_count] > 0) ? sample[:memory_usage] / sample[:object_count] : 0
+        end
-    def self.calculate_standard_deviation(values)
-      return 0 if values.length < 2
+        gc_efficiency = []
+        (1...samples.length).each do |i|
+          gc_delta = samples[i][:gc_count] - samples[i - 1][:gc_count]
+          memory_delta = samples[i][:memory_usage] - samples[i - 1][:memory_usage]
-      mean = values.sum.to_f / values.length
-      variance = values.sum { |v| (v - mean)**2 } / (values.length - 1)
-      Math.sqrt(variance).round(2)
-    end
-    def self.calculate_memory_efficiency(samples)
-      return {} if samples.length < 2
+          if gc_delta > 0 && memory_delta < 0
+            gc_efficiency << (-memory_delta / gc_delta).round(2)
+          end
+        end
-      # Calculate memory per object ratio
-      memory_per_object = samples.map do |sample|
-        (sample[:object_count] > 0) ? sample[:memory_usage] / sample[:object_count] : 0
+        {
+          average_memory_per_object_kb: (memory_per_object.sum / memory_per_object.length).round(2),
+          gc_efficiency_mb_per_cycle: gc_efficiency.any? ? (gc_efficiency.sum / gc_efficiency.length).round(2) : 0,
+          memory_volatility: calculate_standard_deviation(samples.map { |s| s[:memory_usage] })
+        }
       end
-      # Calculate GC efficiency (objects collected per GC cycle)
-      gc_efficiency = []
-      (1...samples.length).each do |i|
-        gc_delta = samples[i][:gc_count] - samples[i - 1][:gc_count]
-        memory_delta = samples[i][:memory_usage] - samples[i - 1][:memory_usage]
+      private
-        if gc_delta > 0 && memory_delta < 0
-          gc_efficiency << (-memory_delta / gc_delta).round(2)
+      def cleanup_old_samples_unlocked
+        cutoff_time = Time.now.utc.to_i - LEAK_DETECTION_WINDOW
+        samples = @history[:samples]
+        # Drop stale samples by time window.
+        if samples.any? && samples.first[:timestamp] <= cutoff_time
+          @history[:samples] = samples.select { |s| s[:timestamp] > cutoff_time }
+          samples = @history[:samples]
         end
+        # Enforce hard cap so a burst of traffic can't grow the buffer forever.
+        if samples.length > MAX_SAMPLES
+          @history[:samples] = samples.last(MAX_SAMPLES)
+        end
+        @history[:last_cleanup] = Time.now.utc.to_i
       end
-      {
-        average_memory_per_object_kb: (memory_per_object.sum / memory_per_object.length).round(2),
-        gc_efficiency_mb_per_cycle: gc_efficiency.any? ? (gc_efficiency.sum / gc_efficiency.length).round(2) : 0,
-        memory_volatility: calculate_standard_deviation(samples.map { |s| s[:memory_usage] })
-      }
-    end
+      # Runs the O(N) regression on a pre-copied snapshot so the mutex is not
+      # held during the computation. The alert is appended inside a short lock.
+      def check_for_memory_leaks(samples)
+        return if samples.length < MIN_SAMPLES_FOR_LEAK_DETECTION
+        memory_values = samples.map { |s| s[:memory_usage] }
+        timestamps    = samples.map { |s| s[:timestamp] }
-    def self.clear_history
-      Thread.current[MEMORY_HISTORY_KEY] = nil
+        trend = calculate_memory_trend(memory_values, timestamps)
+        return unless trend[:slope] > 0.1 && trend[:r_squared] > 0.7
+        memory_growth = memory_values.last - memory_values.first
+        return unless memory_growth > MEMORY_GROWTH_THRESHOLD
+        leak_alert = {
+          detected_at: Time.now.utc.to_i,
+          memory_growth_mb: memory_growth.round(2),
+          growth_rate_mb_per_second: trend[:slope],
+          confidence: trend[:r_squared],
+          sample_count: samples.length,
+          time_window_seconds: timestamps.last - timestamps.first,
+          recent_controllers: samples.last(5).map { |s| "#{s[:controller]}##{s[:action]}" }.uniq
+        }
+        @mutex.synchronize do
+          @history[:leak_alerts] << leak_alert
+          @history[:leak_alerts] = @history[:leak_alerts].last(MAX_LEAK_ALERTS)
+        end
+      end
     end
   end
 end

data/lib/dead_bro/memory_tracking_subscriber.rb CHANGED Viewed

@@ -4,8 +4,10 @@ require "active_support/notifications"
 module DeadBro
   class MemoryTrackingSubscriber
-    # Object allocation events
-    ALLOCATION_EVENT = "object_allocations.active_support"
+    # Allocation counts come from the process_action event (Rails instruments
+    # allocations there via ActiveSupport::Notifications). The old
+    # "object_allocations.active_support" constant was never emitted by Rails,
+    # so that subscription was dead code — removed.
     PROCESS_ACTION_EVENT = "process_action.action_controller"
     THREAD_LOCAL_KEY = :dead_bro_memory_events
@@ -13,7 +15,6 @@ module DeadBro
     LARGE_OBJECT_THRESHOLD = 1_000_000 # 1MB threshold for large objects
     # Performance optimization settings
-    ALLOCATION_SAMPLING_RATE = 1 # Track all when enabled (adjust in production)
     MAX_ALLOCATIONS_PER_REQUEST = 1000 # Limit allocations tracked per request
     LARGE_OBJECT_SAMPLE_RATE = 0.01 # Sample 1% of live objects to estimate large ones
     MAX_LARGE_OBJECTS = 50 # Cap number of large objects captured per request
@@ -23,13 +24,6 @@ module DeadBro
       return unless DeadBro.configuration.allocation_tracking_enabled
       if defined?(ActiveSupport::Notifications) && ActiveSupport::Notifications.notifier.respond_to?(:subscribe)
         begin
-          # Subscribe to object allocation events with sampling
-          ActiveSupport::Notifications.subscribe(ALLOCATION_EVENT) do |name, started, finished, _unique_id, data|
-            # Sample allocations to reduce overhead
-            next unless rand < ALLOCATION_SAMPLING_RATE
-            track_allocation(data, started, finished)
-          end
           # Subscribe to process_action to capture request-level allocation counters
           ActiveSupport::Notifications.subscribe(PROCESS_ACTION_EVENT) do |*args|
             event = if args.length == 1 && args.first.is_a?(ActiveSupport::Notifications::Event)
@@ -345,27 +339,9 @@ module DeadBro
     end
     def self.memory_usage_mb
-      # Use cached memory calculation to avoid expensive system calls
-      @memory_cache ||= {}
-      cache_key = Process.pid
-      # Cache memory usage for 1 second to avoid repeated system calls
-      if @memory_cache[cache_key] && (Time.now - @memory_cache[cache_key][:timestamp]) < 1
-        return @memory_cache[cache_key][:memory]
-      end
-      memory = if defined?(GC) && GC.respond_to?(:stat)
-        # Use GC stats as a proxy for memory usage (much faster than ps)
-        gc_stats = GC.stat
-        # Estimate memory usage from heap pages (rough approximation)
-        heap_pages = gc_stats[:heap_allocated_pages] || 0
-        (heap_pages * 4 * 1024) / (1024 * 1024) # 4KB per page, convert to MB
-      else
-        0
-      end
-      @memory_cache[cache_key] = {memory: memory, timestamp: Time.now}
-      memory
+      # MemoryHelpers.rss_mb reads /proc/self/status on Linux and caches for
+      # ~1 second across threads, so this is safe to call per-request.
+      DeadBro::MemoryHelpers.rss_mb
     rescue
       0
     end

data/lib/dead_bro/monitor.rb CHANGED Viewed

@@ -2,15 +2,23 @@
 module DeadBro
   class Monitor
+    SLEEP_INTERVAL_SECONDS = 60
     def initialize(client: DeadBro.client)
       @client = client
       @thread = nil
       @running = false
+      @stop_mutex = Mutex.new
+      @stop_cv = ConditionVariable.new
     end
     def start
-      return if @running
-      return unless DeadBro.configuration.job_queue_monitoring_enabled
+      # Live thread already running — nothing to do.
+      return if @running && @thread&.alive?
+      # Reset: handles post-fork where @running=true but the thread is dead.
+      @running = false
       return unless DeadBro.configuration.enabled
       @running = true
@@ -25,8 +33,12 @@ module DeadBro
             log_error("Error collecting stats: #{e.message}")
           end
-          # Sleep for 60 seconds (1 minute)
-          sleep(60)
+          # Interruptible sleep — stop() signals the CV so shutdown doesn't
+          # block up to a full minute. Still naps the full interval during
+          # normal operation.
+          @stop_mutex.synchronize do
+            @stop_cv.wait(@stop_mutex, SLEEP_INTERVAL_SECONDS) if @running
+          end
         end
       end
@@ -35,7 +47,8 @@ module DeadBro
     def stop
       @running = false
-      @thread&.join(5) # Wait up to 5 seconds for thread to finish
+      @stop_mutex.synchronize { @stop_cv.broadcast }
+      @thread&.join(5) # Safety timeout in case the thread is mid-flight
       @thread = nil
     end

data/lib/dead_bro/railtie.rb CHANGED Viewed

@@ -55,12 +55,12 @@ if defined?(Rails) && defined?(Rails::Railtie)
             DeadBro::JobSubscriber.subscribe!(client: shared_client)
           end
-          # Start job queue monitoring if enabled
-          if DeadBro.configuration.job_queue_monitoring_enabled
-            require "dead_bro/monitor"
-            DeadBro.monitor = DeadBro::Monitor.new(client: shared_client)
-            DeadBro.monitor.start
-          end
+          # Always start the monitor thread. The thread runs every 60s but
+          # post_monitor_stats skips the HTTP POST when job_queue_monitoring_enabled
+          # is false, so the backend can toggle monitoring on/off mid-process.
+          require "dead_bro/monitor"
+          DeadBro.monitor = DeadBro::Monitor.new(client: shared_client)
+          DeadBro.monitor.start
         rescue
           # Never raise in Railtie init
         end