RubyGems - semian - Versions diffs - 0.27.0 → 0.28.0 - Mend

semian 0.27.0 → 0.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

checksums.yaml +4 -4
data/README.md +75 -0
data/lib/semian/adapter.rb +1 -1
data/lib/semian/adaptive_circuit_breaker.rb +136 -0
data/lib/semian/circuit_breaker.rb +44 -25
data/lib/semian/circuit_breaker_behaviour.rb +64 -0
data/lib/semian/configuration_validator.rb +52 -0
data/lib/semian/dual_circuit_breaker.rb +165 -0
data/lib/semian/mysql2.rb +2 -2
data/lib/semian/net_http.rb +3 -3
data/lib/semian/pid_controller.rb +217 -0
data/lib/semian/pid_controller_thread.rb +72 -0
data/lib/semian/protected_resource.rb +1 -1
data/lib/semian/simple_exponential_smoother.rb +137 -0
data/lib/semian/unprotected_resource.rb +3 -3
data/lib/semian/version.rb +1 -1
data/lib/semian.rb +78 -3
metadata +8 -2

data/lib/semian/dual_circuit_breaker.rb ADDED Viewed

@@ -0,0 +1,165 @@
+# frozen_string_literal: true
+module Semian
+  # DualCircuitBreaker wraps both classic and adaptive circuit breakers,
+  # allowing runtime switching between them via a callable that determines which to use.
+  class DualCircuitBreaker
+    include CircuitBreakerBehaviour
+    # Module to synchronize mark_success and mark_failed calls between sibling circuit breakers
+    # and reduce code duplication
+    module SiblingSync
+      attr_writer :sibling
+      def mark_success(scope: nil, adapter: nil)
+        super
+        @sibling.method(:mark_success).super_method.call(scope:, adapter:)
+      end
+      def mark_failed(error, scope: nil, adapter: nil)
+        super
+        @sibling.method(:mark_failed).super_method.call(error, scope:, adapter:)
+      end
+    end
+    class ChildClassicCircuitBreaker < CircuitBreaker
+      include SiblingSync
+    end
+    class ChildAdaptiveCircuitBreaker < AdaptiveCircuitBreaker
+      include SiblingSync
+    end
+    attr_reader :classic_circuit_breaker, :adaptive_circuit_breaker, :active_circuit_breaker
+    # use_adaptive should be a callable (Proc/lambda) that returns true/false
+    # to determine which circuit breaker to use. If it returns true, use adaptive.
+    def initialize(name:, classic_circuit_breaker:, adaptive_circuit_breaker:)
+      initialize_behaviour(name: name)
+      @classic_circuit_breaker = classic_circuit_breaker
+      @adaptive_circuit_breaker = adaptive_circuit_breaker
+      @classic_circuit_breaker.sibling = @adaptive_circuit_breaker
+      @adaptive_circuit_breaker.sibling = @classic_circuit_breaker
+      @active_circuit_breaker = @classic_circuit_breaker
+    end
+    def self.adaptive_circuit_breaker_selector(selector) # rubocop:disable Style/ClassMethodsDefinitions
+      @@adaptive_circuit_breaker_selector = selector # rubocop:disable Style/ClassVars
+    end
+    def active_breaker_type
+      @active_circuit_breaker.is_a?(Semian::AdaptiveCircuitBreaker) ? :adaptive : :classic
+    end
+    def acquire(resource = nil, scope: nil, adapter: nil, &block)
+      # NOTE: This assignment is not thread-safe, but this is acceptable for now:
+      # - Each request gets its own decision based on the selector at that moment
+      # - The worst case is a brief inconsistency where a thread reads a stale value,
+      #    which just means it uses the previous circuit breaker type for that one request
+      old_type = active_breaker_type
+      @active_circuit_breaker = get_active_circuit_breaker(resource)
+      if old_type != active_breaker_type
+        Semian.notify(:circuit_breaker_mode_change, self, nil, nil, old_mode: old_type, new_mode: active_breaker_type)
+      end
+      @active_circuit_breaker.acquire(resource, scope:, adapter:, &block)
+    end
+    def open?
+      @active_circuit_breaker.open?
+    end
+    def closed?
+      @active_circuit_breaker.closed?
+    end
+    def half_open?
+      @active_circuit_breaker.half_open?
+    end
+    def request_allowed?
+      @active_circuit_breaker.request_allowed?
+    end
+    def mark_failed(error, scope: nil, adapter: nil)
+      @active_circuit_breaker&.mark_failed(error, scope: nil, adapter: nil)
+    end
+    def mark_success(scope: nil, adapter: nil)
+      @active_circuit_breaker&.mark_success(scope: nil, adapter: nil)
+    end
+    def stop
+      @adaptive_circuit_breaker&.stop
+    end
+    def reset(scope: nil, adapter: nil)
+      @classic_circuit_breaker&.reset(scope:, adapter:)
+      @adaptive_circuit_breaker&.reset(scope:, adapter:)
+    end
+    def destroy
+      @classic_circuit_breaker&.destroy
+      @adaptive_circuit_breaker&.destroy
+    end
+    def in_use?
+      @classic_circuit_breaker&.in_use? || @adaptive_circuit_breaker&.in_use?
+    end
+    def last_error
+      @active_circuit_breaker.last_error
+    end
+    def metrics
+      {
+        active: active_breaker_type,
+        classic: classic_metrics,
+        adaptive: adaptive_metrics,
+      }
+    end
+    private
+    def classic_metrics
+      return {} unless @classic_circuit_breaker
+      {
+        state: @classic_circuit_breaker.state&.value,
+        open: @classic_circuit_breaker.open?,
+        closed: @classic_circuit_breaker.closed?,
+        half_open: @classic_circuit_breaker.half_open?,
+      }
+    end
+    def adaptive_metrics
+      return {} unless @adaptive_circuit_breaker
+      @adaptive_circuit_breaker.metrics.merge(
+        open: @adaptive_circuit_breaker.open?,
+        closed: @adaptive_circuit_breaker.closed?,
+        half_open: @adaptive_circuit_breaker.half_open?,
+      )
+    end
+    def get_active_circuit_breaker(resource)
+      if use_adaptive?(resource)
+        @adaptive_circuit_breaker
+      else
+        @classic_circuit_breaker
+      end
+    end
+    def use_adaptive?(resource = nil)
+      return false unless defined?(@@adaptive_circuit_breaker_selector)
+      @@adaptive_circuit_breaker_selector.call(resource)
+    rescue => e
+      Semian.logger&.warn("[#{@name}] use_adaptive check failed: #{e.message}. Defaulting to classic circuit breaker.")
+      false
+    end
+  end
+end

data/lib/semian/mysql2.rb CHANGED Viewed

@@ -126,11 +126,11 @@ module Semian
       acquire_semian_resource(adapter: :mysql, scope: :connection) { raw_connect(*args) }
     end
-    def acquire_semian_resource(**)
+    def acquire_semian_resource(adapter: nil, scope: nil, **)
       super
     rescue ::Mysql2::Error => error
       if error.is_a?(PingFailure) || (!error.is_a?(::Mysql2::SemianError) && error.message.match?(CONNECTION_ERROR))
-        semian_resource.mark_failed(error)
+        semian_resource.mark_failed(error, scope: scope, adapter: adapter)
         error.semian_identifier = semian_identifier
       end
       raise

data/lib/semian/net_http.rb CHANGED Viewed

@@ -106,7 +106,7 @@ module Semian
         return super if disabled?
         acquire_semian_resource(adapter: :http, scope: :query) do
-          handle_error_responses(super)
+          handle_error_responses(super, adapter: :http, scope: :query)
         end
       end
     end
@@ -126,9 +126,9 @@ module Semian
     private
-    def handle_error_responses(result)
+    def handle_error_responses(result, scope:, adapter:)
       if raw_semian_options.fetch(:open_circuit_server_errors, false)
-        semian_resource.mark_failed(result) if result.is_a?(::Net::HTTPServerError)
+        semian_resource.mark_failed(result, scope: scope, adapter: adapter) if result.is_a?(::Net::HTTPServerError)
       end
       result
     end

data/lib/semian/pid_controller.rb ADDED Viewed

@@ -0,0 +1,217 @@
+# frozen_string_literal: true
+require "thread"
+require_relative "simple_exponential_smoother"
+module Semian
+  module Simple
+    # PID Controller for adaptive circuit breaking
+    # Based on the error function:
+    # P = (error_rate - ideal_error_rate) - (1 - (error_rate - ideal_error_rate)) * rejection_rate
+    # Note: P increases when error_rate increases
+    #       P decreases when rejection_rate increases (providing feedback)
+    class PIDController
+      attr_reader :rejection_rate
+      def initialize(kp:, ki:, kd:, window_size:, sliding_interval:, implementation:, initial_error_rate:,
+        dead_zone_ratio:, ideal_error_rate_estimator_cap_value:, integral_upper_cap:, integral_lower_cap:)
+        @kp = kp
+        @ki = ki
+        @kd = kd
+        @dead_zone_ratio = dead_zone_ratio
+        @integral_upper_cap = integral_upper_cap
+        @integral_lower_cap = integral_lower_cap
+        @rejection_rate = 0.0
+        @integral = 0.0
+        @derivative = 0.0
+        @previous_p_value = 0.0
+        @last_ideal_error_rate = initial_error_rate
+        @window_size = window_size
+        @sliding_interval = sliding_interval
+        @smoother = SimpleExponentialSmoother.new(
+          cap_value: ideal_error_rate_estimator_cap_value,
+          initial_value: initial_error_rate,
+          observations_per_minute: 60 / sliding_interval,
+        )
+        @errors = implementation::SlidingWindow.new(max_size: 200 * window_size)
+        @successes = implementation::SlidingWindow.new(max_size: 200 * window_size)
+        @rejections = implementation::SlidingWindow.new(max_size: 200 * window_size)
+        @last_error_rate = 0.0
+        @last_p_value = 0.0
+      end
+      def record_request(outcome)
+        case outcome
+        when :error
+          @errors.push(current_time)
+        when :success
+          @successes.push(current_time)
+        when :rejected
+          @rejections.push(current_time)
+        end
+      end
+      def update
+        # Store the last window's P value so that we can serve it up in the metrics snapshots
+        @previous_p_value = @last_p_value
+        @last_error_rate = calculate_error_rate
+        store_error_rate(@last_error_rate)
+        dt = @sliding_interval
+        @last_p_value = calculate_p_value(@last_error_rate)
+        proportional = @kp * @last_p_value
+        @integral += @last_p_value * dt
+        integral = @ki * @integral
+        @derivative = @kd * (@last_p_value - @previous_p_value) / dt
+        # Calculate the control signal (change in rejection rate)
+        control_signal = proportional + integral + @derivative
+        # Calculate what the new rejection rate would be
+        new_rejection_rate = @rejection_rate + control_signal
+        # Update rejection rate (clamped between 0 and 1)
+        @rejection_rate = new_rejection_rate.clamp(0.0, 1.0)
+        @integral = @integral.clamp(@integral_lower_cap, @integral_upper_cap)
+        @rejection_rate
+      end
+      # Should we reject this request based on current rejection rate?
+      def should_reject?
+        rand < @rejection_rate
+      end
+      # Reset the controller state
+      def reset
+        @rejection_rate = 0.0
+        @integral = 0.0
+        @previous_p_value = 0.0
+        @derivative = 0.0
+        @last_p_value = 0.0
+        @errors.clear
+        @successes.clear
+        @rejections.clear
+        @last_error_rate = 0.0
+        @smoother.reset
+        @last_ideal_error_rate = @smoother.forecast
+      end
+      # Get current metrics for monitoring/debugging
+      def metrics(full: true)
+        result = {
+          rejection_rate: @rejection_rate,
+          error_rate: @last_error_rate,
+          ideal_error_rate: @last_ideal_error_rate,
+          dead_zone_ratio: @dead_zone_ratio,
+          p_value: @last_p_value,
+          previous_p_value: @previous_p_value,
+          integral: @integral,
+          derivative: @derivative,
+        }
+        if full
+          result[:smoother_state] = @smoother.state
+          result[:current_window_requests] = {
+            success: @successes.size,
+            error: @errors.size,
+            rejected: @rejections.size,
+          }
+        end
+        result
+      end
+      private
+      # Calculate the current P value with dead-zone noise suppression.
+      # The dead zone prevents the controller from reacting to small, noisy
+      # deviations from the ideal error rate. Only deviations exceeding
+      # ideal_error_rate * dead_zone_ratio trigger a response.
+      def calculate_p_value(current_error_rate)
+        @last_ideal_error_rate = calculate_ideal_error_rate
+        raw_delta = current_error_rate - @last_ideal_error_rate
+        dead_zone = @last_ideal_error_rate * @dead_zone_ratio
+        delta_error = if raw_delta <= 0
+          # Below or at ideal: pass through for recovery
+          raw_delta
+        elsif raw_delta <= dead_zone
+          # Within dead zone: suppress noise
+          0.0
+        else
+          # Above dead zone: full signal, dead zone only silences noise
+          raw_delta
+        end
+        delta_error - (1 - delta_error) * @rejection_rate
+      end
+      def calculate_error_rate
+        # Clean up old observations
+        current_timestamp = current_time
+        cutoff_time = current_timestamp - @window_size
+        @errors.reject! { |timestamp| timestamp < cutoff_time }
+        @successes.reject! { |timestamp| timestamp < cutoff_time }
+        @rejections.reject! { |timestamp| timestamp < cutoff_time }
+        total_requests = @successes.size + @errors.size
+        return 0.0 if total_requests == 0
+        @errors.size.to_f / total_requests
+      end
+      def store_error_rate(error_rate)
+        @smoother.add_observation(error_rate)
+      end
+      def calculate_ideal_error_rate
+        @smoother.forecast
+      end
+      def current_time
+        Process.clock_gettime(Process::CLOCK_MONOTONIC)
+      end
+    end
+  end
+  module ThreadSafe
+    # Thread-safe version of PIDController
+    class PIDController < Simple::PIDController
+      def initialize(**kwargs)
+        super(**kwargs)
+        @lock = Mutex.new
+      end
+      def record_request(outcome)
+        @lock.synchronize { super }
+      end
+      def update
+        @lock.synchronize { super }
+      end
+      def should_reject?
+        @lock.synchronize { super }
+      end
+      def reset
+        @lock.synchronize { super }
+      end
+      # NOTE: metrics, calculate_error_rate are not overridden
+      # to avoid deadlock. calculate_error_rate is private method
+      # only called internally from update (synchronized) and metrics (not synchronized).
+    end
+  end
+end

data/lib/semian/pid_controller_thread.rb ADDED Viewed

@@ -0,0 +1,72 @@
+# frozen_string_literal: true
+require "singleton"
+require_relative "pid_controller"
+module Semian
+  class PIDControllerThread
+    include Singleton
+    def initialize
+      @stopped = true
+      @update_thread = nil
+      @circuit_breakers = Concurrent::Map.new
+      @sliding_interval = ENV.fetch("SEMIAN_ADAPTIVE_CIRCUIT_BREAKER_SLIDING_INTERVAL", 1).to_i
+    end
+    # As per the singleton pattern, this is called only once
+    def start
+      @stopped = false
+      update_proc = proc do
+        loop do
+          break if @stopped
+          wait_for_window
+          # Update PID controller state for each registered circuit breaker
+          @circuit_breakers.each do |_, circuit_breaker|
+            circuit_breaker.pid_controller_update
+          end
+        rescue => e
+          Semian.logger&.warn("[#{@name}] PID controller update thread error: #{e.message}")
+        end
+      end
+      @update_thread = Thread.new(&update_proc)
+    end
+    def stop
+      @stopped = true
+      @update_thread&.kill
+      @update_thread = nil
+    end
+    def register_resource(circuit_breaker)
+      # Track every registered circuit breaker in a Concurrent::Map
+      # Start the thread if it's not already running
+      if @circuit_breakers.empty? && @stopped
+        start
+      end
+      # Add the circuit breaker to the map
+      @circuit_breakers[circuit_breaker.name] = circuit_breaker
+      self
+    end
+    def unregister_resource(circuit_breaker)
+      # Remove the circuit breaker from the map
+      @circuit_breakers.delete(circuit_breaker.name)
+      # Stop the thread if there are no more circuit breakers
+      if @circuit_breakers.empty?
+        stop
+      end
+    end
+    def wait_for_window
+      Kernel.sleep(@sliding_interval)
+    end
+  end
+end

data/lib/semian/protected_resource.rb CHANGED Viewed

@@ -48,7 +48,7 @@ module Semian
       if @circuit_breaker.nil?
         yield self
       else
-        @circuit_breaker.acquire(resource) do
+        @circuit_breaker.acquire(resource, scope: scope, adapter: adapter) do
           yield self
         end
       end

data/lib/semian/simple_exponential_smoother.rb ADDED Viewed

@@ -0,0 +1,137 @@
+# frozen_string_literal: true
+module Semian
+  # SimpleExponentialSmoother implements Simple Exponential Smoothing (SES) for forecasting
+  # a stable baseline error rate in adaptive circuit breakers.
+  #
+  # SES focuses on the level component only (no trend or seasonality), using the formula:
+  #   smoothed = alpha * value + (1 - alpha) * previous_smoothed
+  #
+  # Key characteristics:
+  # - Drops extreme values above cap to prevent outliers from distorting the forecast
+  # - Runs in two periods: low confidence (first 30 minutes) and high confidence (after 30 minutes)
+  # - During the low confidence period, we converge faster towards observed value than during the high confidence period
+  # - The choice of alphas follows the following criteria:
+  # - During low confidence:
+  #   - If we are observing 2x our current estimate, we need to converge towards it in 30 minutes
+  #   - If we are observing 0.5x our current estimate, we need to converge towards it in 5 minutes
+  # - During high confidence:
+  #   - If we are observing 2x our current estimate, we need to converge towards it in 1 hour
+  #   - If we are observing 0.5x our current estimate, we need to converge towards it in 10 minutes
+  # The following code snippet can be used to calculate the alphas:
+  # def find_alpha(name, start_point, multiplier, convergence_duration)
+  #   target = start_point * multiplier
+  #   desired_distance = 0.003
+  #   alpha_ceil = 0.5
+  #   alpha_floor = 0.0
+  #   alpha = 0.25
+  #   while true
+  #      smoothed_value = start_point
+  #      step_size = convergence_duration / 10
+  #      converged_too_fast = false
+  #      10.times do |step|
+  #          step_size.times do
+  #             smoothed_value = alpha * target + (1 - alpha) * smoothed_value
+  #          end
+  #          if step < 9 and (smoothed_value - target).abs < desired_distance
+  #             converged_too_fast = true
+  #          end
+  #      end
+  #
+  #      if converged_too_fast
+  #         alpha_ceil = alpha
+  #         alpha = (alpha + alpha_floor) / 2
+  #         next
+  #      end
+  #
+  #      if (smoothed_value - target).abs > desired_distance
+  #         alpha_floor = alpha
+  #         alpha =  (alpha + alpha_ceil) / 2
+  #         next
+  #      end
+  #
+  #      break
+  #   end
+  #
+  #   print "#{name} is #{alpha}\n"
+  # end
+  #
+  # initial_error_rate = 0.05
+  #
+  # find_alpha("low confidence upward convergence alpha", initial_error_rate, 2, 1800)
+  # find_alpha("low confidence downward convergence alpha", initial_error_rate, 0.5, 300)
+  # find_alpha("high confidence upward convergence alpha", initial_error_rate, 2, 3600)
+  # find_alpha("high confidence downward convergence alpha", initial_error_rate, 0.5, 600)
+  class SimpleExponentialSmoother
+    LOW_CONFIDENCE_ALPHA_UP = 0.0017
+    LOW_CONFIDENCE_ALPHA_DOWN = 0.078
+    HIGH_CONFIDENCE_ALPHA_UP = 0.0009
+    HIGH_CONFIDENCE_ALPHA_DOWN = 0.039
+    LOW_CONFIDENCE_THRESHOLD_MINUTES = 30
+    # Validate all alpha constants at class load time
+    [
+      LOW_CONFIDENCE_ALPHA_UP,
+      LOW_CONFIDENCE_ALPHA_DOWN,
+      HIGH_CONFIDENCE_ALPHA_UP,
+      HIGH_CONFIDENCE_ALPHA_DOWN,
+    ].each do |alpha|
+      if alpha <= 0 || alpha >= 0.5
+        raise ArgumentError, "alpha constant must be in range (0, 0.5), got: #{alpha}"
+      end
+    end
+    attr_reader :alpha, :cap_value, :initial_value, :smoothed_value, :observations_per_minute
+    def initialize(cap_value:, initial_value:, observations_per_minute:)
+      @alpha = LOW_CONFIDENCE_ALPHA_DOWN # Start with low confidence, converging down
+      @cap_value = cap_value
+      @initial_value = initial_value
+      @observations_per_minute = observations_per_minute
+      @smoothed_value = initial_value
+      @observation_count = 0
+    end
+    def add_observation(value)
+      raise ArgumentError, "value must be non-negative, got: #{value}" if value < 0
+      return @smoothed_value if value > cap_value
+      @observation_count += 1
+      low_confidence = @observation_count < (@observations_per_minute * LOW_CONFIDENCE_THRESHOLD_MINUTES)
+      converging_up = value > @smoothed_value
+      @alpha = if low_confidence
+        converging_up ? LOW_CONFIDENCE_ALPHA_UP : LOW_CONFIDENCE_ALPHA_DOWN
+      else
+        converging_up ? HIGH_CONFIDENCE_ALPHA_UP : HIGH_CONFIDENCE_ALPHA_DOWN
+      end
+      @smoothed_value = (@alpha * value) + ((1.0 - @alpha) * @smoothed_value)
+      @smoothed_value
+    end
+    def forecast
+      @smoothed_value
+    end
+    def state
+      {
+        smoothed_value: @smoothed_value,
+        alpha: @alpha,
+        cap_value: @cap_value,
+        initial_value: @initial_value,
+        observations_per_minute: @observations_per_minute,
+        observation_count: @observation_count,
+      }
+    end
+    def reset
+      @smoothed_value = initial_value
+      @observation_count = 0
+      @alpha = LOW_CONFIDENCE_ALPHA_DOWN
+      self
+    end
+  end
+end

data/lib/semian/unprotected_resource.rb CHANGED Viewed

@@ -35,7 +35,7 @@ module Semian
       0
     end
-    def reset
+    def reset(**)
     end
     def open?
@@ -54,10 +54,10 @@ module Semian
       true
     end
-    def mark_failed(_error)
+    def mark_failed(_error, **)
     end
-    def mark_success
+    def mark_success(**)
     end
     def bulkhead

data/lib/semian/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Semian
-  VERSION = "0.27.0"
+  VERSION = "0.28.0"
 end