semian 0.27.0 → 0.28.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +75 -0
- data/lib/semian/adapter.rb +1 -1
- data/lib/semian/adaptive_circuit_breaker.rb +136 -0
- data/lib/semian/circuit_breaker.rb +44 -25
- data/lib/semian/circuit_breaker_behaviour.rb +64 -0
- data/lib/semian/configuration_validator.rb +52 -0
- data/lib/semian/dual_circuit_breaker.rb +165 -0
- data/lib/semian/mysql2.rb +2 -2
- data/lib/semian/net_http.rb +3 -3
- data/lib/semian/pid_controller.rb +217 -0
- data/lib/semian/pid_controller_thread.rb +72 -0
- data/lib/semian/protected_resource.rb +1 -1
- data/lib/semian/simple_exponential_smoother.rb +137 -0
- data/lib/semian/unprotected_resource.rb +3 -3
- data/lib/semian/version.rb +1 -1
- data/lib/semian.rb +78 -3
- metadata +8 -2
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Semian
|
|
4
|
+
# DualCircuitBreaker wraps both classic and adaptive circuit breakers,
|
|
5
|
+
# allowing runtime switching between them via a callable that determines which to use.
|
|
6
|
+
class DualCircuitBreaker
|
|
7
|
+
include CircuitBreakerBehaviour
|
|
8
|
+
|
|
9
|
+
# Module to synchronize mark_success and mark_failed calls between sibling circuit breakers
|
|
10
|
+
# and reduce code duplication
|
|
11
|
+
module SiblingSync
|
|
12
|
+
attr_writer :sibling
|
|
13
|
+
|
|
14
|
+
def mark_success(scope: nil, adapter: nil)
|
|
15
|
+
super
|
|
16
|
+
@sibling.method(:mark_success).super_method.call(scope:, adapter:)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def mark_failed(error, scope: nil, adapter: nil)
|
|
20
|
+
super
|
|
21
|
+
@sibling.method(:mark_failed).super_method.call(error, scope:, adapter:)
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
class ChildClassicCircuitBreaker < CircuitBreaker
|
|
26
|
+
include SiblingSync
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
class ChildAdaptiveCircuitBreaker < AdaptiveCircuitBreaker
|
|
30
|
+
include SiblingSync
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
attr_reader :classic_circuit_breaker, :adaptive_circuit_breaker, :active_circuit_breaker
|
|
34
|
+
|
|
35
|
+
# use_adaptive should be a callable (Proc/lambda) that returns true/false
|
|
36
|
+
# to determine which circuit breaker to use. If it returns true, use adaptive.
|
|
37
|
+
def initialize(name:, classic_circuit_breaker:, adaptive_circuit_breaker:)
|
|
38
|
+
initialize_behaviour(name: name)
|
|
39
|
+
|
|
40
|
+
@classic_circuit_breaker = classic_circuit_breaker
|
|
41
|
+
@adaptive_circuit_breaker = adaptive_circuit_breaker
|
|
42
|
+
|
|
43
|
+
@classic_circuit_breaker.sibling = @adaptive_circuit_breaker
|
|
44
|
+
@adaptive_circuit_breaker.sibling = @classic_circuit_breaker
|
|
45
|
+
|
|
46
|
+
@active_circuit_breaker = @classic_circuit_breaker
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def self.adaptive_circuit_breaker_selector(selector) # rubocop:disable Style/ClassMethodsDefinitions
|
|
50
|
+
@@adaptive_circuit_breaker_selector = selector # rubocop:disable Style/ClassVars
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def active_breaker_type
|
|
54
|
+
@active_circuit_breaker.is_a?(Semian::AdaptiveCircuitBreaker) ? :adaptive : :classic
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def acquire(resource = nil, scope: nil, adapter: nil, &block)
|
|
58
|
+
# NOTE: This assignment is not thread-safe, but this is acceptable for now:
|
|
59
|
+
# - Each request gets its own decision based on the selector at that moment
|
|
60
|
+
# - The worst case is a brief inconsistency where a thread reads a stale value,
|
|
61
|
+
# which just means it uses the previous circuit breaker type for that one request
|
|
62
|
+
old_type = active_breaker_type
|
|
63
|
+
@active_circuit_breaker = get_active_circuit_breaker(resource)
|
|
64
|
+
if old_type != active_breaker_type
|
|
65
|
+
Semian.notify(:circuit_breaker_mode_change, self, nil, nil, old_mode: old_type, new_mode: active_breaker_type)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
@active_circuit_breaker.acquire(resource, scope:, adapter:, &block)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def open?
|
|
72
|
+
@active_circuit_breaker.open?
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def closed?
|
|
76
|
+
@active_circuit_breaker.closed?
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def half_open?
|
|
80
|
+
@active_circuit_breaker.half_open?
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def request_allowed?
|
|
84
|
+
@active_circuit_breaker.request_allowed?
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def mark_failed(error, scope: nil, adapter: nil)
|
|
88
|
+
@active_circuit_breaker&.mark_failed(error, scope: nil, adapter: nil)
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def mark_success(scope: nil, adapter: nil)
|
|
92
|
+
@active_circuit_breaker&.mark_success(scope: nil, adapter: nil)
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def stop
|
|
96
|
+
@adaptive_circuit_breaker&.stop
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def reset(scope: nil, adapter: nil)
|
|
100
|
+
@classic_circuit_breaker&.reset(scope:, adapter:)
|
|
101
|
+
@adaptive_circuit_breaker&.reset(scope:, adapter:)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def destroy
|
|
105
|
+
@classic_circuit_breaker&.destroy
|
|
106
|
+
@adaptive_circuit_breaker&.destroy
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def in_use?
|
|
110
|
+
@classic_circuit_breaker&.in_use? || @adaptive_circuit_breaker&.in_use?
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def last_error
|
|
114
|
+
@active_circuit_breaker.last_error
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def metrics
|
|
118
|
+
{
|
|
119
|
+
active: active_breaker_type,
|
|
120
|
+
classic: classic_metrics,
|
|
121
|
+
adaptive: adaptive_metrics,
|
|
122
|
+
}
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
private
|
|
126
|
+
|
|
127
|
+
def classic_metrics
|
|
128
|
+
return {} unless @classic_circuit_breaker
|
|
129
|
+
|
|
130
|
+
{
|
|
131
|
+
state: @classic_circuit_breaker.state&.value,
|
|
132
|
+
open: @classic_circuit_breaker.open?,
|
|
133
|
+
closed: @classic_circuit_breaker.closed?,
|
|
134
|
+
half_open: @classic_circuit_breaker.half_open?,
|
|
135
|
+
}
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
def adaptive_metrics
|
|
139
|
+
return {} unless @adaptive_circuit_breaker
|
|
140
|
+
|
|
141
|
+
@adaptive_circuit_breaker.metrics.merge(
|
|
142
|
+
open: @adaptive_circuit_breaker.open?,
|
|
143
|
+
closed: @adaptive_circuit_breaker.closed?,
|
|
144
|
+
half_open: @adaptive_circuit_breaker.half_open?,
|
|
145
|
+
)
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
def get_active_circuit_breaker(resource)
|
|
149
|
+
if use_adaptive?(resource)
|
|
150
|
+
@adaptive_circuit_breaker
|
|
151
|
+
else
|
|
152
|
+
@classic_circuit_breaker
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
def use_adaptive?(resource = nil)
|
|
157
|
+
return false unless defined?(@@adaptive_circuit_breaker_selector)
|
|
158
|
+
|
|
159
|
+
@@adaptive_circuit_breaker_selector.call(resource)
|
|
160
|
+
rescue => e
|
|
161
|
+
Semian.logger&.warn("[#{@name}] use_adaptive check failed: #{e.message}. Defaulting to classic circuit breaker.")
|
|
162
|
+
false
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
end
|
data/lib/semian/mysql2.rb
CHANGED
|
@@ -126,11 +126,11 @@ module Semian
|
|
|
126
126
|
acquire_semian_resource(adapter: :mysql, scope: :connection) { raw_connect(*args) }
|
|
127
127
|
end
|
|
128
128
|
|
|
129
|
-
def acquire_semian_resource(**)
|
|
129
|
+
def acquire_semian_resource(adapter: nil, scope: nil, **)
|
|
130
130
|
super
|
|
131
131
|
rescue ::Mysql2::Error => error
|
|
132
132
|
if error.is_a?(PingFailure) || (!error.is_a?(::Mysql2::SemianError) && error.message.match?(CONNECTION_ERROR))
|
|
133
|
-
semian_resource.mark_failed(error)
|
|
133
|
+
semian_resource.mark_failed(error, scope: scope, adapter: adapter)
|
|
134
134
|
error.semian_identifier = semian_identifier
|
|
135
135
|
end
|
|
136
136
|
raise
|
data/lib/semian/net_http.rb
CHANGED
|
@@ -106,7 +106,7 @@ module Semian
|
|
|
106
106
|
return super if disabled?
|
|
107
107
|
|
|
108
108
|
acquire_semian_resource(adapter: :http, scope: :query) do
|
|
109
|
-
handle_error_responses(super)
|
|
109
|
+
handle_error_responses(super, adapter: :http, scope: :query)
|
|
110
110
|
end
|
|
111
111
|
end
|
|
112
112
|
end
|
|
@@ -126,9 +126,9 @@ module Semian
|
|
|
126
126
|
|
|
127
127
|
private
|
|
128
128
|
|
|
129
|
-
def handle_error_responses(result)
|
|
129
|
+
def handle_error_responses(result, scope:, adapter:)
|
|
130
130
|
if raw_semian_options.fetch(:open_circuit_server_errors, false)
|
|
131
|
-
semian_resource.mark_failed(result) if result.is_a?(::Net::HTTPServerError)
|
|
131
|
+
semian_resource.mark_failed(result, scope: scope, adapter: adapter) if result.is_a?(::Net::HTTPServerError)
|
|
132
132
|
end
|
|
133
133
|
result
|
|
134
134
|
end
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "thread"
|
|
4
|
+
require_relative "simple_exponential_smoother"
|
|
5
|
+
|
|
6
|
+
module Semian
|
|
7
|
+
module Simple
|
|
8
|
+
# PID Controller for adaptive circuit breaking
|
|
9
|
+
# Based on the error function:
|
|
10
|
+
# P = (error_rate - ideal_error_rate) - (1 - (error_rate - ideal_error_rate)) * rejection_rate
|
|
11
|
+
# Note: P increases when error_rate increases
|
|
12
|
+
# P decreases when rejection_rate increases (providing feedback)
|
|
13
|
+
class PIDController
|
|
14
|
+
attr_reader :rejection_rate
|
|
15
|
+
|
|
16
|
+
def initialize(kp:, ki:, kd:, window_size:, sliding_interval:, implementation:, initial_error_rate:,
|
|
17
|
+
dead_zone_ratio:, ideal_error_rate_estimator_cap_value:, integral_upper_cap:, integral_lower_cap:)
|
|
18
|
+
@kp = kp
|
|
19
|
+
@ki = ki
|
|
20
|
+
@kd = kd
|
|
21
|
+
@dead_zone_ratio = dead_zone_ratio
|
|
22
|
+
@integral_upper_cap = integral_upper_cap
|
|
23
|
+
@integral_lower_cap = integral_lower_cap
|
|
24
|
+
|
|
25
|
+
@rejection_rate = 0.0
|
|
26
|
+
@integral = 0.0
|
|
27
|
+
@derivative = 0.0
|
|
28
|
+
@previous_p_value = 0.0
|
|
29
|
+
@last_ideal_error_rate = initial_error_rate
|
|
30
|
+
|
|
31
|
+
@window_size = window_size
|
|
32
|
+
@sliding_interval = sliding_interval
|
|
33
|
+
@smoother = SimpleExponentialSmoother.new(
|
|
34
|
+
cap_value: ideal_error_rate_estimator_cap_value,
|
|
35
|
+
initial_value: initial_error_rate,
|
|
36
|
+
observations_per_minute: 60 / sliding_interval,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
@errors = implementation::SlidingWindow.new(max_size: 200 * window_size)
|
|
40
|
+
@successes = implementation::SlidingWindow.new(max_size: 200 * window_size)
|
|
41
|
+
@rejections = implementation::SlidingWindow.new(max_size: 200 * window_size)
|
|
42
|
+
|
|
43
|
+
@last_error_rate = 0.0
|
|
44
|
+
@last_p_value = 0.0
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def record_request(outcome)
|
|
48
|
+
case outcome
|
|
49
|
+
when :error
|
|
50
|
+
@errors.push(current_time)
|
|
51
|
+
when :success
|
|
52
|
+
@successes.push(current_time)
|
|
53
|
+
when :rejected
|
|
54
|
+
@rejections.push(current_time)
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def update
|
|
59
|
+
# Store the last window's P value so that we can serve it up in the metrics snapshots
|
|
60
|
+
@previous_p_value = @last_p_value
|
|
61
|
+
|
|
62
|
+
@last_error_rate = calculate_error_rate
|
|
63
|
+
|
|
64
|
+
store_error_rate(@last_error_rate)
|
|
65
|
+
|
|
66
|
+
dt = @sliding_interval
|
|
67
|
+
|
|
68
|
+
@last_p_value = calculate_p_value(@last_error_rate)
|
|
69
|
+
|
|
70
|
+
proportional = @kp * @last_p_value
|
|
71
|
+
@integral += @last_p_value * dt
|
|
72
|
+
integral = @ki * @integral
|
|
73
|
+
@derivative = @kd * (@last_p_value - @previous_p_value) / dt
|
|
74
|
+
|
|
75
|
+
# Calculate the control signal (change in rejection rate)
|
|
76
|
+
control_signal = proportional + integral + @derivative
|
|
77
|
+
|
|
78
|
+
# Calculate what the new rejection rate would be
|
|
79
|
+
new_rejection_rate = @rejection_rate + control_signal
|
|
80
|
+
|
|
81
|
+
# Update rejection rate (clamped between 0 and 1)
|
|
82
|
+
@rejection_rate = new_rejection_rate.clamp(0.0, 1.0)
|
|
83
|
+
|
|
84
|
+
@integral = @integral.clamp(@integral_lower_cap, @integral_upper_cap)
|
|
85
|
+
|
|
86
|
+
@rejection_rate
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Should we reject this request based on current rejection rate?
|
|
90
|
+
def should_reject?
|
|
91
|
+
rand < @rejection_rate
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Reset the controller state
|
|
95
|
+
def reset
|
|
96
|
+
@rejection_rate = 0.0
|
|
97
|
+
@integral = 0.0
|
|
98
|
+
@previous_p_value = 0.0
|
|
99
|
+
@derivative = 0.0
|
|
100
|
+
@last_p_value = 0.0
|
|
101
|
+
@errors.clear
|
|
102
|
+
@successes.clear
|
|
103
|
+
@rejections.clear
|
|
104
|
+
@last_error_rate = 0.0
|
|
105
|
+
@smoother.reset
|
|
106
|
+
@last_ideal_error_rate = @smoother.forecast
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# Get current metrics for monitoring/debugging
|
|
110
|
+
def metrics(full: true)
|
|
111
|
+
result = {
|
|
112
|
+
rejection_rate: @rejection_rate,
|
|
113
|
+
error_rate: @last_error_rate,
|
|
114
|
+
ideal_error_rate: @last_ideal_error_rate,
|
|
115
|
+
dead_zone_ratio: @dead_zone_ratio,
|
|
116
|
+
p_value: @last_p_value,
|
|
117
|
+
previous_p_value: @previous_p_value,
|
|
118
|
+
integral: @integral,
|
|
119
|
+
derivative: @derivative,
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
if full
|
|
123
|
+
result[:smoother_state] = @smoother.state
|
|
124
|
+
result[:current_window_requests] = {
|
|
125
|
+
success: @successes.size,
|
|
126
|
+
error: @errors.size,
|
|
127
|
+
rejected: @rejections.size,
|
|
128
|
+
}
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
result
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
private
|
|
135
|
+
|
|
136
|
+
# Calculate the current P value with dead-zone noise suppression.
|
|
137
|
+
# The dead zone prevents the controller from reacting to small, noisy
|
|
138
|
+
# deviations from the ideal error rate. Only deviations exceeding
|
|
139
|
+
# ideal_error_rate * dead_zone_ratio trigger a response.
|
|
140
|
+
def calculate_p_value(current_error_rate)
|
|
141
|
+
@last_ideal_error_rate = calculate_ideal_error_rate
|
|
142
|
+
|
|
143
|
+
raw_delta = current_error_rate - @last_ideal_error_rate
|
|
144
|
+
dead_zone = @last_ideal_error_rate * @dead_zone_ratio
|
|
145
|
+
|
|
146
|
+
delta_error = if raw_delta <= 0
|
|
147
|
+
# Below or at ideal: pass through for recovery
|
|
148
|
+
raw_delta
|
|
149
|
+
elsif raw_delta <= dead_zone
|
|
150
|
+
# Within dead zone: suppress noise
|
|
151
|
+
0.0
|
|
152
|
+
else
|
|
153
|
+
# Above dead zone: full signal, dead zone only silences noise
|
|
154
|
+
raw_delta
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
delta_error - (1 - delta_error) * @rejection_rate
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def calculate_error_rate
|
|
161
|
+
# Clean up old observations
|
|
162
|
+
current_timestamp = current_time
|
|
163
|
+
cutoff_time = current_timestamp - @window_size
|
|
164
|
+
@errors.reject! { |timestamp| timestamp < cutoff_time }
|
|
165
|
+
@successes.reject! { |timestamp| timestamp < cutoff_time }
|
|
166
|
+
@rejections.reject! { |timestamp| timestamp < cutoff_time }
|
|
167
|
+
|
|
168
|
+
total_requests = @successes.size + @errors.size
|
|
169
|
+
return 0.0 if total_requests == 0
|
|
170
|
+
|
|
171
|
+
@errors.size.to_f / total_requests
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
def store_error_rate(error_rate)
|
|
175
|
+
@smoother.add_observation(error_rate)
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
def calculate_ideal_error_rate
|
|
179
|
+
@smoother.forecast
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
def current_time
|
|
183
|
+
Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
module ThreadSafe
|
|
189
|
+
# Thread-safe version of PIDController
|
|
190
|
+
class PIDController < Simple::PIDController
|
|
191
|
+
def initialize(**kwargs)
|
|
192
|
+
super(**kwargs)
|
|
193
|
+
@lock = Mutex.new
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
def record_request(outcome)
|
|
197
|
+
@lock.synchronize { super }
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
def update
|
|
201
|
+
@lock.synchronize { super }
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
def should_reject?
|
|
205
|
+
@lock.synchronize { super }
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
def reset
|
|
209
|
+
@lock.synchronize { super }
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
# NOTE: metrics, calculate_error_rate are not overridden
|
|
213
|
+
# to avoid deadlock. calculate_error_rate is private method
|
|
214
|
+
# only called internally from update (synchronized) and metrics (not synchronized).
|
|
215
|
+
end
|
|
216
|
+
end
|
|
217
|
+
end
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "singleton"
|
|
4
|
+
require_relative "pid_controller"
|
|
5
|
+
|
|
6
|
+
module Semian
|
|
7
|
+
class PIDControllerThread
|
|
8
|
+
include Singleton
|
|
9
|
+
|
|
10
|
+
def initialize
|
|
11
|
+
@stopped = true
|
|
12
|
+
@update_thread = nil
|
|
13
|
+
@circuit_breakers = Concurrent::Map.new
|
|
14
|
+
@sliding_interval = ENV.fetch("SEMIAN_ADAPTIVE_CIRCUIT_BREAKER_SLIDING_INTERVAL", 1).to_i
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# As per the singleton pattern, this is called only once
|
|
18
|
+
def start
|
|
19
|
+
@stopped = false
|
|
20
|
+
|
|
21
|
+
update_proc = proc do
|
|
22
|
+
loop do
|
|
23
|
+
break if @stopped
|
|
24
|
+
|
|
25
|
+
wait_for_window
|
|
26
|
+
|
|
27
|
+
# Update PID controller state for each registered circuit breaker
|
|
28
|
+
@circuit_breakers.each do |_, circuit_breaker|
|
|
29
|
+
circuit_breaker.pid_controller_update
|
|
30
|
+
end
|
|
31
|
+
rescue => e
|
|
32
|
+
Semian.logger&.warn("[#{@name}] PID controller update thread error: #{e.message}")
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
@update_thread = Thread.new(&update_proc)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def stop
|
|
40
|
+
@stopped = true
|
|
41
|
+
@update_thread&.kill
|
|
42
|
+
@update_thread = nil
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def register_resource(circuit_breaker)
|
|
46
|
+
# Track every registered circuit breaker in a Concurrent::Map
|
|
47
|
+
|
|
48
|
+
# Start the thread if it's not already running
|
|
49
|
+
if @circuit_breakers.empty? && @stopped
|
|
50
|
+
start
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Add the circuit breaker to the map
|
|
54
|
+
@circuit_breakers[circuit_breaker.name] = circuit_breaker
|
|
55
|
+
self
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def unregister_resource(circuit_breaker)
|
|
59
|
+
# Remove the circuit breaker from the map
|
|
60
|
+
@circuit_breakers.delete(circuit_breaker.name)
|
|
61
|
+
|
|
62
|
+
# Stop the thread if there are no more circuit breakers
|
|
63
|
+
if @circuit_breakers.empty?
|
|
64
|
+
stop
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def wait_for_window
|
|
69
|
+
Kernel.sleep(@sliding_interval)
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Semian
|
|
4
|
+
# SimpleExponentialSmoother implements Simple Exponential Smoothing (SES) for forecasting
|
|
5
|
+
# a stable baseline error rate in adaptive circuit breakers.
|
|
6
|
+
#
|
|
7
|
+
# SES focuses on the level component only (no trend or seasonality), using the formula:
|
|
8
|
+
# smoothed = alpha * value + (1 - alpha) * previous_smoothed
|
|
9
|
+
#
|
|
10
|
+
# Key characteristics:
|
|
11
|
+
# - Drops extreme values above cap to prevent outliers from distorting the forecast
|
|
12
|
+
# - Runs in two periods: low confidence (first 30 minutes) and high confidence (after 30 minutes)
|
|
13
|
+
# - During the low confidence period, we converge faster towards observed value than during the high confidence period
|
|
14
|
+
# - The choice of alphas follows the following criteria:
|
|
15
|
+
# - During low confidence:
|
|
16
|
+
# - If we are observing 2x our current estimate, we need to converge towards it in 30 minutes
|
|
17
|
+
# - If we are observing 0.5x our current estimate, we need to converge towards it in 5 minutes
|
|
18
|
+
# - During high confidence:
|
|
19
|
+
# - If we are observing 2x our current estimate, we need to converge towards it in 1 hour
|
|
20
|
+
# - If we are observing 0.5x our current estimate, we need to converge towards it in 10 minutes
|
|
21
|
+
# The following code snippet can be used to calculate the alphas:
|
|
22
|
+
# def find_alpha(name, start_point, multiplier, convergence_duration)
|
|
23
|
+
# target = start_point * multiplier
|
|
24
|
+
# desired_distance = 0.003
|
|
25
|
+
# alpha_ceil = 0.5
|
|
26
|
+
# alpha_floor = 0.0
|
|
27
|
+
# alpha = 0.25
|
|
28
|
+
# while true
|
|
29
|
+
# smoothed_value = start_point
|
|
30
|
+
# step_size = convergence_duration / 10
|
|
31
|
+
# converged_too_fast = false
|
|
32
|
+
# 10.times do |step|
|
|
33
|
+
# step_size.times do
|
|
34
|
+
# smoothed_value = alpha * target + (1 - alpha) * smoothed_value
|
|
35
|
+
# end
|
|
36
|
+
# if step < 9 and (smoothed_value - target).abs < desired_distance
|
|
37
|
+
# converged_too_fast = true
|
|
38
|
+
# end
|
|
39
|
+
# end
|
|
40
|
+
#
|
|
41
|
+
# if converged_too_fast
|
|
42
|
+
# alpha_ceil = alpha
|
|
43
|
+
# alpha = (alpha + alpha_floor) / 2
|
|
44
|
+
# next
|
|
45
|
+
# end
|
|
46
|
+
#
|
|
47
|
+
# if (smoothed_value - target).abs > desired_distance
|
|
48
|
+
# alpha_floor = alpha
|
|
49
|
+
# alpha = (alpha + alpha_ceil) / 2
|
|
50
|
+
# next
|
|
51
|
+
# end
|
|
52
|
+
#
|
|
53
|
+
# break
|
|
54
|
+
# end
|
|
55
|
+
#
|
|
56
|
+
# print "#{name} is #{alpha}\n"
|
|
57
|
+
# end
|
|
58
|
+
#
|
|
59
|
+
# initial_error_rate = 0.05
|
|
60
|
+
#
|
|
61
|
+
# find_alpha("low confidence upward convergence alpha", initial_error_rate, 2, 1800)
|
|
62
|
+
# find_alpha("low confidence downward convergence alpha", initial_error_rate, 0.5, 300)
|
|
63
|
+
# find_alpha("high confidence upward convergence alpha", initial_error_rate, 2, 3600)
|
|
64
|
+
# find_alpha("high confidence downward convergence alpha", initial_error_rate, 0.5, 600)
|
|
65
|
+
class SimpleExponentialSmoother
|
|
66
|
+
LOW_CONFIDENCE_ALPHA_UP = 0.0017
|
|
67
|
+
LOW_CONFIDENCE_ALPHA_DOWN = 0.078
|
|
68
|
+
HIGH_CONFIDENCE_ALPHA_UP = 0.0009
|
|
69
|
+
HIGH_CONFIDENCE_ALPHA_DOWN = 0.039
|
|
70
|
+
LOW_CONFIDENCE_THRESHOLD_MINUTES = 30
|
|
71
|
+
|
|
72
|
+
# Validate all alpha constants at class load time
|
|
73
|
+
[
|
|
74
|
+
LOW_CONFIDENCE_ALPHA_UP,
|
|
75
|
+
LOW_CONFIDENCE_ALPHA_DOWN,
|
|
76
|
+
HIGH_CONFIDENCE_ALPHA_UP,
|
|
77
|
+
HIGH_CONFIDENCE_ALPHA_DOWN,
|
|
78
|
+
].each do |alpha|
|
|
79
|
+
if alpha <= 0 || alpha >= 0.5
|
|
80
|
+
raise ArgumentError, "alpha constant must be in range (0, 0.5), got: #{alpha}"
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
attr_reader :alpha, :cap_value, :initial_value, :smoothed_value, :observations_per_minute
|
|
85
|
+
|
|
86
|
+
def initialize(cap_value:, initial_value:, observations_per_minute:)
|
|
87
|
+
@alpha = LOW_CONFIDENCE_ALPHA_DOWN # Start with low confidence, converging down
|
|
88
|
+
@cap_value = cap_value
|
|
89
|
+
@initial_value = initial_value
|
|
90
|
+
@observations_per_minute = observations_per_minute
|
|
91
|
+
@smoothed_value = initial_value
|
|
92
|
+
@observation_count = 0
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def add_observation(value)
|
|
96
|
+
raise ArgumentError, "value must be non-negative, got: #{value}" if value < 0
|
|
97
|
+
|
|
98
|
+
return @smoothed_value if value > cap_value
|
|
99
|
+
|
|
100
|
+
@observation_count += 1
|
|
101
|
+
|
|
102
|
+
low_confidence = @observation_count < (@observations_per_minute * LOW_CONFIDENCE_THRESHOLD_MINUTES)
|
|
103
|
+
converging_up = value > @smoothed_value
|
|
104
|
+
|
|
105
|
+
@alpha = if low_confidence
|
|
106
|
+
converging_up ? LOW_CONFIDENCE_ALPHA_UP : LOW_CONFIDENCE_ALPHA_DOWN
|
|
107
|
+
else
|
|
108
|
+
converging_up ? HIGH_CONFIDENCE_ALPHA_UP : HIGH_CONFIDENCE_ALPHA_DOWN
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
@smoothed_value = (@alpha * value) + ((1.0 - @alpha) * @smoothed_value)
|
|
112
|
+
@smoothed_value
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def forecast
|
|
116
|
+
@smoothed_value
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def state
|
|
120
|
+
{
|
|
121
|
+
smoothed_value: @smoothed_value,
|
|
122
|
+
alpha: @alpha,
|
|
123
|
+
cap_value: @cap_value,
|
|
124
|
+
initial_value: @initial_value,
|
|
125
|
+
observations_per_minute: @observations_per_minute,
|
|
126
|
+
observation_count: @observation_count,
|
|
127
|
+
}
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def reset
|
|
131
|
+
@smoothed_value = initial_value
|
|
132
|
+
@observation_count = 0
|
|
133
|
+
@alpha = LOW_CONFIDENCE_ALPHA_DOWN
|
|
134
|
+
self
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
end
|
|
@@ -35,7 +35,7 @@ module Semian
|
|
|
35
35
|
0
|
|
36
36
|
end
|
|
37
37
|
|
|
38
|
-
def reset
|
|
38
|
+
def reset(**)
|
|
39
39
|
end
|
|
40
40
|
|
|
41
41
|
def open?
|
|
@@ -54,10 +54,10 @@ module Semian
|
|
|
54
54
|
true
|
|
55
55
|
end
|
|
56
56
|
|
|
57
|
-
def mark_failed(_error)
|
|
57
|
+
def mark_failed(_error, **)
|
|
58
58
|
end
|
|
59
59
|
|
|
60
|
-
def mark_success
|
|
60
|
+
def mark_success(**)
|
|
61
61
|
end
|
|
62
62
|
|
|
63
63
|
def bulkhead
|
data/lib/semian/version.rb
CHANGED