stoplight 5.5.0 → 5.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/stoplight/admin/actions/remove.rb +23 -0
- data/lib/stoplight/admin/dependencies.rb +6 -1
- data/lib/stoplight/admin/helpers.rb +10 -5
- data/lib/stoplight/admin/lights_repository.rb +26 -14
- data/lib/stoplight/admin/views/_card.erb +13 -1
- data/lib/stoplight/admin.rb +9 -0
- data/lib/stoplight/common/deprecations.rb +11 -0
- data/lib/stoplight/domain/config.rb +5 -1
- data/lib/stoplight/domain/data_store.rb +58 -6
- data/lib/stoplight/domain/failure.rb +2 -0
- data/lib/stoplight/domain/light/configuration_builder_interface.rb +120 -16
- data/lib/stoplight/domain/light.rb +34 -24
- data/lib/stoplight/domain/metrics.rb +64 -0
- data/lib/stoplight/domain/recovery_lock_token.rb +15 -0
- data/lib/stoplight/domain/{metadata.rb → state_snapshot.rb} +29 -37
- data/lib/stoplight/domain/storage/metrics.rb +42 -0
- data/lib/stoplight/domain/storage/recovery_lock.rb +56 -0
- data/lib/stoplight/domain/storage/state.rb +87 -0
- data/lib/stoplight/domain/strategies/green_run_strategy.rb +2 -2
- data/lib/stoplight/domain/strategies/red_run_strategy.rb +3 -3
- data/lib/stoplight/domain/strategies/run_strategy.rb +2 -7
- data/lib/stoplight/domain/strategies/yellow_run_strategy.rb +63 -36
- data/lib/stoplight/domain/tracker/base.rb +0 -29
- data/lib/stoplight/domain/tracker/recovery_probe.rb +26 -22
- data/lib/stoplight/domain/tracker/request.rb +26 -21
- data/lib/stoplight/domain/traffic_control/base.rb +5 -5
- data/lib/stoplight/domain/traffic_control/consecutive_errors.rb +3 -7
- data/lib/stoplight/domain/traffic_control/error_rate.rb +3 -3
- data/lib/stoplight/domain/traffic_recovery/base.rb +5 -5
- data/lib/stoplight/domain/traffic_recovery/consecutive_successes.rb +4 -8
- data/lib/stoplight/domain/traffic_recovery.rb +0 -1
- data/lib/stoplight/infrastructure/data_store/fail_safe.rb +164 -0
- data/lib/stoplight/infrastructure/data_store/memory/metrics.rb +27 -0
- data/lib/stoplight/infrastructure/data_store/memory/recovery_lock_store.rb +54 -0
- data/lib/stoplight/infrastructure/data_store/memory/recovery_lock_token.rb +20 -0
- data/lib/stoplight/infrastructure/data_store/memory/state.rb +21 -0
- data/lib/stoplight/infrastructure/data_store/memory.rb +163 -132
- data/lib/stoplight/infrastructure/data_store/redis/lua_scripts/get_metrics.lua +26 -0
- data/lib/stoplight/infrastructure/data_store/redis/lua_scripts/record_recovery_probe_failure.lua +27 -0
- data/lib/stoplight/infrastructure/data_store/redis/lua_scripts/record_recovery_probe_success.lua +23 -0
- data/lib/stoplight/infrastructure/data_store/redis/lua_scripts/release_lock.lua +6 -0
- data/lib/stoplight/infrastructure/data_store/redis/recovery_lock_store.rb +73 -0
- data/lib/stoplight/infrastructure/data_store/redis/recovery_lock_token.rb +35 -0
- data/lib/stoplight/infrastructure/data_store/redis/scripting.rb +71 -0
- data/lib/stoplight/infrastructure/data_store/redis.rb +211 -165
- data/lib/stoplight/infrastructure/notifier/fail_safe.rb +62 -0
- data/lib/stoplight/infrastructure/storage/compatibility_metrics.rb +48 -0
- data/lib/stoplight/infrastructure/storage/compatibility_recovery_lock.rb +36 -0
- data/lib/stoplight/infrastructure/storage/compatibility_recovery_metrics.rb +55 -0
- data/lib/stoplight/infrastructure/storage/compatibility_state.rb +55 -0
- data/lib/stoplight/version.rb +1 -1
- data/lib/stoplight/wiring/data_store/base.rb +11 -0
- data/lib/stoplight/wiring/data_store/memory.rb +10 -0
- data/lib/stoplight/wiring/data_store/redis.rb +25 -0
- data/lib/stoplight/wiring/default.rb +1 -1
- data/lib/stoplight/wiring/default_configuration.rb +1 -1
- data/lib/stoplight/wiring/default_factory_builder.rb +1 -1
- data/lib/stoplight/wiring/light_builder.rb +185 -0
- data/lib/stoplight/wiring/light_factory/compatibility_validator.rb +55 -0
- data/lib/stoplight/wiring/light_factory/config_normalizer.rb +71 -0
- data/lib/stoplight/wiring/light_factory/configuration_pipeline.rb +72 -0
- data/lib/stoplight/wiring/light_factory/traffic_control_dsl.rb +26 -0
- data/lib/stoplight/wiring/light_factory/traffic_recovery_dsl.rb +21 -0
- data/lib/stoplight/wiring/light_factory.rb +45 -132
- data/lib/stoplight/wiring/notifier_factory.rb +26 -0
- data/lib/stoplight/wiring/public_api.rb +3 -2
- data/lib/stoplight.rb +18 -3
- metadata +55 -16
- data/lib/stoplight/infrastructure/data_store/redis/get_metadata.lua +0 -38
- data/lib/stoplight/infrastructure/data_store/redis/lua.rb +0 -25
- data/lib/stoplight/infrastructure/dependency_injection/container.rb +0 -249
- data/lib/stoplight/infrastructure/dependency_injection/unresolved_dependency_error.rb +0 -13
- data/lib/stoplight/wiring/container.rb +0 -80
- data/lib/stoplight/wiring/fail_safe_data_store.rb +0 -123
- data/lib/stoplight/wiring/fail_safe_notifier.rb +0 -79
- data/lib/stoplight/wiring/system_container.rb +0 -9
- data/lib/stoplight/wiring/system_light_factory.rb +0 -17
- /data/lib/stoplight/infrastructure/data_store/redis/{record_failure.lua → lua_scripts/record_failure.lua} +0 -0
- /data/lib/stoplight/infrastructure/data_store/redis/{record_success.lua → lua_scripts/record_success.lua} +0 -0
- /data/lib/stoplight/infrastructure/data_store/redis/{transition_to_green.lua → lua_scripts/transition_to_green.lua} +0 -0
- /data/lib/stoplight/infrastructure/data_store/redis/{transition_to_red.lua → lua_scripts/transition_to_red.lua} +0 -0
- /data/lib/stoplight/infrastructure/data_store/redis/{transition_to_yellow.lua → lua_scripts/transition_to_yellow.lua} +0 -0
|
@@ -8,6 +8,7 @@ module Stoplight
|
|
|
8
8
|
# @api private use +Stoplight()+ method instead
|
|
9
9
|
class Light
|
|
10
10
|
extend Forwardable
|
|
11
|
+
include Common::Deprecations
|
|
11
12
|
include ConfigurationBuilderInterface
|
|
12
13
|
|
|
13
14
|
# @!attribute [r] config
|
|
@@ -32,22 +33,22 @@ module Stoplight
|
|
|
32
33
|
# @return [Stoplight::Domain::Strategies::RedRunStrategy]
|
|
33
34
|
protected attr_reader :red_run_strategy
|
|
34
35
|
|
|
35
|
-
# @!attribute [r] data_store
|
|
36
|
-
# @return [Stoplight::Light::Base]
|
|
37
|
-
protected attr_reader :data_store
|
|
38
|
-
|
|
39
36
|
# @!attribute [r] factory
|
|
40
37
|
# @return [Stoplight::Domain::LightFactory]
|
|
41
38
|
protected attr_reader :factory
|
|
42
39
|
|
|
40
|
+
# @!attribute state_store
|
|
41
|
+
# @param [Stoplight::Domain::Storage::State]
|
|
42
|
+
protected attr_reader :state_store
|
|
43
|
+
|
|
43
44
|
# @param config [Stoplight::Domain::Config]
|
|
44
|
-
def initialize(config, green_run_strategy:, yellow_run_strategy:, red_run_strategy:,
|
|
45
|
+
def initialize(config, green_run_strategy:, yellow_run_strategy:, red_run_strategy:, factory:, state_store:)
|
|
45
46
|
@config = config
|
|
46
|
-
@data_store = data_store
|
|
47
47
|
@green_run_strategy = green_run_strategy
|
|
48
48
|
@yellow_run_strategy = yellow_run_strategy
|
|
49
49
|
@red_run_strategy = red_run_strategy
|
|
50
50
|
@factory = factory
|
|
51
|
+
@state_store = state_store
|
|
51
52
|
end
|
|
52
53
|
|
|
53
54
|
# Returns the current state of the light:
|
|
@@ -56,9 +57,7 @@ module Stoplight
|
|
|
56
57
|
# * +Stoplight::State::UNLOCKED+ -- light is not locked and follow the configured rules
|
|
57
58
|
#
|
|
58
59
|
# @return [String]
|
|
59
|
-
def state
|
|
60
|
-
metadata.locked_state
|
|
61
|
-
end
|
|
60
|
+
def state = state_snapshot.locked_state
|
|
62
61
|
|
|
63
62
|
# Returns current color:
|
|
64
63
|
# * +Stoplight::Color::GREEN+ -- circuit breaker is closed
|
|
@@ -70,9 +69,7 @@ module Stoplight
|
|
|
70
69
|
# light.color #=> Color::GREEN
|
|
71
70
|
#
|
|
72
71
|
# @return [String] returns current light color
|
|
73
|
-
def color
|
|
74
|
-
metadata.color
|
|
75
|
-
end
|
|
72
|
+
def color = state_snapshot.color
|
|
76
73
|
|
|
77
74
|
# Runs the given block of code with this circuit breaker
|
|
78
75
|
#
|
|
@@ -91,9 +88,9 @@ module Stoplight
|
|
|
91
88
|
def run(fallback = nil, &code)
|
|
92
89
|
raise ArgumentError, "nothing to run. Please, pass a block into `Light#run`" unless block_given?
|
|
93
90
|
|
|
94
|
-
|
|
95
|
-
strategy = state_strategy_factory(
|
|
96
|
-
strategy.execute(fallback,
|
|
91
|
+
state_snapshot.then do |state_snapshot|
|
|
92
|
+
strategy = state_strategy_factory(state_snapshot.color)
|
|
93
|
+
strategy.execute(fallback, state_snapshot:, &code)
|
|
97
94
|
end
|
|
98
95
|
end
|
|
99
96
|
|
|
@@ -112,7 +109,7 @@ module Stoplight
|
|
|
112
109
|
else raise Error::IncorrectColor
|
|
113
110
|
end
|
|
114
111
|
|
|
115
|
-
|
|
112
|
+
state_store.set_state(state)
|
|
116
113
|
|
|
117
114
|
self
|
|
118
115
|
end
|
|
@@ -126,7 +123,7 @@ module Stoplight
|
|
|
126
123
|
#
|
|
127
124
|
# @return [Stoplight::Light] returns unlocked light (circuit breaker)
|
|
128
125
|
def unlock
|
|
129
|
-
|
|
126
|
+
state_store.set_state(State::UNLOCKED)
|
|
130
127
|
|
|
131
128
|
self
|
|
132
129
|
end
|
|
@@ -136,9 +133,7 @@ module Stoplight
|
|
|
136
133
|
# @param other [any]
|
|
137
134
|
# @return [Boolean]
|
|
138
135
|
def ==(other)
|
|
139
|
-
other.is_a?(self.class) &&
|
|
140
|
-
green_run_strategy == other.green_run_strategy && yellow_run_strategy == other.yellow_run_strategy &&
|
|
141
|
-
red_run_strategy == other.red_run_strategy && factory == other.factory
|
|
136
|
+
other.is_a?(self.class) && factory == other.factory
|
|
142
137
|
end
|
|
143
138
|
|
|
144
139
|
# Reconfigures the light with updated settings and returns a new instance.
|
|
@@ -171,8 +166,26 @@ module Stoplight
|
|
|
171
166
|
# # Run the lights with their respective configurations
|
|
172
167
|
# invoices_light.run(->(error) { [] }) { call_invoices_api }
|
|
173
168
|
# payment_light.run(->(error) { nil }) { call_payment_api }
|
|
169
|
+
# @deprecated
|
|
174
170
|
# @see +Stoplight()+
|
|
175
171
|
def with(**settings)
|
|
172
|
+
deprecate(<<~MSG)
|
|
173
|
+
Light#with is deprecated and will be removed in v6.0.0.
|
|
174
|
+
|
|
175
|
+
Circuit breakers should be configured once at creation, not cloned with
|
|
176
|
+
modifications.
|
|
177
|
+
|
|
178
|
+
Instead of:
|
|
179
|
+
light = Stoplight('api-call', threshold: 5)
|
|
180
|
+
modified = light.with(threshold: 10)
|
|
181
|
+
|
|
182
|
+
Configure correctly from the start:
|
|
183
|
+
Stoplight('api-call', threshold: 10)
|
|
184
|
+
MSG
|
|
185
|
+
with_without_warning(**settings)
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
private def with_without_warning(**settings)
|
|
176
189
|
factory.build_with(**settings)
|
|
177
190
|
end
|
|
178
191
|
|
|
@@ -189,10 +202,7 @@ module Stoplight
|
|
|
189
202
|
end
|
|
190
203
|
end
|
|
191
204
|
|
|
192
|
-
|
|
193
|
-
def metadata
|
|
194
|
-
data_store.get_metadata(config)
|
|
195
|
-
end
|
|
205
|
+
def state_snapshot = state_store.state_snapshot
|
|
196
206
|
end
|
|
197
207
|
end
|
|
198
208
|
end
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Stoplight
|
|
4
|
+
module Domain
|
|
5
|
+
# Request metrics over a given window.
|
|
6
|
+
#
|
|
7
|
+
# @!attribute successes
|
|
8
|
+
# A number of successes withing requested window. Zero for non-windowed metrics
|
|
9
|
+
# @return [Integer]
|
|
10
|
+
#
|
|
11
|
+
# @!attribute errors
|
|
12
|
+
# A number of errors withing requested window. Zero for non-windowed metrics
|
|
13
|
+
# @return [Integer]
|
|
14
|
+
#
|
|
15
|
+
# @!attribute consecutive_errors
|
|
16
|
+
# A number of consecutive errors
|
|
17
|
+
# @return [Integer]
|
|
18
|
+
#
|
|
19
|
+
# @!attribute consecutive_successes
|
|
20
|
+
# A number of consecutive successes
|
|
21
|
+
# @return [Integer]
|
|
22
|
+
#
|
|
23
|
+
# @!attribute last_error
|
|
24
|
+
# @return [Stoplight::Domain::Failure, nil]
|
|
25
|
+
#
|
|
26
|
+
# @!attribute last_success_at
|
|
27
|
+
# @return [Time, nil]
|
|
28
|
+
#
|
|
29
|
+
# @api private
|
|
30
|
+
Metrics = Data.define(
|
|
31
|
+
:successes,
|
|
32
|
+
:errors,
|
|
33
|
+
:consecutive_errors,
|
|
34
|
+
:consecutive_successes,
|
|
35
|
+
:last_error,
|
|
36
|
+
:last_success_at
|
|
37
|
+
) do
|
|
38
|
+
# Calculates the error rate based on the number of successes and errors.
|
|
39
|
+
#
|
|
40
|
+
# @return [Float]
|
|
41
|
+
def error_rate
|
|
42
|
+
return unless requests # we effectively check if this is windowed metrics
|
|
43
|
+
|
|
44
|
+
if (successes + errors).zero?
|
|
45
|
+
0.0
|
|
46
|
+
else
|
|
47
|
+
errors.fdiv(successes + errors)
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# @return [Integer]
|
|
52
|
+
def requests
|
|
53
|
+
if successes && errors # we effectively check if this is windowed metrics
|
|
54
|
+
successes + errors
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# @return [Time, nil]
|
|
59
|
+
def last_error_at
|
|
60
|
+
last_error&.time
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Stoplight
|
|
4
|
+
module Domain
|
|
5
|
+
# Token representing an acquired recovery lock.
|
|
6
|
+
#
|
|
7
|
+
# Returned by +DataStore#acquire_recovery_lock+ and passed to
|
|
8
|
+
# +DataStore#release_recovery_lock+ to identify which lock to release.
|
|
9
|
+
#
|
|
10
|
+
# The actual locking mechanism lives in DataStore implementations,
|
|
11
|
+
# not in these tokens.
|
|
12
|
+
class RecoveryLockToken
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
@@ -2,41 +2,39 @@
|
|
|
2
2
|
|
|
3
3
|
module Stoplight
|
|
4
4
|
module Domain
|
|
5
|
-
#
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
5
|
+
# @!attribute breached_at
|
|
6
|
+
# The time when the light became red (breached threshold)
|
|
7
|
+
# @return [Time, nil]
|
|
8
|
+
#
|
|
9
|
+
# @!attribute locked_state
|
|
10
|
+
# @return [State::UNLOCKED | State::LOCKED_GREEN | State::LOCKED_RED]
|
|
11
|
+
#
|
|
12
|
+
# @!attribute recovery_scheduled_after
|
|
13
|
+
# When Light transitions to RED, it schedules recovery after the Cool Off Time.
|
|
14
|
+
# @return [Time, nil]
|
|
15
|
+
#
|
|
16
|
+
# @!attribute recovery_started_at
|
|
17
|
+
# When in YELLOW state, this time indicates the time of transitioning to YELLOW
|
|
18
|
+
# @return [Time, nil]
|
|
19
|
+
#
|
|
20
|
+
# @!attribute time
|
|
21
|
+
# The time when the snapshot was taken
|
|
22
|
+
# @return [Time]
|
|
23
|
+
#
|
|
24
|
+
StateSnapshot = Data.define(
|
|
16
25
|
:breached_at,
|
|
17
26
|
:locked_state,
|
|
18
27
|
:recovery_scheduled_after,
|
|
19
28
|
:recovery_started_at,
|
|
20
|
-
:
|
|
21
|
-
:current_time
|
|
29
|
+
:time
|
|
22
30
|
) do
|
|
23
|
-
# YELLOW color could be entered implicitly through a timeout
|
|
24
|
-
# and explicitly through a transition.
|
|
25
|
-
#
|
|
26
|
-
# This method indicates whether the recovery has already started explicitly
|
|
27
|
-
#
|
|
28
|
-
# @return [Boolean]
|
|
29
|
-
def recovery_started?
|
|
30
|
-
recovery_started_at && recovery_started_at <= current_time
|
|
31
|
-
end
|
|
32
|
-
|
|
33
31
|
# @return [String] one of +Color::GREEN+, +Color::RED+, or +Color::YELLOW+
|
|
34
32
|
def color
|
|
35
33
|
if locked_state == State::LOCKED_GREEN
|
|
36
34
|
Color::GREEN
|
|
37
35
|
elsif locked_state == State::LOCKED_RED
|
|
38
36
|
Color::RED
|
|
39
|
-
elsif (recovery_scheduled_after && recovery_scheduled_after <
|
|
37
|
+
elsif (recovery_scheduled_after && recovery_scheduled_after < time) || recovery_started_at
|
|
40
38
|
Color::YELLOW
|
|
41
39
|
elsif breached_at
|
|
42
40
|
Color::RED
|
|
@@ -45,20 +43,14 @@ module Stoplight
|
|
|
45
43
|
end
|
|
46
44
|
end
|
|
47
45
|
|
|
48
|
-
#
|
|
46
|
+
# YELLOW color could be entered implicitly through a timeout
|
|
47
|
+
# and explicitly through a transition.
|
|
49
48
|
#
|
|
50
|
-
#
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
errors.fdiv(successes + errors)
|
|
56
|
-
end
|
|
57
|
-
end
|
|
58
|
-
|
|
59
|
-
# @return [Integer]
|
|
60
|
-
def requests
|
|
61
|
-
successes + errors
|
|
49
|
+
# This method indicates whether the recovery has already started explicitly
|
|
50
|
+
#
|
|
51
|
+
# @return [Boolean]
|
|
52
|
+
def recovery_started?
|
|
53
|
+
recovery_started_at && recovery_started_at <= time
|
|
62
54
|
end
|
|
63
55
|
end
|
|
64
56
|
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Stoplight
|
|
4
|
+
module Domain
|
|
5
|
+
module Storage
|
|
6
|
+
# Encapsulates metrics storage for circuit breaker execution tracking.
|
|
7
|
+
#
|
|
8
|
+
# This abstraction isolates metrics collection and retrieval from the
|
|
9
|
+
# broader data store concerns, enabling:
|
|
10
|
+
# - Purpose-built implementations optimized for time-series data
|
|
11
|
+
# - Independent scaling and optimization of metrics vs. state storage
|
|
12
|
+
# - Clearer separation between "what happened" (metrics) and "what to do" (state)
|
|
13
|
+
#
|
|
14
|
+
# Lifecycle: A Metrics instance is scoped to a single circuit breaker
|
|
15
|
+
# configuration. Each circuit gets its own metrics store instance,
|
|
16
|
+
# allowing different circuits to use different storage strategies.
|
|
17
|
+
#
|
|
18
|
+
# @abstract
|
|
19
|
+
class Metrics
|
|
20
|
+
# Retrieves a snapshot of current metrics for decision-making.
|
|
21
|
+
#
|
|
22
|
+
# @return [Stoplight::Domain::Metrics]
|
|
23
|
+
def metrics_snapshot = raise NotImplementedError
|
|
24
|
+
|
|
25
|
+
# Records a successful circuit breaker execution
|
|
26
|
+
#
|
|
27
|
+
# @return [void]
|
|
28
|
+
def record_success = raise NotImplementedError
|
|
29
|
+
|
|
30
|
+
# Records a failed circuit breaker execution
|
|
31
|
+
#
|
|
32
|
+
# @param error [StandardError]
|
|
33
|
+
# @return [void]
|
|
34
|
+
def record_failure(error) = raise NotImplementedError
|
|
35
|
+
|
|
36
|
+
# Clears all metrics for this circuit
|
|
37
|
+
# @return [void]
|
|
38
|
+
def clear = raise NotImplementedError
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Stoplight
|
|
4
|
+
module Domain
|
|
5
|
+
module Storage
|
|
6
|
+
# Encapsulates recovery lock management for coordinating recovery probes.
|
|
7
|
+
#
|
|
8
|
+
# When a circuit enters YELLOW state (half-open), it begins sending
|
|
9
|
+
# "recovery probes" - test requests to check if the protected service
|
|
10
|
+
# has recovered. In distributed deployments with multiple instances,
|
|
11
|
+
# recovery locks ensure only ONE instance sends probes at a time.
|
|
12
|
+
#
|
|
13
|
+
# Without coordination, all instances would simultaneously:
|
|
14
|
+
# 1. Detect the circuit is YELLOW
|
|
15
|
+
# 2. Send recovery probes to the struggling service
|
|
16
|
+
# 3. Potentially overwhelm it with "test" traffic
|
|
17
|
+
#
|
|
18
|
+
# Lock Lifecycle:
|
|
19
|
+
#
|
|
20
|
+
# Instance A: acquire_lock -> probe -> release_lock
|
|
21
|
+
# Instance B: acquire_lock -> nil (already held) -> skip probe
|
|
22
|
+
# Instance C: acquire_lock -> nil (already held) -> skip probe
|
|
23
|
+
#
|
|
24
|
+
# Lock Semantics:
|
|
25
|
+
# - Returns +nil+ if lock is already held. Never blocks waiting for lock availability
|
|
26
|
+
# - Locks must automatically expire when persisted storage is used
|
|
27
|
+
# - Failed releases are acceptable (timeout provides safety)
|
|
28
|
+
#
|
|
29
|
+
# @abstract
|
|
30
|
+
# @see Stoplight::Domain::Strategies::YellowRunStrategy
|
|
31
|
+
class RecoveryLock
|
|
32
|
+
# Attempts to acquire recovery lock for exclusive probe execution.
|
|
33
|
+
#
|
|
34
|
+
# This method tries to acquire a lock that serializes recovery probe
|
|
35
|
+
# execution across multiple instances. If the lock is already held by
|
|
36
|
+
# another instance, returns +nil+ immediately without blocking.
|
|
37
|
+
#
|
|
38
|
+
# @return [Stoplight::Domain::RecoveryLockToken, nil]
|
|
39
|
+
# - +RecoveryLockToken+: Lock acquired, caller should send probe
|
|
40
|
+
# - +nil+: Lock unavailable, another instance is probing
|
|
41
|
+
#
|
|
42
|
+
def acquire_lock = raise NotImplementedError
|
|
43
|
+
|
|
44
|
+
# Releases a previously acquired lock.
|
|
45
|
+
#
|
|
46
|
+
# This method releases the lock token returned by +#acquire_lock+,
|
|
47
|
+
# allowing other instances to acquire it. Release should be called
|
|
48
|
+
# in an ensure block to guarantee cleanup even if probe fails.
|
|
49
|
+
#
|
|
50
|
+
# @param lock [Stoplight::Domain::RecoveryLockToken] The token returned by +#acquire_lock+
|
|
51
|
+
# @return [void]
|
|
52
|
+
def release_lock(lock) = raise NotImplementedError
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Stoplight
|
|
4
|
+
module Domain
|
|
5
|
+
module Storage
|
|
6
|
+
# Encapsulates circuit breaker state storage.
|
|
7
|
+
#
|
|
8
|
+
# State management handles the current operational mode of a circuit breaker:
|
|
9
|
+
# - Color (GREEN/YELLOW/RED) - whether the circuit is open or closed
|
|
10
|
+
# - Lock state (LOCKED_GREEN/LOCKED_RED/UNLOCKED) - manual overrides
|
|
11
|
+
# - State transitions - tracking color changes for notifications #
|
|
12
|
+
#
|
|
13
|
+
# State requires stronger consistency than metrics because:
|
|
14
|
+
# - Multiple instances must agree on circuit color
|
|
15
|
+
# - Race conditions during transitions must be handled
|
|
16
|
+
# - Lock states must be immediately visible across instances
|
|
17
|
+
#
|
|
18
|
+
# @abstract
|
|
19
|
+
# @see Stoplight::Domain::Storage::Metrics
|
|
20
|
+
class State
|
|
21
|
+
# Retrieves current state snapshot for decision-making.
|
|
22
|
+
#
|
|
23
|
+
# The snapshot is an immutable view of the circuit's current state,
|
|
24
|
+
# including its color and lock status. This method is called on every
|
|
25
|
+
# circuit breaker invocation to determine whether to allow traffic.
|
|
26
|
+
#
|
|
27
|
+
# This is called on every request, so implementations should be fast.
|
|
28
|
+
#
|
|
29
|
+
# @return [Stoplight::Domain::StateSnapshot]
|
|
30
|
+
def state_snapshot = raise NotImplementedError
|
|
31
|
+
|
|
32
|
+
# Sets the lock state of the circuit.
|
|
33
|
+
#
|
|
34
|
+
# Locks allow manual override of circuit behavior:
|
|
35
|
+
# - LOCKED_GREEN: Force circuit closed (allow all traffic)
|
|
36
|
+
# - LOCKED_RED: Force circuit open (block all traffic)
|
|
37
|
+
# - UNLOCKED: Follow normal circuit breaker rules
|
|
38
|
+
#
|
|
39
|
+
# Lock states take precedence over color states. A locked circuit
|
|
40
|
+
# ignores failure thresholds and stays in the locked state until
|
|
41
|
+
# explicitly unlocked.
|
|
42
|
+
#
|
|
43
|
+
# Use Cases:
|
|
44
|
+
# - Emergency traffic control during incidents
|
|
45
|
+
# - Maintenance windows (lock RED to prevent traffic)
|
|
46
|
+
# - Gradual rollout (lock GREEN during testing)
|
|
47
|
+
#
|
|
48
|
+
# @param state [String] The new state to set.
|
|
49
|
+
# @return [String] The state that was set.
|
|
50
|
+
def set_state(state) = raise NotImplementedError
|
|
51
|
+
|
|
52
|
+
# Transitions the Stoplight to the specified color.
|
|
53
|
+
#
|
|
54
|
+
# This method performs a color transition operation that works across distributed instances
|
|
55
|
+
# of the light. It ensures that in a multi-instance environment, only one instance
|
|
56
|
+
# is considered the "first" to perform the transition (and therefore responsible for
|
|
57
|
+
# triggering notifications).
|
|
58
|
+
#
|
|
59
|
+
# @param color [String] The target color/state to transition to.
|
|
60
|
+
# Should be one of Stoplight::Color::GREEN, Stoplight::Color::YELLOW, or Stoplight::Color::RED.
|
|
61
|
+
#
|
|
62
|
+
# @return [Boolean] Returns +true+ if this instance was the first to perform this specific transition
|
|
63
|
+
# (and should therefore trigger notifications). Returns +false+ if another instance already
|
|
64
|
+
# initiated this transition.
|
|
65
|
+
#
|
|
66
|
+
# @note In distributed environments with multiple instances, race conditions can occur when instances
|
|
67
|
+
# attempt conflicting transitions simultaneously (e.g., one instance tries to transition from
|
|
68
|
+
# YELLOW to GREEN while another tries YELLOW to RED). The implementation handles this, but
|
|
69
|
+
# be aware that the last operation may determine the final color of the light.
|
|
70
|
+
#
|
|
71
|
+
def transition_to_color(color) = raise NotImplementedError
|
|
72
|
+
|
|
73
|
+
# Clears all state data for this circuit.
|
|
74
|
+
#
|
|
75
|
+
# This removes the circuit from storage entirely, resetting it to
|
|
76
|
+
# default (unlocked, green) state. The next invocation will start
|
|
77
|
+
# with fresh state.
|
|
78
|
+
#
|
|
79
|
+
# @note This does NOT clear metrics. If you want to fully
|
|
80
|
+
# reset a circuit, clear both state and metrics stores.
|
|
81
|
+
#
|
|
82
|
+
# @return [void]
|
|
83
|
+
def clear = raise NotImplementedError
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
@@ -28,11 +28,11 @@ module Stoplight
|
|
|
28
28
|
# Executes the provided code block when the light is in the green state.
|
|
29
29
|
#
|
|
30
30
|
# @param fallback [Proc, nil] A fallback proc to execute in case of an error.
|
|
31
|
-
# @param
|
|
31
|
+
# @param state_snapshot [Stoplight::Domain::StateSnapshot]
|
|
32
32
|
# @yield The code block to execute.
|
|
33
33
|
# @return [Object] The result of the code block if successful.
|
|
34
34
|
# @raise [Exception] Re-raises the error if it is not tracked or no fallback is provided.
|
|
35
|
-
def execute(fallback,
|
|
35
|
+
def execute(fallback, state_snapshot:, &code)
|
|
36
36
|
# TODO: Consider implementing sampling rate to limit the memory footprint
|
|
37
37
|
code.call.tap { record_success }
|
|
38
38
|
rescue => error
|
|
@@ -21,17 +21,17 @@ module Stoplight
|
|
|
21
21
|
# Executes the fallback proc when the light is in the red state.
|
|
22
22
|
#
|
|
23
23
|
# @param fallback [Proc, nil] A fallback proc to execute instead of the code block.
|
|
24
|
-
# @param
|
|
24
|
+
# @param state_snapshot [Stoplight::Domain::StateSnapshot]
|
|
25
25
|
# @return [Object, nil] The result of the fallback proc if provided.
|
|
26
26
|
# @raise [Stoplight::Error::RedLight] Raises an error if no fallback is provided.
|
|
27
|
-
def execute(fallback,
|
|
27
|
+
def execute(fallback, state_snapshot:)
|
|
28
28
|
if fallback
|
|
29
29
|
fallback.call(nil)
|
|
30
30
|
else
|
|
31
31
|
raise Error::RedLight.new(
|
|
32
32
|
config.name,
|
|
33
33
|
cool_off_time: config.cool_off_time,
|
|
34
|
-
retry_after:
|
|
34
|
+
retry_after: state_snapshot.recovery_scheduled_after
|
|
35
35
|
)
|
|
36
36
|
end
|
|
37
37
|
end
|
|
@@ -10,17 +10,12 @@ module Stoplight
|
|
|
10
10
|
# @abstract
|
|
11
11
|
class RunStrategy
|
|
12
12
|
# @param fallback [Proc, nil] A fallback proc to execute in case of an error.
|
|
13
|
-
# @param
|
|
13
|
+
# @param state_snapshot [Stoplight::Domain::StateSnapshot]
|
|
14
14
|
# :nocov:
|
|
15
|
-
def execute(fallback,
|
|
15
|
+
def execute(fallback, state_snapshot:, &code)
|
|
16
16
|
raise NotImplementedError, "Subclasses must implement the execute method"
|
|
17
17
|
end
|
|
18
18
|
# :nocov:
|
|
19
|
-
|
|
20
|
-
# @return [Boolean]
|
|
21
|
-
def ==(other)
|
|
22
|
-
other.is_a?(self.class)
|
|
23
|
-
end
|
|
24
19
|
end
|
|
25
20
|
end
|
|
26
21
|
end
|