stoplight 5.5.0 → 5.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/stoplight/admin/actions/remove.rb +23 -0
- data/lib/stoplight/admin/dependencies.rb +6 -1
- data/lib/stoplight/admin/helpers.rb +10 -5
- data/lib/stoplight/admin/lights_repository.rb +26 -14
- data/lib/stoplight/admin/views/_card.erb +13 -1
- data/lib/stoplight/admin.rb +9 -0
- data/lib/stoplight/common/deprecations.rb +11 -0
- data/lib/stoplight/domain/config.rb +5 -1
- data/lib/stoplight/domain/data_store.rb +58 -6
- data/lib/stoplight/domain/failure.rb +2 -0
- data/lib/stoplight/domain/light/configuration_builder_interface.rb +120 -16
- data/lib/stoplight/domain/light.rb +34 -24
- data/lib/stoplight/domain/metrics.rb +64 -0
- data/lib/stoplight/domain/recovery_lock_token.rb +15 -0
- data/lib/stoplight/domain/{metadata.rb → state_snapshot.rb} +29 -37
- data/lib/stoplight/domain/storage/metrics.rb +42 -0
- data/lib/stoplight/domain/storage/recovery_lock.rb +56 -0
- data/lib/stoplight/domain/storage/state.rb +87 -0
- data/lib/stoplight/domain/strategies/green_run_strategy.rb +2 -2
- data/lib/stoplight/domain/strategies/red_run_strategy.rb +3 -3
- data/lib/stoplight/domain/strategies/run_strategy.rb +2 -7
- data/lib/stoplight/domain/strategies/yellow_run_strategy.rb +63 -36
- data/lib/stoplight/domain/tracker/base.rb +0 -29
- data/lib/stoplight/domain/tracker/recovery_probe.rb +26 -22
- data/lib/stoplight/domain/tracker/request.rb +26 -21
- data/lib/stoplight/domain/traffic_control/base.rb +5 -5
- data/lib/stoplight/domain/traffic_control/consecutive_errors.rb +3 -7
- data/lib/stoplight/domain/traffic_control/error_rate.rb +3 -3
- data/lib/stoplight/domain/traffic_recovery/base.rb +5 -5
- data/lib/stoplight/domain/traffic_recovery/consecutive_successes.rb +4 -8
- data/lib/stoplight/domain/traffic_recovery.rb +0 -1
- data/lib/stoplight/infrastructure/data_store/fail_safe.rb +164 -0
- data/lib/stoplight/infrastructure/data_store/memory/metrics.rb +27 -0
- data/lib/stoplight/infrastructure/data_store/memory/recovery_lock_store.rb +54 -0
- data/lib/stoplight/infrastructure/data_store/memory/recovery_lock_token.rb +20 -0
- data/lib/stoplight/infrastructure/data_store/memory/state.rb +21 -0
- data/lib/stoplight/infrastructure/data_store/memory.rb +163 -132
- data/lib/stoplight/infrastructure/data_store/redis/lua_scripts/get_metrics.lua +26 -0
- data/lib/stoplight/infrastructure/data_store/redis/lua_scripts/record_recovery_probe_failure.lua +27 -0
- data/lib/stoplight/infrastructure/data_store/redis/lua_scripts/record_recovery_probe_success.lua +23 -0
- data/lib/stoplight/infrastructure/data_store/redis/lua_scripts/release_lock.lua +6 -0
- data/lib/stoplight/infrastructure/data_store/redis/recovery_lock_store.rb +73 -0
- data/lib/stoplight/infrastructure/data_store/redis/recovery_lock_token.rb +35 -0
- data/lib/stoplight/infrastructure/data_store/redis/scripting.rb +71 -0
- data/lib/stoplight/infrastructure/data_store/redis.rb +211 -165
- data/lib/stoplight/infrastructure/notifier/fail_safe.rb +62 -0
- data/lib/stoplight/infrastructure/storage/compatibility_metrics.rb +48 -0
- data/lib/stoplight/infrastructure/storage/compatibility_recovery_lock.rb +36 -0
- data/lib/stoplight/infrastructure/storage/compatibility_recovery_metrics.rb +55 -0
- data/lib/stoplight/infrastructure/storage/compatibility_state.rb +55 -0
- data/lib/stoplight/version.rb +1 -1
- data/lib/stoplight/wiring/data_store/base.rb +11 -0
- data/lib/stoplight/wiring/data_store/memory.rb +10 -0
- data/lib/stoplight/wiring/data_store/redis.rb +25 -0
- data/lib/stoplight/wiring/default.rb +1 -1
- data/lib/stoplight/wiring/default_configuration.rb +1 -1
- data/lib/stoplight/wiring/default_factory_builder.rb +1 -1
- data/lib/stoplight/wiring/light_builder.rb +185 -0
- data/lib/stoplight/wiring/light_factory/compatibility_validator.rb +55 -0
- data/lib/stoplight/wiring/light_factory/config_normalizer.rb +71 -0
- data/lib/stoplight/wiring/light_factory/configuration_pipeline.rb +72 -0
- data/lib/stoplight/wiring/light_factory/traffic_control_dsl.rb +26 -0
- data/lib/stoplight/wiring/light_factory/traffic_recovery_dsl.rb +21 -0
- data/lib/stoplight/wiring/light_factory.rb +45 -132
- data/lib/stoplight/wiring/notifier_factory.rb +26 -0
- data/lib/stoplight/wiring/public_api.rb +3 -2
- data/lib/stoplight.rb +18 -3
- metadata +55 -16
- data/lib/stoplight/infrastructure/data_store/redis/get_metadata.lua +0 -38
- data/lib/stoplight/infrastructure/data_store/redis/lua.rb +0 -25
- data/lib/stoplight/infrastructure/dependency_injection/container.rb +0 -249
- data/lib/stoplight/infrastructure/dependency_injection/unresolved_dependency_error.rb +0 -13
- data/lib/stoplight/wiring/container.rb +0 -80
- data/lib/stoplight/wiring/fail_safe_data_store.rb +0 -123
- data/lib/stoplight/wiring/fail_safe_notifier.rb +0 -79
- data/lib/stoplight/wiring/system_container.rb +0 -9
- data/lib/stoplight/wiring/system_light_factory.rb +0 -17
- /data/lib/stoplight/infrastructure/data_store/redis/{record_failure.lua → lua_scripts/record_failure.lua} +0 -0
- /data/lib/stoplight/infrastructure/data_store/redis/{record_success.lua → lua_scripts/record_success.lua} +0 -0
- /data/lib/stoplight/infrastructure/data_store/redis/{transition_to_green.lua → lua_scripts/transition_to_green.lua} +0 -0
- /data/lib/stoplight/infrastructure/data_store/redis/{transition_to_red.lua → lua_scripts/transition_to_red.lua} +0 -0
- /data/lib/stoplight/infrastructure/data_store/redis/{transition_to_yellow.lua → lua_scripts/transition_to_yellow.lua} +0 -0
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "concurrent/map"
|
|
4
|
+
|
|
5
|
+
module Stoplight
|
|
6
|
+
module Infrastructure
|
|
7
|
+
module DataStore
|
|
8
|
+
class Memory
|
|
9
|
+
# Process-local recovery lock using Ruby's Thread::Mutex.
|
|
10
|
+
#
|
|
11
|
+
# This only serializes recovery within a single Ruby process.
|
|
12
|
+
# Multiple processes/servers will NOT coordinate - each process
|
|
13
|
+
# can send probes independently.
|
|
14
|
+
#
|
|
15
|
+
# Mutex Lifecycle:
|
|
16
|
+
# - One mutex created per unique light_name (lazily)
|
|
17
|
+
# - Mutexes persist for process lifetime (never GC'd)
|
|
18
|
+
#
|
|
19
|
+
class RecoveryLockStore
|
|
20
|
+
# @!attribute locks
|
|
21
|
+
# Stores one mutex per unique light_name for the lifetime of the process.
|
|
22
|
+
# Mutexes are never garbage collected.
|
|
23
|
+
# @return [Concurrent::Map<Thread::Mutex>]
|
|
24
|
+
private attr_reader :locks
|
|
25
|
+
|
|
26
|
+
def initialize
|
|
27
|
+
@locks = Concurrent::Map.new
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# @param light_name [String]
|
|
31
|
+
# @return [Stoplight::Infrastructure::DataStore::Memory::RecoveryLockToken, nil]
|
|
32
|
+
def acquire_lock(light_name)
|
|
33
|
+
lock = lock_for(light_name)
|
|
34
|
+
RecoveryLockToken.new(light_name:) if lock.try_lock
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# @param recovery_lock_token [Stoplight::Infrastructure::DataStore::Memory::RecoveryLockToken]
|
|
38
|
+
# @return [void]
|
|
39
|
+
def release_lock(recovery_lock_token)
|
|
40
|
+
lock_for(recovery_lock_token.light_name).unlock
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# @param light_name [String]
|
|
44
|
+
# @return [Thread::Mutex]
|
|
45
|
+
private def lock_for(light_name)
|
|
46
|
+
locks.compute_if_absent(light_name) do
|
|
47
|
+
Thread::Mutex.new
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Stoplight
|
|
4
|
+
module Infrastructure
|
|
5
|
+
module DataStore
|
|
6
|
+
class Memory
|
|
7
|
+
class RecoveryLockToken < Domain::RecoveryLockToken
|
|
8
|
+
# @!attribute light_name
|
|
9
|
+
# @return [String]
|
|
10
|
+
attr_reader :light_name
|
|
11
|
+
|
|
12
|
+
# @param light_name [String]
|
|
13
|
+
def initialize(light_name:)
|
|
14
|
+
@light_name = light_name
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Stoplight
|
|
4
|
+
module Infrastructure
|
|
5
|
+
module DataStore
|
|
6
|
+
class Memory
|
|
7
|
+
class State
|
|
8
|
+
attr_accessor :recovered_at
|
|
9
|
+
attr_accessor :locked_state
|
|
10
|
+
attr_accessor :recovery_scheduled_after
|
|
11
|
+
attr_accessor :recovery_started_at
|
|
12
|
+
attr_accessor :breached_at
|
|
13
|
+
|
|
14
|
+
def initialize
|
|
15
|
+
@locked_state = Domain::State::UNLOCKED
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
@@ -11,68 +11,96 @@ module Stoplight
|
|
|
11
11
|
|
|
12
12
|
KEY_SEPARATOR = ":"
|
|
13
13
|
|
|
14
|
-
|
|
14
|
+
# @!attribute recovery_lock_store
|
|
15
|
+
# @return [Stoplight::Infrastructure::DataStore::Memory::RecoveryLockStore]
|
|
16
|
+
# @api private
|
|
17
|
+
private attr_reader :recovery_lock_store
|
|
18
|
+
|
|
19
|
+
# @param recovery_lock_store [Stoplight::Infrastructure::DataStore::Memory::RecoveryLockStore]
|
|
20
|
+
def initialize(recovery_lock_store:)
|
|
21
|
+
@recovery_lock_store = recovery_lock_store
|
|
15
22
|
@errors = Hash.new { |errors, light_name| errors[light_name] = SlidingWindow.new }
|
|
16
23
|
@successes = Hash.new { |successes, light_name| successes[light_name] = SlidingWindow.new }
|
|
24
|
+
@metrics = Hash.new { |metrics, light_name| metrics[light_name] = Metrics.new }
|
|
17
25
|
|
|
18
|
-
@
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
current_time: Time.now,
|
|
24
|
-
successes: 0,
|
|
25
|
-
errors: 0,
|
|
26
|
-
recovery_probe_successes: 0,
|
|
27
|
-
recovery_probe_errors: 0,
|
|
28
|
-
last_error: nil,
|
|
29
|
-
last_error_at: nil,
|
|
30
|
-
last_success_at: nil,
|
|
31
|
-
consecutive_errors: 0,
|
|
32
|
-
consecutive_successes: 0,
|
|
33
|
-
breached_at: nil,
|
|
34
|
-
locked_state: Domain::State::UNLOCKED,
|
|
35
|
-
recovery_scheduled_after: nil,
|
|
36
|
-
recovery_started_at: nil,
|
|
37
|
-
recovered_at: nil
|
|
38
|
-
)
|
|
39
|
-
end
|
|
40
|
-
super # MonitorMixin
|
|
26
|
+
@recovery_metrics = Hash.new { |metrics, light_name| metrics[light_name] = Metrics.new }
|
|
27
|
+
|
|
28
|
+
@states = Hash.new { |states, light_name| states[light_name] = State.new }
|
|
29
|
+
|
|
30
|
+
super() # MonitorMixin
|
|
41
31
|
end
|
|
42
32
|
|
|
43
33
|
# @return [Array<String>]
|
|
44
34
|
def names
|
|
45
|
-
synchronize { @
|
|
35
|
+
synchronize { @metrics.keys | @states.keys | @recovery_metrics.keys }
|
|
46
36
|
end
|
|
47
37
|
|
|
48
38
|
# @param config [Stoplight::Domain::Config]
|
|
49
|
-
# @return [Stoplight::Domain::
|
|
50
|
-
def
|
|
39
|
+
# @return [Stoplight::Domain::Metrics]
|
|
40
|
+
def get_metrics(config)
|
|
51
41
|
light_name = config.name
|
|
52
42
|
|
|
53
43
|
synchronize do
|
|
54
44
|
current_time = self.current_time
|
|
55
|
-
recovery_window_start = (current_time - config.cool_off_time)
|
|
56
|
-
recovered_at = @metadata[light_name].recovered_at
|
|
57
45
|
window_start = if config.window_size
|
|
58
|
-
|
|
46
|
+
(current_time - config.window_size)
|
|
59
47
|
else
|
|
60
48
|
current_time
|
|
61
49
|
end
|
|
62
50
|
|
|
63
|
-
@
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
51
|
+
metrics = @metrics[light_name]
|
|
52
|
+
|
|
53
|
+
errors = @errors[light_name].sum_in_window(window_start) if config.window_size
|
|
54
|
+
successes = @successes[light_name].sum_in_window(window_start) if config.window_size
|
|
55
|
+
consecutive_errors = config.window_size ? [metrics.consecutive_errors, errors].min : metrics.consecutive_errors
|
|
56
|
+
consecutive_successes = config.window_size ? [metrics.consecutive_successes.to_i, successes].min : metrics.consecutive_successes.to_i
|
|
57
|
+
|
|
58
|
+
Domain::Metrics.new(
|
|
59
|
+
errors:,
|
|
60
|
+
successes:,
|
|
61
|
+
consecutive_errors:,
|
|
62
|
+
consecutive_successes:,
|
|
63
|
+
last_error: metrics.last_error,
|
|
64
|
+
last_success_at: metrics.last_success_at
|
|
65
|
+
)
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# @return [Stoplight::Domain::Metrics]
|
|
70
|
+
def get_recovery_metrics(config)
|
|
71
|
+
light_name = config.name
|
|
72
|
+
|
|
73
|
+
synchronize do
|
|
74
|
+
metrics = @recovery_metrics[light_name]
|
|
75
|
+
|
|
76
|
+
Domain::Metrics.new(
|
|
77
|
+
errors: nil, successes: nil,
|
|
78
|
+
consecutive_errors: metrics.consecutive_errors,
|
|
79
|
+
consecutive_successes: metrics.consecutive_successes,
|
|
80
|
+
last_error: metrics.last_error,
|
|
81
|
+
last_success_at: metrics.last_success_at
|
|
69
82
|
)
|
|
70
83
|
end
|
|
71
84
|
end
|
|
72
85
|
|
|
86
|
+
# @return [Stoplight::Domain::StateSnapshot]
|
|
87
|
+
def get_state_snapshot(config)
|
|
88
|
+
time, state = synchronize do
|
|
89
|
+
[current_time, @states[config.name]]
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
Domain::StateSnapshot.new(
|
|
93
|
+
time:,
|
|
94
|
+
locked_state: state.locked_state,
|
|
95
|
+
recovery_scheduled_after: state.recovery_scheduled_after,
|
|
96
|
+
recovery_started_at: state.recovery_started_at,
|
|
97
|
+
breached_at: state.breached_at
|
|
98
|
+
)
|
|
99
|
+
end
|
|
100
|
+
|
|
73
101
|
# @param config [Stoplight::Domain::Config]
|
|
74
102
|
# @param exception [Exception]
|
|
75
|
-
# @return [
|
|
103
|
+
# @return [void]
|
|
76
104
|
def record_failure(config, exception)
|
|
77
105
|
current_time = self.current_time
|
|
78
106
|
light_name = config.name
|
|
@@ -81,21 +109,31 @@ module Stoplight
|
|
|
81
109
|
synchronize do
|
|
82
110
|
@errors[light_name].increment if config.window_size
|
|
83
111
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
112
|
+
metrics = @metrics[light_name]
|
|
113
|
+
|
|
114
|
+
if metrics.last_error_at.nil? || failure.occurred_at > metrics.last_error_at
|
|
115
|
+
metrics.last_error = failure
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
metrics.consecutive_errors += 1
|
|
119
|
+
metrics.consecutive_successes = 0
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def clear_metrics(config)
|
|
124
|
+
light_name = config.name
|
|
125
|
+
synchronize do
|
|
126
|
+
if config.window_size
|
|
127
|
+
@errors[light_name] = SlidingWindow.new
|
|
128
|
+
@successes[light_name] = SlidingWindow.new
|
|
97
129
|
end
|
|
98
|
-
|
|
130
|
+
@metrics[light_name] = Metrics.new
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
def clear_recovery_metrics(config)
|
|
135
|
+
synchronize do
|
|
136
|
+
@recovery_metrics[config.name] = Metrics.new
|
|
99
137
|
end
|
|
100
138
|
end
|
|
101
139
|
|
|
@@ -108,74 +146,51 @@ module Stoplight
|
|
|
108
146
|
synchronize do
|
|
109
147
|
@successes[light_name].increment if config.window_size
|
|
110
148
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
consecutive_errors: 0,
|
|
116
|
-
consecutive_successes: metadata.consecutive_successes.succ
|
|
117
|
-
)
|
|
118
|
-
else
|
|
119
|
-
metadata.with(
|
|
120
|
-
consecutive_errors: 0,
|
|
121
|
-
consecutive_successes: metadata.consecutive_successes.succ
|
|
122
|
-
)
|
|
149
|
+
metrics = @metrics[light_name]
|
|
150
|
+
|
|
151
|
+
if metrics.last_success_at.nil? || current_time > metrics.last_success_at
|
|
152
|
+
metrics.last_success_at = current_time
|
|
123
153
|
end
|
|
154
|
+
|
|
155
|
+
metrics.consecutive_errors = 0
|
|
156
|
+
metrics.consecutive_successes += 1
|
|
124
157
|
end
|
|
125
158
|
end
|
|
126
159
|
|
|
127
160
|
# @param config [Stoplight::Domain::Config]
|
|
128
161
|
# @param exception [Exception]
|
|
129
|
-
# @return [
|
|
162
|
+
# @return [void]
|
|
130
163
|
def record_recovery_probe_failure(config, exception)
|
|
131
164
|
light_name = config.name
|
|
132
165
|
current_time = self.current_time
|
|
133
166
|
failure = Domain::Failure.from_error(exception, time: current_time)
|
|
134
167
|
|
|
135
168
|
synchronize do
|
|
136
|
-
@
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
metadata.with(
|
|
141
|
-
last_error_at: current_time,
|
|
142
|
-
last_error: failure,
|
|
143
|
-
consecutive_errors: metadata.consecutive_errors.succ,
|
|
144
|
-
consecutive_successes: 0
|
|
145
|
-
)
|
|
146
|
-
else
|
|
147
|
-
metadata.with(
|
|
148
|
-
consecutive_errors: metadata.consecutive_errors.succ,
|
|
149
|
-
consecutive_successes: 0
|
|
150
|
-
)
|
|
169
|
+
metrics = @recovery_metrics[light_name]
|
|
170
|
+
|
|
171
|
+
if metrics.last_error_at.nil? || failure.occurred_at > metrics.last_error_at
|
|
172
|
+
metrics.last_error = failure
|
|
151
173
|
end
|
|
152
|
-
|
|
174
|
+
|
|
175
|
+
metrics.consecutive_errors += 1
|
|
176
|
+
metrics.consecutive_successes = 0
|
|
153
177
|
end
|
|
154
178
|
end
|
|
155
179
|
|
|
156
180
|
# @param config [Stoplight::Domain::Config]
|
|
157
|
-
# @return [
|
|
181
|
+
# @return [void]
|
|
158
182
|
def record_recovery_probe_success(config)
|
|
159
183
|
light_name = config.name
|
|
160
184
|
current_time = self.current_time
|
|
161
185
|
|
|
162
186
|
synchronize do
|
|
163
|
-
@
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
@metadata[light_name] = if metadata.last_success_at.nil? || current_time > metadata.last_success_at
|
|
167
|
-
metadata.with(
|
|
168
|
-
last_success_at: current_time,
|
|
169
|
-
consecutive_errors: 0,
|
|
170
|
-
consecutive_successes: metadata.consecutive_successes.succ
|
|
171
|
-
)
|
|
172
|
-
else
|
|
173
|
-
metadata.with(
|
|
174
|
-
consecutive_errors: 0,
|
|
175
|
-
consecutive_successes: metadata.consecutive_successes.succ
|
|
176
|
-
)
|
|
187
|
+
metrics = @recovery_metrics[light_name]
|
|
188
|
+
if metrics.last_success_at.nil? || current_time > metrics.last_success_at
|
|
189
|
+
metrics.last_success_at = current_time
|
|
177
190
|
end
|
|
178
|
-
|
|
191
|
+
|
|
192
|
+
metrics.consecutive_errors = 0
|
|
193
|
+
metrics.consecutive_successes += 1
|
|
179
194
|
end
|
|
180
195
|
end
|
|
181
196
|
|
|
@@ -186,8 +201,7 @@ module Stoplight
|
|
|
186
201
|
light_name = config.name
|
|
187
202
|
|
|
188
203
|
synchronize do
|
|
189
|
-
|
|
190
|
-
@metadata[light_name] = metadata.with(locked_state: state)
|
|
204
|
+
@states[light_name].locked_state = state
|
|
191
205
|
end
|
|
192
206
|
state
|
|
193
207
|
end
|
|
@@ -197,6 +211,20 @@ module Stoplight
|
|
|
197
211
|
"#<#{self.class.name}>"
|
|
198
212
|
end
|
|
199
213
|
|
|
214
|
+
# @param config [Stoplight::Domain::Config]
|
|
215
|
+
# @return [void]
|
|
216
|
+
def delete_light(config)
|
|
217
|
+
light_name = config.name
|
|
218
|
+
|
|
219
|
+
synchronize do
|
|
220
|
+
@states.delete(light_name)
|
|
221
|
+
@recovery_metrics.delete(light_name)
|
|
222
|
+
@metrics.delete(light_name)
|
|
223
|
+
@errors.delete(light_name)
|
|
224
|
+
@successes.delete(light_name)
|
|
225
|
+
end
|
|
226
|
+
end
|
|
227
|
+
|
|
200
228
|
# Combined method that performs the state transition based on color
|
|
201
229
|
#
|
|
202
230
|
# @param config [Stoplight::Domain::Config] The light configuration
|
|
@@ -215,6 +243,18 @@ module Stoplight
|
|
|
215
243
|
end
|
|
216
244
|
end
|
|
217
245
|
|
|
246
|
+
# @param config [Stoplight::Domain::Config]
|
|
247
|
+
# @return [Stoplight::Infrastructure::DataStore::Memory::RecoveryLockToken, nil]
|
|
248
|
+
def acquire_recovery_lock(config)
|
|
249
|
+
recovery_lock_store.acquire_lock(config.name)
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
# @param lock [Stoplight::Infrastructure::DataStore::Memory::RecoveryLockToken]
|
|
253
|
+
# @return [void]
|
|
254
|
+
def release_recovery_lock(lock)
|
|
255
|
+
recovery_lock_store.release_lock(lock)
|
|
256
|
+
end
|
|
257
|
+
|
|
218
258
|
# Transitions to GREEN state and ensures only one notification
|
|
219
259
|
#
|
|
220
260
|
# @param config [Stoplight::Domain::Config] The light configuration
|
|
@@ -224,16 +264,15 @@ module Stoplight
|
|
|
224
264
|
current_time = self.current_time
|
|
225
265
|
|
|
226
266
|
synchronize do
|
|
227
|
-
|
|
228
|
-
|
|
267
|
+
state = @states[light_name]
|
|
268
|
+
|
|
269
|
+
if state.recovered_at
|
|
229
270
|
false
|
|
230
271
|
else
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
recovery_scheduled_after: nil
|
|
236
|
-
)
|
|
272
|
+
state.recovered_at = current_time
|
|
273
|
+
state.recovery_started_at = nil
|
|
274
|
+
state.breached_at = nil
|
|
275
|
+
state.recovery_scheduled_after = nil
|
|
237
276
|
true
|
|
238
277
|
end
|
|
239
278
|
end
|
|
@@ -248,21 +287,17 @@ module Stoplight
|
|
|
248
287
|
current_time = self.current_time
|
|
249
288
|
|
|
250
289
|
synchronize do
|
|
251
|
-
|
|
252
|
-
if
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
breached_at: nil
|
|
258
|
-
)
|
|
290
|
+
state = @states[light_name]
|
|
291
|
+
if state.recovery_started_at.nil?
|
|
292
|
+
state.recovery_started_at = current_time
|
|
293
|
+
state.recovery_scheduled_after = nil
|
|
294
|
+
state.recovered_at = nil
|
|
295
|
+
state.breached_at = nil
|
|
259
296
|
true
|
|
260
297
|
else
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
breached_at: nil
|
|
265
|
-
)
|
|
298
|
+
state.recovery_scheduled_after = nil
|
|
299
|
+
state.recovered_at = nil
|
|
300
|
+
state.breached_at = nil
|
|
266
301
|
false
|
|
267
302
|
end
|
|
268
303
|
end
|
|
@@ -278,21 +313,17 @@ module Stoplight
|
|
|
278
313
|
recovery_scheduled_after = current_time + config.cool_off_time
|
|
279
314
|
|
|
280
315
|
synchronize do
|
|
281
|
-
|
|
282
|
-
if
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
recovered_at: nil
|
|
287
|
-
)
|
|
316
|
+
state = @states[light_name]
|
|
317
|
+
if state.breached_at
|
|
318
|
+
state.recovery_scheduled_after = recovery_scheduled_after
|
|
319
|
+
state.recovery_started_at = nil
|
|
320
|
+
state.recovered_at = nil
|
|
288
321
|
false
|
|
289
322
|
else
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
recovered_at: nil
|
|
295
|
-
)
|
|
323
|
+
state.breached_at = current_time
|
|
324
|
+
state.recovery_scheduled_after = recovery_scheduled_after
|
|
325
|
+
state.recovery_started_at = nil
|
|
326
|
+
state.recovered_at = nil
|
|
296
327
|
true
|
|
297
328
|
end
|
|
298
329
|
end
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
local number_of_metric_buckets = tonumber(ARGV[1])
|
|
2
|
+
local window_start_ts = tonumber(ARGV[2])
|
|
3
|
+
local window_end_ts = tonumber(ARGV[3])
|
|
4
|
+
local metrics_keys = {}
|
|
5
|
+
for idx = 4, #ARGV do
|
|
6
|
+
table.insert(metrics_keys, ARGV[idx])
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
local metadata_key = KEYS[1]
|
|
10
|
+
|
|
11
|
+
local function count_events(start_idx, bucket_count, start_ts)
|
|
12
|
+
local total = 0
|
|
13
|
+
for idx = start_idx, start_idx + bucket_count - 1 do
|
|
14
|
+
total = total + tonumber(redis.call('ZCOUNT', KEYS[idx], start_ts, window_end_ts))
|
|
15
|
+
end
|
|
16
|
+
return total
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
local offset = 2
|
|
20
|
+
local successes = count_events(2, number_of_metric_buckets, window_start_ts)
|
|
21
|
+
|
|
22
|
+
offset = offset + number_of_metric_buckets
|
|
23
|
+
local errors = count_events(offset, number_of_metric_buckets, window_start_ts)
|
|
24
|
+
|
|
25
|
+
local metrics = redis.call('HMGET', metadata_key, unpack(metrics_keys))
|
|
26
|
+
return {successes, errors, unpack(metrics)}
|
data/lib/stoplight/infrastructure/data_store/redis/lua_scripts/record_recovery_probe_failure.lua
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
local failure_ts = tonumber(ARGV[1])
|
|
2
|
+
local failure_json = ARGV[2]
|
|
3
|
+
|
|
4
|
+
local metadata_key = KEYS[1]
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
-- Update metadata
|
|
8
|
+
local meta = redis.call('HMGET', metadata_key, 'last_error_at', 'consecutive_errors')
|
|
9
|
+
local prev_failure_ts = tonumber(meta[1])
|
|
10
|
+
local prev_consecutive_errors = tonumber(meta[2])
|
|
11
|
+
|
|
12
|
+
if not prev_failure_ts or failure_ts > prev_failure_ts then
|
|
13
|
+
redis.call(
|
|
14
|
+
'HSET', metadata_key,
|
|
15
|
+
'last_error_at', failure_ts,
|
|
16
|
+
'last_error_json', failure_json,
|
|
17
|
+
'consecutive_errors', (prev_consecutive_errors or 0) + 1,
|
|
18
|
+
'consecutive_successes', 0
|
|
19
|
+
)
|
|
20
|
+
else
|
|
21
|
+
redis.call(
|
|
22
|
+
'HSET', metadata_key,
|
|
23
|
+
'consecutive_errors', (prev_consecutive_errors or 0) + 1,
|
|
24
|
+
'consecutive_successes', 0
|
|
25
|
+
)
|
|
26
|
+
end
|
|
27
|
+
|
data/lib/stoplight/infrastructure/data_store/redis/lua_scripts/record_recovery_probe_success.lua
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
local request_ts = tonumber(ARGV[1])
|
|
2
|
+
|
|
3
|
+
local metadata_key = KEYS[1]
|
|
4
|
+
|
|
5
|
+
-- Update metadata
|
|
6
|
+
local meta = redis.call('HMGET', metadata_key, 'last_success_at', 'consecutive_successes')
|
|
7
|
+
local prev_success_ts = tonumber(meta[1])
|
|
8
|
+
local prev_consecutive_successes = tonumber(meta[2])
|
|
9
|
+
|
|
10
|
+
if not prev_success_ts or request_ts > prev_success_ts then
|
|
11
|
+
redis.call(
|
|
12
|
+
'HSET', metadata_key,
|
|
13
|
+
'last_success_at', request_ts,
|
|
14
|
+
'consecutive_errors', 0,
|
|
15
|
+
'consecutive_successes', (prev_consecutive_successes or 0) + 1
|
|
16
|
+
)
|
|
17
|
+
else
|
|
18
|
+
redis.call(
|
|
19
|
+
'HSET', metadata_key,
|
|
20
|
+
'consecutive_errors', 0,
|
|
21
|
+
'consecutive_successes', (prev_consecutive_successes or 0) + 1
|
|
22
|
+
)
|
|
23
|
+
end
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "securerandom"
|
|
4
|
+
require "forwardable"
|
|
5
|
+
|
|
6
|
+
module Stoplight
|
|
7
|
+
module Infrastructure
|
|
8
|
+
module DataStore
|
|
9
|
+
class Redis
|
|
10
|
+
# Distributed recovery recovery_lock using Redis SET NX (set-if-not-exists).
|
|
11
|
+
#
|
|
12
|
+
# Lock Acquisition:
|
|
13
|
+
# - Uses unique UUID token to prevent accidental release of others' locks
|
|
14
|
+
# - Atomic SET with NX flag ensures only one process acquires recovery_lock
|
|
15
|
+
# - TTL (px: lock_timeout) auto-releases recovery_lock if process crashes
|
|
16
|
+
#
|
|
17
|
+
# Lock Release:
|
|
18
|
+
# - Lua script ensures only token holder can release (token comparison)
|
|
19
|
+
# - Best-effort release; TTL cleanup handles failures
|
|
20
|
+
#
|
|
21
|
+
# Failure Modes:
|
|
22
|
+
# - Lock contention: Returns false, caller should skip probe
|
|
23
|
+
# - Redis unavailable: raises an error and let caller decide
|
|
24
|
+
# - Crashed holder: raises an error and let caller decide. Lock auto-expires after lock_timeout
|
|
25
|
+
# - Release failure: Lock auto-expires after lock_timeout
|
|
26
|
+
#
|
|
27
|
+
class RecoveryLockStore
|
|
28
|
+
# @!attribute redis
|
|
29
|
+
# @return [RedisClient]
|
|
30
|
+
protected attr_reader :redis
|
|
31
|
+
|
|
32
|
+
# @!attribute lock_timeout
|
|
33
|
+
# @return [Integer]
|
|
34
|
+
protected attr_reader :lock_timeout
|
|
35
|
+
|
|
36
|
+
# @!attribute scripting
|
|
37
|
+
# @return [Stoplight::Infrastructure::DataStore::Redis::Scripting]
|
|
38
|
+
protected attr_reader :scripting
|
|
39
|
+
|
|
40
|
+
# @param redis [RedisClient | ConnectionPool]
|
|
41
|
+
# @param lock_timeout [Integer] recovery_lock timeout in milliseconds
|
|
42
|
+
# @param scripting [Stoplight::Infrastructure::DataStore::Redis::Scripting]
|
|
43
|
+
def initialize(redis:, lock_timeout:, scripting:)
|
|
44
|
+
@redis = redis
|
|
45
|
+
@lock_timeout = lock_timeout
|
|
46
|
+
@scripting = scripting
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# @param light_name [String]
|
|
50
|
+
# @return [Stoplight::Infrastructure::DataStore::Redis::RecoveryLockToken, nil]
|
|
51
|
+
def acquire_lock(light_name)
|
|
52
|
+
recovery_lock = RecoveryLockToken.new(light_name:)
|
|
53
|
+
|
|
54
|
+
acquired = !!redis.then do |client|
|
|
55
|
+
client.set(recovery_lock.lock_key, recovery_lock.token, nx: true, px: lock_timeout)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
recovery_lock if acquired
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# @param recovery_lock [Stoplight::Infrastructure::DataStore::Redis::RecoveryLockToken]
|
|
62
|
+
# @return [void]
|
|
63
|
+
def release_lock(recovery_lock)
|
|
64
|
+
scripting.call(
|
|
65
|
+
:release_lock,
|
|
66
|
+
keys: [recovery_lock.lock_key], args: [recovery_lock.token]
|
|
67
|
+
)
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|