stoplight 5.5.0 → 5.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/lib/stoplight/admin/actions/remove.rb +23 -0
  4. data/lib/stoplight/admin/dependencies.rb +6 -1
  5. data/lib/stoplight/admin/helpers.rb +10 -5
  6. data/lib/stoplight/admin/lights_repository.rb +26 -14
  7. data/lib/stoplight/admin/views/_card.erb +13 -1
  8. data/lib/stoplight/admin.rb +9 -0
  9. data/lib/stoplight/common/deprecations.rb +11 -0
  10. data/lib/stoplight/domain/config.rb +5 -1
  11. data/lib/stoplight/domain/data_store.rb +58 -6
  12. data/lib/stoplight/domain/failure.rb +2 -0
  13. data/lib/stoplight/domain/light/configuration_builder_interface.rb +120 -16
  14. data/lib/stoplight/domain/light.rb +34 -24
  15. data/lib/stoplight/domain/metrics.rb +64 -0
  16. data/lib/stoplight/domain/recovery_lock_token.rb +15 -0
  17. data/lib/stoplight/domain/{metadata.rb → state_snapshot.rb} +29 -37
  18. data/lib/stoplight/domain/storage/metrics.rb +42 -0
  19. data/lib/stoplight/domain/storage/recovery_lock.rb +56 -0
  20. data/lib/stoplight/domain/storage/state.rb +87 -0
  21. data/lib/stoplight/domain/strategies/green_run_strategy.rb +2 -2
  22. data/lib/stoplight/domain/strategies/red_run_strategy.rb +3 -3
  23. data/lib/stoplight/domain/strategies/run_strategy.rb +2 -7
  24. data/lib/stoplight/domain/strategies/yellow_run_strategy.rb +63 -36
  25. data/lib/stoplight/domain/tracker/base.rb +0 -29
  26. data/lib/stoplight/domain/tracker/recovery_probe.rb +26 -22
  27. data/lib/stoplight/domain/tracker/request.rb +26 -21
  28. data/lib/stoplight/domain/traffic_control/base.rb +5 -5
  29. data/lib/stoplight/domain/traffic_control/consecutive_errors.rb +3 -7
  30. data/lib/stoplight/domain/traffic_control/error_rate.rb +3 -3
  31. data/lib/stoplight/domain/traffic_recovery/base.rb +5 -5
  32. data/lib/stoplight/domain/traffic_recovery/consecutive_successes.rb +4 -8
  33. data/lib/stoplight/domain/traffic_recovery.rb +0 -1
  34. data/lib/stoplight/infrastructure/data_store/fail_safe.rb +164 -0
  35. data/lib/stoplight/infrastructure/data_store/memory/metrics.rb +27 -0
  36. data/lib/stoplight/infrastructure/data_store/memory/recovery_lock_store.rb +54 -0
  37. data/lib/stoplight/infrastructure/data_store/memory/recovery_lock_token.rb +20 -0
  38. data/lib/stoplight/infrastructure/data_store/memory/state.rb +21 -0
  39. data/lib/stoplight/infrastructure/data_store/memory.rb +163 -132
  40. data/lib/stoplight/infrastructure/data_store/redis/lua_scripts/get_metrics.lua +26 -0
  41. data/lib/stoplight/infrastructure/data_store/redis/lua_scripts/record_recovery_probe_failure.lua +27 -0
  42. data/lib/stoplight/infrastructure/data_store/redis/lua_scripts/record_recovery_probe_success.lua +23 -0
  43. data/lib/stoplight/infrastructure/data_store/redis/lua_scripts/release_lock.lua +6 -0
  44. data/lib/stoplight/infrastructure/data_store/redis/recovery_lock_store.rb +73 -0
  45. data/lib/stoplight/infrastructure/data_store/redis/recovery_lock_token.rb +35 -0
  46. data/lib/stoplight/infrastructure/data_store/redis/scripting.rb +71 -0
  47. data/lib/stoplight/infrastructure/data_store/redis.rb +211 -165
  48. data/lib/stoplight/infrastructure/notifier/fail_safe.rb +62 -0
  49. data/lib/stoplight/infrastructure/storage/compatibility_metrics.rb +48 -0
  50. data/lib/stoplight/infrastructure/storage/compatibility_recovery_lock.rb +36 -0
  51. data/lib/stoplight/infrastructure/storage/compatibility_recovery_metrics.rb +55 -0
  52. data/lib/stoplight/infrastructure/storage/compatibility_state.rb +55 -0
  53. data/lib/stoplight/version.rb +1 -1
  54. data/lib/stoplight/wiring/data_store/base.rb +11 -0
  55. data/lib/stoplight/wiring/data_store/memory.rb +10 -0
  56. data/lib/stoplight/wiring/data_store/redis.rb +25 -0
  57. data/lib/stoplight/wiring/default.rb +1 -1
  58. data/lib/stoplight/wiring/default_configuration.rb +1 -1
  59. data/lib/stoplight/wiring/default_factory_builder.rb +1 -1
  60. data/lib/stoplight/wiring/light_builder.rb +185 -0
  61. data/lib/stoplight/wiring/light_factory/compatibility_validator.rb +55 -0
  62. data/lib/stoplight/wiring/light_factory/config_normalizer.rb +71 -0
  63. data/lib/stoplight/wiring/light_factory/configuration_pipeline.rb +72 -0
  64. data/lib/stoplight/wiring/light_factory/traffic_control_dsl.rb +26 -0
  65. data/lib/stoplight/wiring/light_factory/traffic_recovery_dsl.rb +21 -0
  66. data/lib/stoplight/wiring/light_factory.rb +45 -132
  67. data/lib/stoplight/wiring/notifier_factory.rb +26 -0
  68. data/lib/stoplight/wiring/public_api.rb +3 -2
  69. data/lib/stoplight.rb +18 -3
  70. metadata +55 -16
  71. data/lib/stoplight/infrastructure/data_store/redis/get_metadata.lua +0 -38
  72. data/lib/stoplight/infrastructure/data_store/redis/lua.rb +0 -25
  73. data/lib/stoplight/infrastructure/dependency_injection/container.rb +0 -249
  74. data/lib/stoplight/infrastructure/dependency_injection/unresolved_dependency_error.rb +0 -13
  75. data/lib/stoplight/wiring/container.rb +0 -80
  76. data/lib/stoplight/wiring/fail_safe_data_store.rb +0 -123
  77. data/lib/stoplight/wiring/fail_safe_notifier.rb +0 -79
  78. data/lib/stoplight/wiring/system_container.rb +0 -9
  79. data/lib/stoplight/wiring/system_light_factory.rb +0 -17
  80. /data/lib/stoplight/infrastructure/data_store/redis/{record_failure.lua → lua_scripts/record_failure.lua} +0 -0
  81. /data/lib/stoplight/infrastructure/data_store/redis/{record_success.lua → lua_scripts/record_success.lua} +0 -0
  82. /data/lib/stoplight/infrastructure/data_store/redis/{transition_to_green.lua → lua_scripts/transition_to_green.lua} +0 -0
  83. /data/lib/stoplight/infrastructure/data_store/redis/{transition_to_red.lua → lua_scripts/transition_to_red.lua} +0 -0
  84. /data/lib/stoplight/infrastructure/data_store/redis/{transition_to_yellow.lua → lua_scripts/transition_to_yellow.lua} +0 -0
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "concurrent/map"
4
+
5
+ module Stoplight
6
+ module Infrastructure
7
+ module DataStore
8
+ class Memory
9
+ # Process-local recovery lock using Ruby's Thread::Mutex.
10
+ #
11
+ # This only serializes recovery within a single Ruby process.
12
+ # Multiple processes/servers will NOT coordinate - each process
13
+ # can send probes independently.
14
+ #
15
+ # Mutex Lifecycle:
16
+ # - One mutex created per unique light_name (lazily)
17
+ # - Mutexes persist for process lifetime (never GC'd)
18
+ #
19
+ class RecoveryLockStore
20
+ # @!attribute locks
21
+ # Stores one mutex per unique light_name for the lifetime of the process.
22
+ # Mutexes are never garbage collected.
23
+ # @return [Concurrent::Map<Thread::Mutex>]
24
+ private attr_reader :locks
25
+
26
+ def initialize
27
+ @locks = Concurrent::Map.new
28
+ end
29
+
30
+ # @param light_name [String]
31
+ # @return [Stoplight::Infrastructure::DataStore::Memory::RecoveryLockToken, nil]
32
+ def acquire_lock(light_name)
33
+ lock = lock_for(light_name)
34
+ RecoveryLockToken.new(light_name:) if lock.try_lock
35
+ end
36
+
37
+ # @param recovery_lock_token [Stoplight::Infrastructure::DataStore::Memory::RecoveryLockToken]
38
+ # @return [void]
39
+ def release_lock(recovery_lock_token)
40
+ lock_for(recovery_lock_token.light_name).unlock
41
+ end
42
+
43
+ # @param light_name [String]
44
+ # @return [Thread::Mutex]
45
+ private def lock_for(light_name)
46
+ locks.compute_if_absent(light_name) do
47
+ Thread::Mutex.new
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Stoplight
4
+ module Infrastructure
5
+ module DataStore
6
+ class Memory
7
+ class RecoveryLockToken < Domain::RecoveryLockToken
8
+ # @!attribute light_name
9
+ # @return [String]
10
+ attr_reader :light_name
11
+
12
+ # @param light_name [String]
13
+ def initialize(light_name:)
14
+ @light_name = light_name
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Stoplight
4
+ module Infrastructure
5
+ module DataStore
6
+ class Memory
7
+ class State
8
+ attr_accessor :recovered_at
9
+ attr_accessor :locked_state
10
+ attr_accessor :recovery_scheduled_after
11
+ attr_accessor :recovery_started_at
12
+ attr_accessor :breached_at
13
+
14
+ def initialize
15
+ @locked_state = Domain::State::UNLOCKED
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -11,68 +11,96 @@ module Stoplight
11
11
 
12
12
  KEY_SEPARATOR = ":"
13
13
 
14
- def initialize
14
+ # @!attribute recovery_lock_store
15
+ # @return [Stoplight::Infrastructure::DataStore::Memory::RecoveryLockStore]
16
+ # @api private
17
+ private attr_reader :recovery_lock_store
18
+
19
+ # @param recovery_lock_store [Stoplight::Infrastructure::DataStore::Memory::RecoveryLockStore]
20
+ def initialize(recovery_lock_store:)
21
+ @recovery_lock_store = recovery_lock_store
15
22
  @errors = Hash.new { |errors, light_name| errors[light_name] = SlidingWindow.new }
16
23
  @successes = Hash.new { |successes, light_name| successes[light_name] = SlidingWindow.new }
24
+ @metrics = Hash.new { |metrics, light_name| metrics[light_name] = Metrics.new }
17
25
 
18
- @recovery_probe_errors = Hash.new { |recovery_probe_errors, light_name| recovery_probe_errors[light_name] = SlidingWindow.new }
19
- @recovery_probe_successes = Hash.new { |recovery_probe_successes, light_name| recovery_probe_successes[light_name] = SlidingWindow.new }
20
-
21
- @metadata = Hash.new do |metadata, light_name|
22
- metadata[light_name] = Domain::Metadata.new(
23
- current_time: Time.now,
24
- successes: 0,
25
- errors: 0,
26
- recovery_probe_successes: 0,
27
- recovery_probe_errors: 0,
28
- last_error: nil,
29
- last_error_at: nil,
30
- last_success_at: nil,
31
- consecutive_errors: 0,
32
- consecutive_successes: 0,
33
- breached_at: nil,
34
- locked_state: Domain::State::UNLOCKED,
35
- recovery_scheduled_after: nil,
36
- recovery_started_at: nil,
37
- recovered_at: nil
38
- )
39
- end
40
- super # MonitorMixin
26
+ @recovery_metrics = Hash.new { |metrics, light_name| metrics[light_name] = Metrics.new }
27
+
28
+ @states = Hash.new { |states, light_name| states[light_name] = State.new }
29
+
30
+ super() # MonitorMixin
41
31
  end
42
32
 
43
33
  # @return [Array<String>]
44
34
  def names
45
- synchronize { @metadata.keys }
35
+ synchronize { @metrics.keys | @states.keys | @recovery_metrics.keys }
46
36
  end
47
37
 
48
38
  # @param config [Stoplight::Domain::Config]
49
- # @return [Stoplight::Domain::Metadata]
50
- def get_metadata(config)
39
+ # @return [Stoplight::Domain::Metrics]
40
+ def get_metrics(config)
51
41
  light_name = config.name
52
42
 
53
43
  synchronize do
54
44
  current_time = self.current_time
55
- recovery_window_start = (current_time - config.cool_off_time)
56
- recovered_at = @metadata[light_name].recovered_at
57
45
  window_start = if config.window_size
58
- [recovered_at, (current_time - config.window_size)].compact.max
46
+ (current_time - config.window_size)
59
47
  else
60
48
  current_time
61
49
  end
62
50
 
63
- @metadata[light_name].with(
64
- current_time:,
65
- errors: @errors[config.name].sum_in_window(window_start),
66
- successes: @successes[config.name].sum_in_window(window_start),
67
- recovery_probe_errors: @recovery_probe_errors[config.name].sum_in_window(recovery_window_start),
68
- recovery_probe_successes: @recovery_probe_successes[config.name].sum_in_window(recovery_window_start)
51
+ metrics = @metrics[light_name]
52
+
53
+ errors = @errors[light_name].sum_in_window(window_start) if config.window_size
54
+ successes = @successes[light_name].sum_in_window(window_start) if config.window_size
55
+ consecutive_errors = config.window_size ? [metrics.consecutive_errors, errors].min : metrics.consecutive_errors
56
+ consecutive_successes = config.window_size ? [metrics.consecutive_successes.to_i, successes].min : metrics.consecutive_successes.to_i
57
+
58
+ Domain::Metrics.new(
59
+ errors:,
60
+ successes:,
61
+ consecutive_errors:,
62
+ consecutive_successes:,
63
+ last_error: metrics.last_error,
64
+ last_success_at: metrics.last_success_at
65
+ )
66
+ end
67
+ end
68
+
69
+ # @return [Stoplight::Domain::Metrics]
70
+ def get_recovery_metrics(config)
71
+ light_name = config.name
72
+
73
+ synchronize do
74
+ metrics = @recovery_metrics[light_name]
75
+
76
+ Domain::Metrics.new(
77
+ errors: nil, successes: nil,
78
+ consecutive_errors: metrics.consecutive_errors,
79
+ consecutive_successes: metrics.consecutive_successes,
80
+ last_error: metrics.last_error,
81
+ last_success_at: metrics.last_success_at
69
82
  )
70
83
  end
71
84
  end
72
85
 
86
+ # @return [Stoplight::Domain::StateSnapshot]
87
+ def get_state_snapshot(config)
88
+ time, state = synchronize do
89
+ [current_time, @states[config.name]]
90
+ end
91
+
92
+ Domain::StateSnapshot.new(
93
+ time:,
94
+ locked_state: state.locked_state,
95
+ recovery_scheduled_after: state.recovery_scheduled_after,
96
+ recovery_started_at: state.recovery_started_at,
97
+ breached_at: state.breached_at
98
+ )
99
+ end
100
+
73
101
  # @param config [Stoplight::Domain::Config]
74
102
  # @param exception [Exception]
75
- # @return [Stoplight::Domain::Metadata]
103
+ # @return [void]
76
104
  def record_failure(config, exception)
77
105
  current_time = self.current_time
78
106
  light_name = config.name
@@ -81,21 +109,31 @@ module Stoplight
81
109
  synchronize do
82
110
  @errors[light_name].increment if config.window_size
83
111
 
84
- metadata = @metadata[light_name]
85
- @metadata[light_name] = if metadata.last_error_at.nil? || current_time > metadata.last_error_at
86
- metadata.with(
87
- last_error_at: current_time,
88
- last_error: failure,
89
- consecutive_errors: metadata.consecutive_errors.succ,
90
- consecutive_successes: 0
91
- )
92
- else
93
- metadata.with(
94
- consecutive_errors: metadata.consecutive_errors.succ,
95
- consecutive_successes: 0
96
- )
112
+ metrics = @metrics[light_name]
113
+
114
+ if metrics.last_error_at.nil? || failure.occurred_at > metrics.last_error_at
115
+ metrics.last_error = failure
116
+ end
117
+
118
+ metrics.consecutive_errors += 1
119
+ metrics.consecutive_successes = 0
120
+ end
121
+ end
122
+
123
+ def clear_metrics(config)
124
+ light_name = config.name
125
+ synchronize do
126
+ if config.window_size
127
+ @errors[light_name] = SlidingWindow.new
128
+ @successes[light_name] = SlidingWindow.new
97
129
  end
98
- get_metadata(config)
130
+ @metrics[light_name] = Metrics.new
131
+ end
132
+ end
133
+
134
+ def clear_recovery_metrics(config)
135
+ synchronize do
136
+ @recovery_metrics[config.name] = Metrics.new
99
137
  end
100
138
  end
101
139
 
@@ -108,74 +146,51 @@ module Stoplight
108
146
  synchronize do
109
147
  @successes[light_name].increment if config.window_size
110
148
 
111
- metadata = @metadata[light_name]
112
- @metadata[light_name] = if metadata.last_success_at.nil? || current_time > metadata.last_success_at
113
- metadata.with(
114
- last_success_at: current_time,
115
- consecutive_errors: 0,
116
- consecutive_successes: metadata.consecutive_successes.succ
117
- )
118
- else
119
- metadata.with(
120
- consecutive_errors: 0,
121
- consecutive_successes: metadata.consecutive_successes.succ
122
- )
149
+ metrics = @metrics[light_name]
150
+
151
+ if metrics.last_success_at.nil? || current_time > metrics.last_success_at
152
+ metrics.last_success_at = current_time
123
153
  end
154
+
155
+ metrics.consecutive_errors = 0
156
+ metrics.consecutive_successes += 1
124
157
  end
125
158
  end
126
159
 
127
160
  # @param config [Stoplight::Domain::Config]
128
161
  # @param exception [Exception]
129
- # @return [Stoplight::Domain::Metadata]
162
+ # @return [void]
130
163
  def record_recovery_probe_failure(config, exception)
131
164
  light_name = config.name
132
165
  current_time = self.current_time
133
166
  failure = Domain::Failure.from_error(exception, time: current_time)
134
167
 
135
168
  synchronize do
136
- @recovery_probe_errors[light_name].increment
137
-
138
- metadata = @metadata[light_name]
139
- @metadata[light_name] = if metadata.last_error_at.nil? || current_time > metadata.last_error_at
140
- metadata.with(
141
- last_error_at: current_time,
142
- last_error: failure,
143
- consecutive_errors: metadata.consecutive_errors.succ,
144
- consecutive_successes: 0
145
- )
146
- else
147
- metadata.with(
148
- consecutive_errors: metadata.consecutive_errors.succ,
149
- consecutive_successes: 0
150
- )
169
+ metrics = @recovery_metrics[light_name]
170
+
171
+ if metrics.last_error_at.nil? || failure.occurred_at > metrics.last_error_at
172
+ metrics.last_error = failure
151
173
  end
152
- get_metadata(config)
174
+
175
+ metrics.consecutive_errors += 1
176
+ metrics.consecutive_successes = 0
153
177
  end
154
178
  end
155
179
 
156
180
  # @param config [Stoplight::Domain::Config]
157
- # @return [Stoplight::Domain::Metadata]
181
+ # @return [void]
158
182
  def record_recovery_probe_success(config)
159
183
  light_name = config.name
160
184
  current_time = self.current_time
161
185
 
162
186
  synchronize do
163
- @recovery_probe_successes[light_name].increment
164
-
165
- metadata = @metadata[light_name]
166
- @metadata[light_name] = if metadata.last_success_at.nil? || current_time > metadata.last_success_at
167
- metadata.with(
168
- last_success_at: current_time,
169
- consecutive_errors: 0,
170
- consecutive_successes: metadata.consecutive_successes.succ
171
- )
172
- else
173
- metadata.with(
174
- consecutive_errors: 0,
175
- consecutive_successes: metadata.consecutive_successes.succ
176
- )
187
+ metrics = @recovery_metrics[light_name]
188
+ if metrics.last_success_at.nil? || current_time > metrics.last_success_at
189
+ metrics.last_success_at = current_time
177
190
  end
178
- get_metadata(config)
191
+
192
+ metrics.consecutive_errors = 0
193
+ metrics.consecutive_successes += 1
179
194
  end
180
195
  end
181
196
 
@@ -186,8 +201,7 @@ module Stoplight
186
201
  light_name = config.name
187
202
 
188
203
  synchronize do
189
- metadata = @metadata[light_name]
190
- @metadata[light_name] = metadata.with(locked_state: state)
204
+ @states[light_name].locked_state = state
191
205
  end
192
206
  state
193
207
  end
@@ -197,6 +211,20 @@ module Stoplight
197
211
  "#<#{self.class.name}>"
198
212
  end
199
213
 
214
+ # @param config [Stoplight::Domain::Config]
215
+ # @return [void]
216
+ def delete_light(config)
217
+ light_name = config.name
218
+
219
+ synchronize do
220
+ @states.delete(light_name)
221
+ @recovery_metrics.delete(light_name)
222
+ @metrics.delete(light_name)
223
+ @errors.delete(light_name)
224
+ @successes.delete(light_name)
225
+ end
226
+ end
227
+
200
228
  # Combined method that performs the state transition based on color
201
229
  #
202
230
  # @param config [Stoplight::Domain::Config] The light configuration
@@ -215,6 +243,18 @@ module Stoplight
215
243
  end
216
244
  end
217
245
 
246
+ # @param config [Stoplight::Domain::Config]
247
+ # @return [Stoplight::Infrastructure::DataStore::Memory::RecoveryLockToken, nil]
248
+ def acquire_recovery_lock(config)
249
+ recovery_lock_store.acquire_lock(config.name)
250
+ end
251
+
252
+ # @param lock [Stoplight::Infrastructure::DataStore::Memory::RecoveryLockToken]
253
+ # @return [void]
254
+ def release_recovery_lock(lock)
255
+ recovery_lock_store.release_lock(lock)
256
+ end
257
+
218
258
  # Transitions to GREEN state and ensures only one notification
219
259
  #
220
260
  # @param config [Stoplight::Domain::Config] The light configuration
@@ -224,16 +264,15 @@ module Stoplight
224
264
  current_time = self.current_time
225
265
 
226
266
  synchronize do
227
- metadata = @metadata[light_name]
228
- if metadata.recovered_at
267
+ state = @states[light_name]
268
+
269
+ if state.recovered_at
229
270
  false
230
271
  else
231
- @metadata[light_name] = metadata.with(
232
- recovered_at: current_time,
233
- recovery_started_at: nil,
234
- breached_at: nil,
235
- recovery_scheduled_after: nil
236
- )
272
+ state.recovered_at = current_time
273
+ state.recovery_started_at = nil
274
+ state.breached_at = nil
275
+ state.recovery_scheduled_after = nil
237
276
  true
238
277
  end
239
278
  end
@@ -248,21 +287,17 @@ module Stoplight
248
287
  current_time = self.current_time
249
288
 
250
289
  synchronize do
251
- metadata = @metadata[light_name]
252
- if metadata.recovery_started_at.nil?
253
- @metadata[light_name] = metadata.with(
254
- recovery_started_at: current_time,
255
- recovery_scheduled_after: nil,
256
- recovered_at: nil,
257
- breached_at: nil
258
- )
290
+ state = @states[light_name]
291
+ if state.recovery_started_at.nil?
292
+ state.recovery_started_at = current_time
293
+ state.recovery_scheduled_after = nil
294
+ state.recovered_at = nil
295
+ state.breached_at = nil
259
296
  true
260
297
  else
261
- @metadata[light_name] = metadata.with(
262
- recovery_scheduled_after: nil,
263
- recovered_at: nil,
264
- breached_at: nil
265
- )
298
+ state.recovery_scheduled_after = nil
299
+ state.recovered_at = nil
300
+ state.breached_at = nil
266
301
  false
267
302
  end
268
303
  end
@@ -278,21 +313,17 @@ module Stoplight
278
313
  recovery_scheduled_after = current_time + config.cool_off_time
279
314
 
280
315
  synchronize do
281
- metadata = @metadata[light_name]
282
- if metadata.breached_at
283
- @metadata[light_name] = metadata.with(
284
- recovery_scheduled_after: recovery_scheduled_after,
285
- recovery_started_at: nil,
286
- recovered_at: nil
287
- )
316
+ state = @states[light_name]
317
+ if state.breached_at
318
+ state.recovery_scheduled_after = recovery_scheduled_after
319
+ state.recovery_started_at = nil
320
+ state.recovered_at = nil
288
321
  false
289
322
  else
290
- @metadata[light_name] = metadata.with(
291
- breached_at: current_time,
292
- recovery_scheduled_after: recovery_scheduled_after,
293
- recovery_started_at: nil,
294
- recovered_at: nil
295
- )
323
+ state.breached_at = current_time
324
+ state.recovery_scheduled_after = recovery_scheduled_after
325
+ state.recovery_started_at = nil
326
+ state.recovered_at = nil
296
327
  true
297
328
  end
298
329
  end
@@ -0,0 +1,26 @@
1
+ local number_of_metric_buckets = tonumber(ARGV[1])
2
+ local window_start_ts = tonumber(ARGV[2])
3
+ local window_end_ts = tonumber(ARGV[3])
4
+ local metrics_keys = {}
5
+ for idx = 4, #ARGV do
6
+ table.insert(metrics_keys, ARGV[idx])
7
+ end
8
+
9
+ local metadata_key = KEYS[1]
10
+
11
+ local function count_events(start_idx, bucket_count, start_ts)
12
+ local total = 0
13
+ for idx = start_idx, start_idx + bucket_count - 1 do
14
+ total = total + tonumber(redis.call('ZCOUNT', KEYS[idx], start_ts, window_end_ts))
15
+ end
16
+ return total
17
+ end
18
+
19
+ local offset = 2
20
+ local successes = count_events(2, number_of_metric_buckets, window_start_ts)
21
+
22
+ offset = offset + number_of_metric_buckets
23
+ local errors = count_events(offset, number_of_metric_buckets, window_start_ts)
24
+
25
+ local metrics = redis.call('HMGET', metadata_key, unpack(metrics_keys))
26
+ return {successes, errors, unpack(metrics)}
@@ -0,0 +1,27 @@
1
+ local failure_ts = tonumber(ARGV[1])
2
+ local failure_json = ARGV[2]
3
+
4
+ local metadata_key = KEYS[1]
5
+
6
+
7
+ -- Update metadata
8
+ local meta = redis.call('HMGET', metadata_key, 'last_error_at', 'consecutive_errors')
9
+ local prev_failure_ts = tonumber(meta[1])
10
+ local prev_consecutive_errors = tonumber(meta[2])
11
+
12
+ if not prev_failure_ts or failure_ts > prev_failure_ts then
13
+ redis.call(
14
+ 'HSET', metadata_key,
15
+ 'last_error_at', failure_ts,
16
+ 'last_error_json', failure_json,
17
+ 'consecutive_errors', (prev_consecutive_errors or 0) + 1,
18
+ 'consecutive_successes', 0
19
+ )
20
+ else
21
+ redis.call(
22
+ 'HSET', metadata_key,
23
+ 'consecutive_errors', (prev_consecutive_errors or 0) + 1,
24
+ 'consecutive_successes', 0
25
+ )
26
+ end
27
+
@@ -0,0 +1,23 @@
1
+ local request_ts = tonumber(ARGV[1])
2
+
3
+ local metadata_key = KEYS[1]
4
+
5
+ -- Update metadata
6
+ local meta = redis.call('HMGET', metadata_key, 'last_success_at', 'consecutive_successes')
7
+ local prev_success_ts = tonumber(meta[1])
8
+ local prev_consecutive_successes = tonumber(meta[2])
9
+
10
+ if not prev_success_ts or request_ts > prev_success_ts then
11
+ redis.call(
12
+ 'HSET', metadata_key,
13
+ 'last_success_at', request_ts,
14
+ 'consecutive_errors', 0,
15
+ 'consecutive_successes', (prev_consecutive_successes or 0) + 1
16
+ )
17
+ else
18
+ redis.call(
19
+ 'HSET', metadata_key,
20
+ 'consecutive_errors', 0,
21
+ 'consecutive_successes', (prev_consecutive_successes or 0) + 1
22
+ )
23
+ end
@@ -0,0 +1,6 @@
1
+ local token = ARGV[1]
2
+ local lock_key = KEYS[1]
3
+
4
+ if redis.call("get", lock_key) == token then
5
+ return redis.call("del", lock_key)
6
+ end
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "securerandom"
4
+ require "forwardable"
5
+
6
+ module Stoplight
7
+ module Infrastructure
8
+ module DataStore
9
+ class Redis
10
+ # Distributed recovery recovery_lock using Redis SET NX (set-if-not-exists).
11
+ #
12
+ # Lock Acquisition:
13
+ # - Uses unique UUID token to prevent accidental release of others' locks
14
+ # - Atomic SET with NX flag ensures only one process acquires recovery_lock
15
+ # - TTL (px: lock_timeout) auto-releases recovery_lock if process crashes
16
+ #
17
+ # Lock Release:
18
+ # - Lua script ensures only token holder can release (token comparison)
19
+ # - Best-effort release; TTL cleanup handles failures
20
+ #
21
+ # Failure Modes:
22
+ # - Lock contention: Returns false, caller should skip probe
23
+ # - Redis unavailable: raises an error and let caller decide
24
+ # - Crashed holder: raises an error and let caller decide. Lock auto-expires after lock_timeout
25
+ # - Release failure: Lock auto-expires after lock_timeout
26
+ #
27
+ class RecoveryLockStore
28
+ # @!attribute redis
29
+ # @return [RedisClient]
30
+ protected attr_reader :redis
31
+
32
+ # @!attribute lock_timeout
33
+ # @return [Integer]
34
+ protected attr_reader :lock_timeout
35
+
36
+ # @!attribute scripting
37
+ # @return [Stoplight::Infrastructure::DataStore::Redis::Scripting]
38
+ protected attr_reader :scripting
39
+
40
+ # @param redis [RedisClient | ConnectionPool]
41
+ # @param lock_timeout [Integer] recovery_lock timeout in milliseconds
42
+ # @param scripting [Stoplight::Infrastructure::DataStore::Redis::Scripting]
43
+ def initialize(redis:, lock_timeout:, scripting:)
44
+ @redis = redis
45
+ @lock_timeout = lock_timeout
46
+ @scripting = scripting
47
+ end
48
+
49
+ # @param light_name [String]
50
+ # @return [Stoplight::Infrastructure::DataStore::Redis::RecoveryLockToken, nil]
51
+ def acquire_lock(light_name)
52
+ recovery_lock = RecoveryLockToken.new(light_name:)
53
+
54
+ acquired = !!redis.then do |client|
55
+ client.set(recovery_lock.lock_key, recovery_lock.token, nx: true, px: lock_timeout)
56
+ end
57
+
58
+ recovery_lock if acquired
59
+ end
60
+
61
+ # @param recovery_lock [Stoplight::Infrastructure::DataStore::Redis::RecoveryLockToken]
62
+ # @return [void]
63
+ def release_lock(recovery_lock)
64
+ scripting.call(
65
+ :release_lock,
66
+ keys: [recovery_lock.lock_key], args: [recovery_lock.token]
67
+ )
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end