stoplight 5.5.0 → 5.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/lib/stoplight/admin/actions/remove.rb +23 -0
  4. data/lib/stoplight/admin/dependencies.rb +5 -0
  5. data/lib/stoplight/admin/lights_repository.rb +12 -3
  6. data/lib/stoplight/admin/views/_card.erb +13 -1
  7. data/lib/stoplight/admin.rb +8 -0
  8. data/lib/stoplight/domain/data_store.rb +42 -6
  9. data/lib/stoplight/domain/failure.rb +2 -0
  10. data/lib/stoplight/domain/light.rb +7 -8
  11. data/lib/stoplight/domain/metrics.rb +85 -0
  12. data/lib/stoplight/domain/{metadata.rb → state_snapshot.rb} +29 -37
  13. data/lib/stoplight/domain/strategies/green_run_strategy.rb +2 -2
  14. data/lib/stoplight/domain/strategies/red_run_strategy.rb +3 -3
  15. data/lib/stoplight/domain/strategies/run_strategy.rb +2 -2
  16. data/lib/stoplight/domain/strategies/yellow_run_strategy.rb +7 -6
  17. data/lib/stoplight/domain/tracker/recovery_probe.rb +9 -6
  18. data/lib/stoplight/domain/tracker/request.rb +5 -4
  19. data/lib/stoplight/domain/traffic_control/base.rb +5 -5
  20. data/lib/stoplight/domain/traffic_control/consecutive_errors.rb +3 -7
  21. data/lib/stoplight/domain/traffic_control/error_rate.rb +3 -3
  22. data/lib/stoplight/domain/traffic_recovery/base.rb +6 -5
  23. data/lib/stoplight/domain/traffic_recovery/consecutive_successes.rb +8 -6
  24. data/lib/stoplight/infrastructure/data_store/memory/metrics.rb +27 -0
  25. data/lib/stoplight/infrastructure/data_store/memory/state.rb +21 -0
  26. data/lib/stoplight/infrastructure/data_store/memory.rb +125 -123
  27. data/lib/stoplight/infrastructure/data_store/redis/get_metrics.lua +26 -0
  28. data/lib/stoplight/infrastructure/data_store/redis/lua.rb +1 -1
  29. data/lib/stoplight/infrastructure/data_store/redis.rb +115 -40
  30. data/lib/stoplight/version.rb +1 -1
  31. data/lib/stoplight/wiring/fail_safe_data_store.rb +27 -3
  32. metadata +7 -3
  33. data/lib/stoplight/infrastructure/data_store/redis/get_metadata.lua +0 -38
@@ -14,14 +14,14 @@ module Stoplight
14
14
  # @min_samples = min_samples
15
15
  # end
16
16
  #
17
- # def determine_color(config, metadata)
18
- # total_probes = metadata.recovery_probe_successes + metadata.recovery_probe_errors
17
+ # def determine_color(config, metrics)
18
+ # total_probes = metrics.recovery_probe_successes + metrics.recovery_probe_errors
19
19
  #
20
20
  # if total_probes < @min_samples
21
21
  # return Color::YELLOW # Keep recovering, not enough samples
22
22
  # end
23
23
  #
24
- # success_rate = metadata.recovery_probe_successes.fdiv(total_probes)
24
+ # success_rate = metrics.recovery_probe_successes.fdiv(total_probes)
25
25
  # if success_rate >= @min_success_rate
26
26
  # Color::GREEN # Recovery successful
27
27
  # elsif success_rate <= 0.2
@@ -49,10 +49,11 @@ module Stoplight
49
49
  # current metrics and recovery progress.
50
50
  #
51
51
  # @param config [Stoplight::Domain::Config]
52
- # @param metadata [Stoplight::Domain::Metadata]
52
+ # @param metrics [Stoplight::Domain::Metrics]
53
+ # @param state_snapshot [Stoplight::Domain::StateSnapshot]
53
54
  # @return [TrafficRecovery::Decision]
54
55
  # :nocov:
55
- def determine_color(config, metadata)
56
+ def determine_color(config, metrics, state_snapshot)
56
57
  raise NotImplementedError
57
58
  end
58
59
  # :nocov:
@@ -49,16 +49,18 @@ module Stoplight
49
49
  # Determines if traffic should be resumed based on successes counts.
50
50
  #
51
51
  # @param config [Stoplight::Domain::Config]
52
- # @param metadata [Stoplight::Domain::Metadata]
52
+ # @param recovery_metrics [Stoplight::Domain::Metrics]
53
+ # @param state_snapshot [Stoplight::Domain::StateSnapshot]
53
54
  # @return [TrafficRecovery::Decision]
54
- def determine_color(config, metadata)
55
- return TrafficRecovery::PASS if metadata.color != Color::YELLOW
55
+ def determine_color(config, recovery_metrics, state_snapshot)
56
+ return TrafficRecovery::PASS if state_snapshot.color != Color::YELLOW
56
57
 
57
- recovery_started_at = metadata.recovery_started_at || metadata.recovery_scheduled_after
58
+ recovery_started_at = state_snapshot.recovery_started_at || state_snapshot.recovery_scheduled_after
58
59
 
59
- if metadata.last_error_at && metadata.last_error_at >= recovery_started_at
60
+ # TODO: Need to add metrics cleanup and we can just use recovery_metrics.errors > 0
61
+ if recovery_metrics.last_error_at && recovery_metrics.last_error_at >= recovery_started_at
60
62
  TrafficRecovery::RED
61
- elsif [metadata.consecutive_successes, metadata.recovery_probe_successes].min >= config.recovery_threshold
63
+ elsif recovery_metrics.consecutive_successes >= config.recovery_threshold
62
64
  TrafficRecovery::GREEN
63
65
  else
64
66
  TrafficRecovery::YELLOW
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Stoplight
4
+ module Infrastructure
5
+ module DataStore
6
+ class Memory
7
+ class Metrics
8
+ attr_accessor :consecutive_errors
9
+ attr_accessor :consecutive_successes
10
+ attr_accessor :last_error
11
+ attr_accessor :last_success_at
12
+
13
+ def initialize(consecutive_errors: 0, consecutive_successes: 0, last_error: nil, last_success_at: nil)
14
+ @consecutive_errors = consecutive_errors
15
+ @consecutive_successes = consecutive_successes
16
+ @last_error = last_error
17
+ @last_success_at = last_success_at
18
+ end
19
+
20
+ def last_error_at
21
+ @last_error&.time
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Stoplight
4
+ module Infrastructure
5
+ module DataStore
6
+ class Memory
7
+ class State
8
+ attr_accessor :recovered_at
9
+ attr_accessor :locked_state
10
+ attr_accessor :recovery_scheduled_after
11
+ attr_accessor :recovery_started_at
12
+ attr_accessor :breached_at
13
+
14
+ def initialize
15
+ @locked_state = Domain::State::UNLOCKED
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -18,61 +18,90 @@ module Stoplight
18
18
  @recovery_probe_errors = Hash.new { |recovery_probe_errors, light_name| recovery_probe_errors[light_name] = SlidingWindow.new }
19
19
  @recovery_probe_successes = Hash.new { |recovery_probe_successes, light_name| recovery_probe_successes[light_name] = SlidingWindow.new }
20
20
 
21
- @metadata = Hash.new do |metadata, light_name|
22
- metadata[light_name] = Domain::Metadata.new(
23
- current_time: Time.now,
24
- successes: 0,
25
- errors: 0,
26
- recovery_probe_successes: 0,
27
- recovery_probe_errors: 0,
28
- last_error: nil,
29
- last_error_at: nil,
30
- last_success_at: nil,
31
- consecutive_errors: 0,
32
- consecutive_successes: 0,
33
- breached_at: nil,
34
- locked_state: Domain::State::UNLOCKED,
35
- recovery_scheduled_after: nil,
36
- recovery_started_at: nil,
37
- recovered_at: nil
38
- )
39
- end
21
+ @states = Hash.new { |states, light_name| states[light_name] = State.new }
22
+ @metrics = Hash.new { |metrics, light_name| metrics[light_name] = Metrics.new }
23
+
40
24
  super # MonitorMixin
41
25
  end
42
26
 
43
27
  # @return [Array<String>]
44
28
  def names
45
- synchronize { @metadata.keys }
29
+ synchronize { @metrics.keys | @states.keys }
46
30
  end
47
31
 
48
32
  # @param config [Stoplight::Domain::Config]
49
- # @return [Stoplight::Domain::Metadata]
50
- def get_metadata(config)
33
+ # @return [Stoplight::Domain::Metrics]
34
+ def get_metrics(config)
51
35
  light_name = config.name
52
36
 
53
37
  synchronize do
54
38
  current_time = self.current_time
55
- recovery_window_start = (current_time - config.cool_off_time)
56
- recovered_at = @metadata[light_name].recovered_at
57
39
  window_start = if config.window_size
58
- [recovered_at, (current_time - config.window_size)].compact.max
40
+ (current_time - config.window_size)
41
+ else
42
+ current_time
43
+ end
44
+
45
+ metrics = @metrics[light_name]
46
+
47
+ errors = @errors[light_name].sum_in_window(window_start) if config.window_size
48
+ successes = @successes[light_name].sum_in_window(window_start) if config.window_size
49
+
50
+ Domain::Metrics.new(
51
+ errors:,
52
+ successes:,
53
+ total_consecutive_errors: metrics.consecutive_errors,
54
+ total_consecutive_successes: metrics.consecutive_successes,
55
+ last_error: metrics.last_error,
56
+ last_success_at: metrics.last_success_at
57
+ )
58
+ end
59
+ end
60
+
61
+ # @return [Stoplight::Domain::Metrics]
62
+ def get_recovery_metrics(config)
63
+ light_name = config.name
64
+
65
+ synchronize do
66
+ current_time = self.current_time
67
+ recovery_window_start = (current_time - config.cool_off_time)
68
+ if config.window_size
69
+ (current_time - config.window_size)
59
70
  else
60
71
  current_time
61
72
  end
62
73
 
63
- @metadata[light_name].with(
64
- current_time:,
65
- errors: @errors[config.name].sum_in_window(window_start),
66
- successes: @successes[config.name].sum_in_window(window_start),
67
- recovery_probe_errors: @recovery_probe_errors[config.name].sum_in_window(recovery_window_start),
68
- recovery_probe_successes: @recovery_probe_successes[config.name].sum_in_window(recovery_window_start)
74
+ metrics = @metrics[light_name]
75
+
76
+ Domain::Metrics.new(
77
+ errors: @recovery_probe_errors[light_name].sum_in_window(recovery_window_start),
78
+ successes: @recovery_probe_successes[light_name].sum_in_window(recovery_window_start),
79
+ total_consecutive_errors: metrics.consecutive_errors,
80
+ total_consecutive_successes: metrics.consecutive_successes,
81
+ last_error: metrics.last_error,
82
+ last_success_at: metrics.last_success_at
69
83
  )
70
84
  end
71
85
  end
72
86
 
87
+ # @return [Stoplight::Domain::StateSnapshot]
88
+ def get_state_snapshot(config)
89
+ time, state = synchronize do
90
+ [current_time, @states[config.name]]
91
+ end
92
+
93
+ Domain::StateSnapshot.new(
94
+ time:,
95
+ locked_state: state.locked_state,
96
+ recovery_scheduled_after: state.recovery_scheduled_after,
97
+ recovery_started_at: state.recovery_started_at,
98
+ breached_at: state.breached_at
99
+ )
100
+ end
101
+
73
102
  # @param config [Stoplight::Domain::Config]
74
103
  # @param exception [Exception]
75
- # @return [Stoplight::Domain::Metadata]
104
+ # @return [void]
76
105
  def record_failure(config, exception)
77
106
  current_time = self.current_time
78
107
  light_name = config.name
@@ -81,21 +110,23 @@ module Stoplight
81
110
  synchronize do
82
111
  @errors[light_name].increment if config.window_size
83
112
 
84
- metadata = @metadata[light_name]
85
- @metadata[light_name] = if metadata.last_error_at.nil? || current_time > metadata.last_error_at
86
- metadata.with(
87
- last_error_at: current_time,
88
- last_error: failure,
89
- consecutive_errors: metadata.consecutive_errors.succ,
90
- consecutive_successes: 0
91
- )
92
- else
93
- metadata.with(
94
- consecutive_errors: metadata.consecutive_errors.succ,
95
- consecutive_successes: 0
96
- )
113
+ metrics = @metrics[light_name]
114
+
115
+ if metrics.last_error_at.nil? || failure.occurred_at > metrics.last_error_at
116
+ metrics.last_error = failure
117
+ end
118
+
119
+ metrics.consecutive_errors += 1
120
+ metrics.consecutive_successes = 0
121
+ end
122
+ end
123
+
124
+ def clear_windowed_metrics(config)
125
+ if config.window_size
126
+ synchronize do
127
+ @errors[config.name] = SlidingWindow.new
128
+ @successes[config.name] = SlidingWindow.new
97
129
  end
98
- get_metadata(config)
99
130
  end
100
131
  end
101
132
 
@@ -108,25 +139,20 @@ module Stoplight
108
139
  synchronize do
109
140
  @successes[light_name].increment if config.window_size
110
141
 
111
- metadata = @metadata[light_name]
112
- @metadata[light_name] = if metadata.last_success_at.nil? || current_time > metadata.last_success_at
113
- metadata.with(
114
- last_success_at: current_time,
115
- consecutive_errors: 0,
116
- consecutive_successes: metadata.consecutive_successes.succ
117
- )
118
- else
119
- metadata.with(
120
- consecutive_errors: 0,
121
- consecutive_successes: metadata.consecutive_successes.succ
122
- )
142
+ metrics = @metrics[light_name]
143
+
144
+ if metrics.last_success_at.nil? || current_time > metrics.last_success_at
145
+ metrics.last_success_at = current_time
123
146
  end
147
+
148
+ metrics.consecutive_errors = 0
149
+ metrics.consecutive_successes += 1
124
150
  end
125
151
  end
126
152
 
127
153
  # @param config [Stoplight::Domain::Config]
128
154
  # @param exception [Exception]
129
- # @return [Stoplight::Domain::Metadata]
155
+ # @return [void]
130
156
  def record_recovery_probe_failure(config, exception)
131
157
  light_name = config.name
132
158
  current_time = self.current_time
@@ -135,26 +161,19 @@ module Stoplight
135
161
  synchronize do
136
162
  @recovery_probe_errors[light_name].increment
137
163
 
138
- metadata = @metadata[light_name]
139
- @metadata[light_name] = if metadata.last_error_at.nil? || current_time > metadata.last_error_at
140
- metadata.with(
141
- last_error_at: current_time,
142
- last_error: failure,
143
- consecutive_errors: metadata.consecutive_errors.succ,
144
- consecutive_successes: 0
145
- )
146
- else
147
- metadata.with(
148
- consecutive_errors: metadata.consecutive_errors.succ,
149
- consecutive_successes: 0
150
- )
164
+ metrics = @metrics[light_name]
165
+
166
+ if metrics.last_error_at.nil? || failure.occurred_at > metrics.last_error_at
167
+ metrics.last_error = failure
151
168
  end
152
- get_metadata(config)
169
+
170
+ metrics.consecutive_errors += 1
171
+ metrics.consecutive_successes = 0
153
172
  end
154
173
  end
155
174
 
156
175
  # @param config [Stoplight::Domain::Config]
157
- # @return [Stoplight::Domain::Metadata]
176
+ # @return [void]
158
177
  def record_recovery_probe_success(config)
159
178
  light_name = config.name
160
179
  current_time = self.current_time
@@ -162,20 +181,13 @@ module Stoplight
162
181
  synchronize do
163
182
  @recovery_probe_successes[light_name].increment
164
183
 
165
- metadata = @metadata[light_name]
166
- @metadata[light_name] = if metadata.last_success_at.nil? || current_time > metadata.last_success_at
167
- metadata.with(
168
- last_success_at: current_time,
169
- consecutive_errors: 0,
170
- consecutive_successes: metadata.consecutive_successes.succ
171
- )
172
- else
173
- metadata.with(
174
- consecutive_errors: 0,
175
- consecutive_successes: metadata.consecutive_successes.succ
176
- )
184
+ metrics = @metrics[light_name]
185
+ if metrics.last_success_at.nil? || current_time > metrics.last_success_at
186
+ metrics.last_success_at = current_time
177
187
  end
178
- get_metadata(config)
188
+
189
+ metrics.consecutive_errors = 0
190
+ metrics.consecutive_successes += 1
179
191
  end
180
192
  end
181
193
 
@@ -186,8 +198,7 @@ module Stoplight
186
198
  light_name = config.name
187
199
 
188
200
  synchronize do
189
- metadata = @metadata[light_name]
190
- @metadata[light_name] = metadata.with(locked_state: state)
201
+ @states[light_name].locked_state = state
191
202
  end
192
203
  state
193
204
  end
@@ -224,16 +235,15 @@ module Stoplight
224
235
  current_time = self.current_time
225
236
 
226
237
  synchronize do
227
- metadata = @metadata[light_name]
228
- if metadata.recovered_at
238
+ state = @states[light_name]
239
+
240
+ if state.recovered_at
229
241
  false
230
242
  else
231
- @metadata[light_name] = metadata.with(
232
- recovered_at: current_time,
233
- recovery_started_at: nil,
234
- breached_at: nil,
235
- recovery_scheduled_after: nil
236
- )
243
+ state.recovered_at = current_time
244
+ state.recovery_started_at = nil
245
+ state.breached_at = nil
246
+ state.recovery_scheduled_after = nil
237
247
  true
238
248
  end
239
249
  end
@@ -248,21 +258,17 @@ module Stoplight
248
258
  current_time = self.current_time
249
259
 
250
260
  synchronize do
251
- metadata = @metadata[light_name]
252
- if metadata.recovery_started_at.nil?
253
- @metadata[light_name] = metadata.with(
254
- recovery_started_at: current_time,
255
- recovery_scheduled_after: nil,
256
- recovered_at: nil,
257
- breached_at: nil
258
- )
261
+ state = @states[light_name]
262
+ if state.recovery_started_at.nil?
263
+ state.recovery_started_at = current_time
264
+ state.recovery_scheduled_after = nil
265
+ state.recovered_at = nil
266
+ state.breached_at = nil
259
267
  true
260
268
  else
261
- @metadata[light_name] = metadata.with(
262
- recovery_scheduled_after: nil,
263
- recovered_at: nil,
264
- breached_at: nil
265
- )
269
+ state.recovery_scheduled_after = nil
270
+ state.recovered_at = nil
271
+ state.breached_at = nil
266
272
  false
267
273
  end
268
274
  end
@@ -278,21 +284,17 @@ module Stoplight
278
284
  recovery_scheduled_after = current_time + config.cool_off_time
279
285
 
280
286
  synchronize do
281
- metadata = @metadata[light_name]
282
- if metadata.breached_at
283
- @metadata[light_name] = metadata.with(
284
- recovery_scheduled_after: recovery_scheduled_after,
285
- recovery_started_at: nil,
286
- recovered_at: nil
287
- )
287
+ state = @states[light_name]
288
+ if state.breached_at
289
+ state.recovery_scheduled_after = recovery_scheduled_after
290
+ state.recovery_started_at = nil
291
+ state.recovered_at = nil
288
292
  false
289
293
  else
290
- @metadata[light_name] = metadata.with(
291
- breached_at: current_time,
292
- recovery_scheduled_after: recovery_scheduled_after,
293
- recovery_started_at: nil,
294
- recovered_at: nil
295
- )
294
+ state.breached_at = current_time
295
+ state.recovery_scheduled_after = recovery_scheduled_after
296
+ state.recovery_started_at = nil
297
+ state.recovered_at = nil
296
298
  true
297
299
  end
298
300
  end
@@ -0,0 +1,26 @@
1
+ local number_of_metric_buckets = tonumber(ARGV[1])
2
+ local window_start_ts = tonumber(ARGV[2])
3
+ local window_end_ts = tonumber(ARGV[3])
4
+ local metrics_keys = {}
5
+ for idx = 4, #ARGV do
6
+ table.insert(metrics_keys, ARGV[idx])
7
+ end
8
+
9
+ local metadata_key = KEYS[1]
10
+
11
+ local function count_events(start_idx, bucket_count, start_ts)
12
+ local total = 0
13
+ for idx = start_idx, start_idx + bucket_count - 1 do
14
+ total = total + tonumber(redis.call('ZCOUNT', KEYS[idx], start_ts, window_end_ts))
15
+ end
16
+ return total
17
+ end
18
+
19
+ local offset = 2
20
+ local successes = count_events(2, number_of_metric_buckets, window_start_ts)
21
+
22
+ offset = offset + number_of_metric_buckets
23
+ local errors = count_events(offset, number_of_metric_buckets, window_start_ts)
24
+
25
+ local metrics = redis.call('HMGET', metadata_key, unpack(metrics_keys))
26
+ return {successes, errors, unpack(metrics)}
@@ -14,7 +14,7 @@ module Stoplight
14
14
 
15
15
  RECORD_FAILURE = read_lua_file("record_failure")
16
16
  RECORD_SUCCESS = read_lua_file("record_success")
17
- GET_METADATA = read_lua_file("get_metadata")
17
+ GET_METRICS = read_lua_file("get_metrics")
18
18
  TRANSITION_TO_YELLOW = read_lua_file("transition_to_yellow")
19
19
  TRANSITION_TO_RED = read_lua_file("transition_to_red")
20
20
  TRANSITION_TO_GREEN = read_lua_file("transition_to_green")