breaker_machines 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/breaker_machines/async_support.rb +3 -3
- data/lib/breaker_machines/cascading_circuit.rb +175 -0
- data/lib/breaker_machines/circuit/execution.rb +4 -8
- data/lib/breaker_machines/circuit/introspection.rb +35 -20
- data/lib/breaker_machines/circuit/state_management.rb +5 -4
- data/lib/breaker_machines/circuit.rb +0 -1
- data/lib/breaker_machines/console.rb +12 -12
- data/lib/breaker_machines/dsl/cascading_circuit_builder.rb +20 -0
- data/lib/breaker_machines/dsl/circuit_builder.rb +209 -0
- data/lib/breaker_machines/dsl/hedged_builder.rb +21 -0
- data/lib/breaker_machines/dsl/parallel_fallback_wrapper.rb +20 -0
- data/lib/breaker_machines/dsl.rb +26 -239
- data/lib/breaker_machines/registry.rb +3 -3
- data/lib/breaker_machines/storage/backend_state.rb +69 -0
- data/lib/breaker_machines/storage/bucket_memory.rb +3 -3
- data/lib/breaker_machines/storage/cache.rb +3 -3
- data/lib/breaker_machines/storage/fallback_chain.rb +56 -70
- data/lib/breaker_machines/storage/memory.rb +3 -3
- data/lib/breaker_machines/types.rb +41 -0
- data/lib/breaker_machines/version.rb +1 -1
- data/lib/breaker_machines.rb +12 -0
- metadata +13 -6
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module BreakerMachines
|
4
|
+
module DSL
|
5
|
+
# Builder for hedged request configuration
|
6
|
+
class HedgedBuilder
|
7
|
+
def initialize(config)
|
8
|
+
@config = config
|
9
|
+
@config[:hedged_requests] = true
|
10
|
+
end
|
11
|
+
|
12
|
+
def delay(milliseconds)
|
13
|
+
@config[:hedging_delay] = milliseconds
|
14
|
+
end
|
15
|
+
|
16
|
+
def max_requests(count)
|
17
|
+
@config[:max_hedged_requests] = count
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module BreakerMachines
|
4
|
+
module DSL
|
5
|
+
# Wrapper to indicate parallel execution for fallbacks
|
6
|
+
class ParallelFallbackWrapper
|
7
|
+
attr_reader :fallbacks
|
8
|
+
|
9
|
+
def initialize(fallbacks)
|
10
|
+
@fallbacks = fallbacks
|
11
|
+
end
|
12
|
+
|
13
|
+
def call(error)
|
14
|
+
# This will be handled by the circuit's fallback mechanism
|
15
|
+
# to execute fallbacks in parallel
|
16
|
+
raise NotImplementedError, 'ParallelFallbackWrapper should be handled by Circuit'
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/lib/breaker_machines/dsl.rb
CHANGED
@@ -43,7 +43,7 @@ module BreakerMachines
|
|
43
43
|
@circuits ||= {}
|
44
44
|
|
45
45
|
if block_given?
|
46
|
-
builder = CircuitBuilder.new
|
46
|
+
builder = DSL::CircuitBuilder.new
|
47
47
|
builder.instance_eval(&block)
|
48
48
|
@circuits[name] = builder.config
|
49
49
|
end
|
@@ -51,6 +51,19 @@ module BreakerMachines
|
|
51
51
|
@circuits[name]
|
52
52
|
end
|
53
53
|
|
54
|
+
# Define a cascading circuit breaker that can trip dependent circuits
|
55
|
+
def cascade_circuit(name, &block)
|
56
|
+
@circuits ||= {}
|
57
|
+
|
58
|
+
if block_given?
|
59
|
+
builder = DSL::CascadingCircuitBuilder.new
|
60
|
+
builder.instance_eval(&block)
|
61
|
+
@circuits[name] = builder.config.merge(circuit_type: :cascading)
|
62
|
+
end
|
63
|
+
|
64
|
+
@circuits[name]
|
65
|
+
end
|
66
|
+
|
54
67
|
def circuits
|
55
68
|
# Start with parent circuits if available
|
56
69
|
base_circuits = if superclass.respond_to?(:circuits)
|
@@ -72,7 +85,7 @@ module BreakerMachines
|
|
72
85
|
@circuit_templates ||= {}
|
73
86
|
|
74
87
|
if block_given?
|
75
|
-
builder = CircuitBuilder.new
|
88
|
+
builder = DSL::CircuitBuilder.new
|
76
89
|
builder.instance_eval(&block)
|
77
90
|
@circuit_templates[name] = builder.config
|
78
91
|
end
|
@@ -156,7 +169,16 @@ module BreakerMachines
|
|
156
169
|
def circuit(name)
|
157
170
|
self.class.circuits[name] ||= {}
|
158
171
|
@circuit_instances ||= {}
|
159
|
-
|
172
|
+
|
173
|
+
config = self.class.circuits[name].merge(owner: self)
|
174
|
+
circuit_type = config.delete(:circuit_type)
|
175
|
+
|
176
|
+
@circuit_instances[name] ||= case circuit_type
|
177
|
+
when :cascading
|
178
|
+
CascadingCircuit.new(name, config)
|
179
|
+
else
|
180
|
+
Circuit.new(name, config)
|
181
|
+
end
|
160
182
|
end
|
161
183
|
|
162
184
|
# Create a dynamic circuit breaker with inline configuration
|
@@ -173,7 +195,7 @@ module BreakerMachines
|
|
173
195
|
|
174
196
|
# Apply additional configuration if block provided
|
175
197
|
if config_block
|
176
|
-
builder = CircuitBuilder.new
|
198
|
+
builder = DSL::CircuitBuilder.new
|
177
199
|
builder.instance_variable_set(:@config, base_config.deep_dup)
|
178
200
|
builder.instance_eval(&config_block)
|
179
201
|
base_config = builder.config
|
@@ -257,240 +279,5 @@ module BreakerMachines
|
|
257
279
|
def cleanup_stale_dynamic_circuits(max_age_seconds = 3600)
|
258
280
|
BreakerMachines.registry.cleanup_stale_dynamic_circuits(max_age_seconds)
|
259
281
|
end
|
260
|
-
|
261
|
-
# DSL builder for configuring circuit breakers with a fluent interface
|
262
|
-
class CircuitBuilder
|
263
|
-
attr_reader :config
|
264
|
-
|
265
|
-
def initialize
|
266
|
-
@config = {
|
267
|
-
failure_threshold: 5,
|
268
|
-
failure_window: 60.seconds,
|
269
|
-
success_threshold: 1,
|
270
|
-
timeout: nil,
|
271
|
-
reset_timeout: 60.seconds,
|
272
|
-
half_open_calls: 1,
|
273
|
-
exceptions: [StandardError],
|
274
|
-
storage: nil,
|
275
|
-
metrics: nil,
|
276
|
-
fallback: nil,
|
277
|
-
on_open: nil,
|
278
|
-
on_close: nil,
|
279
|
-
on_half_open: nil,
|
280
|
-
on_reject: nil,
|
281
|
-
notifications: [],
|
282
|
-
fiber_safe: BreakerMachines.config.fiber_safe
|
283
|
-
}
|
284
|
-
end
|
285
|
-
|
286
|
-
def threshold(failures: nil, failure_rate: nil, minimum_calls: nil, within: 60.seconds, successes: nil)
|
287
|
-
if failure_rate
|
288
|
-
# Rate-based threshold
|
289
|
-
validate_failure_rate!(failure_rate)
|
290
|
-
validate_positive_integer!(:minimum_calls, minimum_calls) if minimum_calls
|
291
|
-
|
292
|
-
@config[:failure_rate] = failure_rate
|
293
|
-
@config[:minimum_calls] = minimum_calls || 5
|
294
|
-
@config[:use_rate_threshold] = true
|
295
|
-
elsif failures
|
296
|
-
# Absolute count threshold (existing behavior)
|
297
|
-
validate_positive_integer!(:failures, failures)
|
298
|
-
@config[:failure_threshold] = failures
|
299
|
-
@config[:use_rate_threshold] = false
|
300
|
-
end
|
301
|
-
|
302
|
-
validate_positive_integer!(:within, within.to_i)
|
303
|
-
@config[:failure_window] = within.to_i
|
304
|
-
|
305
|
-
return unless successes
|
306
|
-
|
307
|
-
validate_positive_integer!(:successes, successes)
|
308
|
-
@config[:success_threshold] = successes
|
309
|
-
end
|
310
|
-
|
311
|
-
def reset_after(duration, jitter: nil)
|
312
|
-
validate_positive_integer!(:duration, duration.to_i)
|
313
|
-
@config[:reset_timeout] = duration.to_i
|
314
|
-
|
315
|
-
return unless jitter
|
316
|
-
|
317
|
-
validate_jitter!(jitter)
|
318
|
-
@config[:reset_timeout_jitter] = jitter
|
319
|
-
end
|
320
|
-
|
321
|
-
def timeout(duration)
|
322
|
-
validate_non_negative_integer!(:timeout, duration.to_i)
|
323
|
-
@config[:timeout] = duration.to_i
|
324
|
-
end
|
325
|
-
|
326
|
-
def half_open_requests(count)
|
327
|
-
validate_positive_integer!(:half_open_requests, count)
|
328
|
-
@config[:half_open_calls] = count
|
329
|
-
end
|
330
|
-
|
331
|
-
def storage(backend, **options)
|
332
|
-
@config[:storage] = case backend
|
333
|
-
when :memory
|
334
|
-
Storage::Memory.new(**options)
|
335
|
-
when :bucket_memory
|
336
|
-
Storage::BucketMemory.new(**options)
|
337
|
-
when :cache
|
338
|
-
Storage::Cache.new(**options)
|
339
|
-
when :null
|
340
|
-
Storage::Null.new(**options)
|
341
|
-
when :fallback_chain
|
342
|
-
config = options.is_a?(Proc) ? options.call(timeout: 5) : options
|
343
|
-
Storage::FallbackChain.new(config)
|
344
|
-
when Class
|
345
|
-
backend.new(**options)
|
346
|
-
else
|
347
|
-
backend
|
348
|
-
end
|
349
|
-
end
|
350
|
-
|
351
|
-
def metrics(recorder = nil, &block)
|
352
|
-
@config[:metrics] = recorder || block
|
353
|
-
end
|
354
|
-
|
355
|
-
def fallback(value = nil, &block)
|
356
|
-
raise ArgumentError, 'Fallback requires either a value or a block' if value.nil? && !block_given?
|
357
|
-
|
358
|
-
fallback_value = block || value
|
359
|
-
|
360
|
-
if @config[:fallback].is_a?(Array)
|
361
|
-
@config[:fallback] << fallback_value
|
362
|
-
elsif @config[:fallback]
|
363
|
-
@config[:fallback] = [@config[:fallback], fallback_value]
|
364
|
-
else
|
365
|
-
@config[:fallback] = fallback_value
|
366
|
-
end
|
367
|
-
end
|
368
|
-
|
369
|
-
def on_open(&block)
|
370
|
-
@config[:on_open] = block
|
371
|
-
end
|
372
|
-
|
373
|
-
def on_close(&block)
|
374
|
-
@config[:on_close] = block
|
375
|
-
end
|
376
|
-
|
377
|
-
def on_half_open(&block)
|
378
|
-
@config[:on_half_open] = block
|
379
|
-
end
|
380
|
-
|
381
|
-
def on_reject(&block)
|
382
|
-
@config[:on_reject] = block
|
383
|
-
end
|
384
|
-
|
385
|
-
# Configure hedged requests
|
386
|
-
def hedged(&)
|
387
|
-
if block_given?
|
388
|
-
hedged_builder = HedgedBuilder.new(@config)
|
389
|
-
hedged_builder.instance_eval(&)
|
390
|
-
else
|
391
|
-
@config[:hedged_requests] = true
|
392
|
-
end
|
393
|
-
end
|
394
|
-
|
395
|
-
# Configure multiple backends
|
396
|
-
def backends(*backend_list)
|
397
|
-
@config[:backends] = backend_list.flatten
|
398
|
-
end
|
399
|
-
|
400
|
-
# Configure parallel fallback execution
|
401
|
-
def parallel_fallback(fallback_list)
|
402
|
-
@config[:fallback] = ParallelFallbackWrapper.new(fallback_list)
|
403
|
-
end
|
404
|
-
|
405
|
-
def notify(service, url = nil, events: %i[open close], **options)
|
406
|
-
notification = {
|
407
|
-
via: service,
|
408
|
-
url: url,
|
409
|
-
events: Array(events),
|
410
|
-
options: options
|
411
|
-
}
|
412
|
-
@config[:notifications] << notification
|
413
|
-
end
|
414
|
-
|
415
|
-
def handle(*exceptions)
|
416
|
-
@config[:exceptions] = exceptions
|
417
|
-
end
|
418
|
-
|
419
|
-
def fiber_safe(enabled = true) # rubocop:disable Style/OptionalBooleanParameter
|
420
|
-
@config[:fiber_safe] = enabled
|
421
|
-
end
|
422
|
-
|
423
|
-
def max_concurrent(limit)
|
424
|
-
validate_positive_integer!(:max_concurrent, limit)
|
425
|
-
@config[:max_concurrent] = limit
|
426
|
-
end
|
427
|
-
|
428
|
-
# Advanced features
|
429
|
-
def parallel_calls(count, timeout: nil)
|
430
|
-
@config[:parallel_calls] = count
|
431
|
-
@config[:parallel_timeout] = timeout
|
432
|
-
end
|
433
|
-
|
434
|
-
private
|
435
|
-
|
436
|
-
def validate_positive_integer!(name, value)
|
437
|
-
return if value.is_a?(Integer) && value.positive?
|
438
|
-
|
439
|
-
raise BreakerMachines::ConfigurationError,
|
440
|
-
"#{name} must be a positive integer, got: #{value.inspect}"
|
441
|
-
end
|
442
|
-
|
443
|
-
def validate_non_negative_integer!(name, value)
|
444
|
-
return if value.is_a?(Integer) && value >= 0
|
445
|
-
|
446
|
-
raise BreakerMachines::ConfigurationError,
|
447
|
-
"#{name} must be a non-negative integer, got: #{value.inspect}"
|
448
|
-
end
|
449
|
-
|
450
|
-
def validate_failure_rate!(rate)
|
451
|
-
return if rate.is_a?(Numeric) && rate >= 0.0 && rate <= 1.0
|
452
|
-
|
453
|
-
raise BreakerMachines::ConfigurationError,
|
454
|
-
"failure_rate must be between 0.0 and 1.0, got: #{rate.inspect}"
|
455
|
-
end
|
456
|
-
|
457
|
-
def validate_jitter!(jitter)
|
458
|
-
return if jitter.is_a?(Numeric) && jitter >= 0.0 && jitter <= 1.0
|
459
|
-
|
460
|
-
raise BreakerMachines::ConfigurationError,
|
461
|
-
"jitter must be between 0.0 and 1.0 (0% to 100%), got: #{jitter.inspect}"
|
462
|
-
end
|
463
|
-
end
|
464
|
-
|
465
|
-
# Builder for hedged request configuration
|
466
|
-
class HedgedBuilder
|
467
|
-
def initialize(config)
|
468
|
-
@config = config
|
469
|
-
@config[:hedged_requests] = true
|
470
|
-
end
|
471
|
-
|
472
|
-
def delay(milliseconds)
|
473
|
-
@config[:hedging_delay] = milliseconds
|
474
|
-
end
|
475
|
-
|
476
|
-
def max_requests(count)
|
477
|
-
@config[:max_hedged_requests] = count
|
478
|
-
end
|
479
|
-
end
|
480
|
-
|
481
|
-
# Wrapper to indicate parallel execution for fallbacks
|
482
|
-
class ParallelFallbackWrapper
|
483
|
-
attr_reader :fallbacks
|
484
|
-
|
485
|
-
def initialize(fallbacks)
|
486
|
-
@fallbacks = fallbacks
|
487
|
-
end
|
488
|
-
|
489
|
-
def call(error)
|
490
|
-
# This will be handled by the circuit's fallback mechanism
|
491
|
-
# to execute fallbacks in parallel
|
492
|
-
raise NotImplementedError, 'ParallelFallbackWrapper should be handled by Circuit'
|
493
|
-
end
|
494
|
-
end
|
495
282
|
end
|
496
283
|
end
|
@@ -102,13 +102,13 @@ module BreakerMachines
|
|
102
102
|
|
103
103
|
{
|
104
104
|
summary: stats_summary,
|
105
|
-
circuits: circuits.map
|
105
|
+
circuits: circuits.map { |c| c.stats.to_h },
|
106
106
|
health: {
|
107
107
|
open_count: circuits.count(&:open?),
|
108
108
|
closed_count: circuits.count(&:closed?),
|
109
109
|
half_open_count: circuits.count(&:half_open?),
|
110
|
-
total_failures: circuits.sum { |c| c.stats
|
111
|
-
total_successes: circuits.sum { |c| c.stats
|
110
|
+
total_failures: circuits.sum { |c| c.stats.failure_count },
|
111
|
+
total_successes: circuits.sum { |c| c.stats.success_count }
|
112
112
|
}
|
113
113
|
}
|
114
114
|
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module BreakerMachines
|
4
|
+
module Storage
|
5
|
+
# Manages the health state of a single storage backend using a state machine.
|
6
|
+
class BackendState
|
7
|
+
attr_reader :name, :failure_count, :last_failure_at
|
8
|
+
attr_accessor :health
|
9
|
+
|
10
|
+
def initialize(name, threshold:, timeout:)
|
11
|
+
@name = name
|
12
|
+
@threshold = threshold
|
13
|
+
@timeout = timeout
|
14
|
+
@failure_count = 0
|
15
|
+
@last_failure_at = nil
|
16
|
+
@health = :healthy
|
17
|
+
end
|
18
|
+
|
19
|
+
state_machine :health, initial: :healthy do
|
20
|
+
event :trip do
|
21
|
+
transition healthy: :unhealthy, if: :threshold_reached?
|
22
|
+
end
|
23
|
+
|
24
|
+
event :recover do
|
25
|
+
transition unhealthy: :healthy
|
26
|
+
end
|
27
|
+
|
28
|
+
event :reset do
|
29
|
+
transition all => :healthy
|
30
|
+
end
|
31
|
+
|
32
|
+
before_transition to: :unhealthy do |backend, _transition|
|
33
|
+
backend.instance_variable_set(:@unhealthy_until,
|
34
|
+
BreakerMachines.monotonic_time + backend.instance_variable_get(:@timeout))
|
35
|
+
end
|
36
|
+
|
37
|
+
after_transition to: :healthy do |backend, _transition|
|
38
|
+
backend.instance_variable_set(:@failure_count, 0)
|
39
|
+
backend.instance_variable_set(:@last_failure_at, nil)
|
40
|
+
backend.instance_variable_set(:@unhealthy_until, nil)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def record_failure
|
45
|
+
@failure_count += 1
|
46
|
+
@last_failure_at = BreakerMachines.monotonic_time
|
47
|
+
trip
|
48
|
+
end
|
49
|
+
|
50
|
+
def threshold_reached?
|
51
|
+
@failure_count >= @threshold
|
52
|
+
end
|
53
|
+
|
54
|
+
def unhealthy_due_to_timeout?
|
55
|
+
return false unless unhealthy?
|
56
|
+
|
57
|
+
unhealthy_until = instance_variable_get(:@unhealthy_until)
|
58
|
+
return false unless unhealthy_until
|
59
|
+
|
60
|
+
if BreakerMachines.monotonic_time > unhealthy_until
|
61
|
+
recover
|
62
|
+
false
|
63
|
+
else
|
64
|
+
true
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
@@ -27,10 +27,10 @@ module BreakerMachines
|
|
27
27
|
circuit_data = @circuits[circuit_name]
|
28
28
|
return nil unless circuit_data
|
29
29
|
|
30
|
-
|
30
|
+
BreakerMachines::Status.new(
|
31
31
|
status: circuit_data[:status],
|
32
32
|
opened_at: circuit_data[:opened_at]
|
33
|
-
|
33
|
+
)
|
34
34
|
end
|
35
35
|
|
36
36
|
def set_status(circuit_name, status, opened_at = nil)
|
@@ -160,7 +160,7 @@ module BreakerMachines
|
|
160
160
|
end
|
161
161
|
|
162
162
|
def monotonic_time
|
163
|
-
|
163
|
+
BreakerMachines.monotonic_time
|
164
164
|
end
|
165
165
|
|
166
166
|
def with_timeout(_timeout_ms)
|
@@ -16,10 +16,10 @@ module BreakerMachines
|
|
16
16
|
data = @cache.read(status_key(circuit_name))
|
17
17
|
return nil unless data
|
18
18
|
|
19
|
-
|
19
|
+
BreakerMachines::Status.new(
|
20
20
|
status: data[:status].to_sym,
|
21
21
|
opened_at: data[:opened_at]
|
22
|
-
|
22
|
+
)
|
23
23
|
end
|
24
24
|
|
25
25
|
def set_status(circuit_name, status, opened_at = nil)
|
@@ -162,7 +162,7 @@ module BreakerMachines
|
|
162
162
|
end
|
163
163
|
|
164
164
|
def monotonic_time
|
165
|
-
|
165
|
+
BreakerMachines.monotonic_time
|
166
166
|
end
|
167
167
|
end
|
168
168
|
end
|
@@ -9,15 +9,18 @@ module BreakerMachines
|
|
9
9
|
# cache stores (Redis, Memcached) will work properly. Memory-based backends (:memory,
|
10
10
|
# :bucket_memory) are incompatible with DRb as they don't share state between processes.
|
11
11
|
class FallbackChain < Base
|
12
|
-
attr_reader :storage_configs, :storage_instances, :
|
12
|
+
attr_reader :storage_configs, :storage_instances, :backend_states
|
13
13
|
|
14
|
-
def initialize(storage_configs, **)
|
14
|
+
def initialize(storage_configs, circuit_breaker_threshold: 3, circuit_breaker_timeout: 30, **)
|
15
15
|
super(**)
|
16
16
|
@storage_configs = normalize_storage_configs(storage_configs)
|
17
17
|
@storage_instances = {}
|
18
|
-
@
|
19
|
-
@
|
20
|
-
@
|
18
|
+
@circuit_breaker_threshold = circuit_breaker_threshold
|
19
|
+
@circuit_breaker_timeout = circuit_breaker_timeout
|
20
|
+
@backend_states = @storage_configs.to_h do |config|
|
21
|
+
[config[:backend],
|
22
|
+
BackendState.new(config[:backend], threshold: @circuit_breaker_threshold, timeout: @circuit_breaker_timeout)]
|
23
|
+
end
|
21
24
|
validate_configs!
|
22
25
|
end
|
23
26
|
|
@@ -72,27 +75,28 @@ module BreakerMachines
|
|
72
75
|
instance.clear_all if instance.respond_to?(:clear_all)
|
73
76
|
end
|
74
77
|
storage_instances.clear
|
75
|
-
|
76
|
-
unhealthy_until.clear
|
78
|
+
backend_states.each_value(&:reset)
|
77
79
|
end
|
78
80
|
|
79
81
|
private
|
80
82
|
|
81
83
|
def execute_with_fallback(method, *args, **kwargs)
|
82
|
-
chain_started_at =
|
84
|
+
chain_started_at = BreakerMachines.monotonic_time
|
83
85
|
attempted_backends = []
|
84
86
|
|
85
87
|
storage_configs.each_with_index do |config, index|
|
86
|
-
|
88
|
+
backend_type = config[:backend]
|
89
|
+
attempted_backends << backend_type
|
90
|
+
backend_state = backend_states[backend_type]
|
87
91
|
|
88
|
-
if
|
89
|
-
emit_backend_skipped_notification(
|
92
|
+
if backend_state.unhealthy_due_to_timeout?
|
93
|
+
emit_backend_skipped_notification(backend_type, method, index)
|
90
94
|
next
|
91
95
|
end
|
92
96
|
|
93
97
|
begin
|
94
|
-
backend = get_backend_instance(
|
95
|
-
started_at =
|
98
|
+
backend = get_backend_instance(backend_type)
|
99
|
+
started_at = BreakerMachines.monotonic_time
|
96
100
|
|
97
101
|
result = backend.with_timeout(config[:timeout]) do
|
98
102
|
if kwargs.any?
|
@@ -102,35 +106,30 @@ module BreakerMachines
|
|
102
106
|
end
|
103
107
|
end
|
104
108
|
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
reset_backend_failures(config[:backend])
|
109
|
+
duration_ms = ((BreakerMachines.monotonic_time - started_at) * 1000).round(2)
|
110
|
+
emit_operation_success_notification(backend_type, method, duration_ms, index)
|
111
|
+
reset_backend_failures(backend_type)
|
109
112
|
|
110
|
-
|
111
|
-
|
112
|
-
emit_chain_success_notification(method, attempted_backends, config[:backend], chain_duration_ms)
|
113
|
+
chain_duration_ms = ((BreakerMachines.monotonic_time - chain_started_at) * 1000).round(2)
|
114
|
+
emit_chain_success_notification(method, attempted_backends, backend_type, chain_duration_ms)
|
113
115
|
|
114
116
|
return result
|
115
117
|
rescue BreakerMachines::StorageTimeoutError, BreakerMachines::StorageError, StandardError => e
|
116
|
-
duration_ms = ((
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
raise e if index == storage_configs.size - 1
|
118
|
+
duration_ms = ((BreakerMachines.monotonic_time - started_at) * 1000).round(2)
|
119
|
+
record_backend_failure(backend_type, e, duration_ms)
|
120
|
+
emit_fallback_notification(backend_type, e, duration_ms, index)
|
121
|
+
|
122
|
+
if index == storage_configs.size - 1
|
123
|
+
chain_duration_ms = ((BreakerMachines.monotonic_time - chain_started_at) * 1000).round(2)
|
124
|
+
emit_chain_failure_notification(method, attempted_backends, chain_duration_ms)
|
125
|
+
raise e
|
126
|
+
end
|
126
127
|
|
127
|
-
# Continue to next backend
|
128
128
|
next
|
129
129
|
end
|
130
130
|
end
|
131
131
|
|
132
|
-
|
133
|
-
chain_duration_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - chain_started_at) * 1000).round(2)
|
132
|
+
chain_duration_ms = ((BreakerMachines.monotonic_time - chain_started_at) * 1000).round(2)
|
134
133
|
emit_chain_failure_notification(method, attempted_backends, chain_duration_ms)
|
135
134
|
raise BreakerMachines::StorageError, 'All storage backends are unhealthy'
|
136
135
|
end
|
@@ -158,49 +157,34 @@ module BreakerMachines
|
|
158
157
|
end
|
159
158
|
end
|
160
159
|
|
161
|
-
def
|
162
|
-
|
163
|
-
return
|
164
|
-
|
165
|
-
if Process.clock_gettime(Process::CLOCK_MONOTONIC) > unhealthy_until_time
|
166
|
-
unhealthy_until.delete(backend_type)
|
167
|
-
false
|
168
|
-
else
|
169
|
-
true
|
170
|
-
end
|
171
|
-
end
|
172
|
-
|
173
|
-
def record_backend_failure(backend_type, error, duration_ms)
|
174
|
-
@backend_failures ||= {}
|
175
|
-
@backend_failures[backend_type] ||= []
|
176
|
-
@backend_failures[backend_type] << {
|
177
|
-
error: error,
|
178
|
-
duration_ms: duration_ms,
|
179
|
-
timestamp: Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
180
|
-
}
|
181
|
-
|
182
|
-
# Keep only recent failures (last 60 seconds)
|
183
|
-
cutoff = Process.clock_gettime(Process::CLOCK_MONOTONIC) - 60
|
184
|
-
@backend_failures[backend_type].reject! { |f| f[:timestamp] < cutoff }
|
160
|
+
def record_backend_failure(backend_type, _error, _duration_ms)
|
161
|
+
backend_state = backend_states[backend_type]
|
162
|
+
return unless backend_state
|
185
163
|
|
186
|
-
|
187
|
-
|
164
|
+
previous_health = backend_state.health_name
|
165
|
+
backend_state.record_failure
|
166
|
+
new_health = backend_state.health_name
|
188
167
|
|
189
|
-
|
190
|
-
|
168
|
+
if new_health != previous_health
|
169
|
+
emit_backend_health_change_notification(backend_type, previous_health, new_health,
|
170
|
+
backend_state.failure_count)
|
171
|
+
end
|
191
172
|
rescue StandardError => e
|
192
173
|
# Don't let failure recording cause the whole chain to hang
|
193
174
|
Rails.logger&.error("FallbackChain: Failed to record backend failure: #{e.message}")
|
194
175
|
end
|
195
176
|
|
196
177
|
def reset_backend_failures(backend_type)
|
197
|
-
|
198
|
-
|
199
|
-
unhealthy_until.delete(backend_type)
|
178
|
+
backend_state = backend_states[backend_type]
|
179
|
+
return unless backend_state&.unhealthy?
|
200
180
|
|
201
|
-
|
202
|
-
|
203
|
-
|
181
|
+
previous_health = backend_state.health_name
|
182
|
+
backend_state.reset
|
183
|
+
new_health = backend_state.health_name
|
184
|
+
|
185
|
+
return unless new_health != previous_health
|
186
|
+
|
187
|
+
emit_backend_health_change_notification(backend_type, previous_health, new_health, 0)
|
204
188
|
end
|
205
189
|
|
206
190
|
def emit_fallback_notification(backend_type, error, duration_ms, backend_index)
|
@@ -227,25 +211,27 @@ module BreakerMachines
|
|
227
211
|
end
|
228
212
|
|
229
213
|
def emit_backend_skipped_notification(backend_type, method, backend_index)
|
214
|
+
backend_state = backend_states[backend_type]
|
230
215
|
ActiveSupport::Notifications.instrument(
|
231
216
|
'storage_backend_skipped.breaker_machines',
|
232
217
|
backend: backend_type,
|
233
218
|
operation: method,
|
234
219
|
backend_index: backend_index,
|
235
220
|
reason: 'unhealthy',
|
236
|
-
unhealthy_until: unhealthy_until
|
221
|
+
unhealthy_until: backend_state&.instance_variable_get(:@unhealthy_until)
|
237
222
|
)
|
238
223
|
end
|
239
224
|
|
240
225
|
def emit_backend_health_change_notification(backend_type, previous_state, new_state, failure_count)
|
226
|
+
backend_state = backend_states[backend_type]
|
241
227
|
ActiveSupport::Notifications.instrument(
|
242
228
|
'storage_backend_health.breaker_machines',
|
243
229
|
backend: backend_type,
|
244
230
|
previous_state: previous_state,
|
245
231
|
new_state: new_state,
|
246
232
|
failure_count: failure_count,
|
247
|
-
threshold:
|
248
|
-
recovery_time: new_state == :unhealthy ? unhealthy_until
|
233
|
+
threshold: backend_state&.instance_variable_get(:@threshold),
|
234
|
+
recovery_time: new_state == :unhealthy ? backend_state&.instance_variable_get(:@unhealthy_until) : nil
|
249
235
|
)
|
250
236
|
end
|
251
237
|
|