agent-harness 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.markdownlint.yml +6 -0
- data/.markdownlintignore +8 -0
- data/.release-please-manifest.json +3 -0
- data/.rspec +3 -0
- data/.simplecov +26 -0
- data/.tool-versions +1 -0
- data/CHANGELOG.md +27 -0
- data/CODE_OF_CONDUCT.md +10 -0
- data/LICENSE.txt +21 -0
- data/README.md +274 -0
- data/Rakefile +103 -0
- data/bin/console +11 -0
- data/bin/setup +8 -0
- data/lib/agent_harness/command_executor.rb +146 -0
- data/lib/agent_harness/configuration.rb +299 -0
- data/lib/agent_harness/error_taxonomy.rb +128 -0
- data/lib/agent_harness/errors.rb +63 -0
- data/lib/agent_harness/orchestration/circuit_breaker.rb +169 -0
- data/lib/agent_harness/orchestration/conductor.rb +179 -0
- data/lib/agent_harness/orchestration/health_monitor.rb +170 -0
- data/lib/agent_harness/orchestration/metrics.rb +167 -0
- data/lib/agent_harness/orchestration/provider_manager.rb +240 -0
- data/lib/agent_harness/orchestration/rate_limiter.rb +113 -0
- data/lib/agent_harness/providers/adapter.rb +163 -0
- data/lib/agent_harness/providers/aider.rb +109 -0
- data/lib/agent_harness/providers/anthropic.rb +345 -0
- data/lib/agent_harness/providers/base.rb +198 -0
- data/lib/agent_harness/providers/codex.rb +100 -0
- data/lib/agent_harness/providers/cursor.rb +281 -0
- data/lib/agent_harness/providers/gemini.rb +136 -0
- data/lib/agent_harness/providers/github_copilot.rb +155 -0
- data/lib/agent_harness/providers/kilocode.rb +73 -0
- data/lib/agent_harness/providers/opencode.rb +75 -0
- data/lib/agent_harness/providers/registry.rb +137 -0
- data/lib/agent_harness/response.rb +100 -0
- data/lib/agent_harness/token_tracker.rb +170 -0
- data/lib/agent_harness/version.rb +5 -0
- data/lib/agent_harness.rb +115 -0
- data/release-please-config.json +63 -0
- metadata +129 -0
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module AgentHarness
|
|
4
|
+
# Configuration for AgentHarness
|
|
5
|
+
#
|
|
6
|
+
# Supports configuration via Ruby DSL, YAML files, and environment variables.
|
|
7
|
+
# Configuration sources are merged with priority: Ruby DSL > YAML > Environment.
|
|
8
|
+
#
|
|
9
|
+
# @example Ruby DSL configuration
|
|
10
|
+
# AgentHarness.configure do |config|
|
|
11
|
+
# config.logger = Logger.new(STDOUT)
|
|
12
|
+
# config.default_provider = :cursor
|
|
13
|
+
# config.fallback_providers = [:claude, :gemini]
|
|
14
|
+
#
|
|
15
|
+
# config.provider :claude do |p|
|
|
16
|
+
# p.enabled = true
|
|
17
|
+
# p.timeout = 600
|
|
18
|
+
# end
|
|
19
|
+
# end
|
|
20
|
+
class Configuration
|
|
21
|
+
attr_accessor :logger, :log_level, :default_provider, :fallback_providers
|
|
22
|
+
attr_accessor :config_file_path, :default_timeout
|
|
23
|
+
attr_writer :command_executor
|
|
24
|
+
|
|
25
|
+
attr_reader :providers, :orchestration_config, :callbacks, :custom_provider_classes
|
|
26
|
+
|
|
27
|
+
def initialize
|
|
28
|
+
@logger = nil # Will use null logger if not set
|
|
29
|
+
@log_level = :info
|
|
30
|
+
@default_provider = :cursor
|
|
31
|
+
@fallback_providers = []
|
|
32
|
+
@command_executor = nil # Lazy-initialized
|
|
33
|
+
@config_file_path = nil
|
|
34
|
+
@default_timeout = 300
|
|
35
|
+
@providers = {}
|
|
36
|
+
@orchestration_config = OrchestrationConfig.new
|
|
37
|
+
@callbacks = CallbackRegistry.new
|
|
38
|
+
@custom_provider_classes = {}
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Get or lazily initialize the command executor
|
|
42
|
+
#
|
|
43
|
+
# @return [CommandExecutor] the command executor
|
|
44
|
+
def command_executor
|
|
45
|
+
@command_executor ||= CommandExecutor.new(logger: @logger)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Configure orchestration settings
|
|
49
|
+
#
|
|
50
|
+
# @yield [OrchestrationConfig] the orchestration configuration
|
|
51
|
+
# @return [OrchestrationConfig] the orchestration configuration
|
|
52
|
+
def orchestration(&block)
|
|
53
|
+
yield(@orchestration_config) if block_given?
|
|
54
|
+
@orchestration_config
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Configure a provider
|
|
58
|
+
#
|
|
59
|
+
# @param name [Symbol, String] the provider name
|
|
60
|
+
# @yield [ProviderConfig] the provider configuration
|
|
61
|
+
# @return [ProviderConfig] the provider configuration
|
|
62
|
+
def provider(name, &block)
|
|
63
|
+
config = ProviderConfig.new(name)
|
|
64
|
+
yield(config) if block_given?
|
|
65
|
+
@providers[name.to_sym] = config
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Register a custom provider class
|
|
69
|
+
#
|
|
70
|
+
# @param name [Symbol, String] the provider name
|
|
71
|
+
# @param klass [Class] the provider class
|
|
72
|
+
# @return [void]
|
|
73
|
+
def register_provider(name, klass)
|
|
74
|
+
@custom_provider_classes[name.to_sym] = klass
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Register callback for token usage events
|
|
78
|
+
#
|
|
79
|
+
# @yield [TokenEvent] called when tokens are used
|
|
80
|
+
# @return [void]
|
|
81
|
+
def on_tokens_used(&block)
|
|
82
|
+
@callbacks.register(:tokens_used, block)
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Register callback for provider switch events
|
|
86
|
+
#
|
|
87
|
+
# @yield [Hash] event data with :from_provider, :to_provider, :reason
|
|
88
|
+
# @return [void]
|
|
89
|
+
def on_provider_switch(&block)
|
|
90
|
+
@callbacks.register(:provider_switch, block)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Register callback for circuit open events
|
|
94
|
+
#
|
|
95
|
+
# @yield [Hash] event data with :provider, :failure_count
|
|
96
|
+
# @return [void]
|
|
97
|
+
def on_circuit_open(&block)
|
|
98
|
+
@callbacks.register(:circuit_open, block)
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Register callback for circuit close events
|
|
102
|
+
#
|
|
103
|
+
# @yield [Hash] event data with :provider
|
|
104
|
+
# @return [void]
|
|
105
|
+
def on_circuit_close(&block)
|
|
106
|
+
@callbacks.register(:circuit_close, block)
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# Validate the configuration
|
|
110
|
+
#
|
|
111
|
+
# @raise [ConfigurationError] if configuration is invalid
|
|
112
|
+
# @return [void]
|
|
113
|
+
def validate!
|
|
114
|
+
errors = []
|
|
115
|
+
|
|
116
|
+
errors << "No providers configured" if @providers.empty?
|
|
117
|
+
errors << "Default provider '#{@default_provider}' not configured" unless @providers[@default_provider]
|
|
118
|
+
|
|
119
|
+
raise ConfigurationError, errors.join(", ") unless errors.empty?
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Check if configuration is valid
|
|
123
|
+
#
|
|
124
|
+
# @return [Boolean] true if valid
|
|
125
|
+
def valid?
|
|
126
|
+
validate!
|
|
127
|
+
true
|
|
128
|
+
rescue ConfigurationError
|
|
129
|
+
false
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# Orchestration configuration
|
|
134
|
+
class OrchestrationConfig
|
|
135
|
+
attr_accessor :enabled, :auto_switch_on_error, :auto_switch_on_rate_limit
|
|
136
|
+
|
|
137
|
+
attr_reader :circuit_breaker_config, :retry_config, :rate_limit_config, :health_check_config
|
|
138
|
+
|
|
139
|
+
def initialize
|
|
140
|
+
@enabled = true
|
|
141
|
+
@auto_switch_on_error = true
|
|
142
|
+
@auto_switch_on_rate_limit = true
|
|
143
|
+
@circuit_breaker_config = CircuitBreakerConfig.new
|
|
144
|
+
@retry_config = RetryConfig.new
|
|
145
|
+
@rate_limit_config = RateLimitConfig.new
|
|
146
|
+
@health_check_config = HealthCheckConfig.new
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# Configure circuit breaker
|
|
150
|
+
#
|
|
151
|
+
# @yield [CircuitBreakerConfig] the circuit breaker configuration
|
|
152
|
+
# @return [CircuitBreakerConfig]
|
|
153
|
+
def circuit_breaker(&block)
|
|
154
|
+
yield(@circuit_breaker_config) if block_given?
|
|
155
|
+
@circuit_breaker_config
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
# Configure retry behavior
|
|
159
|
+
#
|
|
160
|
+
# @yield [RetryConfig] the retry configuration
|
|
161
|
+
# @return [RetryConfig]
|
|
162
|
+
def retry(&block)
|
|
163
|
+
yield(@retry_config) if block_given?
|
|
164
|
+
@retry_config
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
# Configure rate limiting
|
|
168
|
+
#
|
|
169
|
+
# @yield [RateLimitConfig] the rate limit configuration
|
|
170
|
+
# @return [RateLimitConfig]
|
|
171
|
+
def rate_limit(&block)
|
|
172
|
+
yield(@rate_limit_config) if block_given?
|
|
173
|
+
@rate_limit_config
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
# Configure health checking
|
|
177
|
+
#
|
|
178
|
+
# @yield [HealthCheckConfig] the health check configuration
|
|
179
|
+
# @return [HealthCheckConfig]
|
|
180
|
+
def health_check(&block)
|
|
181
|
+
yield(@health_check_config) if block_given?
|
|
182
|
+
@health_check_config
|
|
183
|
+
end
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
# Circuit breaker configuration
|
|
187
|
+
class CircuitBreakerConfig
|
|
188
|
+
attr_accessor :enabled, :failure_threshold, :timeout, :half_open_max_calls
|
|
189
|
+
|
|
190
|
+
def initialize
|
|
191
|
+
@enabled = true
|
|
192
|
+
@failure_threshold = 5
|
|
193
|
+
@timeout = 300 # 5 minutes
|
|
194
|
+
@half_open_max_calls = 3
|
|
195
|
+
end
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
# Retry configuration
|
|
199
|
+
class RetryConfig
|
|
200
|
+
attr_accessor :enabled, :max_attempts, :base_delay, :max_delay, :exponential_base, :jitter
|
|
201
|
+
|
|
202
|
+
def initialize
|
|
203
|
+
@enabled = true
|
|
204
|
+
@max_attempts = 3
|
|
205
|
+
@base_delay = 1.0
|
|
206
|
+
@max_delay = 60.0
|
|
207
|
+
@exponential_base = 2.0
|
|
208
|
+
@jitter = true
|
|
209
|
+
end
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
# Rate limit configuration
|
|
213
|
+
class RateLimitConfig
|
|
214
|
+
attr_accessor :enabled, :default_reset_time
|
|
215
|
+
|
|
216
|
+
def initialize
|
|
217
|
+
@enabled = true
|
|
218
|
+
@default_reset_time = 3600 # 1 hour
|
|
219
|
+
end
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
# Health check configuration
|
|
223
|
+
class HealthCheckConfig
|
|
224
|
+
attr_accessor :enabled, :interval, :failure_threshold
|
|
225
|
+
|
|
226
|
+
def initialize
|
|
227
|
+
@enabled = true
|
|
228
|
+
@interval = 60 # 1 minute
|
|
229
|
+
@failure_threshold = 3
|
|
230
|
+
end
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
# Provider-specific configuration
|
|
234
|
+
class ProviderConfig
|
|
235
|
+
attr_accessor :enabled, :type, :priority, :models, :default_flags, :timeout, :model
|
|
236
|
+
|
|
237
|
+
attr_reader :name
|
|
238
|
+
|
|
239
|
+
def initialize(name)
|
|
240
|
+
@name = name.to_sym
|
|
241
|
+
@enabled = true
|
|
242
|
+
@type = :usage_based
|
|
243
|
+
@priority = 10
|
|
244
|
+
@models = []
|
|
245
|
+
@default_flags = []
|
|
246
|
+
@timeout = nil
|
|
247
|
+
@model = nil
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
# Merge options into this configuration
|
|
251
|
+
#
|
|
252
|
+
# @param options [Hash] options to merge
|
|
253
|
+
# @return [self]
|
|
254
|
+
def merge!(options)
|
|
255
|
+
options.each do |key, value|
|
|
256
|
+
setter = "#{key}="
|
|
257
|
+
send(setter, value) if respond_to?(setter)
|
|
258
|
+
end
|
|
259
|
+
self
|
|
260
|
+
end
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
# Registry for event callbacks
|
|
264
|
+
class CallbackRegistry
|
|
265
|
+
def initialize
|
|
266
|
+
@callbacks = Hash.new { |h, k| h[k] = [] }
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
# Register a callback for an event
|
|
270
|
+
#
|
|
271
|
+
# @param event [Symbol] the event name
|
|
272
|
+
# @param block [Proc] the callback
|
|
273
|
+
# @return [void]
|
|
274
|
+
def register(event, block)
|
|
275
|
+
@callbacks[event] << block
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
# Emit an event to all registered callbacks
|
|
279
|
+
#
|
|
280
|
+
# @param event [Symbol] the event name
|
|
281
|
+
# @param data [Hash] event data
|
|
282
|
+
# @return [void]
|
|
283
|
+
def emit(event, data)
|
|
284
|
+
@callbacks[event].each do |callback|
|
|
285
|
+
callback.call(data)
|
|
286
|
+
rescue => e
|
|
287
|
+
AgentHarness.logger&.error("[AgentHarness::CallbackRegistry] Callback error for #{event}: #{e.message}")
|
|
288
|
+
end
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
# Check if any callbacks are registered for an event
|
|
292
|
+
#
|
|
293
|
+
# @param event [Symbol] the event name
|
|
294
|
+
# @return [Boolean] true if callbacks exist
|
|
295
|
+
def registered?(event)
|
|
296
|
+
@callbacks[event].any?
|
|
297
|
+
end
|
|
298
|
+
end
|
|
299
|
+
end
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module AgentHarness
|
|
4
|
+
# Error classification system for categorizing and handling errors
|
|
5
|
+
#
|
|
6
|
+
# Provides a standardized way to classify errors from different providers
|
|
7
|
+
# into actionable categories for retry logic, provider switching, and
|
|
8
|
+
# error reporting.
|
|
9
|
+
module ErrorTaxonomy
|
|
10
|
+
# Error categories with their metadata
|
|
11
|
+
CATEGORIES = {
|
|
12
|
+
rate_limited: {
|
|
13
|
+
description: "Rate limit exceeded",
|
|
14
|
+
action: :switch_provider,
|
|
15
|
+
retryable: false
|
|
16
|
+
},
|
|
17
|
+
auth_expired: {
|
|
18
|
+
description: "Authentication failed or expired",
|
|
19
|
+
action: :switch_provider,
|
|
20
|
+
retryable: false
|
|
21
|
+
},
|
|
22
|
+
quota_exceeded: {
|
|
23
|
+
description: "Usage quota exceeded",
|
|
24
|
+
action: :switch_provider,
|
|
25
|
+
retryable: false
|
|
26
|
+
},
|
|
27
|
+
transient: {
|
|
28
|
+
description: "Temporary error",
|
|
29
|
+
action: :retry_with_backoff,
|
|
30
|
+
retryable: true
|
|
31
|
+
},
|
|
32
|
+
permanent: {
|
|
33
|
+
description: "Unrecoverable error",
|
|
34
|
+
action: :escalate,
|
|
35
|
+
retryable: false
|
|
36
|
+
},
|
|
37
|
+
timeout: {
|
|
38
|
+
description: "Operation timed out",
|
|
39
|
+
action: :retry_with_backoff,
|
|
40
|
+
retryable: true
|
|
41
|
+
},
|
|
42
|
+
unknown: {
|
|
43
|
+
description: "Unknown error",
|
|
44
|
+
action: :retry_with_backoff,
|
|
45
|
+
retryable: true
|
|
46
|
+
}
|
|
47
|
+
}.freeze
|
|
48
|
+
|
|
49
|
+
class << self
|
|
50
|
+
# Classify an error based on provider patterns
|
|
51
|
+
#
|
|
52
|
+
# @param error [Exception] the error to classify
|
|
53
|
+
# @param patterns [Hash<Symbol, Array<Regexp>>] provider-specific patterns
|
|
54
|
+
# @return [Symbol] error category
|
|
55
|
+
def classify(error, patterns = {})
|
|
56
|
+
message = error.message.to_s.downcase
|
|
57
|
+
|
|
58
|
+
# Check provider-specific patterns first
|
|
59
|
+
patterns.each do |category, regexes|
|
|
60
|
+
return category if regexes.any? { |r| message.match?(r) }
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Fall back to generic patterns
|
|
64
|
+
classify_generic(message)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Classify a message string into error category
|
|
68
|
+
#
|
|
69
|
+
# @param message [String] the error message
|
|
70
|
+
# @return [Symbol] error category
|
|
71
|
+
def classify_message(message)
|
|
72
|
+
classify_generic(message.to_s.downcase)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Get recommended action for error category
|
|
76
|
+
#
|
|
77
|
+
# @param category [Symbol] the error category
|
|
78
|
+
# @return [Symbol] recommended action
|
|
79
|
+
def action_for(category)
|
|
80
|
+
CATEGORIES.dig(category, :action) || :escalate
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Check if error category is retryable
|
|
84
|
+
#
|
|
85
|
+
# @param category [Symbol] the error category
|
|
86
|
+
# @return [Boolean] true if the error can be retried
|
|
87
|
+
def retryable?(category)
|
|
88
|
+
CATEGORIES.dig(category, :retryable) || false
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Get description for error category
|
|
92
|
+
#
|
|
93
|
+
# @param category [Symbol] the error category
|
|
94
|
+
# @return [String] human-readable description
|
|
95
|
+
def description_for(category)
|
|
96
|
+
CATEGORIES.dig(category, :description) || "Unknown error"
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Get all category names
|
|
100
|
+
#
|
|
101
|
+
# @return [Array<Symbol>] list of category names
|
|
102
|
+
def categories
|
|
103
|
+
CATEGORIES.keys
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
private
|
|
107
|
+
|
|
108
|
+
def classify_generic(message)
|
|
109
|
+
case message
|
|
110
|
+
when /rate.?limit|too many requests|429/i
|
|
111
|
+
:rate_limited
|
|
112
|
+
when /quota|usage.?limit|billing/i
|
|
113
|
+
:quota_exceeded
|
|
114
|
+
when /auth|unauthorized|forbidden|invalid.*(key|token)|401|403/i
|
|
115
|
+
:auth_expired
|
|
116
|
+
when /timeout|timed.?out/i
|
|
117
|
+
:timeout
|
|
118
|
+
when /temporary|retry|503|502|500/i
|
|
119
|
+
:transient
|
|
120
|
+
when /invalid|malformed|bad.?request|400/i
|
|
121
|
+
:permanent
|
|
122
|
+
else
|
|
123
|
+
:unknown
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
end
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module AgentHarness
|
|
4
|
+
# Base error class for all AgentHarness errors
|
|
5
|
+
class Error < StandardError
|
|
6
|
+
attr_reader :original_error, :context
|
|
7
|
+
|
|
8
|
+
def initialize(message = nil, original_error: nil, context: {})
|
|
9
|
+
@original_error = original_error
|
|
10
|
+
@context = context
|
|
11
|
+
super(message)
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# Provider-related errors
|
|
16
|
+
class ProviderError < Error; end
|
|
17
|
+
|
|
18
|
+
class ProviderNotFoundError < ProviderError; end
|
|
19
|
+
|
|
20
|
+
class ProviderUnavailableError < ProviderError; end
|
|
21
|
+
|
|
22
|
+
# Execution errors
|
|
23
|
+
class TimeoutError < Error; end
|
|
24
|
+
|
|
25
|
+
class CommandExecutionError < Error; end
|
|
26
|
+
|
|
27
|
+
# Rate limiting and circuit breaker errors
|
|
28
|
+
class RateLimitError < Error
|
|
29
|
+
attr_reader :reset_time, :provider
|
|
30
|
+
|
|
31
|
+
def initialize(message = nil, reset_time: nil, provider: nil, **kwargs)
|
|
32
|
+
@reset_time = reset_time
|
|
33
|
+
@provider = provider
|
|
34
|
+
super(message, **kwargs)
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
class CircuitOpenError < Error
|
|
39
|
+
attr_reader :provider
|
|
40
|
+
|
|
41
|
+
def initialize(message = nil, provider: nil, **kwargs)
|
|
42
|
+
@provider = provider
|
|
43
|
+
super(message, **kwargs)
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Authentication errors
|
|
48
|
+
class AuthenticationError < Error; end
|
|
49
|
+
|
|
50
|
+
# Configuration errors
|
|
51
|
+
class ConfigurationError < Error; end
|
|
52
|
+
|
|
53
|
+
# Orchestration errors
|
|
54
|
+
class NoProvidersAvailableError < Error
|
|
55
|
+
attr_reader :attempted_providers, :errors
|
|
56
|
+
|
|
57
|
+
def initialize(message = nil, attempted_providers: [], errors: {}, **kwargs)
|
|
58
|
+
@attempted_providers = attempted_providers
|
|
59
|
+
@errors = errors
|
|
60
|
+
super(message, **kwargs)
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module AgentHarness
|
|
4
|
+
module Orchestration
|
|
5
|
+
# Circuit breaker for provider fault tolerance
|
|
6
|
+
#
|
|
7
|
+
# Implements the circuit breaker pattern to prevent cascading failures.
|
|
8
|
+
# The circuit has three states:
|
|
9
|
+
# - :closed - Normal operation, requests pass through
|
|
10
|
+
# - :open - Failures exceeded threshold, requests are blocked
|
|
11
|
+
# - :half_open - After timeout, limited requests allowed to test recovery
|
|
12
|
+
#
|
|
13
|
+
# @example
|
|
14
|
+
# breaker = CircuitBreaker.new(failure_threshold: 5, timeout: 300)
|
|
15
|
+
# breaker.record_failure
|
|
16
|
+
# breaker.open? # => false (still below threshold)
|
|
17
|
+
class CircuitBreaker
|
|
18
|
+
STATES = [:closed, :open, :half_open].freeze
|
|
19
|
+
|
|
20
|
+
attr_reader :state, :failure_count, :success_count
|
|
21
|
+
|
|
22
|
+
# Create a new circuit breaker
|
|
23
|
+
#
|
|
24
|
+
# @param config [CircuitBreakerConfig, nil] configuration object
|
|
25
|
+
# @param failure_threshold [Integer] failures before opening
|
|
26
|
+
# @param timeout [Integer] seconds before half-open transition
|
|
27
|
+
# @param half_open_max_calls [Integer] successful calls to close
|
|
28
|
+
def initialize(config = nil, failure_threshold: nil, timeout: nil, half_open_max_calls: nil)
|
|
29
|
+
if config
|
|
30
|
+
@enabled = config.enabled
|
|
31
|
+
@failure_threshold = config.failure_threshold
|
|
32
|
+
@timeout = config.timeout
|
|
33
|
+
@half_open_max_calls = config.half_open_max_calls
|
|
34
|
+
else
|
|
35
|
+
@enabled = true
|
|
36
|
+
@failure_threshold = failure_threshold || 5
|
|
37
|
+
@timeout = timeout || 300
|
|
38
|
+
@half_open_max_calls = half_open_max_calls || 3
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
reset!
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Check if circuit is open (blocking requests)
|
|
45
|
+
#
|
|
46
|
+
# @return [Boolean] true if open
|
|
47
|
+
def open?
|
|
48
|
+
return false unless @enabled
|
|
49
|
+
|
|
50
|
+
if @state == :open && timeout_elapsed?
|
|
51
|
+
transition_to(:half_open)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
@state == :open
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Check if circuit is closed (allowing requests)
|
|
58
|
+
#
|
|
59
|
+
# @return [Boolean] true if closed
|
|
60
|
+
def closed?
|
|
61
|
+
@state == :closed
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Check if circuit is half-open (testing recovery)
|
|
65
|
+
#
|
|
66
|
+
# @return [Boolean] true if half-open
|
|
67
|
+
def half_open?
|
|
68
|
+
@state == :half_open
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Record a successful call
|
|
72
|
+
#
|
|
73
|
+
# @return [void]
|
|
74
|
+
def record_success
|
|
75
|
+
@mutex.synchronize do
|
|
76
|
+
@success_count += 1
|
|
77
|
+
|
|
78
|
+
if @state == :half_open && @success_count >= @half_open_max_calls
|
|
79
|
+
transition_to(:closed)
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Record a failed call
|
|
85
|
+
#
|
|
86
|
+
# @return [void]
|
|
87
|
+
def record_failure
|
|
88
|
+
@mutex.synchronize do
|
|
89
|
+
@failure_count += 1
|
|
90
|
+
|
|
91
|
+
if @failure_count >= @failure_threshold
|
|
92
|
+
transition_to(:open)
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Reset the circuit breaker to initial state
|
|
98
|
+
#
|
|
99
|
+
# @return [void]
|
|
100
|
+
def reset!
|
|
101
|
+
@mutex = Mutex.new
|
|
102
|
+
@state = :closed
|
|
103
|
+
@failure_count = 0
|
|
104
|
+
@success_count = 0
|
|
105
|
+
@opened_at = nil
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Get time until circuit attempts recovery
|
|
109
|
+
#
|
|
110
|
+
# @return [Integer, nil] seconds until half-open, or nil if not open
|
|
111
|
+
def time_until_recovery
|
|
112
|
+
return nil unless @state == :open && @opened_at
|
|
113
|
+
|
|
114
|
+
remaining = @timeout - (Time.now - @opened_at)
|
|
115
|
+
remaining.positive? ? remaining.to_i : 0
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Get circuit status
|
|
119
|
+
#
|
|
120
|
+
# @return [Hash] status information
|
|
121
|
+
def status
|
|
122
|
+
{
|
|
123
|
+
state: @state,
|
|
124
|
+
failure_count: @failure_count,
|
|
125
|
+
success_count: @success_count,
|
|
126
|
+
failure_threshold: @failure_threshold,
|
|
127
|
+
timeout: @timeout,
|
|
128
|
+
time_until_recovery: time_until_recovery,
|
|
129
|
+
enabled: @enabled
|
|
130
|
+
}
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
private
|
|
134
|
+
|
|
135
|
+
def transition_to(new_state)
|
|
136
|
+
old_state = @state
|
|
137
|
+
@state = new_state
|
|
138
|
+
|
|
139
|
+
case new_state
|
|
140
|
+
when :open
|
|
141
|
+
@opened_at = Time.now
|
|
142
|
+
@failure_count = 0
|
|
143
|
+
emit_event(:circuit_open, old_state: old_state)
|
|
144
|
+
when :half_open
|
|
145
|
+
@success_count = 0
|
|
146
|
+
emit_event(:circuit_half_open, old_state: old_state)
|
|
147
|
+
when :closed
|
|
148
|
+
@failure_count = 0
|
|
149
|
+
@success_count = 0
|
|
150
|
+
@opened_at = nil
|
|
151
|
+
emit_event(:circuit_close, old_state: old_state)
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
AgentHarness.logger&.info(
|
|
155
|
+
"[AgentHarness::CircuitBreaker] State transition: #{old_state} -> #{new_state}"
|
|
156
|
+
)
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def timeout_elapsed?
|
|
160
|
+
return true unless @opened_at
|
|
161
|
+
Time.now - @opened_at >= @timeout
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
def emit_event(event, **data)
|
|
165
|
+
AgentHarness.configuration.callbacks.emit(event, data.merge(circuit_breaker: self))
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
end
|