aidp 0.7.0 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +60 -214
- data/bin/aidp +1 -1
- data/lib/aidp/analysis/kb_inspector.rb +38 -23
- data/lib/aidp/analysis/seams.rb +2 -31
- data/lib/aidp/analysis/tree_sitter_grammar_loader.rb +1 -13
- data/lib/aidp/analysis/tree_sitter_scan.rb +3 -20
- data/lib/aidp/analyze/error_handler.rb +2 -75
- data/lib/aidp/analyze/json_file_storage.rb +292 -0
- data/lib/aidp/analyze/progress.rb +12 -0
- data/lib/aidp/analyze/progress_visualizer.rb +12 -17
- data/lib/aidp/analyze/ruby_maat_integration.rb +13 -31
- data/lib/aidp/analyze/runner.rb +256 -87
- data/lib/aidp/cli/jobs_command.rb +100 -432
- data/lib/aidp/cli.rb +309 -239
- data/lib/aidp/config.rb +298 -10
- data/lib/aidp/debug_logger.rb +195 -0
- data/lib/aidp/debug_mixin.rb +187 -0
- data/lib/aidp/execute/progress.rb +9 -0
- data/lib/aidp/execute/runner.rb +221 -40
- data/lib/aidp/execute/steps.rb +17 -7
- data/lib/aidp/execute/workflow_selector.rb +211 -0
- data/lib/aidp/harness/completion_checker.rb +268 -0
- data/lib/aidp/harness/condition_detector.rb +1526 -0
- data/lib/aidp/harness/config_loader.rb +373 -0
- data/lib/aidp/harness/config_manager.rb +382 -0
- data/lib/aidp/harness/config_schema.rb +1006 -0
- data/lib/aidp/harness/config_validator.rb +355 -0
- data/lib/aidp/harness/configuration.rb +477 -0
- data/lib/aidp/harness/enhanced_runner.rb +494 -0
- data/lib/aidp/harness/error_handler.rb +616 -0
- data/lib/aidp/harness/provider_config.rb +423 -0
- data/lib/aidp/harness/provider_factory.rb +306 -0
- data/lib/aidp/harness/provider_manager.rb +1269 -0
- data/lib/aidp/harness/provider_type_checker.rb +88 -0
- data/lib/aidp/harness/runner.rb +411 -0
- data/lib/aidp/harness/state/errors.rb +28 -0
- data/lib/aidp/harness/state/metrics.rb +219 -0
- data/lib/aidp/harness/state/persistence.rb +128 -0
- data/lib/aidp/harness/state/provider_state.rb +132 -0
- data/lib/aidp/harness/state/ui_state.rb +68 -0
- data/lib/aidp/harness/state/workflow_state.rb +123 -0
- data/lib/aidp/harness/state_manager.rb +586 -0
- data/lib/aidp/harness/status_display.rb +888 -0
- data/lib/aidp/harness/ui/base.rb +16 -0
- data/lib/aidp/harness/ui/enhanced_tui.rb +545 -0
- data/lib/aidp/harness/ui/enhanced_workflow_selector.rb +252 -0
- data/lib/aidp/harness/ui/error_handler.rb +132 -0
- data/lib/aidp/harness/ui/frame_manager.rb +361 -0
- data/lib/aidp/harness/ui/job_monitor.rb +500 -0
- data/lib/aidp/harness/ui/navigation/main_menu.rb +311 -0
- data/lib/aidp/harness/ui/navigation/menu_formatter.rb +120 -0
- data/lib/aidp/harness/ui/navigation/menu_item.rb +142 -0
- data/lib/aidp/harness/ui/navigation/menu_state.rb +139 -0
- data/lib/aidp/harness/ui/navigation/submenu.rb +202 -0
- data/lib/aidp/harness/ui/navigation/workflow_selector.rb +176 -0
- data/lib/aidp/harness/ui/progress_display.rb +280 -0
- data/lib/aidp/harness/ui/question_collector.rb +141 -0
- data/lib/aidp/harness/ui/spinner_group.rb +184 -0
- data/lib/aidp/harness/ui/spinner_helper.rb +152 -0
- data/lib/aidp/harness/ui/status_manager.rb +312 -0
- data/lib/aidp/harness/ui/status_widget.rb +280 -0
- data/lib/aidp/harness/ui/workflow_controller.rb +312 -0
- data/lib/aidp/harness/user_interface.rb +2381 -0
- data/lib/aidp/provider_manager.rb +131 -7
- data/lib/aidp/providers/anthropic.rb +28 -103
- data/lib/aidp/providers/base.rb +170 -0
- data/lib/aidp/providers/cursor.rb +52 -181
- data/lib/aidp/providers/gemini.rb +24 -107
- data/lib/aidp/providers/macos_ui.rb +99 -5
- data/lib/aidp/providers/opencode.rb +194 -0
- data/lib/aidp/storage/csv_storage.rb +172 -0
- data/lib/aidp/storage/file_manager.rb +214 -0
- data/lib/aidp/storage/json_storage.rb +140 -0
- data/lib/aidp/version.rb +1 -1
- data/lib/aidp.rb +54 -39
- data/templates/COMMON/AGENT_BASE.md +11 -0
- data/templates/EXECUTE/00_PRD.md +4 -4
- data/templates/EXECUTE/02_ARCHITECTURE.md +5 -4
- data/templates/EXECUTE/07_TEST_PLAN.md +4 -1
- data/templates/EXECUTE/08_TASKS.md +4 -4
- data/templates/EXECUTE/10_IMPLEMENTATION_AGENT.md +4 -4
- data/templates/README.md +279 -0
- data/templates/aidp-development.yml.example +373 -0
- data/templates/aidp-minimal.yml.example +48 -0
- data/templates/aidp-production.yml.example +475 -0
- data/templates/aidp.yml.example +598 -0
- metadata +93 -69
- data/lib/aidp/analyze/agent_personas.rb +0 -71
- data/lib/aidp/analyze/agent_tool_executor.rb +0 -439
- data/lib/aidp/analyze/data_retention_manager.rb +0 -421
- data/lib/aidp/analyze/database.rb +0 -260
- data/lib/aidp/analyze/dependencies.rb +0 -335
- data/lib/aidp/analyze/export_manager.rb +0 -418
- data/lib/aidp/analyze/focus_guidance.rb +0 -517
- data/lib/aidp/analyze/incremental_analyzer.rb +0 -533
- data/lib/aidp/analyze/language_analysis_strategies.rb +0 -897
- data/lib/aidp/analyze/large_analysis_progress.rb +0 -499
- data/lib/aidp/analyze/memory_manager.rb +0 -339
- data/lib/aidp/analyze/metrics_storage.rb +0 -336
- data/lib/aidp/analyze/parallel_processor.rb +0 -454
- data/lib/aidp/analyze/performance_optimizer.rb +0 -691
- data/lib/aidp/analyze/repository_chunker.rb +0 -697
- data/lib/aidp/analyze/static_analysis_detector.rb +0 -577
- data/lib/aidp/analyze/storage.rb +0 -655
- data/lib/aidp/analyze/tool_configuration.rb +0 -441
- data/lib/aidp/analyze/tool_modernization.rb +0 -750
- data/lib/aidp/database/pg_adapter.rb +0 -148
- data/lib/aidp/database_config.rb +0 -69
- data/lib/aidp/database_connection.rb +0 -72
- data/lib/aidp/job_manager.rb +0 -41
- data/lib/aidp/jobs/base_job.rb +0 -45
- data/lib/aidp/jobs/provider_execution_job.rb +0 -83
- data/lib/aidp/project_detector.rb +0 -117
- data/lib/aidp/providers/agent_supervisor.rb +0 -348
- data/lib/aidp/providers/supervised_base.rb +0 -317
- data/lib/aidp/providers/supervised_cursor.rb +0 -22
- data/lib/aidp/sync.rb +0 -13
- data/lib/aidp/workspace.rb +0 -19
@@ -0,0 +1,616 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "net/http"
|
4
|
+
require_relative "../debug_mixin"
|
5
|
+
|
6
|
+
module Aidp
|
7
|
+
module Harness
|
8
|
+
# Handles error recovery, retry strategies, and fallback mechanisms
|
9
|
+
class ErrorHandler
|
10
|
+
include Aidp::DebugMixin
|
11
|
+
|
12
|
+
def initialize(provider_manager, configuration, metrics_manager = nil)
|
13
|
+
@provider_manager = provider_manager
|
14
|
+
@configuration = configuration
|
15
|
+
@metrics_manager = metrics_manager
|
16
|
+
@retry_strategies = {}
|
17
|
+
@retry_counts = {}
|
18
|
+
@error_history = []
|
19
|
+
@circuit_breakers = {}
|
20
|
+
@backoff_calculator = BackoffCalculator.new
|
21
|
+
@error_classifier = ErrorClassifier.new
|
22
|
+
@recovery_planner = RecoveryPlanner.new
|
23
|
+
initialize_retry_strategies
|
24
|
+
end
|
25
|
+
|
26
|
+
# Get error statistics
|
27
|
+
def error_stats
|
28
|
+
{
|
29
|
+
total_errors: @error_history.size,
|
30
|
+
error_types: @error_history.group_by { |e| e[:error_type] }.transform_values(&:size),
|
31
|
+
recent_errors: @error_history.last(10),
|
32
|
+
retry_counts: @retry_counts.dup,
|
33
|
+
circuit_breaker_states: @circuit_breakers.transform_values { |cb| cb[:state] }
|
34
|
+
}
|
35
|
+
end
|
36
|
+
|
37
|
+
# Main entry point for error handling
|
38
|
+
def handle_error(error, context = {})
|
39
|
+
error_info = @error_classifier.classify_error(error, context)
|
40
|
+
|
41
|
+
# Debug logging
|
42
|
+
debug_error(error, context)
|
43
|
+
debug_log("🔧 ErrorHandler: Processing error", level: :info, data: {
|
44
|
+
error_type: error_info[:error_type],
|
45
|
+
provider: error_info[:provider],
|
46
|
+
model: error_info[:model]
|
47
|
+
})
|
48
|
+
|
49
|
+
# Record error in metrics if available
|
50
|
+
@metrics_manager&.record_error(error_info[:provider], error_info[:model], error_info)
|
51
|
+
|
52
|
+
# Add to error history
|
53
|
+
@error_history << error_info
|
54
|
+
|
55
|
+
# Get retry strategy for this error type
|
56
|
+
strategy = get_retry_strategy(error_info[:error_type])
|
57
|
+
|
58
|
+
# Check if we should retry
|
59
|
+
if should_retry?(error_info, strategy)
|
60
|
+
debug_log("🔄 ErrorHandler: Attempting retry", level: :info, data: {
|
61
|
+
strategy: strategy[:name],
|
62
|
+
max_retries: strategy[:max_retries]
|
63
|
+
})
|
64
|
+
execute_retry(error_info, strategy, context)
|
65
|
+
|
66
|
+
else
|
67
|
+
# No retry, attempt recovery
|
68
|
+
debug_log("🚨 ErrorHandler: No retry, attempting recovery", level: :warn, data: {
|
69
|
+
error_type: error_info[:error_type],
|
70
|
+
reason: "Retry not applicable or exhausted"
|
71
|
+
})
|
72
|
+
attempt_recovery(error_info, context)
|
73
|
+
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
# Execute a block with retry logic
|
78
|
+
def execute_with_retry(&block)
|
79
|
+
max_attempts = @configuration.max_retries + 1
|
80
|
+
attempt = 0
|
81
|
+
|
82
|
+
begin
|
83
|
+
attempt += 1
|
84
|
+
yield
|
85
|
+
rescue => error
|
86
|
+
if attempt < max_attempts
|
87
|
+
error_info = {
|
88
|
+
error: error,
|
89
|
+
provider: @provider_manager.current_provider,
|
90
|
+
model: @provider_manager.current_model,
|
91
|
+
error_type: @error_classifier.classify_error(error)
|
92
|
+
}
|
93
|
+
|
94
|
+
strategy = get_retry_strategy(error_info[:error_type])
|
95
|
+
if should_retry?(error_info, strategy)
|
96
|
+
delay = @backoff_calculator.calculate_delay(attempt, strategy[:backoff_strategy] || :exponential, 1, 10)
|
97
|
+
# Use regular sleep for now (async not needed in this context)
|
98
|
+
sleep(delay)
|
99
|
+
retry
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
# If we get here, all retries failed
|
104
|
+
handle_error(error, {
|
105
|
+
provider: @provider_manager.current_provider,
|
106
|
+
model: @provider_manager.current_model
|
107
|
+
})
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
# Execute a retry with the given strategy
|
112
|
+
def execute_retry(error_info, strategy, context = {})
|
113
|
+
provider = error_info[:provider]
|
114
|
+
model = error_info[:model]
|
115
|
+
error_type = error_info[:error_type]
|
116
|
+
|
117
|
+
# Increment retry count
|
118
|
+
retry_key = "#{provider}:#{model}:#{error_type}"
|
119
|
+
@retry_counts[retry_key] ||= 0
|
120
|
+
@retry_counts[retry_key] += 1
|
121
|
+
|
122
|
+
# Check if we've exceeded max retries
|
123
|
+
if @retry_counts[retry_key] > strategy[:max_retries]
|
124
|
+
return {
|
125
|
+
success: false,
|
126
|
+
action: :exhausted_retries,
|
127
|
+
error: "Max retries exceeded for #{error_type}",
|
128
|
+
retry_count: @retry_counts[retry_key],
|
129
|
+
next_action: :fallback
|
130
|
+
}
|
131
|
+
end
|
132
|
+
|
133
|
+
# Calculate backoff delay
|
134
|
+
delay = @backoff_calculator.calculate_delay(
|
135
|
+
@retry_counts[retry_key],
|
136
|
+
strategy[:backoff_strategy],
|
137
|
+
strategy[:base_delay],
|
138
|
+
strategy[:max_delay]
|
139
|
+
)
|
140
|
+
|
141
|
+
# Wait for backoff delay
|
142
|
+
if delay > 0
|
143
|
+
# Use regular sleep for now (async not needed in this context)
|
144
|
+
sleep(delay)
|
145
|
+
end
|
146
|
+
|
147
|
+
# Execute the retry
|
148
|
+
retry_result = execute_retry_attempt(error_info, strategy, context)
|
149
|
+
|
150
|
+
# Update retry result with metadata
|
151
|
+
retry_result.merge!(
|
152
|
+
retry_count: @retry_counts[retry_key],
|
153
|
+
delay: delay,
|
154
|
+
strategy: strategy[:name]
|
155
|
+
)
|
156
|
+
|
157
|
+
retry_result
|
158
|
+
end
|
159
|
+
|
160
|
+
# Attempt recovery when retries are exhausted or not applicable
|
161
|
+
def attempt_recovery(error_info, context = {})
|
162
|
+
recovery_plan = @recovery_planner.create_recovery_plan(error_info, context)
|
163
|
+
|
164
|
+
case recovery_plan[:action]
|
165
|
+
when :switch_provider
|
166
|
+
attempt_provider_switch(error_info, recovery_plan)
|
167
|
+
when :switch_model
|
168
|
+
attempt_model_switch(error_info, recovery_plan)
|
169
|
+
when :circuit_breaker
|
170
|
+
open_circuit_breaker(error_info, recovery_plan)
|
171
|
+
when :escalate
|
172
|
+
escalate_error(error_info, recovery_plan)
|
173
|
+
when :abort
|
174
|
+
abort_execution(error_info, recovery_plan)
|
175
|
+
else
|
176
|
+
{
|
177
|
+
success: false,
|
178
|
+
action: :unknown_recovery,
|
179
|
+
error: "Unknown recovery action: #{recovery_plan[:action]}"
|
180
|
+
}
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
# Get retry strategy for error type
|
185
|
+
def get_retry_strategy(error_type)
|
186
|
+
@retry_strategies[error_type] || @retry_strategies[:default]
|
187
|
+
end
|
188
|
+
|
189
|
+
# Get maximum retry attempts
|
190
|
+
def max_attempts
|
191
|
+
@configuration.respond_to?(:max_retries) ? @configuration.max_retries : 3
|
192
|
+
end
|
193
|
+
|
194
|
+
# Check if we should retry based on error type and strategy
|
195
|
+
def should_retry?(error_info, strategy)
|
196
|
+
return false unless strategy[:enabled]
|
197
|
+
return false if error_info[:error_type] == :rate_limit
|
198
|
+
return false if error_info[:error_type] == :authentication
|
199
|
+
return false if error_info[:error_type] == :permission_denied
|
200
|
+
|
201
|
+
# Check circuit breaker
|
202
|
+
circuit_breaker_key = "#{error_info[:provider]}:#{error_info[:model]}"
|
203
|
+
return false if circuit_breaker_open?(circuit_breaker_key)
|
204
|
+
|
205
|
+
true
|
206
|
+
end
|
207
|
+
|
208
|
+
# Reset retry counts for a specific provider/model combination
|
209
|
+
def reset_retry_counts(provider, model = nil)
|
210
|
+
keys_to_reset = if model
|
211
|
+
# Reset specific model
|
212
|
+
@retry_counts.keys.select { |k| k.start_with?("#{provider}:#{model}:") }
|
213
|
+
else
|
214
|
+
# Reset all models for provider
|
215
|
+
@retry_counts.keys.select { |k| k.start_with?("#{provider}:") }
|
216
|
+
end
|
217
|
+
|
218
|
+
keys_to_reset.each { |key| @retry_counts.delete(key) }
|
219
|
+
end
|
220
|
+
|
221
|
+
# Get retry status for a provider/model
|
222
|
+
def get_retry_status(provider, model = nil)
|
223
|
+
keys = if model
|
224
|
+
@retry_counts.keys.select { |k| k.start_with?("#{provider}:#{model}:") }
|
225
|
+
else
|
226
|
+
@retry_counts.keys.select { |k| k.start_with?("#{provider}:") }
|
227
|
+
end
|
228
|
+
|
229
|
+
status = {}
|
230
|
+
keys.each do |key|
|
231
|
+
error_type = key.split(":").last
|
232
|
+
status[error_type] = {
|
233
|
+
retry_count: @retry_counts[key],
|
234
|
+
max_retries: get_retry_strategy(error_type.to_sym)[:max_retries]
|
235
|
+
}
|
236
|
+
end
|
237
|
+
|
238
|
+
status
|
239
|
+
end
|
240
|
+
|
241
|
+
# Get error history
|
242
|
+
def get_error_history(time_range = nil)
|
243
|
+
if time_range
|
244
|
+
@error_history.select { |e| time_range.include?(e[:timestamp]) }
|
245
|
+
else
|
246
|
+
@error_history
|
247
|
+
end
|
248
|
+
end
|
249
|
+
|
250
|
+
# Clear error history
|
251
|
+
def clear_error_history
|
252
|
+
@error_history.clear
|
253
|
+
end
|
254
|
+
|
255
|
+
# Get circuit breaker status
|
256
|
+
def get_circuit_breaker_status
|
257
|
+
@circuit_breakers.transform_values do |cb|
|
258
|
+
{
|
259
|
+
open: cb[:open],
|
260
|
+
opened_at: cb[:opened_at],
|
261
|
+
failure_count: cb[:failure_count],
|
262
|
+
threshold: cb[:threshold]
|
263
|
+
}
|
264
|
+
end
|
265
|
+
end
|
266
|
+
|
267
|
+
# Reset circuit breaker
|
268
|
+
def reset_circuit_breaker(provider, model = nil)
|
269
|
+
key = model ? "#{provider}:#{model}" : provider
|
270
|
+
@circuit_breakers.delete(key)
|
271
|
+
end
|
272
|
+
|
273
|
+
# Reset all circuit breakers
|
274
|
+
def reset_all_circuit_breakers
|
275
|
+
@circuit_breakers.clear
|
276
|
+
end
|
277
|
+
|
278
|
+
private
|
279
|
+
|
280
|
+
def initialize_retry_strategies
|
281
|
+
@retry_strategies = {
|
282
|
+
# Network errors - retry with exponential backoff
|
283
|
+
network_error: {
|
284
|
+
name: "network_error",
|
285
|
+
enabled: true,
|
286
|
+
max_retries: 3,
|
287
|
+
backoff_strategy: :exponential,
|
288
|
+
base_delay: 1.0,
|
289
|
+
max_delay: 30.0,
|
290
|
+
jitter: true
|
291
|
+
},
|
292
|
+
|
293
|
+
# Server errors - retry with linear backoff
|
294
|
+
server_error: {
|
295
|
+
name: "server_error",
|
296
|
+
enabled: true,
|
297
|
+
max_retries: 2,
|
298
|
+
backoff_strategy: :linear,
|
299
|
+
base_delay: 2.0,
|
300
|
+
max_delay: 10.0,
|
301
|
+
jitter: true
|
302
|
+
},
|
303
|
+
|
304
|
+
# Timeout errors - retry with exponential backoff
|
305
|
+
timeout: {
|
306
|
+
name: "timeout",
|
307
|
+
enabled: true,
|
308
|
+
max_retries: 2,
|
309
|
+
backoff_strategy: :exponential,
|
310
|
+
base_delay: 1.0,
|
311
|
+
max_delay: 15.0,
|
312
|
+
jitter: true
|
313
|
+
},
|
314
|
+
|
315
|
+
# Rate limit errors - no retry, immediate switch
|
316
|
+
rate_limit: {
|
317
|
+
name: "rate_limit",
|
318
|
+
enabled: false,
|
319
|
+
max_retries: 0,
|
320
|
+
backoff_strategy: :none,
|
321
|
+
base_delay: 0.0,
|
322
|
+
max_delay: 0.0,
|
323
|
+
jitter: false
|
324
|
+
},
|
325
|
+
|
326
|
+
# Authentication errors - no retry, escalate
|
327
|
+
authentication: {
|
328
|
+
name: "authentication",
|
329
|
+
enabled: false,
|
330
|
+
max_retries: 0,
|
331
|
+
backoff_strategy: :none,
|
332
|
+
base_delay: 0.0,
|
333
|
+
max_delay: 0.0,
|
334
|
+
jitter: false
|
335
|
+
},
|
336
|
+
|
337
|
+
# Permission denied - no retry, escalate
|
338
|
+
permission_denied: {
|
339
|
+
name: "permission_denied",
|
340
|
+
enabled: false,
|
341
|
+
max_retries: 0,
|
342
|
+
backoff_strategy: :none,
|
343
|
+
base_delay: 0.0,
|
344
|
+
max_delay: 0.0,
|
345
|
+
jitter: false
|
346
|
+
},
|
347
|
+
|
348
|
+
# Default strategy for unknown errors
|
349
|
+
default: {
|
350
|
+
name: "default",
|
351
|
+
enabled: true,
|
352
|
+
max_retries: 2,
|
353
|
+
backoff_strategy: :exponential,
|
354
|
+
base_delay: 1.0,
|
355
|
+
max_delay: 20.0,
|
356
|
+
jitter: true
|
357
|
+
}
|
358
|
+
}
|
359
|
+
|
360
|
+
# Override with configuration if available
|
361
|
+
if @configuration.respond_to?(:retry_config)
|
362
|
+
config_strategies = @configuration.retry_config[:strategies] || {}
|
363
|
+
config_strategies.each do |error_type, config|
|
364
|
+
@retry_strategies[error_type.to_sym] = @retry_strategies[error_type.to_sym].merge(config)
|
365
|
+
end
|
366
|
+
end
|
367
|
+
end
|
368
|
+
|
369
|
+
def execute_retry_attempt(error_info, _strategy, _context)
|
370
|
+
# Execute retry attempt with provider
|
371
|
+
# TODO: Integrate with actual provider execution
|
372
|
+
{
|
373
|
+
success: true,
|
374
|
+
action: :retry_attempt,
|
375
|
+
provider: error_info[:provider],
|
376
|
+
model: error_info[:model],
|
377
|
+
error_type: error_info[:error_type]
|
378
|
+
}
|
379
|
+
end
|
380
|
+
|
381
|
+
def attempt_provider_switch(error_info, _recovery_plan)
|
382
|
+
new_provider = @provider_manager.switch_provider_for_error(
|
383
|
+
error_info[:error_type],
|
384
|
+
error_info[:context]
|
385
|
+
)
|
386
|
+
|
387
|
+
if new_provider
|
388
|
+
{
|
389
|
+
success: true,
|
390
|
+
action: :provider_switch,
|
391
|
+
new_provider: new_provider,
|
392
|
+
reason: "Error recovery: #{error_info[:error_type]}"
|
393
|
+
}
|
394
|
+
else
|
395
|
+
{
|
396
|
+
success: false,
|
397
|
+
action: :provider_switch_failed,
|
398
|
+
error: "No available providers for switch"
|
399
|
+
}
|
400
|
+
end
|
401
|
+
end
|
402
|
+
|
403
|
+
def attempt_model_switch(error_info, _recovery_plan)
|
404
|
+
new_model = @provider_manager.switch_model_for_error(
|
405
|
+
error_info[:error_type],
|
406
|
+
error_info[:context]
|
407
|
+
)
|
408
|
+
|
409
|
+
if new_model
|
410
|
+
{
|
411
|
+
success: true,
|
412
|
+
action: :model_switch,
|
413
|
+
provider: error_info[:provider],
|
414
|
+
new_model: new_model,
|
415
|
+
reason: "Error recovery: #{error_info[:error_type]}"
|
416
|
+
}
|
417
|
+
else
|
418
|
+
{
|
419
|
+
success: false,
|
420
|
+
action: :model_switch_failed,
|
421
|
+
error: "No available models for switch"
|
422
|
+
}
|
423
|
+
end
|
424
|
+
end
|
425
|
+
|
426
|
+
def open_circuit_breaker(error_info, recovery_plan)
|
427
|
+
key = "#{error_info[:provider]}:#{error_info[:model]}"
|
428
|
+
@circuit_breakers[key] = {
|
429
|
+
open: true,
|
430
|
+
opened_at: Time.now,
|
431
|
+
failure_count: recovery_plan[:failure_count] || 1,
|
432
|
+
threshold: recovery_plan[:threshold] || 5
|
433
|
+
}
|
434
|
+
|
435
|
+
{
|
436
|
+
success: true,
|
437
|
+
action: :circuit_breaker_opened,
|
438
|
+
provider: error_info[:provider],
|
439
|
+
model: error_info[:model],
|
440
|
+
reason: "Circuit breaker opened due to repeated failures"
|
441
|
+
}
|
442
|
+
end
|
443
|
+
|
444
|
+
def escalate_error(error_info, recovery_plan)
|
445
|
+
{
|
446
|
+
success: false,
|
447
|
+
action: :escalated,
|
448
|
+
error: "Error escalated: #{error_info[:error_type]}",
|
449
|
+
escalation_reason: recovery_plan[:reason],
|
450
|
+
requires_manual_intervention: true
|
451
|
+
}
|
452
|
+
end
|
453
|
+
|
454
|
+
def abort_execution(error_info, recovery_plan)
|
455
|
+
{
|
456
|
+
success: false,
|
457
|
+
action: :aborted,
|
458
|
+
error: "Execution aborted due to: #{error_info[:error_type]}",
|
459
|
+
abort_reason: recovery_plan[:reason]
|
460
|
+
}
|
461
|
+
end
|
462
|
+
|
463
|
+
def circuit_breaker_open?(key)
|
464
|
+
cb = @circuit_breakers[key]
|
465
|
+
return false unless cb
|
466
|
+
|
467
|
+
if cb[:open]
|
468
|
+
# Check if enough time has passed to try half-open
|
469
|
+
timeout = @configuration.respond_to?(:circuit_breaker_config) ?
|
470
|
+
@configuration.circuit_breaker_config[:timeout] : 300
|
471
|
+
|
472
|
+
if Time.now - cb[:opened_at] > timeout
|
473
|
+
# Try half-open
|
474
|
+
cb[:open] = false
|
475
|
+
cb[:half_open_calls] = 0
|
476
|
+
return false
|
477
|
+
end
|
478
|
+
|
479
|
+
return true
|
480
|
+
end
|
481
|
+
|
482
|
+
false
|
483
|
+
end
|
484
|
+
|
485
|
+
# Helper classes
|
486
|
+
class BackoffCalculator
|
487
|
+
def calculate_delay(retry_count, strategy, base_delay, max_delay)
|
488
|
+
case strategy
|
489
|
+
when :exponential
|
490
|
+
delay = base_delay * (2**(retry_count - 1))
|
491
|
+
when :linear
|
492
|
+
delay = base_delay * retry_count
|
493
|
+
when :fixed
|
494
|
+
delay = base_delay
|
495
|
+
when :none
|
496
|
+
return 0.0
|
497
|
+
else
|
498
|
+
delay = base_delay
|
499
|
+
end
|
500
|
+
|
501
|
+
# Apply jitter if enabled
|
502
|
+
if strategy != :none
|
503
|
+
jitter = delay * 0.1 * (rand - 0.5) # ±10% jitter
|
504
|
+
delay += jitter
|
505
|
+
end
|
506
|
+
|
507
|
+
# Cap at max delay
|
508
|
+
[delay, max_delay].min
|
509
|
+
end
|
510
|
+
end
|
511
|
+
|
512
|
+
class ErrorClassifier
|
513
|
+
def classify_error(error, context = {})
|
514
|
+
error_type = classify_error_type(error)
|
515
|
+
|
516
|
+
{
|
517
|
+
error: error,
|
518
|
+
error_type: error_type,
|
519
|
+
provider: (context&.is_a?(Hash) && context[:provider]) || "unknown",
|
520
|
+
model: (context&.is_a?(Hash) && context[:model]) || "unknown",
|
521
|
+
timestamp: Time.now,
|
522
|
+
context: context || {},
|
523
|
+
message: error&.message || "Unknown error",
|
524
|
+
backtrace: error&.backtrace&.first(5)
|
525
|
+
}
|
526
|
+
end
|
527
|
+
|
528
|
+
private
|
529
|
+
|
530
|
+
def classify_error_type(error)
|
531
|
+
return :unknown if error.nil?
|
532
|
+
|
533
|
+
case error
|
534
|
+
when Timeout::Error
|
535
|
+
:timeout
|
536
|
+
when Net::HTTPError
|
537
|
+
case error.response.code.to_i
|
538
|
+
when 429
|
539
|
+
:rate_limit
|
540
|
+
when 401, 403
|
541
|
+
:authentication
|
542
|
+
when 500..599
|
543
|
+
:server_error
|
544
|
+
else
|
545
|
+
:network_error
|
546
|
+
end
|
547
|
+
when SocketError, Errno::ECONNREFUSED, Errno::EHOSTUNREACH
|
548
|
+
:network_error
|
549
|
+
when StandardError
|
550
|
+
# Check error message for common patterns
|
551
|
+
message = error.message.downcase
|
552
|
+
|
553
|
+
if message.include?("rate limit") || message.include?("quota")
|
554
|
+
:rate_limit
|
555
|
+
elsif message.include?("timeout")
|
556
|
+
:timeout
|
557
|
+
elsif message.include?("auth") || message.include?("permission")
|
558
|
+
:authentication
|
559
|
+
elsif message.include?("server") || message.include?("internal")
|
560
|
+
:server_error
|
561
|
+
else
|
562
|
+
:default
|
563
|
+
end
|
564
|
+
else
|
565
|
+
:default
|
566
|
+
end
|
567
|
+
end
|
568
|
+
end
|
569
|
+
|
570
|
+
class RecoveryPlanner
|
571
|
+
def create_recovery_plan(error_info, _context = {})
|
572
|
+
error_type = error_info[:error_type]
|
573
|
+
|
574
|
+
case error_type
|
575
|
+
when :rate_limit
|
576
|
+
{
|
577
|
+
action: :switch_provider,
|
578
|
+
reason: "Rate limit reached, switching provider",
|
579
|
+
priority: :high
|
580
|
+
}
|
581
|
+
when :authentication, :permission_denied
|
582
|
+
{
|
583
|
+
action: :escalate,
|
584
|
+
reason: "Authentication or permission issue requires manual intervention",
|
585
|
+
priority: :critical
|
586
|
+
}
|
587
|
+
when :timeout
|
588
|
+
{
|
589
|
+
action: :switch_model,
|
590
|
+
reason: "Timeout error, trying faster model",
|
591
|
+
priority: :medium
|
592
|
+
}
|
593
|
+
when :network_error
|
594
|
+
{
|
595
|
+
action: :switch_provider,
|
596
|
+
reason: "Network error, switching provider",
|
597
|
+
priority: :high
|
598
|
+
}
|
599
|
+
when :server_error
|
600
|
+
{
|
601
|
+
action: :switch_provider,
|
602
|
+
reason: "Server error, switching provider",
|
603
|
+
priority: :medium
|
604
|
+
}
|
605
|
+
else
|
606
|
+
{
|
607
|
+
action: :switch_provider,
|
608
|
+
reason: "Unknown error, attempting provider switch",
|
609
|
+
priority: :low
|
610
|
+
}
|
611
|
+
end
|
612
|
+
end
|
613
|
+
end
|
614
|
+
end
|
615
|
+
end
|
616
|
+
end
|