crackerjack 0.31.10__py3-none-any.whl → 0.31.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crackerjack might be problematic. Click here for more details.
- crackerjack/CLAUDE.md +288 -705
- crackerjack/__main__.py +22 -8
- crackerjack/agents/__init__.py +0 -3
- crackerjack/agents/architect_agent.py +0 -43
- crackerjack/agents/base.py +1 -9
- crackerjack/agents/coordinator.py +2 -148
- crackerjack/agents/documentation_agent.py +109 -81
- crackerjack/agents/dry_agent.py +122 -97
- crackerjack/agents/formatting_agent.py +3 -16
- crackerjack/agents/import_optimization_agent.py +1174 -130
- crackerjack/agents/performance_agent.py +956 -188
- crackerjack/agents/performance_helpers.py +229 -0
- crackerjack/agents/proactive_agent.py +1 -48
- crackerjack/agents/refactoring_agent.py +516 -246
- crackerjack/agents/refactoring_helpers.py +282 -0
- crackerjack/agents/security_agent.py +393 -90
- crackerjack/agents/test_creation_agent.py +1776 -120
- crackerjack/agents/test_specialist_agent.py +59 -15
- crackerjack/agents/tracker.py +0 -102
- crackerjack/api.py +145 -37
- crackerjack/cli/handlers.py +48 -30
- crackerjack/cli/interactive.py +11 -11
- crackerjack/cli/options.py +66 -4
- crackerjack/code_cleaner.py +808 -148
- crackerjack/config/global_lock_config.py +110 -0
- crackerjack/config/hooks.py +43 -64
- crackerjack/core/async_workflow_orchestrator.py +247 -97
- crackerjack/core/autofix_coordinator.py +192 -109
- crackerjack/core/enhanced_container.py +46 -63
- crackerjack/core/file_lifecycle.py +549 -0
- crackerjack/core/performance.py +9 -8
- crackerjack/core/performance_monitor.py +395 -0
- crackerjack/core/phase_coordinator.py +281 -94
- crackerjack/core/proactive_workflow.py +9 -58
- crackerjack/core/resource_manager.py +501 -0
- crackerjack/core/service_watchdog.py +490 -0
- crackerjack/core/session_coordinator.py +4 -8
- crackerjack/core/timeout_manager.py +504 -0
- crackerjack/core/websocket_lifecycle.py +475 -0
- crackerjack/core/workflow_orchestrator.py +343 -209
- crackerjack/dynamic_config.py +50 -9
- crackerjack/errors.py +3 -4
- crackerjack/executors/async_hook_executor.py +63 -13
- crackerjack/executors/cached_hook_executor.py +14 -14
- crackerjack/executors/hook_executor.py +100 -37
- crackerjack/executors/hook_lock_manager.py +856 -0
- crackerjack/executors/individual_hook_executor.py +120 -86
- crackerjack/intelligence/__init__.py +0 -7
- crackerjack/intelligence/adaptive_learning.py +13 -86
- crackerjack/intelligence/agent_orchestrator.py +15 -78
- crackerjack/intelligence/agent_registry.py +12 -59
- crackerjack/intelligence/agent_selector.py +31 -92
- crackerjack/intelligence/integration.py +1 -41
- crackerjack/interactive.py +9 -9
- crackerjack/managers/async_hook_manager.py +25 -8
- crackerjack/managers/hook_manager.py +9 -9
- crackerjack/managers/publish_manager.py +57 -59
- crackerjack/managers/test_command_builder.py +6 -36
- crackerjack/managers/test_executor.py +9 -61
- crackerjack/managers/test_manager.py +17 -63
- crackerjack/managers/test_manager_backup.py +77 -127
- crackerjack/managers/test_progress.py +4 -23
- crackerjack/mcp/cache.py +5 -12
- crackerjack/mcp/client_runner.py +10 -10
- crackerjack/mcp/context.py +64 -6
- crackerjack/mcp/dashboard.py +14 -11
- crackerjack/mcp/enhanced_progress_monitor.py +55 -55
- crackerjack/mcp/file_monitor.py +72 -42
- crackerjack/mcp/progress_components.py +103 -84
- crackerjack/mcp/progress_monitor.py +122 -49
- crackerjack/mcp/rate_limiter.py +12 -12
- crackerjack/mcp/server_core.py +16 -22
- crackerjack/mcp/service_watchdog.py +26 -26
- crackerjack/mcp/state.py +15 -0
- crackerjack/mcp/tools/core_tools.py +95 -39
- crackerjack/mcp/tools/error_analyzer.py +6 -32
- crackerjack/mcp/tools/execution_tools.py +1 -56
- crackerjack/mcp/tools/execution_tools_backup.py +35 -131
- crackerjack/mcp/tools/intelligence_tool_registry.py +0 -36
- crackerjack/mcp/tools/intelligence_tools.py +2 -55
- crackerjack/mcp/tools/monitoring_tools.py +308 -145
- crackerjack/mcp/tools/proactive_tools.py +12 -42
- crackerjack/mcp/tools/progress_tools.py +23 -15
- crackerjack/mcp/tools/utility_tools.py +3 -40
- crackerjack/mcp/tools/workflow_executor.py +40 -60
- crackerjack/mcp/websocket/app.py +0 -3
- crackerjack/mcp/websocket/endpoints.py +206 -268
- crackerjack/mcp/websocket/jobs.py +213 -66
- crackerjack/mcp/websocket/server.py +84 -6
- crackerjack/mcp/websocket/websocket_handler.py +137 -29
- crackerjack/models/config_adapter.py +3 -16
- crackerjack/models/protocols.py +162 -3
- crackerjack/models/resource_protocols.py +454 -0
- crackerjack/models/task.py +3 -3
- crackerjack/monitoring/__init__.py +0 -0
- crackerjack/monitoring/ai_agent_watchdog.py +25 -71
- crackerjack/monitoring/regression_prevention.py +28 -87
- crackerjack/orchestration/advanced_orchestrator.py +44 -78
- crackerjack/orchestration/coverage_improvement.py +10 -60
- crackerjack/orchestration/execution_strategies.py +16 -16
- crackerjack/orchestration/test_progress_streamer.py +61 -53
- crackerjack/plugins/base.py +1 -1
- crackerjack/plugins/managers.py +22 -20
- crackerjack/py313.py +65 -21
- crackerjack/services/backup_service.py +467 -0
- crackerjack/services/bounded_status_operations.py +627 -0
- crackerjack/services/cache.py +7 -9
- crackerjack/services/config.py +35 -52
- crackerjack/services/config_integrity.py +5 -16
- crackerjack/services/config_merge.py +542 -0
- crackerjack/services/contextual_ai_assistant.py +17 -19
- crackerjack/services/coverage_ratchet.py +44 -73
- crackerjack/services/debug.py +25 -39
- crackerjack/services/dependency_monitor.py +52 -50
- crackerjack/services/enhanced_filesystem.py +14 -11
- crackerjack/services/file_hasher.py +1 -1
- crackerjack/services/filesystem.py +1 -12
- crackerjack/services/git.py +71 -47
- crackerjack/services/health_metrics.py +31 -27
- crackerjack/services/initialization.py +276 -428
- crackerjack/services/input_validator.py +760 -0
- crackerjack/services/log_manager.py +16 -16
- crackerjack/services/logging.py +7 -6
- crackerjack/services/metrics.py +43 -43
- crackerjack/services/pattern_cache.py +2 -31
- crackerjack/services/pattern_detector.py +26 -63
- crackerjack/services/performance_benchmarks.py +20 -45
- crackerjack/services/regex_patterns.py +2887 -0
- crackerjack/services/regex_utils.py +537 -0
- crackerjack/services/secure_path_utils.py +683 -0
- crackerjack/services/secure_status_formatter.py +534 -0
- crackerjack/services/secure_subprocess.py +605 -0
- crackerjack/services/security.py +47 -10
- crackerjack/services/security_logger.py +492 -0
- crackerjack/services/server_manager.py +109 -50
- crackerjack/services/smart_scheduling.py +8 -25
- crackerjack/services/status_authentication.py +603 -0
- crackerjack/services/status_security_manager.py +442 -0
- crackerjack/services/thread_safe_status_collector.py +546 -0
- crackerjack/services/tool_version_service.py +1 -23
- crackerjack/services/unified_config.py +36 -58
- crackerjack/services/validation_rate_limiter.py +269 -0
- crackerjack/services/version_checker.py +9 -40
- crackerjack/services/websocket_resource_limiter.py +572 -0
- crackerjack/slash_commands/__init__.py +52 -2
- crackerjack/tools/__init__.py +0 -0
- crackerjack/tools/validate_input_validator_patterns.py +262 -0
- crackerjack/tools/validate_regex_patterns.py +198 -0
- {crackerjack-0.31.10.dist-info → crackerjack-0.31.13.dist-info}/METADATA +197 -12
- crackerjack-0.31.13.dist-info/RECORD +178 -0
- crackerjack/cli/facade.py +0 -104
- crackerjack-0.31.10.dist-info/RECORD +0 -149
- {crackerjack-0.31.10.dist-info → crackerjack-0.31.13.dist-info}/WHEEL +0 -0
- {crackerjack-0.31.10.dist-info → crackerjack-0.31.13.dist-info}/entry_points.txt +0 -0
- {crackerjack-0.31.10.dist-info → crackerjack-0.31.13.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,504 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Comprehensive timeout management system for async operations.
|
|
3
|
+
|
|
4
|
+
This module provides timeout handling, circuit breaker patterns, and
|
|
5
|
+
graceful degradation for all async operations in crackerjack.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import asyncio
|
|
9
|
+
import builtins
|
|
10
|
+
import logging
|
|
11
|
+
import time
|
|
12
|
+
import typing as t
|
|
13
|
+
from contextlib import asynccontextmanager
|
|
14
|
+
from dataclasses import dataclass, field
|
|
15
|
+
from enum import Enum
|
|
16
|
+
from functools import wraps
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger("crackerjack.timeout_manager")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class _DummyPerformanceMonitor:
|
|
22
|
+
"""Dummy performance monitor to avoid circular import issues."""
|
|
23
|
+
|
|
24
|
+
def record_operation_start(self, operation: str) -> float:
|
|
25
|
+
return time.time()
|
|
26
|
+
|
|
27
|
+
def record_operation_success(self, operation: str, start_time: float) -> None:
|
|
28
|
+
pass
|
|
29
|
+
|
|
30
|
+
def record_operation_failure(self, operation: str, start_time: float) -> None:
|
|
31
|
+
pass
|
|
32
|
+
|
|
33
|
+
def record_operation_timeout(
|
|
34
|
+
self,
|
|
35
|
+
operation: str,
|
|
36
|
+
start_time: float,
|
|
37
|
+
timeout_value: float,
|
|
38
|
+
error_message: str,
|
|
39
|
+
) -> None:
|
|
40
|
+
pass
|
|
41
|
+
|
|
42
|
+
def record_circuit_breaker_event(self, operation: str, opened: bool) -> None:
|
|
43
|
+
pass
|
|
44
|
+
|
|
45
|
+
def get_summary_stats(self) -> dict[str, t.Any]:
|
|
46
|
+
"""Dummy implementation."""
|
|
47
|
+
return {}
|
|
48
|
+
|
|
49
|
+
def get_all_metrics(self) -> dict[str, t.Any]:
|
|
50
|
+
"""Dummy implementation."""
|
|
51
|
+
return {}
|
|
52
|
+
|
|
53
|
+
def get_performance_alerts(self) -> list[str]:
|
|
54
|
+
"""Dummy implementation."""
|
|
55
|
+
return []
|
|
56
|
+
|
|
57
|
+
def get_recent_timeout_events(self, limit: int) -> list[t.Any]:
|
|
58
|
+
"""Dummy implementation."""
|
|
59
|
+
return []
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class TimeoutStrategy(Enum):
|
|
63
|
+
"""Timeout handling strategies."""
|
|
64
|
+
|
|
65
|
+
FAIL_FAST = "fail_fast"
|
|
66
|
+
RETRY_WITH_BACKOFF = "retry_with_backoff"
|
|
67
|
+
CIRCUIT_BREAKER = "circuit_breaker"
|
|
68
|
+
GRACEFUL_DEGRADATION = "graceful_degradation"
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class CircuitBreakerState(Enum):
|
|
72
|
+
"""Circuit breaker states."""
|
|
73
|
+
|
|
74
|
+
CLOSED = "closed" # Normal operation
|
|
75
|
+
OPEN = "open" # Failing fast
|
|
76
|
+
HALF_OPEN = "half_open" # Testing if service recovered
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@dataclass
|
|
80
|
+
class TimeoutConfig:
|
|
81
|
+
"""Configuration for timeout handling."""
|
|
82
|
+
|
|
83
|
+
# Basic timeout settings
|
|
84
|
+
default_timeout: float = 30.0
|
|
85
|
+
operation_timeouts: dict[str, float] = field(
|
|
86
|
+
default_factory=lambda: {
|
|
87
|
+
"fast_hooks": 60.0,
|
|
88
|
+
"comprehensive_hooks": 300.0,
|
|
89
|
+
"test_execution": 600.0,
|
|
90
|
+
"ai_agent_processing": 180.0,
|
|
91
|
+
"file_operations": 10.0,
|
|
92
|
+
"network_operations": 15.0,
|
|
93
|
+
"websocket_broadcast": 5.0,
|
|
94
|
+
"workflow_iteration": 900.0, # 15 minutes
|
|
95
|
+
"complete_workflow": 3600.0, # 1 hour
|
|
96
|
+
}
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
# Retry configuration
|
|
100
|
+
max_retries: int = 3
|
|
101
|
+
base_retry_delay: float = 1.0
|
|
102
|
+
max_retry_delay: float = 60.0
|
|
103
|
+
backoff_multiplier: float = 2.0
|
|
104
|
+
|
|
105
|
+
# Circuit breaker configuration
|
|
106
|
+
failure_threshold: int = 5
|
|
107
|
+
recovery_timeout: float = 60.0
|
|
108
|
+
half_open_max_calls: int = 3
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@dataclass
|
|
112
|
+
class CircuitBreakerStateData:
|
|
113
|
+
"""State data for a circuit breaker."""
|
|
114
|
+
|
|
115
|
+
state: CircuitBreakerState = CircuitBreakerState.CLOSED
|
|
116
|
+
failure_count: int = 0
|
|
117
|
+
last_failure_time: float = 0.0
|
|
118
|
+
half_open_calls: int = 0
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class TimeoutError(Exception):
|
|
122
|
+
"""Custom timeout error with context."""
|
|
123
|
+
|
|
124
|
+
def __init__(self, operation: str, timeout: float, elapsed: float = 0.0) -> None:
|
|
125
|
+
self.operation = operation
|
|
126
|
+
self.timeout = timeout
|
|
127
|
+
self.elapsed = elapsed
|
|
128
|
+
super().__init__(
|
|
129
|
+
f"Operation '{operation}' timed out after {timeout}s "
|
|
130
|
+
f"(elapsed: {elapsed:.1f}s)"
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
class AsyncTimeoutManager:
|
|
135
|
+
"""Comprehensive async timeout and circuit breaker manager."""
|
|
136
|
+
|
|
137
|
+
def __init__(self, config: TimeoutConfig | None = None) -> None:
|
|
138
|
+
self.config = config or TimeoutConfig()
|
|
139
|
+
self.circuit_breakers: dict[str, CircuitBreakerStateData] = {}
|
|
140
|
+
self.operation_stats: dict[str, list[float]] = {}
|
|
141
|
+
|
|
142
|
+
# Initialize performance monitor lazily to avoid circular imports
|
|
143
|
+
self._performance_monitor = None
|
|
144
|
+
|
|
145
|
+
def get_timeout(self, operation: str) -> float:
|
|
146
|
+
"""Get timeout for specific operation."""
|
|
147
|
+
return self.config.operation_timeouts.get(
|
|
148
|
+
operation, self.config.default_timeout
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
@property
|
|
152
|
+
def performance_monitor(self):
|
|
153
|
+
"""Lazy-load performance monitor to avoid circular imports."""
|
|
154
|
+
if self._performance_monitor is None:
|
|
155
|
+
try:
|
|
156
|
+
from .performance_monitor import get_performance_monitor
|
|
157
|
+
|
|
158
|
+
self._performance_monitor = get_performance_monitor()
|
|
159
|
+
except ImportError:
|
|
160
|
+
# If performance monitor is not available, create a dummy implementation
|
|
161
|
+
self._performance_monitor = _DummyPerformanceMonitor()
|
|
162
|
+
return self._performance_monitor
|
|
163
|
+
|
|
164
|
+
@asynccontextmanager
|
|
165
|
+
async def timeout_context(
|
|
166
|
+
self,
|
|
167
|
+
operation: str,
|
|
168
|
+
timeout: float | None = None,
|
|
169
|
+
strategy: TimeoutStrategy = TimeoutStrategy.FAIL_FAST,
|
|
170
|
+
) -> t.AsyncIterator[None]:
|
|
171
|
+
"""Context manager for timeout handling with strategy."""
|
|
172
|
+
timeout_value = timeout or self.get_timeout(operation)
|
|
173
|
+
start_time = self.performance_monitor.record_operation_start(operation)
|
|
174
|
+
|
|
175
|
+
# Add additional protection against very long operations
|
|
176
|
+
if timeout_value > 7200.0: # 2 hours maximum
|
|
177
|
+
logger.warning(
|
|
178
|
+
f"Capping excessive timeout for {operation}: {timeout_value}s -> 7200s"
|
|
179
|
+
)
|
|
180
|
+
timeout_value = 7200.0
|
|
181
|
+
|
|
182
|
+
try:
|
|
183
|
+
# Use asyncio.timeout for Python 3.11+ or asyncio.wait_for fallback
|
|
184
|
+
try:
|
|
185
|
+
async with asyncio.timeout(timeout_value):
|
|
186
|
+
yield
|
|
187
|
+
except AttributeError:
|
|
188
|
+
# Fallback for older Python versions without asyncio.timeout
|
|
189
|
+
task = asyncio.current_task()
|
|
190
|
+
if task:
|
|
191
|
+
try:
|
|
192
|
+
await asyncio.wait_for(
|
|
193
|
+
asyncio.shield(task), timeout=timeout_value
|
|
194
|
+
)
|
|
195
|
+
yield
|
|
196
|
+
except builtins.TimeoutError:
|
|
197
|
+
raise builtins.TimeoutError(f"Operation {operation} timed out")
|
|
198
|
+
else:
|
|
199
|
+
# Direct yield if no current task context
|
|
200
|
+
yield
|
|
201
|
+
|
|
202
|
+
# Record successful operation
|
|
203
|
+
self.performance_monitor.record_operation_success(operation, start_time)
|
|
204
|
+
elapsed = time.time() - start_time
|
|
205
|
+
self._record_success(operation, elapsed)
|
|
206
|
+
|
|
207
|
+
except (TimeoutError, asyncio.CancelledError) as e:
|
|
208
|
+
elapsed = time.time() - start_time
|
|
209
|
+
self.performance_monitor.record_operation_timeout(
|
|
210
|
+
operation, start_time, timeout_value, str(e)
|
|
211
|
+
)
|
|
212
|
+
self._record_failure(operation, elapsed)
|
|
213
|
+
|
|
214
|
+
if strategy == TimeoutStrategy.CIRCUIT_BREAKER:
|
|
215
|
+
self._update_circuit_breaker(operation, False)
|
|
216
|
+
|
|
217
|
+
# Handle graceful degradation
|
|
218
|
+
if strategy == TimeoutStrategy.GRACEFUL_DEGRADATION:
|
|
219
|
+
logger.warning(
|
|
220
|
+
f"Operation {operation} timed out ({elapsed:.1f}s), continuing gracefully"
|
|
221
|
+
)
|
|
222
|
+
return # Exit gracefully without raising
|
|
223
|
+
|
|
224
|
+
raise TimeoutError(operation, timeout_value, elapsed) from e
|
|
225
|
+
except Exception:
|
|
226
|
+
self.performance_monitor.record_operation_failure(operation, start_time)
|
|
227
|
+
elapsed = time.time() - start_time
|
|
228
|
+
self._record_failure(operation, elapsed)
|
|
229
|
+
|
|
230
|
+
if strategy == TimeoutStrategy.CIRCUIT_BREAKER:
|
|
231
|
+
self._update_circuit_breaker(operation, False)
|
|
232
|
+
|
|
233
|
+
raise
|
|
234
|
+
|
|
235
|
+
async def with_timeout(
|
|
236
|
+
self,
|
|
237
|
+
operation: str,
|
|
238
|
+
coro: t.Awaitable[t.Any],
|
|
239
|
+
timeout: float | None = None,
|
|
240
|
+
strategy: TimeoutStrategy = TimeoutStrategy.FAIL_FAST,
|
|
241
|
+
) -> t.Any:
|
|
242
|
+
"""Execute coroutine with timeout and strategy."""
|
|
243
|
+
if strategy == TimeoutStrategy.CIRCUIT_BREAKER:
|
|
244
|
+
if not self._check_circuit_breaker(operation):
|
|
245
|
+
raise TimeoutError(operation, 0.0, 0.0)
|
|
246
|
+
|
|
247
|
+
timeout_value = timeout or self.get_timeout(operation)
|
|
248
|
+
start_time = self.performance_monitor.record_operation_start(operation)
|
|
249
|
+
|
|
250
|
+
try:
|
|
251
|
+
# Wrap coroutine execution with timeout protection
|
|
252
|
+
result = await asyncio.wait_for(coro, timeout=timeout_value)
|
|
253
|
+
|
|
254
|
+
# Record success
|
|
255
|
+
self.performance_monitor.record_operation_success(operation, start_time)
|
|
256
|
+
elapsed = time.time() - start_time
|
|
257
|
+
self._record_success(operation, elapsed)
|
|
258
|
+
|
|
259
|
+
if strategy == TimeoutStrategy.CIRCUIT_BREAKER:
|
|
260
|
+
self._update_circuit_breaker(operation, True)
|
|
261
|
+
|
|
262
|
+
return result
|
|
263
|
+
|
|
264
|
+
except builtins.TimeoutError as e:
|
|
265
|
+
elapsed = time.time() - start_time
|
|
266
|
+
self.performance_monitor.record_operation_timeout(
|
|
267
|
+
operation, start_time, timeout_value, str(e)
|
|
268
|
+
)
|
|
269
|
+
self._record_failure(operation, elapsed)
|
|
270
|
+
|
|
271
|
+
if strategy == TimeoutStrategy.CIRCUIT_BREAKER:
|
|
272
|
+
self._update_circuit_breaker(operation, False)
|
|
273
|
+
|
|
274
|
+
# Handle graceful degradation
|
|
275
|
+
if strategy == TimeoutStrategy.GRACEFUL_DEGRADATION:
|
|
276
|
+
logger.warning(
|
|
277
|
+
f"Operation {operation} timed out ({elapsed:.1f}s), returning None"
|
|
278
|
+
)
|
|
279
|
+
return None
|
|
280
|
+
|
|
281
|
+
raise TimeoutError(operation, timeout_value, elapsed) from e
|
|
282
|
+
|
|
283
|
+
except Exception:
|
|
284
|
+
self.performance_monitor.record_operation_failure(operation, start_time)
|
|
285
|
+
elapsed = time.time() - start_time
|
|
286
|
+
self._record_failure(operation, elapsed)
|
|
287
|
+
|
|
288
|
+
if strategy == TimeoutStrategy.CIRCUIT_BREAKER:
|
|
289
|
+
self._update_circuit_breaker(operation, False)
|
|
290
|
+
|
|
291
|
+
raise
|
|
292
|
+
|
|
293
|
+
async def _with_retry(
|
|
294
|
+
self,
|
|
295
|
+
operation: str,
|
|
296
|
+
coro_factory: t.Callable[[], t.Awaitable[t.Any]],
|
|
297
|
+
timeout: float | None = None,
|
|
298
|
+
) -> t.Any:
|
|
299
|
+
"""Execute with exponential backoff retry."""
|
|
300
|
+
last_exception = None
|
|
301
|
+
delay = self.config.base_retry_delay
|
|
302
|
+
|
|
303
|
+
for attempt in range(self.config.max_retries + 1):
|
|
304
|
+
try:
|
|
305
|
+
async with self.timeout_context(operation, timeout):
|
|
306
|
+
# Create a new coroutine for each attempt
|
|
307
|
+
return await coro_factory()
|
|
308
|
+
except (TimeoutError, Exception) as e:
|
|
309
|
+
last_exception = e
|
|
310
|
+
|
|
311
|
+
if attempt == self.config.max_retries:
|
|
312
|
+
break
|
|
313
|
+
|
|
314
|
+
logger.warning(
|
|
315
|
+
f"Attempt {attempt + 1}/{self.config.max_retries + 1} "
|
|
316
|
+
f"failed for {operation}: {e}, retrying in {delay}s"
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
await asyncio.sleep(delay)
|
|
320
|
+
delay = min(
|
|
321
|
+
delay * self.config.backoff_multiplier, self.config.max_retry_delay
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
if last_exception is not None:
|
|
325
|
+
raise last_exception
|
|
326
|
+
raise RuntimeError(f"No attempts made for operation: {operation}")
|
|
327
|
+
|
|
328
|
+
def _check_circuit_breaker(self, operation: str) -> bool:
|
|
329
|
+
"""Check if circuit breaker allows operation."""
|
|
330
|
+
if operation not in self.circuit_breakers:
|
|
331
|
+
self.circuit_breakers[operation] = CircuitBreakerStateData()
|
|
332
|
+
return True
|
|
333
|
+
|
|
334
|
+
breaker = self.circuit_breakers[operation]
|
|
335
|
+
current_time = time.time()
|
|
336
|
+
|
|
337
|
+
if breaker.state == CircuitBreakerState.CLOSED:
|
|
338
|
+
return True
|
|
339
|
+
elif breaker.state == CircuitBreakerState.OPEN:
|
|
340
|
+
if current_time - breaker.last_failure_time > self.config.recovery_timeout:
|
|
341
|
+
breaker.state = CircuitBreakerState.HALF_OPEN
|
|
342
|
+
breaker.half_open_calls = 0
|
|
343
|
+
return True
|
|
344
|
+
return False
|
|
345
|
+
else: # HALF_OPEN
|
|
346
|
+
if breaker.half_open_calls < self.config.half_open_max_calls:
|
|
347
|
+
breaker.half_open_calls += 1
|
|
348
|
+
return True
|
|
349
|
+
return False
|
|
350
|
+
|
|
351
|
+
def _update_circuit_breaker(self, operation: str, success: bool) -> None:
|
|
352
|
+
"""Update circuit breaker state based on operation result."""
|
|
353
|
+
if operation not in self.circuit_breakers:
|
|
354
|
+
self.circuit_breakers[operation] = CircuitBreakerStateData()
|
|
355
|
+
|
|
356
|
+
breaker = self.circuit_breakers[operation]
|
|
357
|
+
previous_state = breaker.state
|
|
358
|
+
|
|
359
|
+
if success:
|
|
360
|
+
if breaker.state == CircuitBreakerState.HALF_OPEN:
|
|
361
|
+
breaker.state = CircuitBreakerState.CLOSED
|
|
362
|
+
breaker.failure_count = 0
|
|
363
|
+
elif breaker.state == CircuitBreakerState.CLOSED:
|
|
364
|
+
breaker.failure_count = max(0, breaker.failure_count - 1)
|
|
365
|
+
else:
|
|
366
|
+
breaker.failure_count += 1
|
|
367
|
+
breaker.last_failure_time = time.time()
|
|
368
|
+
|
|
369
|
+
if breaker.failure_count >= self.config.failure_threshold:
|
|
370
|
+
breaker.state = CircuitBreakerState.OPEN
|
|
371
|
+
|
|
372
|
+
# Record circuit breaker event if state changed
|
|
373
|
+
if previous_state != CircuitBreakerState.OPEN:
|
|
374
|
+
self.performance_monitor.record_circuit_breaker_event(
|
|
375
|
+
operation, True
|
|
376
|
+
)
|
|
377
|
+
|
|
378
|
+
def _record_success(self, operation: str, elapsed: float) -> None:
|
|
379
|
+
"""Record successful operation timing."""
|
|
380
|
+
if operation not in self.operation_stats:
|
|
381
|
+
self.operation_stats[operation] = []
|
|
382
|
+
|
|
383
|
+
stats = self.operation_stats[operation]
|
|
384
|
+
stats.append(elapsed)
|
|
385
|
+
|
|
386
|
+
# Keep only recent stats (last 100 operations)
|
|
387
|
+
if len(stats) > 100:
|
|
388
|
+
stats.pop(0)
|
|
389
|
+
|
|
390
|
+
if self.config.operation_timeouts.get(operation):
|
|
391
|
+
self._update_circuit_breaker(operation, True)
|
|
392
|
+
|
|
393
|
+
def _record_failure(self, operation: str, elapsed: float) -> None:
|
|
394
|
+
"""Record failed operation timing."""
|
|
395
|
+
logger.warning(
|
|
396
|
+
f"Operation '{operation}' failed after {elapsed:.1f}s "
|
|
397
|
+
f"(timeout: {self.get_timeout(operation)}s)"
|
|
398
|
+
)
|
|
399
|
+
|
|
400
|
+
def get_stats(self, operation: str) -> dict[str, t.Any]:
|
|
401
|
+
"""Get performance statistics for operation."""
|
|
402
|
+
stats = self.operation_stats.get(operation, [])
|
|
403
|
+
if not stats:
|
|
404
|
+
return {
|
|
405
|
+
"count": 0,
|
|
406
|
+
"avg_time": 0.0,
|
|
407
|
+
"min_time": 0.0,
|
|
408
|
+
"max_time": 0.0,
|
|
409
|
+
"success_rate": 0.0,
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
return {
|
|
413
|
+
"count": len(stats),
|
|
414
|
+
"avg_time": sum(stats) / len(stats),
|
|
415
|
+
"min_time": min(stats),
|
|
416
|
+
"max_time": max(stats),
|
|
417
|
+
"success_rate": len(stats)
|
|
418
|
+
/ (
|
|
419
|
+
len(stats)
|
|
420
|
+
+ self.circuit_breakers.get(
|
|
421
|
+
operation, CircuitBreakerStateData()
|
|
422
|
+
).failure_count
|
|
423
|
+
),
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
def timeout_async(
|
|
428
|
+
operation: str,
|
|
429
|
+
timeout: float | None = None,
|
|
430
|
+
strategy: TimeoutStrategy = TimeoutStrategy.FAIL_FAST,
|
|
431
|
+
) -> t.Callable[
|
|
432
|
+
[t.Callable[..., t.Awaitable[t.Any]]], t.Callable[..., t.Awaitable[t.Any]]
|
|
433
|
+
]:
|
|
434
|
+
"""Decorator for async functions with timeout handling."""
|
|
435
|
+
|
|
436
|
+
def decorator(
|
|
437
|
+
func: t.Callable[..., t.Awaitable[t.Any]],
|
|
438
|
+
) -> t.Callable[..., t.Awaitable[t.Any]]:
|
|
439
|
+
@wraps(func)
|
|
440
|
+
async def wrapper(*args: t.Any, **kwargs: t.Any) -> t.Any:
|
|
441
|
+
manager = AsyncTimeoutManager()
|
|
442
|
+
return await manager.with_timeout(
|
|
443
|
+
operation, func(*args, **kwargs), timeout, strategy
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
return wrapper
|
|
447
|
+
|
|
448
|
+
return decorator
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
# Global timeout manager instance
|
|
452
|
+
_global_timeout_manager: AsyncTimeoutManager | None = None
|
|
453
|
+
|
|
454
|
+
|
|
455
|
+
def get_timeout_manager() -> AsyncTimeoutManager:
|
|
456
|
+
"""Get global timeout manager instance."""
|
|
457
|
+
global _global_timeout_manager
|
|
458
|
+
if _global_timeout_manager is None:
|
|
459
|
+
_global_timeout_manager = AsyncTimeoutManager()
|
|
460
|
+
return _global_timeout_manager
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
def configure_timeouts(config: TimeoutConfig) -> None:
|
|
464
|
+
"""Configure global timeout manager."""
|
|
465
|
+
global _global_timeout_manager
|
|
466
|
+
_global_timeout_manager = AsyncTimeoutManager(config)
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
def get_performance_report() -> dict[str, t.Any]:
|
|
470
|
+
"""Get comprehensive performance report."""
|
|
471
|
+
timeout_manager = get_timeout_manager()
|
|
472
|
+
monitor = timeout_manager.performance_monitor
|
|
473
|
+
|
|
474
|
+
return {
|
|
475
|
+
"summary": monitor.get_summary_stats(),
|
|
476
|
+
"metrics": {
|
|
477
|
+
name: {
|
|
478
|
+
"success_rate": m.success_rate,
|
|
479
|
+
"average_time": m.average_time,
|
|
480
|
+
"recent_average_time": m.recent_average_time,
|
|
481
|
+
"total_calls": m.total_calls,
|
|
482
|
+
"timeout_calls": m.timeout_calls,
|
|
483
|
+
}
|
|
484
|
+
for name, m in monitor.get_all_metrics().items()
|
|
485
|
+
},
|
|
486
|
+
"alerts": monitor.get_performance_alerts(),
|
|
487
|
+
"recent_timeouts": [
|
|
488
|
+
{
|
|
489
|
+
"operation": event.operation,
|
|
490
|
+
"expected_timeout": event.expected_timeout,
|
|
491
|
+
"actual_duration": event.actual_duration,
|
|
492
|
+
"timestamp": event.timestamp,
|
|
493
|
+
}
|
|
494
|
+
for event in monitor.get_recent_timeout_events(10)
|
|
495
|
+
],
|
|
496
|
+
"circuit_breakers": {
|
|
497
|
+
operation: {
|
|
498
|
+
"state": breaker.state.value,
|
|
499
|
+
"failure_count": breaker.failure_count,
|
|
500
|
+
"last_failure_time": breaker.last_failure_time,
|
|
501
|
+
}
|
|
502
|
+
for operation, breaker in timeout_manager.circuit_breakers.items()
|
|
503
|
+
},
|
|
504
|
+
}
|