proxilion 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- proxilion/__init__.py +136 -0
- proxilion/audit/__init__.py +133 -0
- proxilion/audit/base_exporters.py +527 -0
- proxilion/audit/compliance/__init__.py +130 -0
- proxilion/audit/compliance/base.py +457 -0
- proxilion/audit/compliance/eu_ai_act.py +603 -0
- proxilion/audit/compliance/iso27001.py +544 -0
- proxilion/audit/compliance/soc2.py +491 -0
- proxilion/audit/events.py +493 -0
- proxilion/audit/explainability.py +1173 -0
- proxilion/audit/exporters/__init__.py +58 -0
- proxilion/audit/exporters/aws_s3.py +636 -0
- proxilion/audit/exporters/azure_storage.py +608 -0
- proxilion/audit/exporters/cloud_base.py +468 -0
- proxilion/audit/exporters/gcp_storage.py +570 -0
- proxilion/audit/exporters/multi_exporter.py +498 -0
- proxilion/audit/hash_chain.py +652 -0
- proxilion/audit/logger.py +543 -0
- proxilion/caching/__init__.py +49 -0
- proxilion/caching/tool_cache.py +633 -0
- proxilion/context/__init__.py +73 -0
- proxilion/context/context_window.py +556 -0
- proxilion/context/message_history.py +505 -0
- proxilion/context/session.py +735 -0
- proxilion/contrib/__init__.py +51 -0
- proxilion/contrib/anthropic.py +609 -0
- proxilion/contrib/google.py +1012 -0
- proxilion/contrib/langchain.py +641 -0
- proxilion/contrib/mcp.py +893 -0
- proxilion/contrib/openai.py +646 -0
- proxilion/core.py +3058 -0
- proxilion/decorators.py +966 -0
- proxilion/engines/__init__.py +287 -0
- proxilion/engines/base.py +266 -0
- proxilion/engines/casbin_engine.py +412 -0
- proxilion/engines/opa_engine.py +493 -0
- proxilion/engines/simple.py +437 -0
- proxilion/exceptions.py +887 -0
- proxilion/guards/__init__.py +54 -0
- proxilion/guards/input_guard.py +522 -0
- proxilion/guards/output_guard.py +634 -0
- proxilion/observability/__init__.py +198 -0
- proxilion/observability/cost_tracker.py +866 -0
- proxilion/observability/hooks.py +683 -0
- proxilion/observability/metrics.py +798 -0
- proxilion/observability/session_cost_tracker.py +1063 -0
- proxilion/policies/__init__.py +67 -0
- proxilion/policies/base.py +304 -0
- proxilion/policies/builtin.py +486 -0
- proxilion/policies/registry.py +376 -0
- proxilion/providers/__init__.py +201 -0
- proxilion/providers/adapter.py +468 -0
- proxilion/providers/anthropic_adapter.py +330 -0
- proxilion/providers/gemini_adapter.py +391 -0
- proxilion/providers/openai_adapter.py +294 -0
- proxilion/py.typed +0 -0
- proxilion/resilience/__init__.py +81 -0
- proxilion/resilience/degradation.py +615 -0
- proxilion/resilience/fallback.py +555 -0
- proxilion/resilience/retry.py +554 -0
- proxilion/scheduling/__init__.py +57 -0
- proxilion/scheduling/priority_queue.py +419 -0
- proxilion/scheduling/scheduler.py +459 -0
- proxilion/security/__init__.py +244 -0
- proxilion/security/agent_trust.py +968 -0
- proxilion/security/behavioral_drift.py +794 -0
- proxilion/security/cascade_protection.py +869 -0
- proxilion/security/circuit_breaker.py +428 -0
- proxilion/security/cost_limiter.py +690 -0
- proxilion/security/idor_protection.py +460 -0
- proxilion/security/intent_capsule.py +849 -0
- proxilion/security/intent_validator.py +495 -0
- proxilion/security/memory_integrity.py +767 -0
- proxilion/security/rate_limiter.py +509 -0
- proxilion/security/scope_enforcer.py +680 -0
- proxilion/security/sequence_validator.py +636 -0
- proxilion/security/trust_boundaries.py +784 -0
- proxilion/streaming/__init__.py +70 -0
- proxilion/streaming/detector.py +761 -0
- proxilion/streaming/transformer.py +674 -0
- proxilion/timeouts/__init__.py +55 -0
- proxilion/timeouts/decorators.py +477 -0
- proxilion/timeouts/manager.py +545 -0
- proxilion/tools/__init__.py +69 -0
- proxilion/tools/decorators.py +493 -0
- proxilion/tools/registry.py +732 -0
- proxilion/types.py +339 -0
- proxilion/validation/__init__.py +93 -0
- proxilion/validation/pydantic_schema.py +351 -0
- proxilion/validation/schema.py +651 -0
- proxilion-0.0.1.dist-info/METADATA +872 -0
- proxilion-0.0.1.dist-info/RECORD +94 -0
- proxilion-0.0.1.dist-info/WHEEL +4 -0
- proxilion-0.0.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Resilience components for Proxilion.
|
|
3
|
+
|
|
4
|
+
This module provides fault tolerance mechanisms for AI agent operations:
|
|
5
|
+
- Retry with exponential backoff
|
|
6
|
+
- Fallback chains for tools and models
|
|
7
|
+
- Graceful degradation tiers
|
|
8
|
+
|
|
9
|
+
These components help build robust AI applications that can handle
|
|
10
|
+
transient failures, provider outages, and degraded service conditions.
|
|
11
|
+
|
|
12
|
+
Example:
|
|
13
|
+
>>> from proxilion.resilience import (
|
|
14
|
+
... RetryPolicy, retry_with_backoff,
|
|
15
|
+
... FallbackChain, FallbackOption,
|
|
16
|
+
... DegradationTier, GracefulDegradation,
|
|
17
|
+
... )
|
|
18
|
+
>>>
|
|
19
|
+
>>> # Retry with exponential backoff
|
|
20
|
+
>>> @retry_with_backoff(RetryPolicy(max_attempts=3))
|
|
21
|
+
... async def call_llm_api():
|
|
22
|
+
... return await client.chat.completions.create(...)
|
|
23
|
+
>>>
|
|
24
|
+
>>> # Fallback chain for models
|
|
25
|
+
>>> model_fallback = FallbackChain([
|
|
26
|
+
... FallbackOption("claude-opus", call_claude_opus),
|
|
27
|
+
... FallbackOption("gpt-4o", call_gpt4o),
|
|
28
|
+
... FallbackOption("local-llama", call_local),
|
|
29
|
+
... ])
|
|
30
|
+
>>> result = await model_fallback.execute_async(prompt="Hello")
|
|
31
|
+
>>>
|
|
32
|
+
>>> # Graceful degradation
|
|
33
|
+
>>> degradation = GracefulDegradation()
|
|
34
|
+
>>> if degradation.is_tool_available("web_search"):
|
|
35
|
+
... result = await web_search(query)
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
from proxilion.resilience.degradation import (
|
|
39
|
+
DEFAULT_TIERS,
|
|
40
|
+
DegradationTier,
|
|
41
|
+
GracefulDegradation,
|
|
42
|
+
TierConfig,
|
|
43
|
+
)
|
|
44
|
+
from proxilion.resilience.fallback import (
|
|
45
|
+
FallbackChain,
|
|
46
|
+
FallbackOption,
|
|
47
|
+
FallbackResult,
|
|
48
|
+
ModelFallback,
|
|
49
|
+
ToolFallback,
|
|
50
|
+
)
|
|
51
|
+
from proxilion.resilience.retry import (
|
|
52
|
+
DEFAULT_RETRY_POLICY,
|
|
53
|
+
RetryContext,
|
|
54
|
+
RetryPolicy,
|
|
55
|
+
RetryStats,
|
|
56
|
+
retry_async,
|
|
57
|
+
retry_sync,
|
|
58
|
+
retry_with_backoff,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
__all__ = [
|
|
62
|
+
# Retry
|
|
63
|
+
"RetryPolicy",
|
|
64
|
+
"RetryContext",
|
|
65
|
+
"RetryStats",
|
|
66
|
+
"retry_with_backoff",
|
|
67
|
+
"retry_async",
|
|
68
|
+
"retry_sync",
|
|
69
|
+
"DEFAULT_RETRY_POLICY",
|
|
70
|
+
# Fallback
|
|
71
|
+
"FallbackOption",
|
|
72
|
+
"FallbackResult",
|
|
73
|
+
"FallbackChain",
|
|
74
|
+
"ModelFallback",
|
|
75
|
+
"ToolFallback",
|
|
76
|
+
# Degradation
|
|
77
|
+
"DegradationTier",
|
|
78
|
+
"TierConfig",
|
|
79
|
+
"GracefulDegradation",
|
|
80
|
+
"DEFAULT_TIERS",
|
|
81
|
+
]
|
|
@@ -0,0 +1,615 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Graceful degradation for AI operations.
|
|
3
|
+
|
|
4
|
+
Provides tier-based feature availability for handling
|
|
5
|
+
reduced service capacity.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
import threading
|
|
12
|
+
from collections.abc import Callable
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
from datetime import datetime, timezone
|
|
15
|
+
from enum import Enum, auto
|
|
16
|
+
from typing import Any
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DegradationTier(Enum):
|
|
22
|
+
"""
|
|
23
|
+
Degradation tiers representing service capability levels.
|
|
24
|
+
|
|
25
|
+
Attributes:
|
|
26
|
+
FULL: Full service with all features available.
|
|
27
|
+
REDUCED: Reduced service with limited features.
|
|
28
|
+
MINIMAL: Minimal service with essential features only.
|
|
29
|
+
OFFLINE: Offline mode with local-only functionality.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
FULL = auto()
|
|
33
|
+
REDUCED = auto()
|
|
34
|
+
MINIMAL = auto()
|
|
35
|
+
OFFLINE = auto()
|
|
36
|
+
|
|
37
|
+
def __lt__(self, other: DegradationTier) -> bool:
|
|
38
|
+
"""Compare tiers (FULL > REDUCED > MINIMAL > OFFLINE)."""
|
|
39
|
+
if not isinstance(other, DegradationTier):
|
|
40
|
+
return NotImplemented
|
|
41
|
+
order = {
|
|
42
|
+
DegradationTier.FULL: 4,
|
|
43
|
+
DegradationTier.REDUCED: 3,
|
|
44
|
+
DegradationTier.MINIMAL: 2,
|
|
45
|
+
DegradationTier.OFFLINE: 1,
|
|
46
|
+
}
|
|
47
|
+
return order[self] < order[other]
|
|
48
|
+
|
|
49
|
+
def __le__(self, other: DegradationTier) -> bool:
|
|
50
|
+
return self == other or self < other
|
|
51
|
+
|
|
52
|
+
def __gt__(self, other: DegradationTier) -> bool:
|
|
53
|
+
return not self <= other
|
|
54
|
+
|
|
55
|
+
def __ge__(self, other: DegradationTier) -> bool:
|
|
56
|
+
return not self < other
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@dataclass
|
|
60
|
+
class TierConfig:
|
|
61
|
+
"""
|
|
62
|
+
Configuration for a degradation tier.
|
|
63
|
+
|
|
64
|
+
Attributes:
|
|
65
|
+
tier: The degradation tier this config applies to.
|
|
66
|
+
available_tools: Set of tool names available at this tier.
|
|
67
|
+
Use {"*"} to allow all tools.
|
|
68
|
+
available_models: List of model names available at this tier.
|
|
69
|
+
max_tokens: Maximum tokens allowed at this tier.
|
|
70
|
+
features: Set of feature names enabled at this tier.
|
|
71
|
+
rate_limit_multiplier: Multiplier for rate limits at this tier.
|
|
72
|
+
timeout_multiplier: Multiplier for timeouts at this tier.
|
|
73
|
+
description: Human-readable description of this tier.
|
|
74
|
+
|
|
75
|
+
Example:
|
|
76
|
+
>>> config = TierConfig(
|
|
77
|
+
... tier=DegradationTier.REDUCED,
|
|
78
|
+
... available_tools={"search", "calculator"},
|
|
79
|
+
... available_models=["gpt-4o-mini"],
|
|
80
|
+
... max_tokens=32000,
|
|
81
|
+
... features={"function_calling"},
|
|
82
|
+
... )
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
tier: DegradationTier
|
|
86
|
+
available_tools: set[str] = field(default_factory=lambda: {"*"})
|
|
87
|
+
available_models: list[str] = field(default_factory=list)
|
|
88
|
+
max_tokens: int = 100000
|
|
89
|
+
features: set[str] = field(default_factory=set)
|
|
90
|
+
rate_limit_multiplier: float = 1.0
|
|
91
|
+
timeout_multiplier: float = 1.0
|
|
92
|
+
description: str = ""
|
|
93
|
+
|
|
94
|
+
def __post_init__(self) -> None:
|
|
95
|
+
"""Ensure available_tools is a set."""
|
|
96
|
+
if isinstance(self.available_tools, list):
|
|
97
|
+
self.available_tools = set(self.available_tools)
|
|
98
|
+
|
|
99
|
+
def is_tool_available(self, tool_name: str) -> bool:
|
|
100
|
+
"""
|
|
101
|
+
Check if a tool is available at this tier.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
tool_name: Name of the tool to check.
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
True if the tool is available.
|
|
108
|
+
"""
|
|
109
|
+
return "*" in self.available_tools or tool_name in self.available_tools
|
|
110
|
+
|
|
111
|
+
def is_model_available(self, model_name: str) -> bool:
|
|
112
|
+
"""
|
|
113
|
+
Check if a model is available at this tier.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
model_name: Name of the model to check.
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
True if the model is available.
|
|
120
|
+
"""
|
|
121
|
+
return model_name in self.available_models
|
|
122
|
+
|
|
123
|
+
def is_feature_enabled(self, feature: str) -> bool:
|
|
124
|
+
"""
|
|
125
|
+
Check if a feature is enabled at this tier.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
feature: Name of the feature to check.
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
True if the feature is enabled.
|
|
132
|
+
"""
|
|
133
|
+
return feature in self.features
|
|
134
|
+
|
|
135
|
+
def to_dict(self) -> dict[str, Any]:
|
|
136
|
+
"""Convert to dictionary."""
|
|
137
|
+
return {
|
|
138
|
+
"tier": self.tier.name,
|
|
139
|
+
"available_tools": list(self.available_tools),
|
|
140
|
+
"available_models": self.available_models,
|
|
141
|
+
"max_tokens": self.max_tokens,
|
|
142
|
+
"features": list(self.features),
|
|
143
|
+
"rate_limit_multiplier": self.rate_limit_multiplier,
|
|
144
|
+
"timeout_multiplier": self.timeout_multiplier,
|
|
145
|
+
"description": self.description,
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
# Default tier configurations
|
|
150
|
+
DEFAULT_TIERS: dict[DegradationTier, TierConfig] = {
|
|
151
|
+
DegradationTier.FULL: TierConfig(
|
|
152
|
+
tier=DegradationTier.FULL,
|
|
153
|
+
available_tools={"*"},
|
|
154
|
+
available_models=["claude-opus-4-5", "claude-sonnet-4", "gpt-4o", "gpt-4o-mini"],
|
|
155
|
+
max_tokens=100000,
|
|
156
|
+
features={"streaming", "vision", "function_calling", "code_execution"},
|
|
157
|
+
rate_limit_multiplier=1.0,
|
|
158
|
+
timeout_multiplier=1.0,
|
|
159
|
+
description="Full service with all features available",
|
|
160
|
+
),
|
|
161
|
+
DegradationTier.REDUCED: TierConfig(
|
|
162
|
+
tier=DegradationTier.REDUCED,
|
|
163
|
+
available_tools={"search", "read_file", "calculator", "web_fetch"},
|
|
164
|
+
available_models=["claude-sonnet-4", "gpt-4o-mini"],
|
|
165
|
+
max_tokens=32000,
|
|
166
|
+
features={"function_calling"},
|
|
167
|
+
rate_limit_multiplier=0.5,
|
|
168
|
+
timeout_multiplier=1.5,
|
|
169
|
+
description="Reduced service with limited features",
|
|
170
|
+
),
|
|
171
|
+
DegradationTier.MINIMAL: TierConfig(
|
|
172
|
+
tier=DegradationTier.MINIMAL,
|
|
173
|
+
available_tools={"search", "calculator"},
|
|
174
|
+
available_models=["gpt-4o-mini"],
|
|
175
|
+
max_tokens=8000,
|
|
176
|
+
features=set(),
|
|
177
|
+
rate_limit_multiplier=0.25,
|
|
178
|
+
timeout_multiplier=2.0,
|
|
179
|
+
description="Minimal service with essential features only",
|
|
180
|
+
),
|
|
181
|
+
DegradationTier.OFFLINE: TierConfig(
|
|
182
|
+
tier=DegradationTier.OFFLINE,
|
|
183
|
+
available_tools=set(),
|
|
184
|
+
available_models=[],
|
|
185
|
+
max_tokens=4000,
|
|
186
|
+
features=set(),
|
|
187
|
+
rate_limit_multiplier=0.1,
|
|
188
|
+
timeout_multiplier=3.0,
|
|
189
|
+
description="Offline mode with cached/local-only functionality",
|
|
190
|
+
),
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
@dataclass
|
|
195
|
+
class DegradationEvent:
|
|
196
|
+
"""
|
|
197
|
+
An event recording a tier change.
|
|
198
|
+
|
|
199
|
+
Attributes:
|
|
200
|
+
timestamp: When the change occurred.
|
|
201
|
+
from_tier: Previous tier.
|
|
202
|
+
to_tier: New tier.
|
|
203
|
+
reason: Reason for the change.
|
|
204
|
+
triggered_by: What triggered the change (tool name, etc.).
|
|
205
|
+
"""
|
|
206
|
+
|
|
207
|
+
timestamp: datetime
|
|
208
|
+
from_tier: DegradationTier
|
|
209
|
+
to_tier: DegradationTier
|
|
210
|
+
reason: str = ""
|
|
211
|
+
triggered_by: str | None = None
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
class GracefulDegradation:
|
|
215
|
+
"""
|
|
216
|
+
Manages graceful degradation across service tiers.
|
|
217
|
+
|
|
218
|
+
Tracks current service tier and provides methods to check
|
|
219
|
+
feature availability, auto-degrade on failures, and recover.
|
|
220
|
+
|
|
221
|
+
Example:
|
|
222
|
+
>>> degradation = GracefulDegradation()
|
|
223
|
+
>>>
|
|
224
|
+
>>> # Check availability
|
|
225
|
+
>>> if degradation.is_tool_available("web_search"):
|
|
226
|
+
... result = await web_search(query)
|
|
227
|
+
>>>
|
|
228
|
+
>>> # Auto-degrade on failure
|
|
229
|
+
>>> try:
|
|
230
|
+
... result = await call_api()
|
|
231
|
+
... except Exception as e:
|
|
232
|
+
... degradation.record_failure("api_call")
|
|
233
|
+
>>>
|
|
234
|
+
>>> # Manual tier control
|
|
235
|
+
>>> degradation.set_tier(DegradationTier.REDUCED, reason="High latency")
|
|
236
|
+
"""
|
|
237
|
+
|
|
238
|
+
def __init__(
|
|
239
|
+
self,
|
|
240
|
+
tiers: dict[DegradationTier, TierConfig] | None = None,
|
|
241
|
+
initial_tier: DegradationTier = DegradationTier.FULL,
|
|
242
|
+
failure_threshold: int = 3,
|
|
243
|
+
recovery_threshold: int = 5,
|
|
244
|
+
auto_recover: bool = True,
|
|
245
|
+
) -> None:
|
|
246
|
+
"""
|
|
247
|
+
Initialize graceful degradation.
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
tiers: Tier configurations. Uses DEFAULT_TIERS if None.
|
|
251
|
+
initial_tier: Starting tier.
|
|
252
|
+
failure_threshold: Consecutive failures before degrading.
|
|
253
|
+
recovery_threshold: Consecutive successes before recovering.
|
|
254
|
+
auto_recover: Whether to auto-recover after successes.
|
|
255
|
+
"""
|
|
256
|
+
self._tiers = tiers or dict(DEFAULT_TIERS)
|
|
257
|
+
self._current_tier = initial_tier
|
|
258
|
+
self._failure_threshold = failure_threshold
|
|
259
|
+
self._recovery_threshold = recovery_threshold
|
|
260
|
+
self._auto_recover = auto_recover
|
|
261
|
+
|
|
262
|
+
self._failure_counts: dict[str, int] = {}
|
|
263
|
+
self._success_counts: dict[str, int] = {}
|
|
264
|
+
self._history: list[DegradationEvent] = []
|
|
265
|
+
self._callbacks: list[Callable[[DegradationEvent], None]] = []
|
|
266
|
+
self._lock = threading.RLock()
|
|
267
|
+
|
|
268
|
+
@property
|
|
269
|
+
def current_tier(self) -> DegradationTier:
|
|
270
|
+
"""Get the current degradation tier."""
|
|
271
|
+
return self._current_tier
|
|
272
|
+
|
|
273
|
+
@property
|
|
274
|
+
def current_config(self) -> TierConfig:
|
|
275
|
+
"""Get the configuration for the current tier."""
|
|
276
|
+
return self._tiers[self._current_tier]
|
|
277
|
+
|
|
278
|
+
def get_current_tier(self) -> DegradationTier:
|
|
279
|
+
"""Get the current tier."""
|
|
280
|
+
return self._current_tier
|
|
281
|
+
|
|
282
|
+
def set_tier(
|
|
283
|
+
self,
|
|
284
|
+
tier: DegradationTier,
|
|
285
|
+
reason: str = "",
|
|
286
|
+
triggered_by: str | None = None,
|
|
287
|
+
) -> None:
|
|
288
|
+
"""
|
|
289
|
+
Set the degradation tier.
|
|
290
|
+
|
|
291
|
+
Args:
|
|
292
|
+
tier: The tier to set.
|
|
293
|
+
reason: Reason for the change.
|
|
294
|
+
triggered_by: What triggered the change.
|
|
295
|
+
"""
|
|
296
|
+
with self._lock:
|
|
297
|
+
if tier == self._current_tier:
|
|
298
|
+
return
|
|
299
|
+
|
|
300
|
+
event = DegradationEvent(
|
|
301
|
+
timestamp=datetime.now(timezone.utc),
|
|
302
|
+
from_tier=self._current_tier,
|
|
303
|
+
to_tier=tier,
|
|
304
|
+
reason=reason,
|
|
305
|
+
triggered_by=triggered_by,
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
old_tier = self._current_tier
|
|
309
|
+
self._current_tier = tier
|
|
310
|
+
self._history.append(event)
|
|
311
|
+
|
|
312
|
+
# Notify callbacks
|
|
313
|
+
for callback in self._callbacks:
|
|
314
|
+
try:
|
|
315
|
+
callback(event)
|
|
316
|
+
except Exception as e:
|
|
317
|
+
logger.error(f"Degradation callback error: {e}")
|
|
318
|
+
|
|
319
|
+
logger.info(
|
|
320
|
+
f"Degradation tier changed: {old_tier.name} -> {tier.name} "
|
|
321
|
+
f"(reason: {reason}, triggered_by: {triggered_by})"
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
def degrade(self, reason: str = "", triggered_by: str | None = None) -> bool:
|
|
325
|
+
"""
|
|
326
|
+
Degrade to the next lower tier.
|
|
327
|
+
|
|
328
|
+
Args:
|
|
329
|
+
reason: Reason for degradation.
|
|
330
|
+
triggered_by: What triggered the degradation.
|
|
331
|
+
|
|
332
|
+
Returns:
|
|
333
|
+
True if degradation occurred, False if already at lowest tier.
|
|
334
|
+
"""
|
|
335
|
+
with self._lock:
|
|
336
|
+
tier_order = [
|
|
337
|
+
DegradationTier.FULL,
|
|
338
|
+
DegradationTier.REDUCED,
|
|
339
|
+
DegradationTier.MINIMAL,
|
|
340
|
+
DegradationTier.OFFLINE,
|
|
341
|
+
]
|
|
342
|
+
current_index = tier_order.index(self._current_tier)
|
|
343
|
+
|
|
344
|
+
if current_index >= len(tier_order) - 1:
|
|
345
|
+
return False # Already at lowest tier
|
|
346
|
+
|
|
347
|
+
new_tier = tier_order[current_index + 1]
|
|
348
|
+
self.set_tier(new_tier, reason=reason, triggered_by=triggered_by)
|
|
349
|
+
return True
|
|
350
|
+
|
|
351
|
+
def recover(self, reason: str = "", triggered_by: str | None = None) -> bool:
|
|
352
|
+
"""
|
|
353
|
+
Recover to the next higher tier.
|
|
354
|
+
|
|
355
|
+
Args:
|
|
356
|
+
reason: Reason for recovery.
|
|
357
|
+
triggered_by: What triggered the recovery.
|
|
358
|
+
|
|
359
|
+
Returns:
|
|
360
|
+
True if recovery occurred, False if already at highest tier.
|
|
361
|
+
"""
|
|
362
|
+
with self._lock:
|
|
363
|
+
tier_order = [
|
|
364
|
+
DegradationTier.FULL,
|
|
365
|
+
DegradationTier.REDUCED,
|
|
366
|
+
DegradationTier.MINIMAL,
|
|
367
|
+
DegradationTier.OFFLINE,
|
|
368
|
+
]
|
|
369
|
+
current_index = tier_order.index(self._current_tier)
|
|
370
|
+
|
|
371
|
+
if current_index <= 0:
|
|
372
|
+
return False # Already at highest tier
|
|
373
|
+
|
|
374
|
+
new_tier = tier_order[current_index - 1]
|
|
375
|
+
self.set_tier(new_tier, reason=reason, triggered_by=triggered_by)
|
|
376
|
+
return True
|
|
377
|
+
|
|
378
|
+
def record_failure(self, component: str) -> None:
|
|
379
|
+
"""
|
|
380
|
+
Record a failure for a component.
|
|
381
|
+
|
|
382
|
+
May trigger automatic degradation if threshold is reached.
|
|
383
|
+
|
|
384
|
+
Args:
|
|
385
|
+
component: Name of the component that failed.
|
|
386
|
+
"""
|
|
387
|
+
with self._lock:
|
|
388
|
+
self._failure_counts[component] = self._failure_counts.get(component, 0) + 1
|
|
389
|
+
self._success_counts[component] = 0 # Reset success count
|
|
390
|
+
|
|
391
|
+
if self._failure_counts[component] >= self._failure_threshold:
|
|
392
|
+
self.degrade(
|
|
393
|
+
reason=f"{self._failure_threshold} consecutive failures",
|
|
394
|
+
triggered_by=component,
|
|
395
|
+
)
|
|
396
|
+
self._failure_counts[component] = 0 # Reset after degrading
|
|
397
|
+
|
|
398
|
+
def record_success(self, component: str) -> None:
|
|
399
|
+
"""
|
|
400
|
+
Record a success for a component.
|
|
401
|
+
|
|
402
|
+
May trigger automatic recovery if threshold is reached.
|
|
403
|
+
|
|
404
|
+
Args:
|
|
405
|
+
component: Name of the component that succeeded.
|
|
406
|
+
"""
|
|
407
|
+
with self._lock:
|
|
408
|
+
self._success_counts[component] = self._success_counts.get(component, 0) + 1
|
|
409
|
+
self._failure_counts[component] = 0 # Reset failure count
|
|
410
|
+
|
|
411
|
+
if (
|
|
412
|
+
self._auto_recover
|
|
413
|
+
and self._success_counts[component] >= self._recovery_threshold
|
|
414
|
+
):
|
|
415
|
+
self.recover(
|
|
416
|
+
reason=f"{self._recovery_threshold} consecutive successes",
|
|
417
|
+
triggered_by=component,
|
|
418
|
+
)
|
|
419
|
+
self._success_counts[component] = 0 # Reset after recovering
|
|
420
|
+
|
|
421
|
+
def auto_degrade_on_failure(self, tool_name: str) -> None:
|
|
422
|
+
"""
|
|
423
|
+
Record a tool failure and potentially auto-degrade.
|
|
424
|
+
|
|
425
|
+
Alias for record_failure for backward compatibility.
|
|
426
|
+
|
|
427
|
+
Args:
|
|
428
|
+
tool_name: Name of the tool that failed.
|
|
429
|
+
"""
|
|
430
|
+
self.record_failure(tool_name)
|
|
431
|
+
|
|
432
|
+
def is_tool_available(self, tool_name: str) -> bool:
|
|
433
|
+
"""
|
|
434
|
+
Check if a tool is available at the current tier.
|
|
435
|
+
|
|
436
|
+
Args:
|
|
437
|
+
tool_name: Name of the tool to check.
|
|
438
|
+
|
|
439
|
+
Returns:
|
|
440
|
+
True if the tool is available.
|
|
441
|
+
"""
|
|
442
|
+
return self.current_config.is_tool_available(tool_name)
|
|
443
|
+
|
|
444
|
+
def is_model_available(self, model_name: str) -> bool:
|
|
445
|
+
"""
|
|
446
|
+
Check if a model is available at the current tier.
|
|
447
|
+
|
|
448
|
+
Args:
|
|
449
|
+
model_name: Name of the model to check.
|
|
450
|
+
|
|
451
|
+
Returns:
|
|
452
|
+
True if the model is available.
|
|
453
|
+
"""
|
|
454
|
+
return self.current_config.is_model_available(model_name)
|
|
455
|
+
|
|
456
|
+
def is_feature_enabled(self, feature: str) -> bool:
|
|
457
|
+
"""
|
|
458
|
+
Check if a feature is enabled at the current tier.
|
|
459
|
+
|
|
460
|
+
Args:
|
|
461
|
+
feature: Name of the feature to check.
|
|
462
|
+
|
|
463
|
+
Returns:
|
|
464
|
+
True if the feature is enabled.
|
|
465
|
+
"""
|
|
466
|
+
return self.current_config.is_feature_enabled(feature)
|
|
467
|
+
|
|
468
|
+
def get_available_tools(self) -> set[str]:
|
|
469
|
+
"""
|
|
470
|
+
Get all tools available at the current tier.
|
|
471
|
+
|
|
472
|
+
Returns:
|
|
473
|
+
Set of available tool names.
|
|
474
|
+
"""
|
|
475
|
+
return set(self.current_config.available_tools)
|
|
476
|
+
|
|
477
|
+
def get_available_models(self) -> list[str]:
|
|
478
|
+
"""
|
|
479
|
+
Get all models available at the current tier.
|
|
480
|
+
|
|
481
|
+
Returns:
|
|
482
|
+
List of available model names.
|
|
483
|
+
"""
|
|
484
|
+
return list(self.current_config.available_models)
|
|
485
|
+
|
|
486
|
+
def get_enabled_features(self) -> set[str]:
|
|
487
|
+
"""
|
|
488
|
+
Get all features enabled at the current tier.
|
|
489
|
+
|
|
490
|
+
Returns:
|
|
491
|
+
Set of enabled feature names.
|
|
492
|
+
"""
|
|
493
|
+
return set(self.current_config.features)
|
|
494
|
+
|
|
495
|
+
def get_max_tokens(self) -> int:
|
|
496
|
+
"""Get maximum tokens for the current tier."""
|
|
497
|
+
return self.current_config.max_tokens
|
|
498
|
+
|
|
499
|
+
def get_rate_limit_multiplier(self) -> float:
|
|
500
|
+
"""Get rate limit multiplier for the current tier."""
|
|
501
|
+
return self.current_config.rate_limit_multiplier
|
|
502
|
+
|
|
503
|
+
def get_timeout_multiplier(self) -> float:
|
|
504
|
+
"""Get timeout multiplier for the current tier."""
|
|
505
|
+
return self.current_config.timeout_multiplier
|
|
506
|
+
|
|
507
|
+
def add_tier_change_callback(
|
|
508
|
+
self, callback: Callable[[DegradationEvent], None]
|
|
509
|
+
) -> None:
|
|
510
|
+
"""
|
|
511
|
+
Add a callback for tier changes.
|
|
512
|
+
|
|
513
|
+
Args:
|
|
514
|
+
callback: Function to call on tier change.
|
|
515
|
+
"""
|
|
516
|
+
with self._lock:
|
|
517
|
+
self._callbacks.append(callback)
|
|
518
|
+
|
|
519
|
+
def remove_tier_change_callback(
|
|
520
|
+
self, callback: Callable[[DegradationEvent], None]
|
|
521
|
+
) -> bool:
|
|
522
|
+
"""
|
|
523
|
+
Remove a tier change callback.
|
|
524
|
+
|
|
525
|
+
Args:
|
|
526
|
+
callback: The callback to remove.
|
|
527
|
+
|
|
528
|
+
Returns:
|
|
529
|
+
True if callback was found and removed.
|
|
530
|
+
"""
|
|
531
|
+
with self._lock:
|
|
532
|
+
try:
|
|
533
|
+
self._callbacks.remove(callback)
|
|
534
|
+
return True
|
|
535
|
+
except ValueError:
|
|
536
|
+
return False
|
|
537
|
+
|
|
538
|
+
def get_history(self) -> list[DegradationEvent]:
|
|
539
|
+
"""Get tier change history."""
|
|
540
|
+
with self._lock:
|
|
541
|
+
return list(self._history)
|
|
542
|
+
|
|
543
|
+
def get_failure_counts(self) -> dict[str, int]:
|
|
544
|
+
"""Get current failure counts by component."""
|
|
545
|
+
with self._lock:
|
|
546
|
+
return dict(self._failure_counts)
|
|
547
|
+
|
|
548
|
+
def get_success_counts(self) -> dict[str, int]:
|
|
549
|
+
"""Get current success counts by component."""
|
|
550
|
+
with self._lock:
|
|
551
|
+
return dict(self._success_counts)
|
|
552
|
+
|
|
553
|
+
def reset_counts(self, component: str | None = None) -> None:
|
|
554
|
+
"""
|
|
555
|
+
Reset failure and success counts.
|
|
556
|
+
|
|
557
|
+
Args:
|
|
558
|
+
component: Specific component to reset, or None for all.
|
|
559
|
+
"""
|
|
560
|
+
with self._lock:
|
|
561
|
+
if component:
|
|
562
|
+
self._failure_counts.pop(component, None)
|
|
563
|
+
self._success_counts.pop(component, None)
|
|
564
|
+
else:
|
|
565
|
+
self._failure_counts.clear()
|
|
566
|
+
self._success_counts.clear()
|
|
567
|
+
|
|
568
|
+
def reset(self, tier: DegradationTier = DegradationTier.FULL) -> None:
|
|
569
|
+
"""
|
|
570
|
+
Reset to a specific tier and clear all counts.
|
|
571
|
+
|
|
572
|
+
Args:
|
|
573
|
+
tier: The tier to reset to.
|
|
574
|
+
"""
|
|
575
|
+
with self._lock:
|
|
576
|
+
self._current_tier = tier
|
|
577
|
+
self._failure_counts.clear()
|
|
578
|
+
self._success_counts.clear()
|
|
579
|
+
self._history.clear()
|
|
580
|
+
|
|
581
|
+
def get_tier_config(self, tier: DegradationTier) -> TierConfig:
|
|
582
|
+
"""
|
|
583
|
+
Get configuration for a specific tier.
|
|
584
|
+
|
|
585
|
+
Args:
|
|
586
|
+
tier: The tier to get config for.
|
|
587
|
+
|
|
588
|
+
Returns:
|
|
589
|
+
TierConfig for the tier.
|
|
590
|
+
"""
|
|
591
|
+
return self._tiers[tier]
|
|
592
|
+
|
|
593
|
+
def set_tier_config(self, tier: DegradationTier, config: TierConfig) -> None:
|
|
594
|
+
"""
|
|
595
|
+
Set configuration for a specific tier.
|
|
596
|
+
|
|
597
|
+
Args:
|
|
598
|
+
tier: The tier to configure.
|
|
599
|
+
config: The configuration to set.
|
|
600
|
+
"""
|
|
601
|
+
with self._lock:
|
|
602
|
+
self._tiers[tier] = config
|
|
603
|
+
|
|
604
|
+
def to_dict(self) -> dict[str, Any]:
|
|
605
|
+
"""Convert current state to dictionary."""
|
|
606
|
+
return {
|
|
607
|
+
"current_tier": self._current_tier.name,
|
|
608
|
+
"current_config": self.current_config.to_dict(),
|
|
609
|
+
"failure_counts": dict(self._failure_counts),
|
|
610
|
+
"success_counts": dict(self._success_counts),
|
|
611
|
+
"history_length": len(self._history),
|
|
612
|
+
"auto_recover": self._auto_recover,
|
|
613
|
+
"failure_threshold": self._failure_threshold,
|
|
614
|
+
"recovery_threshold": self._recovery_threshold,
|
|
615
|
+
}
|