DeepFabric 4.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepfabric/__init__.py +70 -0
- deepfabric/__main__.py +6 -0
- deepfabric/auth.py +382 -0
- deepfabric/builders.py +303 -0
- deepfabric/builders_agent.py +1304 -0
- deepfabric/cli.py +1288 -0
- deepfabric/config.py +899 -0
- deepfabric/config_manager.py +251 -0
- deepfabric/constants.py +94 -0
- deepfabric/dataset_manager.py +534 -0
- deepfabric/error_codes.py +581 -0
- deepfabric/evaluation/__init__.py +47 -0
- deepfabric/evaluation/backends/__init__.py +32 -0
- deepfabric/evaluation/backends/ollama_backend.py +137 -0
- deepfabric/evaluation/backends/tool_call_parsers.py +409 -0
- deepfabric/evaluation/backends/transformers_backend.py +326 -0
- deepfabric/evaluation/evaluator.py +845 -0
- deepfabric/evaluation/evaluators/__init__.py +13 -0
- deepfabric/evaluation/evaluators/base.py +104 -0
- deepfabric/evaluation/evaluators/builtin/__init__.py +5 -0
- deepfabric/evaluation/evaluators/builtin/tool_calling.py +93 -0
- deepfabric/evaluation/evaluators/registry.py +66 -0
- deepfabric/evaluation/inference.py +155 -0
- deepfabric/evaluation/metrics.py +397 -0
- deepfabric/evaluation/parser.py +304 -0
- deepfabric/evaluation/reporters/__init__.py +13 -0
- deepfabric/evaluation/reporters/base.py +56 -0
- deepfabric/evaluation/reporters/cloud_reporter.py +195 -0
- deepfabric/evaluation/reporters/file_reporter.py +61 -0
- deepfabric/evaluation/reporters/multi_reporter.py +56 -0
- deepfabric/exceptions.py +67 -0
- deepfabric/factory.py +26 -0
- deepfabric/generator.py +1084 -0
- deepfabric/graph.py +545 -0
- deepfabric/hf_hub.py +214 -0
- deepfabric/kaggle_hub.py +219 -0
- deepfabric/llm/__init__.py +41 -0
- deepfabric/llm/api_key_verifier.py +534 -0
- deepfabric/llm/client.py +1206 -0
- deepfabric/llm/errors.py +105 -0
- deepfabric/llm/rate_limit_config.py +262 -0
- deepfabric/llm/rate_limit_detector.py +278 -0
- deepfabric/llm/retry_handler.py +270 -0
- deepfabric/metrics.py +212 -0
- deepfabric/progress.py +262 -0
- deepfabric/prompts.py +290 -0
- deepfabric/schemas.py +1000 -0
- deepfabric/spin/__init__.py +6 -0
- deepfabric/spin/client.py +263 -0
- deepfabric/spin/models.py +26 -0
- deepfabric/stream_simulator.py +90 -0
- deepfabric/tools/__init__.py +5 -0
- deepfabric/tools/defaults.py +85 -0
- deepfabric/tools/loader.py +87 -0
- deepfabric/tools/mcp_client.py +677 -0
- deepfabric/topic_manager.py +303 -0
- deepfabric/topic_model.py +20 -0
- deepfabric/training/__init__.py +35 -0
- deepfabric/training/api_key_prompt.py +302 -0
- deepfabric/training/callback.py +363 -0
- deepfabric/training/metrics_sender.py +301 -0
- deepfabric/tree.py +438 -0
- deepfabric/tui.py +1267 -0
- deepfabric/update_checker.py +166 -0
- deepfabric/utils.py +150 -0
- deepfabric/validation.py +143 -0
- deepfabric-4.4.0.dist-info/METADATA +702 -0
- deepfabric-4.4.0.dist-info/RECORD +71 -0
- deepfabric-4.4.0.dist-info/WHEEL +4 -0
- deepfabric-4.4.0.dist-info/entry_points.txt +2 -0
- deepfabric-4.4.0.dist-info/licenses/LICENSE +201 -0
deepfabric/llm/errors.py
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"""Error handling for LLM providers."""
|
|
2
|
+
|
|
3
|
+
import anthropic
|
|
4
|
+
import openai
|
|
5
|
+
|
|
6
|
+
from ..exceptions import DataSetGeneratorError
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def handle_openai_error(e: Exception, provider: str, model_name: str) -> DataSetGeneratorError: # noqa: PLR0911
|
|
10
|
+
"""Handle OpenAI-specific errors with helpful messages."""
|
|
11
|
+
if isinstance(e, openai.AuthenticationError):
|
|
12
|
+
return DataSetGeneratorError(
|
|
13
|
+
f"Authentication failed for {provider}. Please check your API key."
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
if isinstance(e, openai.NotFoundError):
|
|
17
|
+
if provider == "ollama":
|
|
18
|
+
return DataSetGeneratorError(
|
|
19
|
+
f"Model '{model_name}' not found in Ollama. Please run: ollama pull {model_name}"
|
|
20
|
+
)
|
|
21
|
+
return DataSetGeneratorError(
|
|
22
|
+
f"Model '{model_name}' not found for {provider}. Please check the model name."
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
if isinstance(e, openai.APIConnectionError):
|
|
26
|
+
if provider == "ollama":
|
|
27
|
+
return DataSetGeneratorError(
|
|
28
|
+
"Cannot connect to Ollama server. Please ensure Ollama is running (try: ollama serve)"
|
|
29
|
+
)
|
|
30
|
+
return DataSetGeneratorError(
|
|
31
|
+
f"Network error connecting to {provider}. Please check your internet connection."
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
if isinstance(e, openai.RateLimitError):
|
|
35
|
+
return DataSetGeneratorError(
|
|
36
|
+
f"Rate limit exceeded for {provider}/{model_name}. Please wait and try again."
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
return DataSetGeneratorError(f"OpenAI API error for {provider}/{model_name}: {e}")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def handle_anthropic_error(e: Exception, provider: str, model_name: str) -> DataSetGeneratorError:
|
|
43
|
+
"""Handle Anthropic-specific errors with helpful messages."""
|
|
44
|
+
if isinstance(e, anthropic.AuthenticationError):
|
|
45
|
+
return DataSetGeneratorError(
|
|
46
|
+
f"Authentication failed for {provider}. Please check your ANTHROPIC_API_KEY."
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
if isinstance(e, anthropic.NotFoundError):
|
|
50
|
+
return DataSetGeneratorError(
|
|
51
|
+
f"Model '{model_name}' not found for {provider}. Please check the model name."
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
if isinstance(e, anthropic.APIConnectionError):
|
|
55
|
+
return DataSetGeneratorError(
|
|
56
|
+
f"Network error connecting to {provider}. Please check your internet connection."
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
if isinstance(e, anthropic.RateLimitError):
|
|
60
|
+
return DataSetGeneratorError(
|
|
61
|
+
f"Rate limit exceeded for {provider}/{model_name}. Please wait and try again."
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
return DataSetGeneratorError(f"Anthropic API error for {provider}/{model_name}: {e}")
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def handle_gemini_error(e: Exception, provider: str, model_name: str) -> DataSetGeneratorError:
|
|
68
|
+
"""Handle Gemini-specific errors with helpful messages."""
|
|
69
|
+
error_str = str(e).lower()
|
|
70
|
+
|
|
71
|
+
if "invalid" in error_str and "model" in error_str:
|
|
72
|
+
return DataSetGeneratorError(
|
|
73
|
+
f"Model '{model_name}' not available for Gemini. Try: gemini-1.5-flash, gemini-1.5-pro"
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
if any(
|
|
77
|
+
keyword in error_str
|
|
78
|
+
for keyword in ["permission", "api_key", "authentication", "unauthorized"]
|
|
79
|
+
):
|
|
80
|
+
return DataSetGeneratorError(
|
|
81
|
+
"Authentication failed for Gemini. Please check your GOOGLE_API_KEY or GEMINI_API_KEY."
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
if any(keyword in error_str for keyword in ["quota", "rate limit", "too many requests"]):
|
|
85
|
+
return DataSetGeneratorError(
|
|
86
|
+
f"Rate limit exceeded for Gemini/{model_name}. Please wait and try again."
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
if any(keyword in error_str for keyword in ["network", "connection", "timeout"]):
|
|
90
|
+
return DataSetGeneratorError(
|
|
91
|
+
"Network error connecting to Gemini. Please check your internet connection."
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
return DataSetGeneratorError(f"Gemini API error for {provider}/{model_name}: {e}")
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def handle_provider_error(e: Exception, provider: str, model_name: str) -> DataSetGeneratorError:
|
|
98
|
+
"""Handle errors for any provider with appropriate error handler."""
|
|
99
|
+
if provider in ["openai", "ollama"]:
|
|
100
|
+
return handle_openai_error(e, provider, model_name)
|
|
101
|
+
if provider == "anthropic":
|
|
102
|
+
return handle_anthropic_error(e, provider, model_name)
|
|
103
|
+
if provider == "gemini":
|
|
104
|
+
return handle_gemini_error(e, provider, model_name)
|
|
105
|
+
return DataSetGeneratorError(f"Unknown provider error for {provider}/{model_name}: {e}")
|
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
"""Rate limiting configuration models for different LLM providers."""
|
|
2
|
+
|
|
3
|
+
from enum import Enum
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel, Field, ValidationInfo, field_validator
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class BackoffStrategy(str, Enum):
|
|
10
|
+
"""Backoff strategy for retry attempts."""
|
|
11
|
+
|
|
12
|
+
EXPONENTIAL = "exponential"
|
|
13
|
+
EXPONENTIAL_JITTER = "exponential_jitter"
|
|
14
|
+
LINEAR = "linear"
|
|
15
|
+
CONSTANT = "constant"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class RateLimitConfig(BaseModel):
|
|
19
|
+
"""Base configuration for rate limiting and retry behavior.
|
|
20
|
+
|
|
21
|
+
This provides sensible defaults that work across all providers,
|
|
22
|
+
with provider-specific subclasses adding specialized behavior.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
max_retries: int = Field(
|
|
26
|
+
default=5,
|
|
27
|
+
ge=0,
|
|
28
|
+
le=20,
|
|
29
|
+
description="Maximum number of retry attempts",
|
|
30
|
+
)
|
|
31
|
+
base_delay: float = Field(
|
|
32
|
+
default=1.0,
|
|
33
|
+
ge=0.1,
|
|
34
|
+
le=60.0,
|
|
35
|
+
description="Base delay in seconds before first retry",
|
|
36
|
+
)
|
|
37
|
+
max_delay: float = Field(
|
|
38
|
+
default=60.0,
|
|
39
|
+
ge=1.0,
|
|
40
|
+
le=300.0,
|
|
41
|
+
description="Maximum delay in seconds between retries",
|
|
42
|
+
)
|
|
43
|
+
backoff_strategy: BackoffStrategy = Field(
|
|
44
|
+
default=BackoffStrategy.EXPONENTIAL_JITTER,
|
|
45
|
+
description="Strategy for calculating retry delays",
|
|
46
|
+
)
|
|
47
|
+
exponential_base: float = Field(
|
|
48
|
+
default=2.0,
|
|
49
|
+
ge=1.1,
|
|
50
|
+
le=10.0,
|
|
51
|
+
description="Base multiplier for exponential backoff",
|
|
52
|
+
)
|
|
53
|
+
jitter: bool = Field(
|
|
54
|
+
default=True,
|
|
55
|
+
description="Add randomization to delays to prevent thundering herd",
|
|
56
|
+
)
|
|
57
|
+
respect_retry_after: bool = Field(
|
|
58
|
+
default=True,
|
|
59
|
+
description="Respect retry-after headers from provider responses",
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
# HTTP status codes that should trigger retry
|
|
63
|
+
retry_on_status_codes: set[int] = Field(
|
|
64
|
+
default_factory=lambda: {429, 500, 502, 503, 504},
|
|
65
|
+
description="HTTP status codes that trigger retry",
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
# Exception types/messages that should trigger retry
|
|
69
|
+
retry_on_exceptions: list[str] = Field(
|
|
70
|
+
default_factory=lambda: ["timeout", "connection", "network"],
|
|
71
|
+
description="Exception keywords that trigger retry (case-insensitive)",
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
@field_validator("max_delay")
|
|
75
|
+
@classmethod
|
|
76
|
+
def validate_max_delay(cls, v: float, info: "ValidationInfo") -> float:
|
|
77
|
+
"""Ensure max_delay is greater than base_delay."""
|
|
78
|
+
if "base_delay" in info.data and v < info.data["base_delay"]:
|
|
79
|
+
msg = "max_delay must be greater than or equal to base_delay"
|
|
80
|
+
raise ValueError(msg)
|
|
81
|
+
return v
|
|
82
|
+
|
|
83
|
+
def to_dict(self) -> dict[str, Any]:
|
|
84
|
+
"""Convert config to dictionary for serialization."""
|
|
85
|
+
return self.model_dump(mode="json")
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class OpenAIRateLimitConfig(RateLimitConfig):
|
|
89
|
+
"""OpenAI-specific rate limit configuration.
|
|
90
|
+
|
|
91
|
+
OpenAI provides detailed rate limit headers (x-ratelimit-*) and
|
|
92
|
+
retry-after headers. This config enables monitoring and preemptive
|
|
93
|
+
backoff based on remaining capacity.
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
check_headers: bool = Field(
|
|
97
|
+
default=True,
|
|
98
|
+
description="Monitor x-ratelimit-* headers for capacity tracking",
|
|
99
|
+
)
|
|
100
|
+
preemptive_backoff: bool = Field(
|
|
101
|
+
default=False,
|
|
102
|
+
description="Back off preemptively when remaining capacity is low",
|
|
103
|
+
)
|
|
104
|
+
preemptive_threshold: float = Field(
|
|
105
|
+
default=0.1,
|
|
106
|
+
ge=0.0,
|
|
107
|
+
le=0.5,
|
|
108
|
+
description="Threshold (0-1) for preemptive backoff (e.g., 0.1 = 10% remaining)",
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class AnthropicRateLimitConfig(RateLimitConfig):
|
|
113
|
+
"""Anthropic Claude-specific rate limit configuration.
|
|
114
|
+
|
|
115
|
+
Anthropic uses a token bucket algorithm with separate limits for
|
|
116
|
+
requests per minute (RPM), input tokens per minute (ITPM), and
|
|
117
|
+
output tokens per minute (OTPM). Rate limits vary by model and tier.
|
|
118
|
+
"""
|
|
119
|
+
|
|
120
|
+
check_headers: bool = Field(
|
|
121
|
+
default=True,
|
|
122
|
+
description="Monitor anthropic-ratelimit-* headers",
|
|
123
|
+
)
|
|
124
|
+
token_bucket_aware: bool = Field(
|
|
125
|
+
default=True,
|
|
126
|
+
description="Account for token bucket continuous replenishment",
|
|
127
|
+
)
|
|
128
|
+
gradual_rampup: bool = Field(
|
|
129
|
+
default=True,
|
|
130
|
+
description="Enable gradual traffic ramp-up for new workloads",
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
class GeminiRateLimitConfig(RateLimitConfig):
|
|
135
|
+
"""Google Gemini-specific rate limit configuration.
|
|
136
|
+
|
|
137
|
+
Gemini has RPM, TPM, and RPD (requests per day) limits. Daily quotas
|
|
138
|
+
reset at midnight Pacific time. No explicit retry-after header, so
|
|
139
|
+
more conservative backoff is used. Rate limit errors include detailed
|
|
140
|
+
quota violation information.
|
|
141
|
+
"""
|
|
142
|
+
|
|
143
|
+
base_delay: float = Field(
|
|
144
|
+
default=2.0,
|
|
145
|
+
ge=0.5,
|
|
146
|
+
le=60.0,
|
|
147
|
+
description="Higher default delay for Gemini (no retry-after header)",
|
|
148
|
+
)
|
|
149
|
+
max_delay: float = Field(
|
|
150
|
+
default=120.0,
|
|
151
|
+
ge=5.0,
|
|
152
|
+
le=600.0,
|
|
153
|
+
description="Longer max delay for daily quota exhaustion",
|
|
154
|
+
)
|
|
155
|
+
parse_quota_details: bool = Field(
|
|
156
|
+
default=True,
|
|
157
|
+
description="Extract quota metric details from RESOURCE_EXHAUSTED errors",
|
|
158
|
+
)
|
|
159
|
+
daily_quota_aware: bool = Field(
|
|
160
|
+
default=True,
|
|
161
|
+
description="Recognize daily quota exhaustion vs per-minute limits",
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
class OllamaRateLimitConfig(RateLimitConfig):
|
|
166
|
+
"""Ollama-specific rate limit configuration.
|
|
167
|
+
|
|
168
|
+
Ollama is typically run locally, so rate limiting is less common.
|
|
169
|
+
This config uses minimal retries, primarily for connection issues.
|
|
170
|
+
"""
|
|
171
|
+
|
|
172
|
+
max_retries: int = Field(
|
|
173
|
+
default=2,
|
|
174
|
+
ge=0,
|
|
175
|
+
le=5,
|
|
176
|
+
description="Minimal retries for local Ollama server",
|
|
177
|
+
)
|
|
178
|
+
base_delay: float = Field(
|
|
179
|
+
default=0.5,
|
|
180
|
+
ge=0.1,
|
|
181
|
+
le=5.0,
|
|
182
|
+
description="Short delay for local server retry",
|
|
183
|
+
)
|
|
184
|
+
max_delay: float = Field(
|
|
185
|
+
default=5.0,
|
|
186
|
+
ge=1.0,
|
|
187
|
+
le=30.0,
|
|
188
|
+
description="Short max delay for local operations",
|
|
189
|
+
)
|
|
190
|
+
retry_on_status_codes: set[int] = Field(
|
|
191
|
+
default_factory=lambda: {500, 502, 503, 504},
|
|
192
|
+
description="Primarily retry server errors (429 unlikely for local)",
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
class OpenRouterRateLimitConfig(RateLimitConfig):
|
|
197
|
+
"""OpenRouter-specific rate limit configuration.
|
|
198
|
+
|
|
199
|
+
OpenRouter aggregates multiple LLM providers and uses OpenAI-compatible API.
|
|
200
|
+
It uses credit-based quotas with model-specific rate limits. Different models
|
|
201
|
+
have different RPM limits, and free model variants have daily limits.
|
|
202
|
+
Returns 402 Payment Required when account balance is negative.
|
|
203
|
+
"""
|
|
204
|
+
|
|
205
|
+
retry_on_status_codes: set[int] = Field(
|
|
206
|
+
default_factory=lambda: {402, 429, 500, 502, 503, 504},
|
|
207
|
+
description="HTTP status codes that trigger retry (includes 402 for payment issues)",
|
|
208
|
+
)
|
|
209
|
+
check_credits: bool = Field(
|
|
210
|
+
default=False,
|
|
211
|
+
description="Monitor credit balance via /api/v1/key endpoint",
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def get_default_rate_limit_config(provider: str) -> RateLimitConfig:
|
|
216
|
+
"""Get the default rate limit configuration for a provider.
|
|
217
|
+
|
|
218
|
+
Args:
|
|
219
|
+
provider: Provider name (openai, anthropic, gemini, ollama, openrouter)
|
|
220
|
+
|
|
221
|
+
Returns:
|
|
222
|
+
Provider-specific rate limit configuration with sensible defaults
|
|
223
|
+
"""
|
|
224
|
+
configs = {
|
|
225
|
+
"openai": OpenAIRateLimitConfig(),
|
|
226
|
+
"anthropic": AnthropicRateLimitConfig(),
|
|
227
|
+
"gemini": GeminiRateLimitConfig(),
|
|
228
|
+
"ollama": OllamaRateLimitConfig(),
|
|
229
|
+
"openrouter": OpenRouterRateLimitConfig(),
|
|
230
|
+
}
|
|
231
|
+
return configs.get(provider, RateLimitConfig())
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def create_rate_limit_config(
|
|
235
|
+
provider: str,
|
|
236
|
+
config_dict: dict[str, Any] | None = None,
|
|
237
|
+
) -> RateLimitConfig:
|
|
238
|
+
"""Create a rate limit configuration from a dictionary.
|
|
239
|
+
|
|
240
|
+
Args:
|
|
241
|
+
provider: Provider name (openai, anthropic, gemini, ollama, openrouter)
|
|
242
|
+
config_dict: Configuration parameters as dictionary
|
|
243
|
+
|
|
244
|
+
Returns:
|
|
245
|
+
Provider-specific rate limit configuration
|
|
246
|
+
|
|
247
|
+
Raises:
|
|
248
|
+
ValueError: If configuration validation fails
|
|
249
|
+
"""
|
|
250
|
+
if config_dict is None:
|
|
251
|
+
return get_default_rate_limit_config(provider)
|
|
252
|
+
|
|
253
|
+
config_classes = {
|
|
254
|
+
"openai": OpenAIRateLimitConfig,
|
|
255
|
+
"anthropic": AnthropicRateLimitConfig,
|
|
256
|
+
"gemini": GeminiRateLimitConfig,
|
|
257
|
+
"ollama": OllamaRateLimitConfig,
|
|
258
|
+
"openrouter": OpenRouterRateLimitConfig,
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
config_class = config_classes.get(provider, RateLimitConfig)
|
|
262
|
+
return config_class(**config_dict)
|
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
"""Rate limit detection and error parsing for different LLM providers."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
import anthropic
|
|
9
|
+
import openai
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class QuotaInfo:
|
|
16
|
+
"""Information extracted from a rate limit error."""
|
|
17
|
+
|
|
18
|
+
is_rate_limit: bool = False
|
|
19
|
+
quota_type: str | None = None
|
|
20
|
+
limit_value: int | None = None
|
|
21
|
+
retry_after: float | None = None
|
|
22
|
+
daily_quota_exhausted: bool = False
|
|
23
|
+
details: dict[str, Any] = field(default_factory=dict)
|
|
24
|
+
|
|
25
|
+
def __repr__(self) -> str:
|
|
26
|
+
parts = [f"QuotaInfo(is_rate_limit={self.is_rate_limit}"]
|
|
27
|
+
if self.quota_type:
|
|
28
|
+
parts.append(f"quota_type={self.quota_type}")
|
|
29
|
+
if self.retry_after:
|
|
30
|
+
parts.append(f"retry_after={self.retry_after}s")
|
|
31
|
+
if self.daily_quota_exhausted:
|
|
32
|
+
parts.append("daily_quota_exhausted=True")
|
|
33
|
+
return ", ".join(parts) + ")"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class RateLimitDetector:
|
|
37
|
+
"""Detect and parse rate limit errors from different LLM providers."""
|
|
38
|
+
|
|
39
|
+
@staticmethod
|
|
40
|
+
def is_rate_limit_error(exception: Exception, provider: str) -> bool:
|
|
41
|
+
"""Check if an exception represents a rate limit error.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
exception: The exception to check
|
|
45
|
+
provider: Provider name (openai, anthropic, gemini, ollama)
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
True if the exception is a rate limit error
|
|
49
|
+
"""
|
|
50
|
+
if provider == "openai":
|
|
51
|
+
return isinstance(exception, openai.RateLimitError)
|
|
52
|
+
|
|
53
|
+
if provider == "anthropic":
|
|
54
|
+
return isinstance(exception, anthropic.RateLimitError)
|
|
55
|
+
|
|
56
|
+
if provider == "gemini":
|
|
57
|
+
error_str = str(exception)
|
|
58
|
+
return "429" in error_str and "RESOURCE_EXHAUSTED" in error_str
|
|
59
|
+
|
|
60
|
+
if provider == "ollama":
|
|
61
|
+
# Ollama uses OpenAI-compatible API, but rate limits are unlikely
|
|
62
|
+
return isinstance(exception, openai.RateLimitError)
|
|
63
|
+
|
|
64
|
+
# Generic fallback: check for 429 in error message
|
|
65
|
+
error_str = str(exception).lower()
|
|
66
|
+
return "429" in error_str or "rate limit" in error_str
|
|
67
|
+
|
|
68
|
+
@staticmethod
|
|
69
|
+
def is_retryable_error(exception: Exception, provider: str) -> bool:
|
|
70
|
+
"""Check if an exception should trigger a retry.
|
|
71
|
+
|
|
72
|
+
This includes rate limits, timeouts, and server errors.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
exception: The exception to check
|
|
76
|
+
provider: Provider name
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
True if the error is retryable
|
|
80
|
+
"""
|
|
81
|
+
# Check if it's a rate limit error first
|
|
82
|
+
if RateLimitDetector.is_rate_limit_error(exception, provider):
|
|
83
|
+
return True
|
|
84
|
+
|
|
85
|
+
# Check for server errors and transient failures
|
|
86
|
+
error_str = str(exception).lower()
|
|
87
|
+
retryable_keywords = [
|
|
88
|
+
"timeout",
|
|
89
|
+
"connection",
|
|
90
|
+
"network",
|
|
91
|
+
"503",
|
|
92
|
+
"502",
|
|
93
|
+
"500",
|
|
94
|
+
"504",
|
|
95
|
+
"service unavailable",
|
|
96
|
+
"bad gateway",
|
|
97
|
+
"gateway timeout",
|
|
98
|
+
]
|
|
99
|
+
|
|
100
|
+
return any(keyword in error_str for keyword in retryable_keywords)
|
|
101
|
+
|
|
102
|
+
@staticmethod
|
|
103
|
+
def extract_quota_info(exception: Exception, provider: str) -> QuotaInfo:
|
|
104
|
+
"""Extract detailed quota information from a rate limit error.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
exception: The rate limit exception
|
|
108
|
+
provider: Provider name
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
QuotaInfo object with parsed details
|
|
112
|
+
"""
|
|
113
|
+
if provider == "openai":
|
|
114
|
+
return RateLimitDetector._parse_openai_error(exception)
|
|
115
|
+
if provider == "anthropic":
|
|
116
|
+
return RateLimitDetector._parse_anthropic_error(exception)
|
|
117
|
+
if provider == "gemini":
|
|
118
|
+
return RateLimitDetector._parse_gemini_error(exception)
|
|
119
|
+
if provider == "ollama":
|
|
120
|
+
return RateLimitDetector._parse_openai_error(exception)
|
|
121
|
+
|
|
122
|
+
# Generic fallback
|
|
123
|
+
return QuotaInfo(is_rate_limit=RateLimitDetector.is_rate_limit_error(exception, provider))
|
|
124
|
+
|
|
125
|
+
@staticmethod
|
|
126
|
+
def _parse_openai_error(exception: Exception) -> QuotaInfo:
|
|
127
|
+
"""Parse OpenAI rate limit error for quota details.
|
|
128
|
+
|
|
129
|
+
OpenAI provides detailed headers:
|
|
130
|
+
- x-ratelimit-limit-requests
|
|
131
|
+
- x-ratelimit-remaining-requests
|
|
132
|
+
- retry-after
|
|
133
|
+
"""
|
|
134
|
+
quota_info = QuotaInfo()
|
|
135
|
+
|
|
136
|
+
if not isinstance(exception, openai.RateLimitError):
|
|
137
|
+
return quota_info
|
|
138
|
+
|
|
139
|
+
quota_info.is_rate_limit = True
|
|
140
|
+
|
|
141
|
+
# Try to extract retry-after from the exception
|
|
142
|
+
try:
|
|
143
|
+
if hasattr(exception, "response") and exception.response:
|
|
144
|
+
headers = exception.response.headers
|
|
145
|
+
if "retry-after" in headers:
|
|
146
|
+
quota_info.retry_after = float(headers["retry-after"])
|
|
147
|
+
|
|
148
|
+
# Check for remaining capacity in headers
|
|
149
|
+
if "x-ratelimit-remaining-requests" in headers:
|
|
150
|
+
remaining = int(headers["x-ratelimit-remaining-requests"])
|
|
151
|
+
quota_info.details["remaining_requests"] = remaining
|
|
152
|
+
|
|
153
|
+
if "x-ratelimit-limit-requests" in headers:
|
|
154
|
+
limit = int(headers["x-ratelimit-limit-requests"])
|
|
155
|
+
quota_info.limit_value = limit
|
|
156
|
+
quota_info.quota_type = "requests"
|
|
157
|
+
|
|
158
|
+
except (AttributeError, ValueError, KeyError) as e:
|
|
159
|
+
logger.debug("Could not parse OpenAI rate limit headers: %s", e)
|
|
160
|
+
|
|
161
|
+
# Parse error message for quota vs rate limit distinction
|
|
162
|
+
error_msg = str(exception).lower()
|
|
163
|
+
if "quota" in error_msg:
|
|
164
|
+
quota_info.daily_quota_exhausted = True
|
|
165
|
+
quota_info.quota_type = "quota"
|
|
166
|
+
|
|
167
|
+
return quota_info
|
|
168
|
+
|
|
169
|
+
@staticmethod
|
|
170
|
+
def _parse_anthropic_error(exception: Exception) -> QuotaInfo:
|
|
171
|
+
"""Parse Anthropic rate limit error for quota details.
|
|
172
|
+
|
|
173
|
+
Anthropic provides:
|
|
174
|
+
- retry-after header
|
|
175
|
+
- anthropic-ratelimit-requests-remaining
|
|
176
|
+
- anthropic-ratelimit-tokens-remaining
|
|
177
|
+
"""
|
|
178
|
+
quota_info = QuotaInfo()
|
|
179
|
+
|
|
180
|
+
if not isinstance(exception, anthropic.RateLimitError):
|
|
181
|
+
return quota_info
|
|
182
|
+
|
|
183
|
+
quota_info.is_rate_limit = True
|
|
184
|
+
|
|
185
|
+
try:
|
|
186
|
+
if hasattr(exception, "response") and exception.response:
|
|
187
|
+
headers = exception.response.headers
|
|
188
|
+
if "retry-after" in headers:
|
|
189
|
+
quota_info.retry_after = float(headers["retry-after"])
|
|
190
|
+
|
|
191
|
+
# Extract remaining capacity
|
|
192
|
+
if "anthropic-ratelimit-requests-remaining" in headers:
|
|
193
|
+
remaining = int(headers["anthropic-ratelimit-requests-remaining"])
|
|
194
|
+
quota_info.details["remaining_requests"] = remaining
|
|
195
|
+
|
|
196
|
+
if "anthropic-ratelimit-tokens-remaining" in headers:
|
|
197
|
+
remaining_tokens = int(headers["anthropic-ratelimit-tokens-remaining"])
|
|
198
|
+
quota_info.details["remaining_tokens"] = remaining_tokens
|
|
199
|
+
|
|
200
|
+
except (AttributeError, ValueError, KeyError) as e:
|
|
201
|
+
logger.debug("Could not parse Anthropic rate limit headers: %s", e)
|
|
202
|
+
|
|
203
|
+
# Determine quota type from error message
|
|
204
|
+
error_msg = str(exception).lower()
|
|
205
|
+
if "request" in error_msg:
|
|
206
|
+
quota_info.quota_type = "requests_per_minute"
|
|
207
|
+
elif "token" in error_msg:
|
|
208
|
+
quota_info.quota_type = "tokens_per_minute"
|
|
209
|
+
|
|
210
|
+
return quota_info
|
|
211
|
+
|
|
212
|
+
@staticmethod
|
|
213
|
+
def _parse_gemini_error(exception: Exception) -> QuotaInfo:
|
|
214
|
+
"""Parse Gemini RESOURCE_EXHAUSTED error for quota details.
|
|
215
|
+
|
|
216
|
+
Gemini errors include detailed quota violation information:
|
|
217
|
+
- quotaMetric (e.g., generate_requests_per_model_per_day)
|
|
218
|
+
- quotaId
|
|
219
|
+
- No explicit retry-after header
|
|
220
|
+
"""
|
|
221
|
+
quota_info = QuotaInfo()
|
|
222
|
+
|
|
223
|
+
error_str = str(exception)
|
|
224
|
+
if "429" not in error_str or "RESOURCE_EXHAUSTED" not in error_str:
|
|
225
|
+
return quota_info
|
|
226
|
+
|
|
227
|
+
quota_info.is_rate_limit = True
|
|
228
|
+
|
|
229
|
+
# Try to parse the error response JSON if available
|
|
230
|
+
try:
|
|
231
|
+
# Look for quota metric in error string
|
|
232
|
+
if "quotaMetric" in error_str:
|
|
233
|
+
# Extract quota metric type
|
|
234
|
+
if "per_day" in error_str:
|
|
235
|
+
quota_info.quota_type = "requests_per_day"
|
|
236
|
+
quota_info.daily_quota_exhausted = True
|
|
237
|
+
elif "per_minute" in error_str:
|
|
238
|
+
quota_info.quota_type = "requests_per_minute"
|
|
239
|
+
|
|
240
|
+
# Check for limit value in error message
|
|
241
|
+
if "limit: 0" in error_str:
|
|
242
|
+
quota_info.limit_value = 0
|
|
243
|
+
quota_info.details["limit_exceeded"] = True
|
|
244
|
+
|
|
245
|
+
# Extract specific quota details if present
|
|
246
|
+
if "generate_requests_per_model_per_day" in error_str:
|
|
247
|
+
quota_info.details["metric"] = "generate_requests_per_model_per_day"
|
|
248
|
+
elif "generate_requests_per_model_per_minute" in error_str:
|
|
249
|
+
quota_info.details["metric"] = "generate_requests_per_model_per_minute"
|
|
250
|
+
|
|
251
|
+
except Exception as e: # noqa: BLE001
|
|
252
|
+
logger.debug("Could not parse Gemini quota details: %s", e)
|
|
253
|
+
|
|
254
|
+
# Gemini doesn't provide retry-after header, use None
|
|
255
|
+
quota_info.retry_after = None
|
|
256
|
+
|
|
257
|
+
return quota_info
|
|
258
|
+
|
|
259
|
+
@staticmethod
|
|
260
|
+
def should_fail_fast(quota_info: QuotaInfo) -> bool:
|
|
261
|
+
"""Determine if we should fail fast instead of retrying.
|
|
262
|
+
|
|
263
|
+
Some quota errors are not worth retrying:
|
|
264
|
+
- Daily quota exhausted (won't reset for hours)
|
|
265
|
+
- Quota limit is 0 (account not set up properly)
|
|
266
|
+
|
|
267
|
+
Args:
|
|
268
|
+
quota_info: Parsed quota information
|
|
269
|
+
|
|
270
|
+
Returns:
|
|
271
|
+
True if we should fail fast and not retry
|
|
272
|
+
"""
|
|
273
|
+
# Daily quota exhausted - won't reset until midnight
|
|
274
|
+
if quota_info.daily_quota_exhausted:
|
|
275
|
+
return True
|
|
276
|
+
|
|
277
|
+
# Quota limit is 0 - account issue, not transient
|
|
278
|
+
return quota_info.limit_value == 0
|