DeepFabric 4.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. deepfabric/__init__.py +70 -0
  2. deepfabric/__main__.py +6 -0
  3. deepfabric/auth.py +382 -0
  4. deepfabric/builders.py +303 -0
  5. deepfabric/builders_agent.py +1304 -0
  6. deepfabric/cli.py +1288 -0
  7. deepfabric/config.py +899 -0
  8. deepfabric/config_manager.py +251 -0
  9. deepfabric/constants.py +94 -0
  10. deepfabric/dataset_manager.py +534 -0
  11. deepfabric/error_codes.py +581 -0
  12. deepfabric/evaluation/__init__.py +47 -0
  13. deepfabric/evaluation/backends/__init__.py +32 -0
  14. deepfabric/evaluation/backends/ollama_backend.py +137 -0
  15. deepfabric/evaluation/backends/tool_call_parsers.py +409 -0
  16. deepfabric/evaluation/backends/transformers_backend.py +326 -0
  17. deepfabric/evaluation/evaluator.py +845 -0
  18. deepfabric/evaluation/evaluators/__init__.py +13 -0
  19. deepfabric/evaluation/evaluators/base.py +104 -0
  20. deepfabric/evaluation/evaluators/builtin/__init__.py +5 -0
  21. deepfabric/evaluation/evaluators/builtin/tool_calling.py +93 -0
  22. deepfabric/evaluation/evaluators/registry.py +66 -0
  23. deepfabric/evaluation/inference.py +155 -0
  24. deepfabric/evaluation/metrics.py +397 -0
  25. deepfabric/evaluation/parser.py +304 -0
  26. deepfabric/evaluation/reporters/__init__.py +13 -0
  27. deepfabric/evaluation/reporters/base.py +56 -0
  28. deepfabric/evaluation/reporters/cloud_reporter.py +195 -0
  29. deepfabric/evaluation/reporters/file_reporter.py +61 -0
  30. deepfabric/evaluation/reporters/multi_reporter.py +56 -0
  31. deepfabric/exceptions.py +67 -0
  32. deepfabric/factory.py +26 -0
  33. deepfabric/generator.py +1084 -0
  34. deepfabric/graph.py +545 -0
  35. deepfabric/hf_hub.py +214 -0
  36. deepfabric/kaggle_hub.py +219 -0
  37. deepfabric/llm/__init__.py +41 -0
  38. deepfabric/llm/api_key_verifier.py +534 -0
  39. deepfabric/llm/client.py +1206 -0
  40. deepfabric/llm/errors.py +105 -0
  41. deepfabric/llm/rate_limit_config.py +262 -0
  42. deepfabric/llm/rate_limit_detector.py +278 -0
  43. deepfabric/llm/retry_handler.py +270 -0
  44. deepfabric/metrics.py +212 -0
  45. deepfabric/progress.py +262 -0
  46. deepfabric/prompts.py +290 -0
  47. deepfabric/schemas.py +1000 -0
  48. deepfabric/spin/__init__.py +6 -0
  49. deepfabric/spin/client.py +263 -0
  50. deepfabric/spin/models.py +26 -0
  51. deepfabric/stream_simulator.py +90 -0
  52. deepfabric/tools/__init__.py +5 -0
  53. deepfabric/tools/defaults.py +85 -0
  54. deepfabric/tools/loader.py +87 -0
  55. deepfabric/tools/mcp_client.py +677 -0
  56. deepfabric/topic_manager.py +303 -0
  57. deepfabric/topic_model.py +20 -0
  58. deepfabric/training/__init__.py +35 -0
  59. deepfabric/training/api_key_prompt.py +302 -0
  60. deepfabric/training/callback.py +363 -0
  61. deepfabric/training/metrics_sender.py +301 -0
  62. deepfabric/tree.py +438 -0
  63. deepfabric/tui.py +1267 -0
  64. deepfabric/update_checker.py +166 -0
  65. deepfabric/utils.py +150 -0
  66. deepfabric/validation.py +143 -0
  67. deepfabric-4.4.0.dist-info/METADATA +702 -0
  68. deepfabric-4.4.0.dist-info/RECORD +71 -0
  69. deepfabric-4.4.0.dist-info/WHEEL +4 -0
  70. deepfabric-4.4.0.dist-info/entry_points.txt +2 -0
  71. deepfabric-4.4.0.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,105 @@
1
+ """Error handling for LLM providers."""
2
+
3
+ import anthropic
4
+ import openai
5
+
6
+ from ..exceptions import DataSetGeneratorError
7
+
8
+
9
+ def handle_openai_error(e: Exception, provider: str, model_name: str) -> DataSetGeneratorError: # noqa: PLR0911
10
+ """Handle OpenAI-specific errors with helpful messages."""
11
+ if isinstance(e, openai.AuthenticationError):
12
+ return DataSetGeneratorError(
13
+ f"Authentication failed for {provider}. Please check your API key."
14
+ )
15
+
16
+ if isinstance(e, openai.NotFoundError):
17
+ if provider == "ollama":
18
+ return DataSetGeneratorError(
19
+ f"Model '{model_name}' not found in Ollama. Please run: ollama pull {model_name}"
20
+ )
21
+ return DataSetGeneratorError(
22
+ f"Model '{model_name}' not found for {provider}. Please check the model name."
23
+ )
24
+
25
+ if isinstance(e, openai.APIConnectionError):
26
+ if provider == "ollama":
27
+ return DataSetGeneratorError(
28
+ "Cannot connect to Ollama server. Please ensure Ollama is running (try: ollama serve)"
29
+ )
30
+ return DataSetGeneratorError(
31
+ f"Network error connecting to {provider}. Please check your internet connection."
32
+ )
33
+
34
+ if isinstance(e, openai.RateLimitError):
35
+ return DataSetGeneratorError(
36
+ f"Rate limit exceeded for {provider}/{model_name}. Please wait and try again."
37
+ )
38
+
39
+ return DataSetGeneratorError(f"OpenAI API error for {provider}/{model_name}: {e}")
40
+
41
+
42
+ def handle_anthropic_error(e: Exception, provider: str, model_name: str) -> DataSetGeneratorError:
43
+ """Handle Anthropic-specific errors with helpful messages."""
44
+ if isinstance(e, anthropic.AuthenticationError):
45
+ return DataSetGeneratorError(
46
+ f"Authentication failed for {provider}. Please check your ANTHROPIC_API_KEY."
47
+ )
48
+
49
+ if isinstance(e, anthropic.NotFoundError):
50
+ return DataSetGeneratorError(
51
+ f"Model '{model_name}' not found for {provider}. Please check the model name."
52
+ )
53
+
54
+ if isinstance(e, anthropic.APIConnectionError):
55
+ return DataSetGeneratorError(
56
+ f"Network error connecting to {provider}. Please check your internet connection."
57
+ )
58
+
59
+ if isinstance(e, anthropic.RateLimitError):
60
+ return DataSetGeneratorError(
61
+ f"Rate limit exceeded for {provider}/{model_name}. Please wait and try again."
62
+ )
63
+
64
+ return DataSetGeneratorError(f"Anthropic API error for {provider}/{model_name}: {e}")
65
+
66
+
67
+ def handle_gemini_error(e: Exception, provider: str, model_name: str) -> DataSetGeneratorError:
68
+ """Handle Gemini-specific errors with helpful messages."""
69
+ error_str = str(e).lower()
70
+
71
+ if "invalid" in error_str and "model" in error_str:
72
+ return DataSetGeneratorError(
73
+ f"Model '{model_name}' not available for Gemini. Try: gemini-1.5-flash, gemini-1.5-pro"
74
+ )
75
+
76
+ if any(
77
+ keyword in error_str
78
+ for keyword in ["permission", "api_key", "authentication", "unauthorized"]
79
+ ):
80
+ return DataSetGeneratorError(
81
+ "Authentication failed for Gemini. Please check your GOOGLE_API_KEY or GEMINI_API_KEY."
82
+ )
83
+
84
+ if any(keyword in error_str for keyword in ["quota", "rate limit", "too many requests"]):
85
+ return DataSetGeneratorError(
86
+ f"Rate limit exceeded for Gemini/{model_name}. Please wait and try again."
87
+ )
88
+
89
+ if any(keyword in error_str for keyword in ["network", "connection", "timeout"]):
90
+ return DataSetGeneratorError(
91
+ "Network error connecting to Gemini. Please check your internet connection."
92
+ )
93
+
94
+ return DataSetGeneratorError(f"Gemini API error for {provider}/{model_name}: {e}")
95
+
96
+
97
+ def handle_provider_error(e: Exception, provider: str, model_name: str) -> DataSetGeneratorError:
98
+ """Handle errors for any provider with appropriate error handler."""
99
+ if provider in ["openai", "ollama"]:
100
+ return handle_openai_error(e, provider, model_name)
101
+ if provider == "anthropic":
102
+ return handle_anthropic_error(e, provider, model_name)
103
+ if provider == "gemini":
104
+ return handle_gemini_error(e, provider, model_name)
105
+ return DataSetGeneratorError(f"Unknown provider error for {provider}/{model_name}: {e}")
@@ -0,0 +1,262 @@
1
+ """Rate limiting configuration models for different LLM providers."""
2
+
3
+ from enum import Enum
4
+ from typing import Any
5
+
6
+ from pydantic import BaseModel, Field, ValidationInfo, field_validator
7
+
8
+
9
+ class BackoffStrategy(str, Enum):
10
+ """Backoff strategy for retry attempts."""
11
+
12
+ EXPONENTIAL = "exponential"
13
+ EXPONENTIAL_JITTER = "exponential_jitter"
14
+ LINEAR = "linear"
15
+ CONSTANT = "constant"
16
+
17
+
18
+ class RateLimitConfig(BaseModel):
19
+ """Base configuration for rate limiting and retry behavior.
20
+
21
+ This provides sensible defaults that work across all providers,
22
+ with provider-specific subclasses adding specialized behavior.
23
+ """
24
+
25
+ max_retries: int = Field(
26
+ default=5,
27
+ ge=0,
28
+ le=20,
29
+ description="Maximum number of retry attempts",
30
+ )
31
+ base_delay: float = Field(
32
+ default=1.0,
33
+ ge=0.1,
34
+ le=60.0,
35
+ description="Base delay in seconds before first retry",
36
+ )
37
+ max_delay: float = Field(
38
+ default=60.0,
39
+ ge=1.0,
40
+ le=300.0,
41
+ description="Maximum delay in seconds between retries",
42
+ )
43
+ backoff_strategy: BackoffStrategy = Field(
44
+ default=BackoffStrategy.EXPONENTIAL_JITTER,
45
+ description="Strategy for calculating retry delays",
46
+ )
47
+ exponential_base: float = Field(
48
+ default=2.0,
49
+ ge=1.1,
50
+ le=10.0,
51
+ description="Base multiplier for exponential backoff",
52
+ )
53
+ jitter: bool = Field(
54
+ default=True,
55
+ description="Add randomization to delays to prevent thundering herd",
56
+ )
57
+ respect_retry_after: bool = Field(
58
+ default=True,
59
+ description="Respect retry-after headers from provider responses",
60
+ )
61
+
62
+ # HTTP status codes that should trigger retry
63
+ retry_on_status_codes: set[int] = Field(
64
+ default_factory=lambda: {429, 500, 502, 503, 504},
65
+ description="HTTP status codes that trigger retry",
66
+ )
67
+
68
+ # Exception types/messages that should trigger retry
69
+ retry_on_exceptions: list[str] = Field(
70
+ default_factory=lambda: ["timeout", "connection", "network"],
71
+ description="Exception keywords that trigger retry (case-insensitive)",
72
+ )
73
+
74
+ @field_validator("max_delay")
75
+ @classmethod
76
+ def validate_max_delay(cls, v: float, info: "ValidationInfo") -> float:
77
+ """Ensure max_delay is greater than base_delay."""
78
+ if "base_delay" in info.data and v < info.data["base_delay"]:
79
+ msg = "max_delay must be greater than or equal to base_delay"
80
+ raise ValueError(msg)
81
+ return v
82
+
83
+ def to_dict(self) -> dict[str, Any]:
84
+ """Convert config to dictionary for serialization."""
85
+ return self.model_dump(mode="json")
86
+
87
+
88
+ class OpenAIRateLimitConfig(RateLimitConfig):
89
+ """OpenAI-specific rate limit configuration.
90
+
91
+ OpenAI provides detailed rate limit headers (x-ratelimit-*) and
92
+ retry-after headers. This config enables monitoring and preemptive
93
+ backoff based on remaining capacity.
94
+ """
95
+
96
+ check_headers: bool = Field(
97
+ default=True,
98
+ description="Monitor x-ratelimit-* headers for capacity tracking",
99
+ )
100
+ preemptive_backoff: bool = Field(
101
+ default=False,
102
+ description="Back off preemptively when remaining capacity is low",
103
+ )
104
+ preemptive_threshold: float = Field(
105
+ default=0.1,
106
+ ge=0.0,
107
+ le=0.5,
108
+ description="Threshold (0-1) for preemptive backoff (e.g., 0.1 = 10% remaining)",
109
+ )
110
+
111
+
112
+ class AnthropicRateLimitConfig(RateLimitConfig):
113
+ """Anthropic Claude-specific rate limit configuration.
114
+
115
+ Anthropic uses a token bucket algorithm with separate limits for
116
+ requests per minute (RPM), input tokens per minute (ITPM), and
117
+ output tokens per minute (OTPM). Rate limits vary by model and tier.
118
+ """
119
+
120
+ check_headers: bool = Field(
121
+ default=True,
122
+ description="Monitor anthropic-ratelimit-* headers",
123
+ )
124
+ token_bucket_aware: bool = Field(
125
+ default=True,
126
+ description="Account for token bucket continuous replenishment",
127
+ )
128
+ gradual_rampup: bool = Field(
129
+ default=True,
130
+ description="Enable gradual traffic ramp-up for new workloads",
131
+ )
132
+
133
+
134
+ class GeminiRateLimitConfig(RateLimitConfig):
135
+ """Google Gemini-specific rate limit configuration.
136
+
137
+ Gemini has RPM, TPM, and RPD (requests per day) limits. Daily quotas
138
+ reset at midnight Pacific time. No explicit retry-after header, so
139
+ more conservative backoff is used. Rate limit errors include detailed
140
+ quota violation information.
141
+ """
142
+
143
+ base_delay: float = Field(
144
+ default=2.0,
145
+ ge=0.5,
146
+ le=60.0,
147
+ description="Higher default delay for Gemini (no retry-after header)",
148
+ )
149
+ max_delay: float = Field(
150
+ default=120.0,
151
+ ge=5.0,
152
+ le=600.0,
153
+ description="Longer max delay for daily quota exhaustion",
154
+ )
155
+ parse_quota_details: bool = Field(
156
+ default=True,
157
+ description="Extract quota metric details from RESOURCE_EXHAUSTED errors",
158
+ )
159
+ daily_quota_aware: bool = Field(
160
+ default=True,
161
+ description="Recognize daily quota exhaustion vs per-minute limits",
162
+ )
163
+
164
+
165
+ class OllamaRateLimitConfig(RateLimitConfig):
166
+ """Ollama-specific rate limit configuration.
167
+
168
+ Ollama is typically run locally, so rate limiting is less common.
169
+ This config uses minimal retries, primarily for connection issues.
170
+ """
171
+
172
+ max_retries: int = Field(
173
+ default=2,
174
+ ge=0,
175
+ le=5,
176
+ description="Minimal retries for local Ollama server",
177
+ )
178
+ base_delay: float = Field(
179
+ default=0.5,
180
+ ge=0.1,
181
+ le=5.0,
182
+ description="Short delay for local server retry",
183
+ )
184
+ max_delay: float = Field(
185
+ default=5.0,
186
+ ge=1.0,
187
+ le=30.0,
188
+ description="Short max delay for local operations",
189
+ )
190
+ retry_on_status_codes: set[int] = Field(
191
+ default_factory=lambda: {500, 502, 503, 504},
192
+ description="Primarily retry server errors (429 unlikely for local)",
193
+ )
194
+
195
+
196
+ class OpenRouterRateLimitConfig(RateLimitConfig):
197
+ """OpenRouter-specific rate limit configuration.
198
+
199
+ OpenRouter aggregates multiple LLM providers and uses OpenAI-compatible API.
200
+ It uses credit-based quotas with model-specific rate limits. Different models
201
+ have different RPM limits, and free model variants have daily limits.
202
+ Returns 402 Payment Required when account balance is negative.
203
+ """
204
+
205
+ retry_on_status_codes: set[int] = Field(
206
+ default_factory=lambda: {402, 429, 500, 502, 503, 504},
207
+ description="HTTP status codes that trigger retry (includes 402 for payment issues)",
208
+ )
209
+ check_credits: bool = Field(
210
+ default=False,
211
+ description="Monitor credit balance via /api/v1/key endpoint",
212
+ )
213
+
214
+
215
+ def get_default_rate_limit_config(provider: str) -> RateLimitConfig:
216
+ """Get the default rate limit configuration for a provider.
217
+
218
+ Args:
219
+ provider: Provider name (openai, anthropic, gemini, ollama, openrouter)
220
+
221
+ Returns:
222
+ Provider-specific rate limit configuration with sensible defaults
223
+ """
224
+ configs = {
225
+ "openai": OpenAIRateLimitConfig(),
226
+ "anthropic": AnthropicRateLimitConfig(),
227
+ "gemini": GeminiRateLimitConfig(),
228
+ "ollama": OllamaRateLimitConfig(),
229
+ "openrouter": OpenRouterRateLimitConfig(),
230
+ }
231
+ return configs.get(provider, RateLimitConfig())
232
+
233
+
234
+ def create_rate_limit_config(
235
+ provider: str,
236
+ config_dict: dict[str, Any] | None = None,
237
+ ) -> RateLimitConfig:
238
+ """Create a rate limit configuration from a dictionary.
239
+
240
+ Args:
241
+ provider: Provider name (openai, anthropic, gemini, ollama, openrouter)
242
+ config_dict: Configuration parameters as dictionary
243
+
244
+ Returns:
245
+ Provider-specific rate limit configuration
246
+
247
+ Raises:
248
+ ValueError: If configuration validation fails
249
+ """
250
+ if config_dict is None:
251
+ return get_default_rate_limit_config(provider)
252
+
253
+ config_classes = {
254
+ "openai": OpenAIRateLimitConfig,
255
+ "anthropic": AnthropicRateLimitConfig,
256
+ "gemini": GeminiRateLimitConfig,
257
+ "ollama": OllamaRateLimitConfig,
258
+ "openrouter": OpenRouterRateLimitConfig,
259
+ }
260
+
261
+ config_class = config_classes.get(provider, RateLimitConfig)
262
+ return config_class(**config_dict)
@@ -0,0 +1,278 @@
1
+ """Rate limit detection and error parsing for different LLM providers."""
2
+
3
+ import logging
4
+
5
+ from dataclasses import dataclass, field
6
+ from typing import Any
7
+
8
+ import anthropic
9
+ import openai
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ @dataclass
15
+ class QuotaInfo:
16
+ """Information extracted from a rate limit error."""
17
+
18
+ is_rate_limit: bool = False
19
+ quota_type: str | None = None
20
+ limit_value: int | None = None
21
+ retry_after: float | None = None
22
+ daily_quota_exhausted: bool = False
23
+ details: dict[str, Any] = field(default_factory=dict)
24
+
25
+ def __repr__(self) -> str:
26
+ parts = [f"QuotaInfo(is_rate_limit={self.is_rate_limit}"]
27
+ if self.quota_type:
28
+ parts.append(f"quota_type={self.quota_type}")
29
+ if self.retry_after:
30
+ parts.append(f"retry_after={self.retry_after}s")
31
+ if self.daily_quota_exhausted:
32
+ parts.append("daily_quota_exhausted=True")
33
+ return ", ".join(parts) + ")"
34
+
35
+
36
+ class RateLimitDetector:
37
+ """Detect and parse rate limit errors from different LLM providers."""
38
+
39
+ @staticmethod
40
+ def is_rate_limit_error(exception: Exception, provider: str) -> bool:
41
+ """Check if an exception represents a rate limit error.
42
+
43
+ Args:
44
+ exception: The exception to check
45
+ provider: Provider name (openai, anthropic, gemini, ollama)
46
+
47
+ Returns:
48
+ True if the exception is a rate limit error
49
+ """
50
+ if provider == "openai":
51
+ return isinstance(exception, openai.RateLimitError)
52
+
53
+ if provider == "anthropic":
54
+ return isinstance(exception, anthropic.RateLimitError)
55
+
56
+ if provider == "gemini":
57
+ error_str = str(exception)
58
+ return "429" in error_str and "RESOURCE_EXHAUSTED" in error_str
59
+
60
+ if provider == "ollama":
61
+ # Ollama uses OpenAI-compatible API, but rate limits are unlikely
62
+ return isinstance(exception, openai.RateLimitError)
63
+
64
+ # Generic fallback: check for 429 in error message
65
+ error_str = str(exception).lower()
66
+ return "429" in error_str or "rate limit" in error_str
67
+
68
+ @staticmethod
69
+ def is_retryable_error(exception: Exception, provider: str) -> bool:
70
+ """Check if an exception should trigger a retry.
71
+
72
+ This includes rate limits, timeouts, and server errors.
73
+
74
+ Args:
75
+ exception: The exception to check
76
+ provider: Provider name
77
+
78
+ Returns:
79
+ True if the error is retryable
80
+ """
81
+ # Check if it's a rate limit error first
82
+ if RateLimitDetector.is_rate_limit_error(exception, provider):
83
+ return True
84
+
85
+ # Check for server errors and transient failures
86
+ error_str = str(exception).lower()
87
+ retryable_keywords = [
88
+ "timeout",
89
+ "connection",
90
+ "network",
91
+ "503",
92
+ "502",
93
+ "500",
94
+ "504",
95
+ "service unavailable",
96
+ "bad gateway",
97
+ "gateway timeout",
98
+ ]
99
+
100
+ return any(keyword in error_str for keyword in retryable_keywords)
101
+
102
+ @staticmethod
103
+ def extract_quota_info(exception: Exception, provider: str) -> QuotaInfo:
104
+ """Extract detailed quota information from a rate limit error.
105
+
106
+ Args:
107
+ exception: The rate limit exception
108
+ provider: Provider name
109
+
110
+ Returns:
111
+ QuotaInfo object with parsed details
112
+ """
113
+ if provider == "openai":
114
+ return RateLimitDetector._parse_openai_error(exception)
115
+ if provider == "anthropic":
116
+ return RateLimitDetector._parse_anthropic_error(exception)
117
+ if provider == "gemini":
118
+ return RateLimitDetector._parse_gemini_error(exception)
119
+ if provider == "ollama":
120
+ return RateLimitDetector._parse_openai_error(exception)
121
+
122
+ # Generic fallback
123
+ return QuotaInfo(is_rate_limit=RateLimitDetector.is_rate_limit_error(exception, provider))
124
+
125
+ @staticmethod
126
+ def _parse_openai_error(exception: Exception) -> QuotaInfo:
127
+ """Parse OpenAI rate limit error for quota details.
128
+
129
+ OpenAI provides detailed headers:
130
+ - x-ratelimit-limit-requests
131
+ - x-ratelimit-remaining-requests
132
+ - retry-after
133
+ """
134
+ quota_info = QuotaInfo()
135
+
136
+ if not isinstance(exception, openai.RateLimitError):
137
+ return quota_info
138
+
139
+ quota_info.is_rate_limit = True
140
+
141
+ # Try to extract retry-after from the exception
142
+ try:
143
+ if hasattr(exception, "response") and exception.response:
144
+ headers = exception.response.headers
145
+ if "retry-after" in headers:
146
+ quota_info.retry_after = float(headers["retry-after"])
147
+
148
+ # Check for remaining capacity in headers
149
+ if "x-ratelimit-remaining-requests" in headers:
150
+ remaining = int(headers["x-ratelimit-remaining-requests"])
151
+ quota_info.details["remaining_requests"] = remaining
152
+
153
+ if "x-ratelimit-limit-requests" in headers:
154
+ limit = int(headers["x-ratelimit-limit-requests"])
155
+ quota_info.limit_value = limit
156
+ quota_info.quota_type = "requests"
157
+
158
+ except (AttributeError, ValueError, KeyError) as e:
159
+ logger.debug("Could not parse OpenAI rate limit headers: %s", e)
160
+
161
+ # Parse error message for quota vs rate limit distinction
162
+ error_msg = str(exception).lower()
163
+ if "quota" in error_msg:
164
+ quota_info.daily_quota_exhausted = True
165
+ quota_info.quota_type = "quota"
166
+
167
+ return quota_info
168
+
169
+ @staticmethod
170
+ def _parse_anthropic_error(exception: Exception) -> QuotaInfo:
171
+ """Parse Anthropic rate limit error for quota details.
172
+
173
+ Anthropic provides:
174
+ - retry-after header
175
+ - anthropic-ratelimit-requests-remaining
176
+ - anthropic-ratelimit-tokens-remaining
177
+ """
178
+ quota_info = QuotaInfo()
179
+
180
+ if not isinstance(exception, anthropic.RateLimitError):
181
+ return quota_info
182
+
183
+ quota_info.is_rate_limit = True
184
+
185
+ try:
186
+ if hasattr(exception, "response") and exception.response:
187
+ headers = exception.response.headers
188
+ if "retry-after" in headers:
189
+ quota_info.retry_after = float(headers["retry-after"])
190
+
191
+ # Extract remaining capacity
192
+ if "anthropic-ratelimit-requests-remaining" in headers:
193
+ remaining = int(headers["anthropic-ratelimit-requests-remaining"])
194
+ quota_info.details["remaining_requests"] = remaining
195
+
196
+ if "anthropic-ratelimit-tokens-remaining" in headers:
197
+ remaining_tokens = int(headers["anthropic-ratelimit-tokens-remaining"])
198
+ quota_info.details["remaining_tokens"] = remaining_tokens
199
+
200
+ except (AttributeError, ValueError, KeyError) as e:
201
+ logger.debug("Could not parse Anthropic rate limit headers: %s", e)
202
+
203
+ # Determine quota type from error message
204
+ error_msg = str(exception).lower()
205
+ if "request" in error_msg:
206
+ quota_info.quota_type = "requests_per_minute"
207
+ elif "token" in error_msg:
208
+ quota_info.quota_type = "tokens_per_minute"
209
+
210
+ return quota_info
211
+
212
+ @staticmethod
213
+ def _parse_gemini_error(exception: Exception) -> QuotaInfo:
214
+ """Parse Gemini RESOURCE_EXHAUSTED error for quota details.
215
+
216
+ Gemini errors include detailed quota violation information:
217
+ - quotaMetric (e.g., generate_requests_per_model_per_day)
218
+ - quotaId
219
+ - No explicit retry-after header
220
+ """
221
+ quota_info = QuotaInfo()
222
+
223
+ error_str = str(exception)
224
+ if "429" not in error_str or "RESOURCE_EXHAUSTED" not in error_str:
225
+ return quota_info
226
+
227
+ quota_info.is_rate_limit = True
228
+
229
+ # Try to parse the error response JSON if available
230
+ try:
231
+ # Look for quota metric in error string
232
+ if "quotaMetric" in error_str:
233
+ # Extract quota metric type
234
+ if "per_day" in error_str:
235
+ quota_info.quota_type = "requests_per_day"
236
+ quota_info.daily_quota_exhausted = True
237
+ elif "per_minute" in error_str:
238
+ quota_info.quota_type = "requests_per_minute"
239
+
240
+ # Check for limit value in error message
241
+ if "limit: 0" in error_str:
242
+ quota_info.limit_value = 0
243
+ quota_info.details["limit_exceeded"] = True
244
+
245
+ # Extract specific quota details if present
246
+ if "generate_requests_per_model_per_day" in error_str:
247
+ quota_info.details["metric"] = "generate_requests_per_model_per_day"
248
+ elif "generate_requests_per_model_per_minute" in error_str:
249
+ quota_info.details["metric"] = "generate_requests_per_model_per_minute"
250
+
251
+ except Exception as e: # noqa: BLE001
252
+ logger.debug("Could not parse Gemini quota details: %s", e)
253
+
254
+ # Gemini doesn't provide retry-after header, use None
255
+ quota_info.retry_after = None
256
+
257
+ return quota_info
258
+
259
+ @staticmethod
260
+ def should_fail_fast(quota_info: QuotaInfo) -> bool:
261
+ """Determine if we should fail fast instead of retrying.
262
+
263
+ Some quota errors are not worth retrying:
264
+ - Daily quota exhausted (won't reset for hours)
265
+ - Quota limit is 0 (account not set up properly)
266
+
267
+ Args:
268
+ quota_info: Parsed quota information
269
+
270
+ Returns:
271
+ True if we should fail fast and not retry
272
+ """
273
+ # Daily quota exhausted - won't reset until midnight
274
+ if quota_info.daily_quota_exhausted:
275
+ return True
276
+
277
+ # Quota limit is 0 - account issue, not transient
278
+ return quota_info.limit_value == 0