DeepFabric 4.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepfabric/__init__.py +70 -0
- deepfabric/__main__.py +6 -0
- deepfabric/auth.py +382 -0
- deepfabric/builders.py +303 -0
- deepfabric/builders_agent.py +1304 -0
- deepfabric/cli.py +1288 -0
- deepfabric/config.py +899 -0
- deepfabric/config_manager.py +251 -0
- deepfabric/constants.py +94 -0
- deepfabric/dataset_manager.py +534 -0
- deepfabric/error_codes.py +581 -0
- deepfabric/evaluation/__init__.py +47 -0
- deepfabric/evaluation/backends/__init__.py +32 -0
- deepfabric/evaluation/backends/ollama_backend.py +137 -0
- deepfabric/evaluation/backends/tool_call_parsers.py +409 -0
- deepfabric/evaluation/backends/transformers_backend.py +326 -0
- deepfabric/evaluation/evaluator.py +845 -0
- deepfabric/evaluation/evaluators/__init__.py +13 -0
- deepfabric/evaluation/evaluators/base.py +104 -0
- deepfabric/evaluation/evaluators/builtin/__init__.py +5 -0
- deepfabric/evaluation/evaluators/builtin/tool_calling.py +93 -0
- deepfabric/evaluation/evaluators/registry.py +66 -0
- deepfabric/evaluation/inference.py +155 -0
- deepfabric/evaluation/metrics.py +397 -0
- deepfabric/evaluation/parser.py +304 -0
- deepfabric/evaluation/reporters/__init__.py +13 -0
- deepfabric/evaluation/reporters/base.py +56 -0
- deepfabric/evaluation/reporters/cloud_reporter.py +195 -0
- deepfabric/evaluation/reporters/file_reporter.py +61 -0
- deepfabric/evaluation/reporters/multi_reporter.py +56 -0
- deepfabric/exceptions.py +67 -0
- deepfabric/factory.py +26 -0
- deepfabric/generator.py +1084 -0
- deepfabric/graph.py +545 -0
- deepfabric/hf_hub.py +214 -0
- deepfabric/kaggle_hub.py +219 -0
- deepfabric/llm/__init__.py +41 -0
- deepfabric/llm/api_key_verifier.py +534 -0
- deepfabric/llm/client.py +1206 -0
- deepfabric/llm/errors.py +105 -0
- deepfabric/llm/rate_limit_config.py +262 -0
- deepfabric/llm/rate_limit_detector.py +278 -0
- deepfabric/llm/retry_handler.py +270 -0
- deepfabric/metrics.py +212 -0
- deepfabric/progress.py +262 -0
- deepfabric/prompts.py +290 -0
- deepfabric/schemas.py +1000 -0
- deepfabric/spin/__init__.py +6 -0
- deepfabric/spin/client.py +263 -0
- deepfabric/spin/models.py +26 -0
- deepfabric/stream_simulator.py +90 -0
- deepfabric/tools/__init__.py +5 -0
- deepfabric/tools/defaults.py +85 -0
- deepfabric/tools/loader.py +87 -0
- deepfabric/tools/mcp_client.py +677 -0
- deepfabric/topic_manager.py +303 -0
- deepfabric/topic_model.py +20 -0
- deepfabric/training/__init__.py +35 -0
- deepfabric/training/api_key_prompt.py +302 -0
- deepfabric/training/callback.py +363 -0
- deepfabric/training/metrics_sender.py +301 -0
- deepfabric/tree.py +438 -0
- deepfabric/tui.py +1267 -0
- deepfabric/update_checker.py +166 -0
- deepfabric/utils.py +150 -0
- deepfabric/validation.py +143 -0
- deepfabric-4.4.0.dist-info/METADATA +702 -0
- deepfabric-4.4.0.dist-info/RECORD +71 -0
- deepfabric-4.4.0.dist-info/WHEEL +4 -0
- deepfabric-4.4.0.dist-info/entry_points.txt +2 -0
- deepfabric-4.4.0.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,581 @@
|
|
|
1
|
+
"""DeepFabric error codes for standardized error reporting.
|
|
2
|
+
|
|
3
|
+
Error codes provide consistent, documentable error identification across the CLI and TUI.
|
|
4
|
+
Each code maps to a short message suitable for display and a longer description for docs.
|
|
5
|
+
|
|
6
|
+
Error Code Format: DF-XNN
|
|
7
|
+
- DF: DeepFabric prefix
|
|
8
|
+
- X: Category letter (R=Rate limit, A=Auth/API, N=Network, P=Parse, T=Tool, X=Unknown)
|
|
9
|
+
- NN: Number within category
|
|
10
|
+
|
|
11
|
+
Sample-level errors occur during generation and allow processing to continue.
|
|
12
|
+
Fatal errors cause the CLI to exit immediately.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from dataclasses import dataclass
|
|
16
|
+
from enum import Enum
|
|
17
|
+
from typing import Any
|
|
18
|
+
|
|
19
|
+
# Constants for error classification
|
|
20
|
+
ERROR_DETAIL_MAX_LENGTH = 50
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class ErrorCategory(str, Enum):
|
|
24
|
+
"""Error category for grouping related errors."""
|
|
25
|
+
|
|
26
|
+
RATE_LIMIT = "rate_limit"
|
|
27
|
+
AUTH_API = "auth_api"
|
|
28
|
+
NETWORK = "network"
|
|
29
|
+
PARSE = "parse"
|
|
30
|
+
TOOL = "tool"
|
|
31
|
+
UNKNOWN = "unknown"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class ErrorSeverity(str, Enum):
|
|
35
|
+
"""Whether an error is recoverable or fatal."""
|
|
36
|
+
|
|
37
|
+
SAMPLE = "sample" # Per-sample error, generation continues
|
|
38
|
+
FATAL = "fatal" # CLI exits
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass(frozen=True)
|
|
42
|
+
class ErrorCode:
|
|
43
|
+
"""Definition of a DeepFabric error code."""
|
|
44
|
+
|
|
45
|
+
code: str
|
|
46
|
+
short_message: str
|
|
47
|
+
description: str
|
|
48
|
+
category: ErrorCategory
|
|
49
|
+
severity: ErrorSeverity
|
|
50
|
+
|
|
51
|
+
def format_event(self, detail: str | None = None) -> str:
|
|
52
|
+
"""Format error for TUI Events panel display.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
detail: Optional short detail (e.g., retry time, quota type)
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
Formatted string like "DF-R01 Rate limit (RPM)" or "DF-R01 Rate limit - retry 3s"
|
|
59
|
+
"""
|
|
60
|
+
if detail:
|
|
61
|
+
return f"{self.code} {self.short_message} - {detail}"
|
|
62
|
+
return f"{self.code} {self.short_message}"
|
|
63
|
+
|
|
64
|
+
def format_full(self, detail: str | None = None) -> str:
|
|
65
|
+
"""Format error for detailed output (debug mode, logs).
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
detail: Optional detail about the specific error
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
Formatted string with code, message, and description
|
|
72
|
+
"""
|
|
73
|
+
base = f"[{self.code}] {self.short_message}"
|
|
74
|
+
if detail:
|
|
75
|
+
base += f": {detail}"
|
|
76
|
+
return base
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
# =============================================================================
|
|
80
|
+
# Error Code Definitions
|
|
81
|
+
# =============================================================================
|
|
82
|
+
|
|
83
|
+
# Rate Limit Errors (DF-R0x)
|
|
84
|
+
DF_R01 = ErrorCode(
|
|
85
|
+
code="DF-R01",
|
|
86
|
+
short_message="Rate limit (RPM)",
|
|
87
|
+
description="Requests per minute limit exceeded. The provider is throttling requests.",
|
|
88
|
+
category=ErrorCategory.RATE_LIMIT,
|
|
89
|
+
severity=ErrorSeverity.SAMPLE,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
DF_R02 = ErrorCode(
|
|
93
|
+
code="DF-R02",
|
|
94
|
+
short_message="Rate limit (daily)",
|
|
95
|
+
description="Daily quota exhausted. Resets at midnight (provider timezone).",
|
|
96
|
+
category=ErrorCategory.RATE_LIMIT,
|
|
97
|
+
severity=ErrorSeverity.SAMPLE,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
DF_R03 = ErrorCode(
|
|
101
|
+
code="DF-R03",
|
|
102
|
+
short_message="Rate limit (tokens)",
|
|
103
|
+
description="Token per minute limit exceeded.",
|
|
104
|
+
category=ErrorCategory.RATE_LIMIT,
|
|
105
|
+
severity=ErrorSeverity.SAMPLE,
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
DF_R04 = ErrorCode(
|
|
109
|
+
code="DF-R04",
|
|
110
|
+
short_message="Rate limit",
|
|
111
|
+
description="Generic rate limit error from provider.",
|
|
112
|
+
category=ErrorCategory.RATE_LIMIT,
|
|
113
|
+
severity=ErrorSeverity.SAMPLE,
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
# Auth/API Errors (DF-A0x)
|
|
117
|
+
DF_A01 = ErrorCode(
|
|
118
|
+
code="DF-A01",
|
|
119
|
+
short_message="Auth failed",
|
|
120
|
+
description="Authentication failed. Check your API key environment variable.",
|
|
121
|
+
category=ErrorCategory.AUTH_API,
|
|
122
|
+
severity=ErrorSeverity.FATAL,
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
DF_A02 = ErrorCode(
|
|
126
|
+
code="DF-A02",
|
|
127
|
+
short_message="Model not found",
|
|
128
|
+
description="The specified model does not exist or is not accessible.",
|
|
129
|
+
category=ErrorCategory.AUTH_API,
|
|
130
|
+
severity=ErrorSeverity.FATAL,
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
DF_A03 = ErrorCode(
|
|
134
|
+
code="DF-A03",
|
|
135
|
+
short_message="API error",
|
|
136
|
+
description="Generic API error from the provider.",
|
|
137
|
+
category=ErrorCategory.AUTH_API,
|
|
138
|
+
severity=ErrorSeverity.SAMPLE,
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
# Network Errors (DF-N0x)
|
|
142
|
+
DF_N01 = ErrorCode(
|
|
143
|
+
code="DF-N01",
|
|
144
|
+
short_message="Network error",
|
|
145
|
+
description="Connection failed. Check your internet connection.",
|
|
146
|
+
category=ErrorCategory.NETWORK,
|
|
147
|
+
severity=ErrorSeverity.SAMPLE,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
DF_N02 = ErrorCode(
|
|
151
|
+
code="DF-N02",
|
|
152
|
+
short_message="Timeout",
|
|
153
|
+
description="Request timed out waiting for provider response.",
|
|
154
|
+
category=ErrorCategory.NETWORK,
|
|
155
|
+
severity=ErrorSeverity.SAMPLE,
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
DF_N03 = ErrorCode(
|
|
159
|
+
code="DF-N03",
|
|
160
|
+
short_message="Service unavailable",
|
|
161
|
+
description="Provider service temporarily unavailable (503/502).",
|
|
162
|
+
category=ErrorCategory.NETWORK,
|
|
163
|
+
severity=ErrorSeverity.SAMPLE,
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
# Parse Errors (DF-P0x)
|
|
167
|
+
DF_P01 = ErrorCode(
|
|
168
|
+
code="DF-P01",
|
|
169
|
+
short_message="JSON parse error",
|
|
170
|
+
description="Failed to parse JSON from LLM response.",
|
|
171
|
+
category=ErrorCategory.PARSE,
|
|
172
|
+
severity=ErrorSeverity.SAMPLE,
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
DF_P02 = ErrorCode(
|
|
176
|
+
code="DF-P02",
|
|
177
|
+
short_message="Schema validation",
|
|
178
|
+
description="Response does not match expected schema structure.",
|
|
179
|
+
category=ErrorCategory.PARSE,
|
|
180
|
+
severity=ErrorSeverity.SAMPLE,
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
DF_P03 = ErrorCode(
|
|
184
|
+
code="DF-P03",
|
|
185
|
+
short_message="Empty response",
|
|
186
|
+
description="LLM returned an empty or whitespace-only response.",
|
|
187
|
+
category=ErrorCategory.PARSE,
|
|
188
|
+
severity=ErrorSeverity.SAMPLE,
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
DF_P04 = ErrorCode(
|
|
192
|
+
code="DF-P04",
|
|
193
|
+
short_message="Malformed response",
|
|
194
|
+
description="Response structure is malformed or incomplete.",
|
|
195
|
+
category=ErrorCategory.PARSE,
|
|
196
|
+
severity=ErrorSeverity.SAMPLE,
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
# Tool Errors (DF-T0x)
|
|
200
|
+
DF_T01 = ErrorCode(
|
|
201
|
+
code="DF-T01",
|
|
202
|
+
short_message="Tool validation",
|
|
203
|
+
description="Tool call format is invalid or missing required fields.",
|
|
204
|
+
category=ErrorCategory.TOOL,
|
|
205
|
+
severity=ErrorSeverity.SAMPLE,
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
DF_T02 = ErrorCode(
|
|
209
|
+
code="DF-T02",
|
|
210
|
+
short_message="Tool limit exceeded",
|
|
211
|
+
description="Sample exceeded maximum tool calls per query.",
|
|
212
|
+
category=ErrorCategory.TOOL,
|
|
213
|
+
severity=ErrorSeverity.SAMPLE,
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
DF_T03 = ErrorCode(
|
|
217
|
+
code="DF-T03",
|
|
218
|
+
short_message="No tool execution",
|
|
219
|
+
description="Agent mode requires at least one tool execution.",
|
|
220
|
+
category=ErrorCategory.TOOL,
|
|
221
|
+
severity=ErrorSeverity.SAMPLE,
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
# Unknown Errors (DF-X0x)
|
|
225
|
+
DF_X01 = ErrorCode(
|
|
226
|
+
code="DF-X01",
|
|
227
|
+
short_message="Unknown error",
|
|
228
|
+
description="An unexpected error occurred.",
|
|
229
|
+
category=ErrorCategory.UNKNOWN,
|
|
230
|
+
severity=ErrorSeverity.SAMPLE,
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
# =============================================================================
|
|
235
|
+
# Error Code Registry
|
|
236
|
+
# =============================================================================
|
|
237
|
+
|
|
238
|
+
ALL_ERROR_CODES: dict[str, ErrorCode] = {
|
|
239
|
+
"DF-R01": DF_R01,
|
|
240
|
+
"DF-R02": DF_R02,
|
|
241
|
+
"DF-R03": DF_R03,
|
|
242
|
+
"DF-R04": DF_R04,
|
|
243
|
+
"DF-A01": DF_A01,
|
|
244
|
+
"DF-A02": DF_A02,
|
|
245
|
+
"DF-A03": DF_A03,
|
|
246
|
+
"DF-N01": DF_N01,
|
|
247
|
+
"DF-N02": DF_N02,
|
|
248
|
+
"DF-N03": DF_N03,
|
|
249
|
+
"DF-P01": DF_P01,
|
|
250
|
+
"DF-P02": DF_P02,
|
|
251
|
+
"DF-P03": DF_P03,
|
|
252
|
+
"DF-P04": DF_P04,
|
|
253
|
+
"DF-T01": DF_T01,
|
|
254
|
+
"DF-T02": DF_T02,
|
|
255
|
+
"DF-T03": DF_T03,
|
|
256
|
+
"DF-X01": DF_X01,
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
@dataclass
|
|
261
|
+
class ClassifiedError:
|
|
262
|
+
"""Result of classifying an error."""
|
|
263
|
+
|
|
264
|
+
error_code: ErrorCode
|
|
265
|
+
detail: str | None = None
|
|
266
|
+
original_error: str | None = None
|
|
267
|
+
retry_after: float | None = None
|
|
268
|
+
|
|
269
|
+
def to_event(self) -> str:
|
|
270
|
+
"""Format for TUI Events panel."""
|
|
271
|
+
if self.retry_after:
|
|
272
|
+
return self.error_code.format_event(f"retry {self.retry_after:.0f}s")
|
|
273
|
+
return self.error_code.format_event(self.detail)
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
class ErrorClassifier:
|
|
277
|
+
"""Classifies exceptions and error strings into DeepFabric error codes."""
|
|
278
|
+
|
|
279
|
+
def __init__(self, provider: str | None = None):
|
|
280
|
+
"""Initialize classifier.
|
|
281
|
+
|
|
282
|
+
Args:
|
|
283
|
+
provider: LLM provider name for provider-specific classification
|
|
284
|
+
"""
|
|
285
|
+
self.provider = provider
|
|
286
|
+
|
|
287
|
+
def classify( # noqa: PLR0911
|
|
288
|
+
self,
|
|
289
|
+
error: Exception | str,
|
|
290
|
+
context: dict[str, Any] | None = None,
|
|
291
|
+
) -> ClassifiedError:
|
|
292
|
+
"""Classify an error into a DeepFabric error code.
|
|
293
|
+
|
|
294
|
+
Args:
|
|
295
|
+
error: The exception or error string to classify
|
|
296
|
+
context: Optional context (e.g., quota_info from rate limit detector)
|
|
297
|
+
|
|
298
|
+
Returns:
|
|
299
|
+
ClassifiedError with appropriate error code and details
|
|
300
|
+
"""
|
|
301
|
+
error_str = str(error).lower()
|
|
302
|
+
context = context or {}
|
|
303
|
+
|
|
304
|
+
# Check for rate limit errors first (most common during generation)
|
|
305
|
+
if self._is_rate_limit(error_str, context):
|
|
306
|
+
return self._classify_rate_limit(error_str, context, error)
|
|
307
|
+
|
|
308
|
+
# Check for authentication errors
|
|
309
|
+
if self._is_auth_error(error_str):
|
|
310
|
+
return ClassifiedError(
|
|
311
|
+
error_code=DF_A01,
|
|
312
|
+
original_error=str(error),
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
# Check for model not found
|
|
316
|
+
if self._is_model_not_found(error_str):
|
|
317
|
+
return ClassifiedError(
|
|
318
|
+
error_code=DF_A02,
|
|
319
|
+
original_error=str(error),
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
# Check for network/connection errors
|
|
323
|
+
if self._is_network_error(error_str):
|
|
324
|
+
return self._classify_network_error(error_str, error)
|
|
325
|
+
|
|
326
|
+
# Check for parse/schema errors
|
|
327
|
+
if self._is_parse_error(error_str, context):
|
|
328
|
+
return self._classify_parse_error(error_str, context, error)
|
|
329
|
+
|
|
330
|
+
# Check for tool errors
|
|
331
|
+
if self._is_tool_error(error_str, context):
|
|
332
|
+
return self._classify_tool_error(error_str, context, error)
|
|
333
|
+
|
|
334
|
+
# Check for generic API errors
|
|
335
|
+
if self._is_api_error(error_str):
|
|
336
|
+
return ClassifiedError(
|
|
337
|
+
error_code=DF_A03,
|
|
338
|
+
original_error=str(error),
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
# Unknown error
|
|
342
|
+
error_detail = str(error)
|
|
343
|
+
if len(error_detail) > ERROR_DETAIL_MAX_LENGTH:
|
|
344
|
+
error_detail = error_detail[:ERROR_DETAIL_MAX_LENGTH]
|
|
345
|
+
return ClassifiedError(
|
|
346
|
+
error_code=DF_X01,
|
|
347
|
+
detail=error_detail,
|
|
348
|
+
original_error=str(error),
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
def _is_rate_limit(self, error_str: str, context: dict[str, Any]) -> bool:
|
|
352
|
+
"""Check if error is a rate limit error."""
|
|
353
|
+
rate_limit_indicators = [
|
|
354
|
+
"rate limit",
|
|
355
|
+
"rate_limit",
|
|
356
|
+
"ratelimit",
|
|
357
|
+
"429",
|
|
358
|
+
"resource_exhausted",
|
|
359
|
+
"quota",
|
|
360
|
+
"too many requests",
|
|
361
|
+
]
|
|
362
|
+
if any(ind in error_str for ind in rate_limit_indicators):
|
|
363
|
+
return True
|
|
364
|
+
return context.get("is_rate_limit", False)
|
|
365
|
+
|
|
366
|
+
def _classify_rate_limit(
|
|
367
|
+
self, error_str: str, context: dict[str, Any], error: Exception | str
|
|
368
|
+
) -> ClassifiedError:
|
|
369
|
+
"""Classify rate limit error into specific type."""
|
|
370
|
+
retry_after = context.get("retry_after")
|
|
371
|
+
original = str(error)
|
|
372
|
+
|
|
373
|
+
# Daily quota exhausted
|
|
374
|
+
if context.get("daily_quota_exhausted") or "per_day" in error_str:
|
|
375
|
+
return ClassifiedError(
|
|
376
|
+
error_code=DF_R02,
|
|
377
|
+
detail="daily quota",
|
|
378
|
+
retry_after=retry_after,
|
|
379
|
+
original_error=original,
|
|
380
|
+
)
|
|
381
|
+
|
|
382
|
+
# Token limit
|
|
383
|
+
quota_type = context.get("quota_type", "")
|
|
384
|
+
if "token" in error_str or "token" in quota_type:
|
|
385
|
+
return ClassifiedError(
|
|
386
|
+
error_code=DF_R03,
|
|
387
|
+
retry_after=retry_after,
|
|
388
|
+
original_error=original,
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
# RPM (requests per minute) - most common
|
|
392
|
+
if "per_minute" in error_str or "rpm" in error_str:
|
|
393
|
+
return ClassifiedError(
|
|
394
|
+
error_code=DF_R01,
|
|
395
|
+
retry_after=retry_after,
|
|
396
|
+
original_error=original,
|
|
397
|
+
)
|
|
398
|
+
|
|
399
|
+
# Generic rate limit
|
|
400
|
+
return ClassifiedError(
|
|
401
|
+
error_code=DF_R04,
|
|
402
|
+
retry_after=retry_after,
|
|
403
|
+
original_error=original,
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
def _is_auth_error(self, error_str: str) -> bool:
|
|
407
|
+
"""Check if error is authentication-related."""
|
|
408
|
+
auth_indicators = [
|
|
409
|
+
"authentication",
|
|
410
|
+
"unauthorized",
|
|
411
|
+
"api_key",
|
|
412
|
+
"api key",
|
|
413
|
+
"invalid key",
|
|
414
|
+
"permission denied",
|
|
415
|
+
"403",
|
|
416
|
+
"401",
|
|
417
|
+
]
|
|
418
|
+
return any(ind in error_str for ind in auth_indicators)
|
|
419
|
+
|
|
420
|
+
def _is_model_not_found(self, error_str: str) -> bool:
|
|
421
|
+
"""Check if error is model not found."""
|
|
422
|
+
return ("not found" in error_str or "404" in error_str) and "model" in error_str
|
|
423
|
+
|
|
424
|
+
def _is_network_error(self, error_str: str) -> bool:
|
|
425
|
+
"""Check if error is network-related."""
|
|
426
|
+
network_indicators = [
|
|
427
|
+
"connection",
|
|
428
|
+
"network",
|
|
429
|
+
"timeout",
|
|
430
|
+
"timed out",
|
|
431
|
+
"503",
|
|
432
|
+
"502",
|
|
433
|
+
"504",
|
|
434
|
+
"service unavailable",
|
|
435
|
+
"bad gateway",
|
|
436
|
+
"gateway timeout",
|
|
437
|
+
]
|
|
438
|
+
return any(ind in error_str for ind in network_indicators)
|
|
439
|
+
|
|
440
|
+
def _classify_network_error(self, error_str: str, error: Exception | str) -> ClassifiedError:
|
|
441
|
+
"""Classify network error into specific type."""
|
|
442
|
+
if "timeout" in error_str or "timed out" in error_str:
|
|
443
|
+
return ClassifiedError(
|
|
444
|
+
error_code=DF_N02,
|
|
445
|
+
original_error=str(error),
|
|
446
|
+
)
|
|
447
|
+
|
|
448
|
+
if any(code in error_str for code in ["503", "502", "504", "service unavailable"]):
|
|
449
|
+
return ClassifiedError(
|
|
450
|
+
error_code=DF_N03,
|
|
451
|
+
original_error=str(error),
|
|
452
|
+
)
|
|
453
|
+
|
|
454
|
+
return ClassifiedError(
|
|
455
|
+
error_code=DF_N01,
|
|
456
|
+
original_error=str(error),
|
|
457
|
+
)
|
|
458
|
+
|
|
459
|
+
def _is_parse_error(self, error_str: str, context: dict[str, Any]) -> bool:
|
|
460
|
+
"""Check if error is a parsing/schema error."""
|
|
461
|
+
parse_indicators = [
|
|
462
|
+
"json",
|
|
463
|
+
"parse",
|
|
464
|
+
"schema",
|
|
465
|
+
"validation",
|
|
466
|
+
"empty",
|
|
467
|
+
"malformed",
|
|
468
|
+
"invalid format",
|
|
469
|
+
]
|
|
470
|
+
if any(ind in error_str for ind in parse_indicators):
|
|
471
|
+
return True
|
|
472
|
+
return context.get("error_type") in [
|
|
473
|
+
"json_parsing_errors",
|
|
474
|
+
"invalid_schema",
|
|
475
|
+
"empty_responses",
|
|
476
|
+
]
|
|
477
|
+
|
|
478
|
+
def _classify_parse_error(
|
|
479
|
+
self, error_str: str, context: dict[str, Any], error: Exception | str
|
|
480
|
+
) -> ClassifiedError:
|
|
481
|
+
"""Classify parse error into specific type."""
|
|
482
|
+
error_type = context.get("error_type", "")
|
|
483
|
+
|
|
484
|
+
if "empty" in error_str or error_type == "empty_responses":
|
|
485
|
+
return ClassifiedError(
|
|
486
|
+
error_code=DF_P03,
|
|
487
|
+
original_error=str(error),
|
|
488
|
+
)
|
|
489
|
+
|
|
490
|
+
if "schema" in error_str or error_type == "invalid_schema":
|
|
491
|
+
return ClassifiedError(
|
|
492
|
+
error_code=DF_P02,
|
|
493
|
+
original_error=str(error),
|
|
494
|
+
)
|
|
495
|
+
|
|
496
|
+
if "json" in error_str or "parse" in error_str or error_type == "json_parsing_errors":
|
|
497
|
+
return ClassifiedError(
|
|
498
|
+
error_code=DF_P01,
|
|
499
|
+
original_error=str(error),
|
|
500
|
+
)
|
|
501
|
+
|
|
502
|
+
return ClassifiedError(
|
|
503
|
+
error_code=DF_P04,
|
|
504
|
+
original_error=str(error),
|
|
505
|
+
)
|
|
506
|
+
|
|
507
|
+
def _is_tool_error(self, error_str: str, context: dict[str, Any]) -> bool:
|
|
508
|
+
"""Check if error is tool-related."""
|
|
509
|
+
tool_indicators = ["tool", "execution", "agent mode"]
|
|
510
|
+
if any(ind in error_str for ind in tool_indicators):
|
|
511
|
+
return True
|
|
512
|
+
return context.get("error_type") == "tool_error"
|
|
513
|
+
|
|
514
|
+
def _classify_tool_error(
|
|
515
|
+
self,
|
|
516
|
+
error_str: str,
|
|
517
|
+
context: dict[str, Any], # noqa: ARG002
|
|
518
|
+
error: Exception | str,
|
|
519
|
+
) -> ClassifiedError:
|
|
520
|
+
"""Classify tool error into specific type."""
|
|
521
|
+
if "exceeds limit" in error_str or "max_tools" in error_str:
|
|
522
|
+
return ClassifiedError(
|
|
523
|
+
error_code=DF_T02,
|
|
524
|
+
original_error=str(error),
|
|
525
|
+
)
|
|
526
|
+
|
|
527
|
+
if "requires at least one" in error_str or "no tool" in error_str:
|
|
528
|
+
return ClassifiedError(
|
|
529
|
+
error_code=DF_T03,
|
|
530
|
+
original_error=str(error),
|
|
531
|
+
)
|
|
532
|
+
|
|
533
|
+
return ClassifiedError(
|
|
534
|
+
error_code=DF_T01,
|
|
535
|
+
original_error=str(error),
|
|
536
|
+
)
|
|
537
|
+
|
|
538
|
+
def _is_api_error(self, error_str: str) -> bool:
|
|
539
|
+
"""Check if error is a generic API error."""
|
|
540
|
+
api_indicators = ["api error", "api_error", "500", "internal server error"]
|
|
541
|
+
return any(ind in error_str for ind in api_indicators)
|
|
542
|
+
|
|
543
|
+
|
|
544
|
+
# Module-level classifier instance for convenience
|
|
545
|
+
_default_classifier: ErrorClassifier | None = None
|
|
546
|
+
|
|
547
|
+
|
|
548
|
+
def get_classifier(provider: str | None = None) -> ErrorClassifier:
|
|
549
|
+
"""Get an error classifier instance.
|
|
550
|
+
|
|
551
|
+
Args:
|
|
552
|
+
provider: Optional provider name for provider-specific classification
|
|
553
|
+
|
|
554
|
+
Returns:
|
|
555
|
+
ErrorClassifier instance
|
|
556
|
+
"""
|
|
557
|
+
global _default_classifier # noqa: PLW0603
|
|
558
|
+
if provider:
|
|
559
|
+
return ErrorClassifier(provider)
|
|
560
|
+
if _default_classifier is None:
|
|
561
|
+
_default_classifier = ErrorClassifier()
|
|
562
|
+
return _default_classifier
|
|
563
|
+
|
|
564
|
+
|
|
565
|
+
def classify_error(
|
|
566
|
+
error: Exception | str,
|
|
567
|
+
provider: str | None = None,
|
|
568
|
+
context: dict[str, Any] | None = None,
|
|
569
|
+
) -> ClassifiedError:
|
|
570
|
+
"""Convenience function to classify an error.
|
|
571
|
+
|
|
572
|
+
Args:
|
|
573
|
+
error: The exception or error string to classify
|
|
574
|
+
provider: Optional provider name
|
|
575
|
+
context: Optional context dictionary
|
|
576
|
+
|
|
577
|
+
Returns:
|
|
578
|
+
ClassifiedError with appropriate error code
|
|
579
|
+
"""
|
|
580
|
+
classifier = get_classifier(provider)
|
|
581
|
+
return classifier.classify(error, context)
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""Evaluation module for DeepFabric.
|
|
2
|
+
|
|
3
|
+
This module provides functionality to evaluate fine-tuned models on tool-calling tasks.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from .evaluator import EvaluationResult, Evaluator, EvaluatorConfig
|
|
7
|
+
from .evaluators import (
|
|
8
|
+
BaseEvaluator,
|
|
9
|
+
EvaluationContext,
|
|
10
|
+
EvaluatorRegistry,
|
|
11
|
+
EvaluatorResult,
|
|
12
|
+
ToolCallingEvaluator,
|
|
13
|
+
)
|
|
14
|
+
from .inference import InferenceConfig, ModelResponse, create_inference_backend
|
|
15
|
+
from .metrics import EvaluationMetrics, SampleEvaluation, compute_metrics
|
|
16
|
+
from .parser import GroundTruth, GroundTruthParser, parse_batch
|
|
17
|
+
from .reporters import BaseReporter, CloudReporter, FileReporter, MultiReporter
|
|
18
|
+
|
|
19
|
+
__all__ = [
|
|
20
|
+
# Parsing
|
|
21
|
+
"GroundTruth",
|
|
22
|
+
"GroundTruthParser",
|
|
23
|
+
"parse_batch",
|
|
24
|
+
# Inference
|
|
25
|
+
"InferenceConfig",
|
|
26
|
+
"ModelResponse",
|
|
27
|
+
"create_inference_backend",
|
|
28
|
+
# Metrics
|
|
29
|
+
"EvaluationMetrics",
|
|
30
|
+
"SampleEvaluation",
|
|
31
|
+
"compute_metrics",
|
|
32
|
+
# Evaluator
|
|
33
|
+
"Evaluator",
|
|
34
|
+
"EvaluatorConfig",
|
|
35
|
+
"EvaluationResult",
|
|
36
|
+
# Evaluators
|
|
37
|
+
"BaseEvaluator",
|
|
38
|
+
"EvaluationContext",
|
|
39
|
+
"EvaluatorRegistry",
|
|
40
|
+
"EvaluatorResult",
|
|
41
|
+
"ToolCallingEvaluator",
|
|
42
|
+
# Reporters
|
|
43
|
+
"BaseReporter",
|
|
44
|
+
"FileReporter",
|
|
45
|
+
"CloudReporter",
|
|
46
|
+
"MultiReporter",
|
|
47
|
+
]
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""Inference backend implementations."""
|
|
2
|
+
|
|
3
|
+
from .ollama_backend import OllamaBackend
|
|
4
|
+
from .tool_call_parsers import (
|
|
5
|
+
GenericToolCallParser,
|
|
6
|
+
HermesToolCallParser,
|
|
7
|
+
LlamaToolCallParser,
|
|
8
|
+
MistralToolCallParser,
|
|
9
|
+
QwenToolCallParser,
|
|
10
|
+
ToolCallParser,
|
|
11
|
+
ToolCallParserRegistry,
|
|
12
|
+
get_parser,
|
|
13
|
+
get_parser_for_model,
|
|
14
|
+
register_parser,
|
|
15
|
+
)
|
|
16
|
+
from .transformers_backend import TransformersBackend
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
"TransformersBackend",
|
|
20
|
+
"OllamaBackend",
|
|
21
|
+
# Tool call parsers
|
|
22
|
+
"ToolCallParser",
|
|
23
|
+
"ToolCallParserRegistry",
|
|
24
|
+
"QwenToolCallParser",
|
|
25
|
+
"LlamaToolCallParser",
|
|
26
|
+
"MistralToolCallParser",
|
|
27
|
+
"HermesToolCallParser",
|
|
28
|
+
"GenericToolCallParser",
|
|
29
|
+
"get_parser",
|
|
30
|
+
"get_parser_for_model",
|
|
31
|
+
"register_parser",
|
|
32
|
+
]
|