DeepFabric 4.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. deepfabric/__init__.py +70 -0
  2. deepfabric/__main__.py +6 -0
  3. deepfabric/auth.py +382 -0
  4. deepfabric/builders.py +303 -0
  5. deepfabric/builders_agent.py +1304 -0
  6. deepfabric/cli.py +1288 -0
  7. deepfabric/config.py +899 -0
  8. deepfabric/config_manager.py +251 -0
  9. deepfabric/constants.py +94 -0
  10. deepfabric/dataset_manager.py +534 -0
  11. deepfabric/error_codes.py +581 -0
  12. deepfabric/evaluation/__init__.py +47 -0
  13. deepfabric/evaluation/backends/__init__.py +32 -0
  14. deepfabric/evaluation/backends/ollama_backend.py +137 -0
  15. deepfabric/evaluation/backends/tool_call_parsers.py +409 -0
  16. deepfabric/evaluation/backends/transformers_backend.py +326 -0
  17. deepfabric/evaluation/evaluator.py +845 -0
  18. deepfabric/evaluation/evaluators/__init__.py +13 -0
  19. deepfabric/evaluation/evaluators/base.py +104 -0
  20. deepfabric/evaluation/evaluators/builtin/__init__.py +5 -0
  21. deepfabric/evaluation/evaluators/builtin/tool_calling.py +93 -0
  22. deepfabric/evaluation/evaluators/registry.py +66 -0
  23. deepfabric/evaluation/inference.py +155 -0
  24. deepfabric/evaluation/metrics.py +397 -0
  25. deepfabric/evaluation/parser.py +304 -0
  26. deepfabric/evaluation/reporters/__init__.py +13 -0
  27. deepfabric/evaluation/reporters/base.py +56 -0
  28. deepfabric/evaluation/reporters/cloud_reporter.py +195 -0
  29. deepfabric/evaluation/reporters/file_reporter.py +61 -0
  30. deepfabric/evaluation/reporters/multi_reporter.py +56 -0
  31. deepfabric/exceptions.py +67 -0
  32. deepfabric/factory.py +26 -0
  33. deepfabric/generator.py +1084 -0
  34. deepfabric/graph.py +545 -0
  35. deepfabric/hf_hub.py +214 -0
  36. deepfabric/kaggle_hub.py +219 -0
  37. deepfabric/llm/__init__.py +41 -0
  38. deepfabric/llm/api_key_verifier.py +534 -0
  39. deepfabric/llm/client.py +1206 -0
  40. deepfabric/llm/errors.py +105 -0
  41. deepfabric/llm/rate_limit_config.py +262 -0
  42. deepfabric/llm/rate_limit_detector.py +278 -0
  43. deepfabric/llm/retry_handler.py +270 -0
  44. deepfabric/metrics.py +212 -0
  45. deepfabric/progress.py +262 -0
  46. deepfabric/prompts.py +290 -0
  47. deepfabric/schemas.py +1000 -0
  48. deepfabric/spin/__init__.py +6 -0
  49. deepfabric/spin/client.py +263 -0
  50. deepfabric/spin/models.py +26 -0
  51. deepfabric/stream_simulator.py +90 -0
  52. deepfabric/tools/__init__.py +5 -0
  53. deepfabric/tools/defaults.py +85 -0
  54. deepfabric/tools/loader.py +87 -0
  55. deepfabric/tools/mcp_client.py +677 -0
  56. deepfabric/topic_manager.py +303 -0
  57. deepfabric/topic_model.py +20 -0
  58. deepfabric/training/__init__.py +35 -0
  59. deepfabric/training/api_key_prompt.py +302 -0
  60. deepfabric/training/callback.py +363 -0
  61. deepfabric/training/metrics_sender.py +301 -0
  62. deepfabric/tree.py +438 -0
  63. deepfabric/tui.py +1267 -0
  64. deepfabric/update_checker.py +166 -0
  65. deepfabric/utils.py +150 -0
  66. deepfabric/validation.py +143 -0
  67. deepfabric-4.4.0.dist-info/METADATA +702 -0
  68. deepfabric-4.4.0.dist-info/RECORD +71 -0
  69. deepfabric-4.4.0.dist-info/WHEEL +4 -0
  70. deepfabric-4.4.0.dist-info/entry_points.txt +2 -0
  71. deepfabric-4.4.0.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,581 @@
1
+ """DeepFabric error codes for standardized error reporting.
2
+
3
+ Error codes provide consistent, documentable error identification across the CLI and TUI.
4
+ Each code maps to a short message suitable for display and a longer description for docs.
5
+
6
+ Error Code Format: DF-XNN
7
+ - DF: DeepFabric prefix
8
+ - X: Category letter (R=Rate limit, A=Auth/API, N=Network, P=Parse, T=Tool, X=Unknown)
9
+ - NN: Number within category
10
+
11
+ Sample-level errors occur during generation and allow processing to continue.
12
+ Fatal errors cause the CLI to exit immediately.
13
+ """
14
+
15
+ from dataclasses import dataclass
16
+ from enum import Enum
17
+ from typing import Any
18
+
19
+ # Constants for error classification
20
+ ERROR_DETAIL_MAX_LENGTH = 50
21
+
22
+
23
+ class ErrorCategory(str, Enum):
24
+ """Error category for grouping related errors."""
25
+
26
+ RATE_LIMIT = "rate_limit"
27
+ AUTH_API = "auth_api"
28
+ NETWORK = "network"
29
+ PARSE = "parse"
30
+ TOOL = "tool"
31
+ UNKNOWN = "unknown"
32
+
33
+
34
+ class ErrorSeverity(str, Enum):
35
+ """Whether an error is recoverable or fatal."""
36
+
37
+ SAMPLE = "sample" # Per-sample error, generation continues
38
+ FATAL = "fatal" # CLI exits
39
+
40
+
41
+ @dataclass(frozen=True)
42
+ class ErrorCode:
43
+ """Definition of a DeepFabric error code."""
44
+
45
+ code: str
46
+ short_message: str
47
+ description: str
48
+ category: ErrorCategory
49
+ severity: ErrorSeverity
50
+
51
+ def format_event(self, detail: str | None = None) -> str:
52
+ """Format error for TUI Events panel display.
53
+
54
+ Args:
55
+ detail: Optional short detail (e.g., retry time, quota type)
56
+
57
+ Returns:
58
+ Formatted string like "DF-R01 Rate limit (RPM)" or "DF-R01 Rate limit - retry 3s"
59
+ """
60
+ if detail:
61
+ return f"{self.code} {self.short_message} - {detail}"
62
+ return f"{self.code} {self.short_message}"
63
+
64
+ def format_full(self, detail: str | None = None) -> str:
65
+ """Format error for detailed output (debug mode, logs).
66
+
67
+ Args:
68
+ detail: Optional detail about the specific error
69
+
70
+ Returns:
71
+ Formatted string with code, message, and description
72
+ """
73
+ base = f"[{self.code}] {self.short_message}"
74
+ if detail:
75
+ base += f": {detail}"
76
+ return base
77
+
78
+
79
+ # =============================================================================
80
+ # Error Code Definitions
81
+ # =============================================================================
82
+
83
+ # Rate Limit Errors (DF-R0x)
84
+ DF_R01 = ErrorCode(
85
+ code="DF-R01",
86
+ short_message="Rate limit (RPM)",
87
+ description="Requests per minute limit exceeded. The provider is throttling requests.",
88
+ category=ErrorCategory.RATE_LIMIT,
89
+ severity=ErrorSeverity.SAMPLE,
90
+ )
91
+
92
+ DF_R02 = ErrorCode(
93
+ code="DF-R02",
94
+ short_message="Rate limit (daily)",
95
+ description="Daily quota exhausted. Resets at midnight (provider timezone).",
96
+ category=ErrorCategory.RATE_LIMIT,
97
+ severity=ErrorSeverity.SAMPLE,
98
+ )
99
+
100
+ DF_R03 = ErrorCode(
101
+ code="DF-R03",
102
+ short_message="Rate limit (tokens)",
103
+ description="Token per minute limit exceeded.",
104
+ category=ErrorCategory.RATE_LIMIT,
105
+ severity=ErrorSeverity.SAMPLE,
106
+ )
107
+
108
+ DF_R04 = ErrorCode(
109
+ code="DF-R04",
110
+ short_message="Rate limit",
111
+ description="Generic rate limit error from provider.",
112
+ category=ErrorCategory.RATE_LIMIT,
113
+ severity=ErrorSeverity.SAMPLE,
114
+ )
115
+
116
+ # Auth/API Errors (DF-A0x)
117
+ DF_A01 = ErrorCode(
118
+ code="DF-A01",
119
+ short_message="Auth failed",
120
+ description="Authentication failed. Check your API key environment variable.",
121
+ category=ErrorCategory.AUTH_API,
122
+ severity=ErrorSeverity.FATAL,
123
+ )
124
+
125
+ DF_A02 = ErrorCode(
126
+ code="DF-A02",
127
+ short_message="Model not found",
128
+ description="The specified model does not exist or is not accessible.",
129
+ category=ErrorCategory.AUTH_API,
130
+ severity=ErrorSeverity.FATAL,
131
+ )
132
+
133
+ DF_A03 = ErrorCode(
134
+ code="DF-A03",
135
+ short_message="API error",
136
+ description="Generic API error from the provider.",
137
+ category=ErrorCategory.AUTH_API,
138
+ severity=ErrorSeverity.SAMPLE,
139
+ )
140
+
141
+ # Network Errors (DF-N0x)
142
+ DF_N01 = ErrorCode(
143
+ code="DF-N01",
144
+ short_message="Network error",
145
+ description="Connection failed. Check your internet connection.",
146
+ category=ErrorCategory.NETWORK,
147
+ severity=ErrorSeverity.SAMPLE,
148
+ )
149
+
150
+ DF_N02 = ErrorCode(
151
+ code="DF-N02",
152
+ short_message="Timeout",
153
+ description="Request timed out waiting for provider response.",
154
+ category=ErrorCategory.NETWORK,
155
+ severity=ErrorSeverity.SAMPLE,
156
+ )
157
+
158
+ DF_N03 = ErrorCode(
159
+ code="DF-N03",
160
+ short_message="Service unavailable",
161
+ description="Provider service temporarily unavailable (503/502).",
162
+ category=ErrorCategory.NETWORK,
163
+ severity=ErrorSeverity.SAMPLE,
164
+ )
165
+
166
+ # Parse Errors (DF-P0x)
167
+ DF_P01 = ErrorCode(
168
+ code="DF-P01",
169
+ short_message="JSON parse error",
170
+ description="Failed to parse JSON from LLM response.",
171
+ category=ErrorCategory.PARSE,
172
+ severity=ErrorSeverity.SAMPLE,
173
+ )
174
+
175
+ DF_P02 = ErrorCode(
176
+ code="DF-P02",
177
+ short_message="Schema validation",
178
+ description="Response does not match expected schema structure.",
179
+ category=ErrorCategory.PARSE,
180
+ severity=ErrorSeverity.SAMPLE,
181
+ )
182
+
183
+ DF_P03 = ErrorCode(
184
+ code="DF-P03",
185
+ short_message="Empty response",
186
+ description="LLM returned an empty or whitespace-only response.",
187
+ category=ErrorCategory.PARSE,
188
+ severity=ErrorSeverity.SAMPLE,
189
+ )
190
+
191
+ DF_P04 = ErrorCode(
192
+ code="DF-P04",
193
+ short_message="Malformed response",
194
+ description="Response structure is malformed or incomplete.",
195
+ category=ErrorCategory.PARSE,
196
+ severity=ErrorSeverity.SAMPLE,
197
+ )
198
+
199
+ # Tool Errors (DF-T0x)
200
+ DF_T01 = ErrorCode(
201
+ code="DF-T01",
202
+ short_message="Tool validation",
203
+ description="Tool call format is invalid or missing required fields.",
204
+ category=ErrorCategory.TOOL,
205
+ severity=ErrorSeverity.SAMPLE,
206
+ )
207
+
208
+ DF_T02 = ErrorCode(
209
+ code="DF-T02",
210
+ short_message="Tool limit exceeded",
211
+ description="Sample exceeded maximum tool calls per query.",
212
+ category=ErrorCategory.TOOL,
213
+ severity=ErrorSeverity.SAMPLE,
214
+ )
215
+
216
+ DF_T03 = ErrorCode(
217
+ code="DF-T03",
218
+ short_message="No tool execution",
219
+ description="Agent mode requires at least one tool execution.",
220
+ category=ErrorCategory.TOOL,
221
+ severity=ErrorSeverity.SAMPLE,
222
+ )
223
+
224
+ # Unknown Errors (DF-X0x)
225
+ DF_X01 = ErrorCode(
226
+ code="DF-X01",
227
+ short_message="Unknown error",
228
+ description="An unexpected error occurred.",
229
+ category=ErrorCategory.UNKNOWN,
230
+ severity=ErrorSeverity.SAMPLE,
231
+ )
232
+
233
+
234
+ # =============================================================================
235
+ # Error Code Registry
236
+ # =============================================================================
237
+
238
+ ALL_ERROR_CODES: dict[str, ErrorCode] = {
239
+ "DF-R01": DF_R01,
240
+ "DF-R02": DF_R02,
241
+ "DF-R03": DF_R03,
242
+ "DF-R04": DF_R04,
243
+ "DF-A01": DF_A01,
244
+ "DF-A02": DF_A02,
245
+ "DF-A03": DF_A03,
246
+ "DF-N01": DF_N01,
247
+ "DF-N02": DF_N02,
248
+ "DF-N03": DF_N03,
249
+ "DF-P01": DF_P01,
250
+ "DF-P02": DF_P02,
251
+ "DF-P03": DF_P03,
252
+ "DF-P04": DF_P04,
253
+ "DF-T01": DF_T01,
254
+ "DF-T02": DF_T02,
255
+ "DF-T03": DF_T03,
256
+ "DF-X01": DF_X01,
257
+ }
258
+
259
+
260
+ @dataclass
261
+ class ClassifiedError:
262
+ """Result of classifying an error."""
263
+
264
+ error_code: ErrorCode
265
+ detail: str | None = None
266
+ original_error: str | None = None
267
+ retry_after: float | None = None
268
+
269
+ def to_event(self) -> str:
270
+ """Format for TUI Events panel."""
271
+ if self.retry_after:
272
+ return self.error_code.format_event(f"retry {self.retry_after:.0f}s")
273
+ return self.error_code.format_event(self.detail)
274
+
275
+
276
+ class ErrorClassifier:
277
+ """Classifies exceptions and error strings into DeepFabric error codes."""
278
+
279
+ def __init__(self, provider: str | None = None):
280
+ """Initialize classifier.
281
+
282
+ Args:
283
+ provider: LLM provider name for provider-specific classification
284
+ """
285
+ self.provider = provider
286
+
287
+ def classify( # noqa: PLR0911
288
+ self,
289
+ error: Exception | str,
290
+ context: dict[str, Any] | None = None,
291
+ ) -> ClassifiedError:
292
+ """Classify an error into a DeepFabric error code.
293
+
294
+ Args:
295
+ error: The exception or error string to classify
296
+ context: Optional context (e.g., quota_info from rate limit detector)
297
+
298
+ Returns:
299
+ ClassifiedError with appropriate error code and details
300
+ """
301
+ error_str = str(error).lower()
302
+ context = context or {}
303
+
304
+ # Check for rate limit errors first (most common during generation)
305
+ if self._is_rate_limit(error_str, context):
306
+ return self._classify_rate_limit(error_str, context, error)
307
+
308
+ # Check for authentication errors
309
+ if self._is_auth_error(error_str):
310
+ return ClassifiedError(
311
+ error_code=DF_A01,
312
+ original_error=str(error),
313
+ )
314
+
315
+ # Check for model not found
316
+ if self._is_model_not_found(error_str):
317
+ return ClassifiedError(
318
+ error_code=DF_A02,
319
+ original_error=str(error),
320
+ )
321
+
322
+ # Check for network/connection errors
323
+ if self._is_network_error(error_str):
324
+ return self._classify_network_error(error_str, error)
325
+
326
+ # Check for parse/schema errors
327
+ if self._is_parse_error(error_str, context):
328
+ return self._classify_parse_error(error_str, context, error)
329
+
330
+ # Check for tool errors
331
+ if self._is_tool_error(error_str, context):
332
+ return self._classify_tool_error(error_str, context, error)
333
+
334
+ # Check for generic API errors
335
+ if self._is_api_error(error_str):
336
+ return ClassifiedError(
337
+ error_code=DF_A03,
338
+ original_error=str(error),
339
+ )
340
+
341
+ # Unknown error
342
+ error_detail = str(error)
343
+ if len(error_detail) > ERROR_DETAIL_MAX_LENGTH:
344
+ error_detail = error_detail[:ERROR_DETAIL_MAX_LENGTH]
345
+ return ClassifiedError(
346
+ error_code=DF_X01,
347
+ detail=error_detail,
348
+ original_error=str(error),
349
+ )
350
+
351
+ def _is_rate_limit(self, error_str: str, context: dict[str, Any]) -> bool:
352
+ """Check if error is a rate limit error."""
353
+ rate_limit_indicators = [
354
+ "rate limit",
355
+ "rate_limit",
356
+ "ratelimit",
357
+ "429",
358
+ "resource_exhausted",
359
+ "quota",
360
+ "too many requests",
361
+ ]
362
+ if any(ind in error_str for ind in rate_limit_indicators):
363
+ return True
364
+ return context.get("is_rate_limit", False)
365
+
366
+ def _classify_rate_limit(
367
+ self, error_str: str, context: dict[str, Any], error: Exception | str
368
+ ) -> ClassifiedError:
369
+ """Classify rate limit error into specific type."""
370
+ retry_after = context.get("retry_after")
371
+ original = str(error)
372
+
373
+ # Daily quota exhausted
374
+ if context.get("daily_quota_exhausted") or "per_day" in error_str:
375
+ return ClassifiedError(
376
+ error_code=DF_R02,
377
+ detail="daily quota",
378
+ retry_after=retry_after,
379
+ original_error=original,
380
+ )
381
+
382
+ # Token limit
383
+ quota_type = context.get("quota_type", "")
384
+ if "token" in error_str or "token" in quota_type:
385
+ return ClassifiedError(
386
+ error_code=DF_R03,
387
+ retry_after=retry_after,
388
+ original_error=original,
389
+ )
390
+
391
+ # RPM (requests per minute) - most common
392
+ if "per_minute" in error_str or "rpm" in error_str:
393
+ return ClassifiedError(
394
+ error_code=DF_R01,
395
+ retry_after=retry_after,
396
+ original_error=original,
397
+ )
398
+
399
+ # Generic rate limit
400
+ return ClassifiedError(
401
+ error_code=DF_R04,
402
+ retry_after=retry_after,
403
+ original_error=original,
404
+ )
405
+
406
+ def _is_auth_error(self, error_str: str) -> bool:
407
+ """Check if error is authentication-related."""
408
+ auth_indicators = [
409
+ "authentication",
410
+ "unauthorized",
411
+ "api_key",
412
+ "api key",
413
+ "invalid key",
414
+ "permission denied",
415
+ "403",
416
+ "401",
417
+ ]
418
+ return any(ind in error_str for ind in auth_indicators)
419
+
420
+ def _is_model_not_found(self, error_str: str) -> bool:
421
+ """Check if error is model not found."""
422
+ return ("not found" in error_str or "404" in error_str) and "model" in error_str
423
+
424
+ def _is_network_error(self, error_str: str) -> bool:
425
+ """Check if error is network-related."""
426
+ network_indicators = [
427
+ "connection",
428
+ "network",
429
+ "timeout",
430
+ "timed out",
431
+ "503",
432
+ "502",
433
+ "504",
434
+ "service unavailable",
435
+ "bad gateway",
436
+ "gateway timeout",
437
+ ]
438
+ return any(ind in error_str for ind in network_indicators)
439
+
440
+ def _classify_network_error(self, error_str: str, error: Exception | str) -> ClassifiedError:
441
+ """Classify network error into specific type."""
442
+ if "timeout" in error_str or "timed out" in error_str:
443
+ return ClassifiedError(
444
+ error_code=DF_N02,
445
+ original_error=str(error),
446
+ )
447
+
448
+ if any(code in error_str for code in ["503", "502", "504", "service unavailable"]):
449
+ return ClassifiedError(
450
+ error_code=DF_N03,
451
+ original_error=str(error),
452
+ )
453
+
454
+ return ClassifiedError(
455
+ error_code=DF_N01,
456
+ original_error=str(error),
457
+ )
458
+
459
+ def _is_parse_error(self, error_str: str, context: dict[str, Any]) -> bool:
460
+ """Check if error is a parsing/schema error."""
461
+ parse_indicators = [
462
+ "json",
463
+ "parse",
464
+ "schema",
465
+ "validation",
466
+ "empty",
467
+ "malformed",
468
+ "invalid format",
469
+ ]
470
+ if any(ind in error_str for ind in parse_indicators):
471
+ return True
472
+ return context.get("error_type") in [
473
+ "json_parsing_errors",
474
+ "invalid_schema",
475
+ "empty_responses",
476
+ ]
477
+
478
+ def _classify_parse_error(
479
+ self, error_str: str, context: dict[str, Any], error: Exception | str
480
+ ) -> ClassifiedError:
481
+ """Classify parse error into specific type."""
482
+ error_type = context.get("error_type", "")
483
+
484
+ if "empty" in error_str or error_type == "empty_responses":
485
+ return ClassifiedError(
486
+ error_code=DF_P03,
487
+ original_error=str(error),
488
+ )
489
+
490
+ if "schema" in error_str or error_type == "invalid_schema":
491
+ return ClassifiedError(
492
+ error_code=DF_P02,
493
+ original_error=str(error),
494
+ )
495
+
496
+ if "json" in error_str or "parse" in error_str or error_type == "json_parsing_errors":
497
+ return ClassifiedError(
498
+ error_code=DF_P01,
499
+ original_error=str(error),
500
+ )
501
+
502
+ return ClassifiedError(
503
+ error_code=DF_P04,
504
+ original_error=str(error),
505
+ )
506
+
507
+ def _is_tool_error(self, error_str: str, context: dict[str, Any]) -> bool:
508
+ """Check if error is tool-related."""
509
+ tool_indicators = ["tool", "execution", "agent mode"]
510
+ if any(ind in error_str for ind in tool_indicators):
511
+ return True
512
+ return context.get("error_type") == "tool_error"
513
+
514
+ def _classify_tool_error(
515
+ self,
516
+ error_str: str,
517
+ context: dict[str, Any], # noqa: ARG002
518
+ error: Exception | str,
519
+ ) -> ClassifiedError:
520
+ """Classify tool error into specific type."""
521
+ if "exceeds limit" in error_str or "max_tools" in error_str:
522
+ return ClassifiedError(
523
+ error_code=DF_T02,
524
+ original_error=str(error),
525
+ )
526
+
527
+ if "requires at least one" in error_str or "no tool" in error_str:
528
+ return ClassifiedError(
529
+ error_code=DF_T03,
530
+ original_error=str(error),
531
+ )
532
+
533
+ return ClassifiedError(
534
+ error_code=DF_T01,
535
+ original_error=str(error),
536
+ )
537
+
538
+ def _is_api_error(self, error_str: str) -> bool:
539
+ """Check if error is a generic API error."""
540
+ api_indicators = ["api error", "api_error", "500", "internal server error"]
541
+ return any(ind in error_str for ind in api_indicators)
542
+
543
+
544
+ # Module-level classifier instance for convenience
545
+ _default_classifier: ErrorClassifier | None = None
546
+
547
+
548
+ def get_classifier(provider: str | None = None) -> ErrorClassifier:
549
+ """Get an error classifier instance.
550
+
551
+ Args:
552
+ provider: Optional provider name for provider-specific classification
553
+
554
+ Returns:
555
+ ErrorClassifier instance
556
+ """
557
+ global _default_classifier # noqa: PLW0603
558
+ if provider:
559
+ return ErrorClassifier(provider)
560
+ if _default_classifier is None:
561
+ _default_classifier = ErrorClassifier()
562
+ return _default_classifier
563
+
564
+
565
+ def classify_error(
566
+ error: Exception | str,
567
+ provider: str | None = None,
568
+ context: dict[str, Any] | None = None,
569
+ ) -> ClassifiedError:
570
+ """Convenience function to classify an error.
571
+
572
+ Args:
573
+ error: The exception or error string to classify
574
+ provider: Optional provider name
575
+ context: Optional context dictionary
576
+
577
+ Returns:
578
+ ClassifiedError with appropriate error code
579
+ """
580
+ classifier = get_classifier(provider)
581
+ return classifier.classify(error, context)
@@ -0,0 +1,47 @@
1
+ """Evaluation module for DeepFabric.
2
+
3
+ This module provides functionality to evaluate fine-tuned models on tool-calling tasks.
4
+ """
5
+
6
+ from .evaluator import EvaluationResult, Evaluator, EvaluatorConfig
7
+ from .evaluators import (
8
+ BaseEvaluator,
9
+ EvaluationContext,
10
+ EvaluatorRegistry,
11
+ EvaluatorResult,
12
+ ToolCallingEvaluator,
13
+ )
14
+ from .inference import InferenceConfig, ModelResponse, create_inference_backend
15
+ from .metrics import EvaluationMetrics, SampleEvaluation, compute_metrics
16
+ from .parser import GroundTruth, GroundTruthParser, parse_batch
17
+ from .reporters import BaseReporter, CloudReporter, FileReporter, MultiReporter
18
+
19
+ __all__ = [
20
+ # Parsing
21
+ "GroundTruth",
22
+ "GroundTruthParser",
23
+ "parse_batch",
24
+ # Inference
25
+ "InferenceConfig",
26
+ "ModelResponse",
27
+ "create_inference_backend",
28
+ # Metrics
29
+ "EvaluationMetrics",
30
+ "SampleEvaluation",
31
+ "compute_metrics",
32
+ # Evaluator
33
+ "Evaluator",
34
+ "EvaluatorConfig",
35
+ "EvaluationResult",
36
+ # Evaluators
37
+ "BaseEvaluator",
38
+ "EvaluationContext",
39
+ "EvaluatorRegistry",
40
+ "EvaluatorResult",
41
+ "ToolCallingEvaluator",
42
+ # Reporters
43
+ "BaseReporter",
44
+ "FileReporter",
45
+ "CloudReporter",
46
+ "MultiReporter",
47
+ ]
@@ -0,0 +1,32 @@
1
+ """Inference backend implementations."""
2
+
3
+ from .ollama_backend import OllamaBackend
4
+ from .tool_call_parsers import (
5
+ GenericToolCallParser,
6
+ HermesToolCallParser,
7
+ LlamaToolCallParser,
8
+ MistralToolCallParser,
9
+ QwenToolCallParser,
10
+ ToolCallParser,
11
+ ToolCallParserRegistry,
12
+ get_parser,
13
+ get_parser_for_model,
14
+ register_parser,
15
+ )
16
+ from .transformers_backend import TransformersBackend
17
+
18
+ __all__ = [
19
+ "TransformersBackend",
20
+ "OllamaBackend",
21
+ # Tool call parsers
22
+ "ToolCallParser",
23
+ "ToolCallParserRegistry",
24
+ "QwenToolCallParser",
25
+ "LlamaToolCallParser",
26
+ "MistralToolCallParser",
27
+ "HermesToolCallParser",
28
+ "GenericToolCallParser",
29
+ "get_parser",
30
+ "get_parser_for_model",
31
+ "register_parser",
32
+ ]