PyPI - DeepFabric - Versions diffs - 4.4.1__py3-none-any.whl → 4.6.0__py3-none-any.whl - Mend

DeepFabric 4.4.1py3-none-any.whl → 4.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

deepfabric/__init__.py +8 -0
deepfabric/auth.py +8 -2
deepfabric/builders.py +2 -2
deepfabric/builders_agent.py +18 -6
deepfabric/cli.py +292 -13
deepfabric/cloud_upload.py +884 -0
deepfabric/config.py +47 -20
deepfabric/config_manager.py +2 -2
deepfabric/dataset.py +302 -0
deepfabric/evaluation/backends/__init__.py +2 -0
deepfabric/evaluation/backends/llm_eval_backend.py +527 -0
deepfabric/evaluation/backends/ollama_backend.py +3 -3
deepfabric/evaluation/backends/tool_call_parsers.py +7 -7
deepfabric/evaluation/backends/transformers_backend.py +73 -16
deepfabric/evaluation/evaluator.py +41 -7
deepfabric/evaluation/evaluators/builtin/tool_calling.py +13 -8
deepfabric/evaluation/inference.py +77 -5
deepfabric/evaluation/metrics.py +4 -0
deepfabric/evaluation/parser.py +8 -8
deepfabric/evaluation/reporters/cloud_reporter.py +19 -6
deepfabric/exceptions.py +14 -0
deepfabric/generator.py +8 -4
deepfabric/graph.py +38 -0
deepfabric/hf_hub.py +1 -1
deepfabric/loader.py +554 -0
deepfabric/schemas.py +7 -7
deepfabric/topic_manager.py +4 -0
deepfabric/training/__init__.py +24 -5
deepfabric/training/callback.py +43 -1
deepfabric/training/dataset_utils.py +223 -0
deepfabric/training/metrics_sender.py +50 -16
deepfabric/tui.py +9 -1
deepfabric/utils.py +14 -0
deepfabric/validation.py +1 -1
{deepfabric-4.4.1.dist-info → deepfabric-4.6.0.dist-info}/METADATA +84 -177
{deepfabric-4.4.1.dist-info → deepfabric-4.6.0.dist-info}/RECORD +39 -34
{deepfabric-4.4.1.dist-info → deepfabric-4.6.0.dist-info}/WHEEL +0 -0
{deepfabric-4.4.1.dist-info → deepfabric-4.6.0.dist-info}/entry_points.txt +0 -0
{deepfabric-4.4.1.dist-info → deepfabric-4.6.0.dist-info}/licenses/LICENSE +0 -0

deepfabric/evaluation/backends/transformers_backend.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import json
 import logging
+import sys
 from typing import Any
@@ -36,9 +37,15 @@ class TransformersBackend(InferenceBackend):
         """
         super().__init__(config)
+        # Check if model is pre-loaded (not a string path)
+        is_preloaded = not isinstance(config.model, str)
         # Determine device
         if config.device:
             self.device = config.device
+        elif is_preloaded:
+            # Get device from pre-loaded model
+            self.device = str(next(config.model.parameters()).device)
         # Auto-detect best available device
         elif torch.cuda.is_available():
             self.device = "cuda"
@@ -48,7 +55,7 @@ class TransformersBackend(InferenceBackend):
             self.device = "cpu"
         # Determine dtype based on device
-        if self.device == "cuda":
+        if self.device == "cuda" or self.device.startswith("cuda:"):
             dtype = torch.float16
             device_map = "auto"
         elif self.device == "mps":
@@ -58,11 +65,36 @@ class TransformersBackend(InferenceBackend):
             dtype = torch.float32
             device_map = None
+        # Handle pre-loaded model case - skip all loading logic
+        if is_preloaded:
+            self.model = config.model
+            self.tokenizer = config.tokenizer
+            self.loaded_with_unsloth = False
+            # Detect architecture from pre-loaded model's config
+            self._architectures = []
+            if hasattr(self.model, "config"):
+                self._architectures = getattr(self.model.config, "architectures", []) or []
+            # Initialize tool call parser
+            self._tool_call_parser: ToolCallParser = get_parser(self._architectures)
+            logger.info(
+                "Using pre-loaded model with %s parser for architectures: %s",
+                type(self._tool_call_parser).__name__,
+                self._architectures or ["unknown"],
+            )
+            # Set padding token if not set
+            if self.tokenizer.pad_token is None:
+                self.tokenizer.pad_token = self.tokenizer.eos_token
+            return  # Skip remaining initialization
         # Detect model architecture for parser selection and tokenizer config
-        self._architectures: list[str] = []
+        self._architectures = []
         tokenizer_kwargs: dict[str, Any] = {}
         try:
-            model_config = AutoConfig.from_pretrained(config.model_path)  # nosec
+            model_config = AutoConfig.from_pretrained(config.model)  # nosec
             self._architectures = getattr(model_config, "architectures", []) or []
             if any(arch in MISTRAL_ARCHITECTURES for arch in self._architectures):
                 tokenizer_kwargs["fix_mistral_regex"] = True
@@ -71,7 +103,7 @@ class TransformersBackend(InferenceBackend):
             logger.warning("Could not detect model architecture: %s", e)
         # Initialize tool call parser based on detected architecture
-        self._tool_call_parser: ToolCallParser = get_parser(self._architectures)
+        self._tool_call_parser = get_parser(self._architectures)
         logger.info(
             "Using %s for model architectures: %s",
             type(self._tool_call_parser).__name__,
@@ -79,19 +111,44 @@ class TransformersBackend(InferenceBackend):
         )
         self.loaded_with_unsloth = False
-        # Load with Unsloth if requested
-        if config.use_unsloth:
+        # Detect if Unsloth has already patched the environment
+        # This happens when user imports unsloth in the same runtime
+        unsloth_patched = "unsloth" in sys.modules
+        # Use Unsloth if explicitly requested OR if Unsloth has patched the environment
+        # (to avoid "apply_qkv" errors from patched attention classes)
+        use_unsloth_loading = config.use_unsloth or unsloth_patched
+        if use_unsloth_loading:
             try:
                 from unsloth import FastLanguageModel  # type: ignore # noqa: PLC0415
-                # Load from adapter path if provided, otherwise from model_path
-                load_path = config.adapter_path if config.adapter_path else config.model_path
-                self.model, self.tokenizer = FastLanguageModel.from_pretrained(
-                    model_name=load_path,
-                    max_seq_length=config.max_seq_length,
-                    dtype=dtype,
-                    load_in_4bit=config.load_in_4bit,
-                )
+                if unsloth_patched and not config.use_unsloth:
+                    logger.info(
+                        "Unsloth detected in environment, using Unsloth loader for compatibility"
+                    )
+                if config.adapter_path:
+                    # Load base model first, then apply adapter
+                    self.model, self.tokenizer = FastLanguageModel.from_pretrained(
+                        model_name=config.model,
+                        max_seq_length=config.max_seq_length,
+                        dtype=dtype,
+                        load_in_4bit=config.load_in_4bit,
+                    )
+                    # Load LoRA adapter using PEFT
+                    from peft import PeftModel  # noqa: PLC0415
+                    self.model = PeftModel.from_pretrained(self.model, config.adapter_path)
+                else:
+                    # Load merged model or base model directly
+                    self.model, self.tokenizer = FastLanguageModel.from_pretrained(
+                        model_name=config.model,
+                        max_seq_length=config.max_seq_length,
+                        dtype=dtype,
+                        load_in_4bit=config.load_in_4bit,
+                    )
                 FastLanguageModel.for_inference(self.model)
                 self.loaded_with_unsloth = True
             except ImportError:
@@ -104,11 +161,11 @@ class TransformersBackend(InferenceBackend):
         # Standard transformers/PEFT loading
         if not self.loaded_with_unsloth:
             self.tokenizer = AutoTokenizer.from_pretrained(  # nosec
-                config.model_path, **tokenizer_kwargs
+                config.model, **tokenizer_kwargs
             )
             self.model = AutoModelForCausalLM.from_pretrained(  # nosec
-                config.model_path,
+                config.model,
                 device_map=device_map,
                 dtype=dtype,
             )

deepfabric/evaluation/evaluator.py CHANGED Viewed

@@ -36,12 +36,12 @@ class EvaluatorConfig(BaseModel):
         default=None,
         description="Path to save evaluation results",
     )
-    model_path: str | None = Field(
+    model: str | None = Field(
         default=None,
-        description="Path to model to evaluate (overrides inference_config.model_path)",
+        description="Model to evaluate (overrides inference_config.model)",
     )
     inference_config: InferenceConfig = Field(
-        description="Inference backend configuration (includes model_path)",
+        description="Inference backend configuration (includes model)",
     )
     batch_size: int = Field(
         default=1,
@@ -119,7 +119,7 @@ class Evaluator:
             "evaluator_created",
             {
                 "backend": self.config.inference_config.backend,
-                "model_path": self.config.inference_config.model_path,
+                "model": self.config.inference_config.model,
                 "has_adapter": self.config.inference_config.adapter_path is not None,
                 "evaluators": (
                     list(self.config.evaluators)
@@ -434,6 +434,7 @@ class Evaluator:
                 ground_truth=ground_truth,
                 response=response,
                 evaluator_results=evaluator_results,
+                tools=tools,
             )
         except Exception as e:  # noqa: BLE001
@@ -442,8 +443,9 @@ class Evaluator:
             expected_tool = None
             expected_params: dict[str, Any] = {}
             expected_answer = None
+            available_tool_names: list[str] = []
-            # Try to extract ground truth if available
+            # Try to extract ground truth and tools if available
             try:
                 gt = self.extract_ground_truth(sample)
                 query = gt.query
@@ -453,9 +455,16 @@ class Evaluator:
             except (KeyError, AttributeError, ValidationError):
                 pass
+            try:
+                tools = self.prepare_tools(sample)
+                available_tool_names = [t.name for t in tools]
+            except (KeyError, AttributeError, ValidationError):
+                pass
             return SampleEvaluation(
                 sample_id=sample_id,
                 query=query,
+                available_tools=available_tool_names,
                 expected_tool=expected_tool,
                 predicted_tool=None,
                 expected_parameters=expected_params,
@@ -560,6 +569,7 @@ class Evaluator:
                 ground_truth=ground_truth,
                 predicted_tool_calls=all_predicted_tool_calls,
                 final_content=final_content,
+                tools=tools,
             )
         except Exception as e:  # noqa: BLE001
@@ -568,6 +578,7 @@ class Evaluator:
             expected_tool = None
             expected_params: dict[str, Any] = {}
             expected_answer = None
+            available_tool_names: list[str] = []
             try:
                 gt = self.extract_ground_truth(sample)
@@ -578,9 +589,16 @@ class Evaluator:
             except (KeyError, AttributeError, ValidationError):
                 pass
+            try:
+                tools = self.prepare_tools(sample)
+                available_tool_names = [t.name for t in tools]
+            except (KeyError, AttributeError, ValidationError):
+                pass
             return SampleEvaluation(
                 sample_id=sample_id,
                 query=query,
+                available_tools=available_tool_names,
                 expected_tool=expected_tool,
                 predicted_tool=None,
                 expected_parameters=expected_params,
@@ -600,6 +618,7 @@ class Evaluator:
         ground_truth: GroundTruth,
         predicted_tool_calls: list[dict],
         final_content: str,
+        tools: list[ToolDefinition] | None = None,
     ) -> SampleEvaluation:
         """Compute metrics for multi-turn evaluation.
@@ -610,6 +629,7 @@ class Evaluator:
             ground_truth: Expected values including all expected tools
             predicted_tool_calls: All tool calls made by model across turns
             final_content: Final model response content
+            tools: List of available tools for this sample
         Returns:
             SampleEvaluation with computed metrics
@@ -652,9 +672,13 @@ class Evaluator:
         # Execution valid if we got through the conversation
         execution_valid = len(predicted_tool_calls) > 0 or final_content != ""
+        # Extract tool names for available_tools field
+        available_tool_names = [t.name for t in tools] if tools else []
         return SampleEvaluation(
             sample_id=sample_id,
             query=ground_truth.query,
+            available_tools=available_tool_names,
             expected_tool=ground_truth.expected_tool,
             predicted_tool=first_predicted_tool,
             expected_parameters=ground_truth.expected_parameters,
@@ -714,6 +738,7 @@ class Evaluator:
         ground_truth: GroundTruth,
         response: ModelResponse,
         evaluator_results: list[EvaluatorResult],
+        tools: list[ToolDefinition] | None = None,
     ) -> SampleEvaluation:
         """Aggregate evaluator results into SampleEvaluation.
@@ -722,6 +747,7 @@ class Evaluator:
             ground_truth: Expected values
             response: Model response
             evaluator_results: Results from all evaluators
+            tools: List of available tools for this sample
         Returns:
             SampleEvaluation with aggregated metrics
@@ -746,10 +772,14 @@ class Evaluator:
                 params_correct = metrics.get("parameter_accuracy", 0.0) == 1.0
                 execution_valid = metrics.get("execution_valid", 0.0) == 1.0
+        # Extract tool names for available_tools field
+        available_tool_names = [t.name for t in tools] if tools else []
         # Return backwards-compatible SampleEvaluation
         return SampleEvaluation(
             sample_id=sample_id,
             query=ground_truth.query,
+            available_tools=available_tool_names,
             expected_tool=ground_truth.expected_tool,
             predicted_tool=predicted_tool,
             expected_parameters=ground_truth.expected_parameters,
@@ -780,13 +810,17 @@ class Evaluator:
         console.print("[bold blue]Running evaluation...[/bold blue]")
         evaluations = []
-        for idx, sample in tqdm(enumerate(samples), total=len(samples), desc="Evaluating"):
+        pbar = tqdm(enumerate(samples), total=len(samples), desc="Evaluating")
+        for idx, sample in pbar:
             eval_result = self.evaluate_sample(sample, idx)
             evaluations.append(eval_result)
             # Stream sample to reporters (for cloud real-time tracking)
             self.reporter.report_sample(eval_result)
+            # Force refresh for notebook compatibility
+            pbar.refresh()
         console.print("[bold green]Evaluation complete![/bold green]")
         # Compute metrics
@@ -804,7 +838,7 @@ class Evaluator:
             "evaluation_completed",
             {
                 "backend": self.config.inference_config.backend,
-                "model_path": self.config.inference_config.model_path,
+                "model": self.config.inference_config.model,
                 "has_adapter": self.config.inference_config.adapter_path is not None,
                 "samples_evaluated": metrics.samples_evaluated,
                 "samples_processed": metrics.samples_processed,

deepfabric/evaluation/evaluators/builtin/tool_calling.py CHANGED Viewed

@@ -63,14 +63,19 @@ class ToolCallingEvaluator(BaseEvaluator):
         # Compute metrics
         tool_correct = predicted_tool == ground_truth.expected_tool
-        # Validate parameters against the PREDICTED tool (not expected)
-        # This measures parameter extraction capability independently of tool selection
-        params_correct = compare_parameters(
-            ground_truth.expected_parameters,
-            predicted_params,
-            tool_name=predicted_tool,  # Use predicted tool for schema validation
-            tool_definitions=context.tools,
-        )
+        # Parameter accuracy requires a tool to have been called
+        # If no tool was predicted but one was expected, params cannot be correct
+        if predicted_tool is None and ground_truth.expected_tool is not None:
+            params_correct = False
+        else:
+            # Validate parameters against the PREDICTED tool (not expected)
+            # This measures parameter extraction capability independently of tool selection
+            params_correct = compare_parameters(
+                ground_truth.expected_parameters,
+                predicted_params,
+                tool_name=predicted_tool,  # Use predicted tool for schema validation
+                tool_definitions=context.tools,
+            )
         # Execution valid requires BOTH correct tool AND correct params
         execution_valid = tool_correct and params_correct

deepfabric/evaluation/inference.py CHANGED Viewed

@@ -1,9 +1,9 @@
 """Model inference interfaces and implementations for evaluation."""
 from abc import ABC, abstractmethod
-from typing import Literal
+from typing import Any, Literal
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, ConfigDict, Field, field_serializer, model_validator
 from ..schemas import ToolDefinition
@@ -11,17 +11,40 @@ from ..schemas import ToolDefinition
 class InferenceConfig(BaseModel):
     """Configuration for model inference."""
-    model_path: str = Field(
-        description="Path to model (local path or HuggingFace Hub ID)",
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+    model: str | Any = Field(
+        description="Model identifier (local path, HuggingFace Hub ID, or model name for cloud providers). "
+        "Can also be a pre-loaded model object to avoid reloading.",
+    )
+    tokenizer: Any | None = Field(
+        default=None,
+        description="Pre-loaded tokenizer object. Required when model is a pre-loaded model object.",
     )
     adapter_path: str | None = Field(
         default=None,
         description="Path to PEFT/LoRA adapter (if using adapter-based fine-tuning)",
     )
-    backend: Literal["transformers", "ollama"] = Field(
+    backend: Literal["transformers", "ollama", "llm"] = Field(
         default="transformers",
         description="Inference backend to use",
     )
+    provider: Literal["openai", "anthropic", "gemini", "openrouter"] | None = Field(
+        default=None,
+        description="Cloud LLM provider (required when backend='llm')",
+    )
+    api_key: str | None = Field(
+        default=None,
+        description="API key for the provider (falls back to environment variable if not set)",
+    )
+    base_url: str | None = Field(
+        default=None,
+        description="Custom base URL for the API (e.g., for OpenRouter or proxies)",
+    )
+    rate_limit_config: dict | None = Field(
+        default=None,
+        description="Rate limiting configuration overrides",
+    )
     use_unsloth: bool = Field(
         default=False,
         description="Use Unsloth for loading adapter (for adapters trained with Unsloth)",
@@ -62,6 +85,51 @@ class InferenceConfig(BaseModel):
         description="Batch size for inference",
     )
+    @field_serializer("model")
+    def serialize_model(self, value: str | Any) -> str:
+        """Serialize model field - convert objects to descriptive string."""
+        if isinstance(value, str):
+            return value
+        # For in-memory model objects, return a descriptive string
+        model_class = type(value).__name__
+        model_name = getattr(getattr(value, "config", None), "name_or_path", "unknown")
+        return f"<in-memory:{model_class}:{model_name}>"
+    @field_serializer("tokenizer")
+    def serialize_tokenizer(self, value: Any | None) -> str | None:
+        """Serialize tokenizer field - convert objects to descriptive string."""
+        if value is None:
+            return None
+        if isinstance(value, str):
+            return value
+        # For in-memory tokenizer objects, return a descriptive string
+        tokenizer_class = type(value).__name__
+        tokenizer_name = getattr(value, "name_or_path", "unknown")
+        return f"<in-memory:{tokenizer_class}:{tokenizer_name}>"
+    @model_validator(mode="after")
+    def validate_config(self) -> "InferenceConfig":
+        """Validate configuration consistency."""
+        # Ensure provider is set when using LLM backend
+        if self.backend == "llm" and self.provider is None:
+            msg = "provider must be specified when backend='llm'"
+            raise ValueError(msg)
+        # Check if model is a pre-loaded object (not a string path)
+        is_preloaded_model = not isinstance(self.model, str)
+        # If model is pre-loaded, tokenizer must also be provided
+        if is_preloaded_model and self.tokenizer is None:
+            msg = "tokenizer must be provided when using a pre-loaded model object"
+            raise ValueError(msg)
+        # Pre-loaded models only work with transformers backend
+        if is_preloaded_model and self.backend != "transformers":
+            msg = "pre-loaded model objects are only supported with backend='transformers'"
+            raise ValueError(msg)
+        return self
 class ModelResponse(BaseModel):
     """Model inference response."""
@@ -150,6 +218,10 @@ def create_inference_backend(config: InferenceConfig) -> InferenceBackend:
         from .backends.ollama_backend import OllamaBackend  # noqa: PLC0415
         return OllamaBackend(config)
+    if config.backend == "llm":
+        from .backends.llm_eval_backend import LLMEvalBackend  # noqa: PLC0415
+        return LLMEvalBackend(config)
     msg = f"Unsupported backend: {config.backend}"
     raise ValueError(msg)

deepfabric/evaluation/metrics.py CHANGED Viewed

@@ -107,6 +107,10 @@ class SampleEvaluation(BaseModel):
     sample_id: int = Field(description="Sample index")
     query: str = Field(description="Input query")
+    available_tools: list[str] = Field(
+        default_factory=list,
+        description="List of tool names available for this sample",
+    )
     expected_tool: str | None = Field(
         default=None,
         description="Expected tool name",

deepfabric/evaluation/parser.py CHANGED Viewed

@@ -49,12 +49,12 @@ class GroundTruth(BaseModel):
         default=None,
         description="Expected final answer if available",
     )
-    conversation_type: Literal["basic", "chain_of_thought"] = Field(
+    conversation_type: Literal["basic", "cot"] = Field(
         description="Type of conversation",
     )
     reasoning_style: Literal["freetext", "agent", "structured", "hybrid"] | None = Field(
         default=None,
-        description="Reasoning style if chain_of_thought",
+        description="Reasoning style if cot",
     )
     agent_mode: Literal["single_turn", "multi_turn"] | None = Field(
         default=None,
@@ -75,18 +75,18 @@ class GroundTruthParser:
     def __init__(
         self,
-        conversation_type: Literal["basic", "chain_of_thought"],
+        conversation_type: Literal["basic", "cot"],
         reasoning_style: Literal["freetext", "agent", "structured", "hybrid"] | None = None,
         agent_mode: Literal["single_turn", "multi_turn"] | None = None,
     ):
         """Initialize parser with conversation configuration.
         Args:
-            conversation_type: Type of conversation (basic, chain_of_thought)
-            reasoning_style: Reasoning style for chain_of_thought
+            conversation_type: Type of conversation (basic, cot)
+            reasoning_style: Reasoning style for cot
             agent_mode: Agent mode if tools are used
         """
-        self.conversation_type: Literal["basic", "chain_of_thought"] = conversation_type
+        self.conversation_type: Literal["basic", "cot"] = conversation_type
         self.reasoning_style: Literal["freetext", "agent", "structured", "hybrid"] | None = (
             reasoning_style
         )
@@ -270,7 +270,7 @@ class GroundTruthParser:
 def parse_batch(
     conversations: list[Conversation],
-    conversation_type: Literal["basic", "chain_of_thought"],
+    conversation_type: Literal["basic", "cot"],
     reasoning_style: Literal["freetext", "agent", "structured", "hybrid"] | None = None,
     agent_mode: Literal["single_turn", "multi_turn"] | None = None,
 ) -> list[GroundTruth]:
@@ -279,7 +279,7 @@ def parse_batch(
     Args:
         conversations: List of Conversation objects
         conversation_type: Type of conversation
-        reasoning_style: Reasoning style if chain_of_thought
+        reasoning_style: Reasoning style if cot
         agent_mode: Agent mode if tools are used
     Returns:

deepfabric/evaluation/reporters/cloud_reporter.py CHANGED Viewed

@@ -13,6 +13,7 @@ import httpx
 from rich.console import Console
+from ...utils import get_bool_env
 from .base import BaseReporter
 if TYPE_CHECKING:
@@ -45,7 +46,7 @@ class CloudReporter(BaseReporter):
         Args:
             config: Optional configuration with:
-                - api_url: DeepFabric API URL (default: https://api.deepfabric.dev")
+                - api_url: DeepFabric API URL (default: https://api.deepfabric.cloud")
                 - project_id: Project ID to associate results with
                 - auth_token: Authentication token (if not provided, will read from config file)
                 - enabled: Whether to enable cloud reporting (default: True if authenticated)
@@ -53,7 +54,7 @@ class CloudReporter(BaseReporter):
         super().__init__(config)
         # Get API URL from config or environment
-        self.api_url = os.getenv("DEEPFABRIC_API_URL", "https://api.deepfabric.dev")
+        self.api_url = os.getenv("DEEPFABRIC_API_URL", "https://api.deepfabric.cloud")
         if config and "api_url" in config:
             self.api_url = config["api_url"]
@@ -67,8 +68,9 @@ class CloudReporter(BaseReporter):
         # Get project ID from config
         self.project_id = config.get("project_id") if config else None
-        # Enable cloud reporting if authenticated
-        self.enabled = (
+        # Enable cloud reporting if authenticated AND experimental flag is set
+        is_experimental = get_bool_env("EXPERIMENTAL_DF")
+        self.enabled = is_experimental and (
             config.get("enabled", bool(self.auth_token)) if config else bool(self.auth_token)
         )
@@ -99,11 +101,22 @@ class CloudReporter(BaseReporter):
         try:
             console.print("[cyan]Uploading evaluation results to cloud...[/cyan]")
+            # Get model name as string (handle in-memory model objects)
+            model_value = result.config.inference_config.model
+            if isinstance(model_value, str):
+                model_name = model_value
+            else:
+                # For in-memory model objects, extract name from config
+                model_config = getattr(model_value, "config", None)
+                model_name = (
+                    getattr(model_config, "name_or_path", None) or type(model_value).__name__
+                )
             # Create evaluation run
             run_data = {
-                "project_id": self.project_id,
+                "pipeline_id": self.project_id,
                 "name": f"Evaluation - {datetime.now(UTC).strftime('%Y-%m-%d %H:%M')}",
-                "model_name": result.config.inference_config.model_path,
+                "model_name": model_name,
                 "model_provider": result.config.inference_config.backend,
                 "config": {
                     "evaluators": getattr(result.config, "evaluators", ["tool_calling"]),

deepfabric/exceptions.py CHANGED Viewed

@@ -65,3 +65,17 @@ class RetryExhaustedError(ModelError):
     """Raised when maximum retries are exceeded."""
     pass
+class LoaderError(DeepFabricError):
+    """Raised when dataset loading fails.
+    Common causes:
+    - File not found
+    - Invalid file format (malformed JSON/JSONL)
+    - Cloud authentication failure
+    - Network errors
+    - Empty dataset
+    """
+    pass

deepfabric/generator.py CHANGED Viewed

@@ -127,14 +127,14 @@ class DataSetGeneratorConfig(BaseModel):
     )
     # Modular conversation configuration
-    conversation_type: Literal["basic", "chain_of_thought"] = Field(
+    conversation_type: Literal["basic", "cot"] = Field(
         default="basic",
-        description="Base conversation type: basic (simple chat), chain_of_thought (with reasoning traces)",
+        description="Base conversation type: basic (simple chat), cot (with reasoning traces)",
     )
     reasoning_style: Literal["freetext", "agent", "structured", "hybrid"] | None = Field(
         default=None,
-        description="Reasoning style for chain_of_thought type: freetext (natural language) or agent (structured step-by-step for tool-calling). Note: 'structured' and 'hybrid' are deprecated.",
+        description="Reasoning style for cot type: freetext (natural language) or agent (structured step-by-step for tool-calling). Note: 'structured' and 'hybrid' are deprecated.",
     )
     @field_validator("reasoning_style", mode="before")
@@ -213,6 +213,10 @@ class DataSetGeneratorConfig(BaseModel):
         le=20,
         description="Minimum number of tool calls required before allowing early conversation conclusion",
     )
+    tool_inclusion_strategy: Literal["all", "used_only"] = Field(
+        default="used_only",
+        description="Which tools to include in each sample: 'all' includes full catalog, 'used_only' includes only tools actually called (recommended for training)",
+    )
 class DataSetGenerator:
@@ -1041,7 +1045,7 @@ class DataSetGenerator:
             return CONVERSATION_GENERATION_PROMPT
         # Handle chain of thought conversations
-        if self.config.conversation_type == "chain_of_thought":
+        if self.config.conversation_type == "cot":
             # Agent mode with tools - use agent prompts
             if self.config.agent_mode == "single_turn" and self.tool_registry:
                 # Use agent prompt for single-turn tool calling

DeepFabric 4.4.1__py3-none-any.whl → 4.6.0__py3-none-any.whl

DeepFabric 4.4.1py3-none-any.whl → 4.6.0py3-none-any.whl