PyPI - DeepFabric - Versions diffs - 4.4.1__py3-none-any.whl → 4.6.0__py3-none-any.whl - Mend

DeepFabric 4.4.1py3-none-any.whl → 4.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

deepfabric/__init__.py +8 -0
deepfabric/auth.py +8 -2
deepfabric/builders.py +2 -2
deepfabric/builders_agent.py +18 -6
deepfabric/cli.py +292 -13
deepfabric/cloud_upload.py +884 -0
deepfabric/config.py +47 -20
deepfabric/config_manager.py +2 -2
deepfabric/dataset.py +302 -0
deepfabric/evaluation/backends/__init__.py +2 -0
deepfabric/evaluation/backends/llm_eval_backend.py +527 -0
deepfabric/evaluation/backends/ollama_backend.py +3 -3
deepfabric/evaluation/backends/tool_call_parsers.py +7 -7
deepfabric/evaluation/backends/transformers_backend.py +73 -16
deepfabric/evaluation/evaluator.py +41 -7
deepfabric/evaluation/evaluators/builtin/tool_calling.py +13 -8
deepfabric/evaluation/inference.py +77 -5
deepfabric/evaluation/metrics.py +4 -0
deepfabric/evaluation/parser.py +8 -8
deepfabric/evaluation/reporters/cloud_reporter.py +19 -6
deepfabric/exceptions.py +14 -0
deepfabric/generator.py +8 -4
deepfabric/graph.py +38 -0
deepfabric/hf_hub.py +1 -1
deepfabric/loader.py +554 -0
deepfabric/schemas.py +7 -7
deepfabric/topic_manager.py +4 -0
deepfabric/training/__init__.py +24 -5
deepfabric/training/callback.py +43 -1
deepfabric/training/dataset_utils.py +223 -0
deepfabric/training/metrics_sender.py +50 -16
deepfabric/tui.py +9 -1
deepfabric/utils.py +14 -0
deepfabric/validation.py +1 -1
{deepfabric-4.4.1.dist-info → deepfabric-4.6.0.dist-info}/METADATA +84 -177
{deepfabric-4.4.1.dist-info → deepfabric-4.6.0.dist-info}/RECORD +39 -34
{deepfabric-4.4.1.dist-info → deepfabric-4.6.0.dist-info}/WHEEL +0 -0
{deepfabric-4.4.1.dist-info → deepfabric-4.6.0.dist-info}/entry_points.txt +0 -0
{deepfabric-4.4.1.dist-info → deepfabric-4.6.0.dist-info}/licenses/LICENSE +0 -0

deepfabric/topic_manager.py CHANGED Viewed

@@ -97,6 +97,8 @@ async def _process_graph_events(graph: Graph, debug: bool = False) -> dict | Non
                         get_tui().error(f"  [{idx}] Node ID: {node_id}, Attempts: {attempts}")
                         get_tui().error(f"      Error: {last_error}")
     except Exception as e:
+        # Stop TUI before printing error to ensure visibility
+        tui.stop_live()
         if debug:
             get_tui().error(f"Debug: Full traceback:\n{traceback.format_exc()}")
         get_tui().error(f"Graph build failed: {str(e)}")
@@ -147,6 +149,8 @@ async def _process_tree_events(tree: Tree, debug: bool = False) -> dict | None:
                         )
                         get_tui().error(f"      Error: {failure.get('error', 'Unknown error')}")
     except Exception as e:
+        # Stop TUI before printing error to ensure visibility
+        tui.stop_live()
         if debug:
             get_tui().error(f"Debug: Full traceback:\n{traceback.format_exc()}")
         get_tui().error(f"Tree build failed: {str(e)}")

deepfabric/training/__init__.py CHANGED Viewed

@@ -1,20 +1,27 @@
-"""DeepFabric training metrics logging.
+"""DeepFabric training utilities.
-This module provides integration with HuggingFace Trainer and TRL trainers
-to log training metrics to the DeepFabric SaaS backend.
+This module provides:
+- Integration with HuggingFace Trainer and TRL trainers for metrics logging
+- Dataset preparation utilities for optimizing training data
 Features:
 - Non-blocking async metrics sending
 - Notebook-friendly API key prompts (like wandb)
 - Graceful handling of failures without impacting training
+- Tool filtering to reduce sequence lengths and memory usage
 Usage:
-    from deepfabric.training import DeepFabricCallback
+    from deepfabric.training import DeepFabricCallback, prepare_dataset_for_training
+    # Prepare dataset (reduces tool overhead)
+    dataset = load_dataset("your/dataset", split="train")
+    prepared = prepare_dataset_for_training(dataset, tool_strategy="used_only")
+    # Train with metrics logging
     trainer = Trainer(
         model=model,
         args=training_args,
-        train_dataset=train_dataset,
+        train_dataset=prepared,
     )
     trainer.add_callback(DeepFabricCallback(trainer))
     trainer.train()
@@ -27,9 +34,21 @@ Environment Variables:
 from __future__ import annotations
 from .callback import DeepFabricCallback
+from .dataset_utils import (
+    ToolInclusionStrategy,
+    clean_tool_schema,
+    filter_tools_for_sample,
+    get_used_tool_names,
+    prepare_dataset_for_training,
+)
 from .metrics_sender import MetricsSender
 __all__ = [
     "DeepFabricCallback",
     "MetricsSender",
+    "ToolInclusionStrategy",
+    "clean_tool_schema",
+    "filter_tools_for_sample",
+    "get_used_tool_names",
+    "prepare_dataset_for_training",
 ]

deepfabric/training/callback.py CHANGED Viewed

@@ -51,6 +51,7 @@ class DeepFabricCallback:
         trainer: Any | None = None,
         api_key: str | None = None,
         endpoint: str | None = None,
+        pipeline_id: str | None = None,
         enabled: bool = True,
     ):
         """Initialize the DeepFabric callback.
@@ -60,11 +61,14 @@ class DeepFabricCallback:
             api_key: DeepFabric API key (falls back to DEEPFABRIC_API_KEY env var,
                      then prompts in interactive environments)
             endpoint: API endpoint URL (falls back to DEEPFABRIC_API_URL env var)
+            pipeline_id: Pipeline ID to associate training with (falls back to
+                         DEEPFABRIC_PIPELINE_ID env var or pipeline_id.txt file)
             enabled: Whether logging is enabled (default: True)
         """
         # Get API key from arg, env, or prompt
         self.api_key = api_key or get_api_key()
         self.endpoint = endpoint or os.getenv("DEEPFABRIC_API_URL", "https://api.deepfabric.ai")
+        self.pipeline_id = pipeline_id or self._get_pipeline_id()
         self.run_id = str(uuid.uuid4())
         self.enabled = enabled and self.api_key is not None
@@ -75,14 +79,26 @@ class DeepFabricCallback:
         self.sender = MetricsSender(
             endpoint=self.endpoint,
             api_key=self.api_key if self.enabled else None,
+            pipeline_id=self.pipeline_id,
         )
         self._run_started = False
         self._model_name: str | None = None
         self._training_args_logged = False
+        self._start_time: datetime | None = None
         if self.enabled:
-            logger.debug(f"DeepFabric callback initialized (run_id={self.run_id})")
+            if self.pipeline_id:
+                logger.debug(
+                    f"DeepFabric callback initialized (run_id={self.run_id}, "
+                    f"pipeline_id={self.pipeline_id})"
+                )
+            else:
+                logger.warning(
+                    "DeepFabric callback initialized but no pipeline_id set. "
+                    "Metrics will not be sent. Set DEEPFABRIC_PIPELINE_ID env var "
+                    "or create pipeline_id.txt file."
+                )
         else:
             logger.debug("DeepFabric callback disabled (no API key)")
@@ -101,6 +117,7 @@ class DeepFabricCallback:
             return
         self._run_started = True
+        self._start_time = datetime.now(timezone.utc)
         # Extract model name from various sources
         model = kwargs.get("model")
@@ -121,6 +138,7 @@ class DeepFabricCallback:
                     "num_train_epochs": state.num_train_epochs,
                     "is_world_process_zero": getattr(state, "is_world_process_zero", True),
                 },
+                "started_at": self._start_time.isoformat(),
             }
         )
@@ -204,6 +222,8 @@ class DeepFabricCallback:
         if not self.enabled or not self._run_started:
             return
+        completed_at = datetime.now(timezone.utc)
         self.sender.send_run_end(
             {
                 "run_id": self.run_id,
@@ -212,6 +232,7 @@ class DeepFabricCallback:
                 "total_flos": getattr(state, "total_flos", None),
                 "best_metric": getattr(state, "best_metric", None),
                 "best_model_checkpoint": getattr(state, "best_model_checkpoint", None),
+                "completed_at": completed_at.isoformat(),
             }
         )
@@ -246,6 +267,27 @@ class DeepFabricCallback:
             }
         )
+    def _get_pipeline_id(self) -> str | None:
+        """Get pipeline ID from environment or file.
+        Returns:
+            Pipeline ID or None
+        """
+        # Try environment variable first
+        pipeline_id = os.getenv("DEEPFABRIC_PIPELINE_ID", "")
+        if pipeline_id:
+            return pipeline_id
+        # Try pipeline_id.txt file
+        pipeline_file = "pipeline_id.txt"
+        if os.path.exists(pipeline_file):
+            with open(pipeline_file) as f:
+                pipeline_id = f.read().strip()
+                if pipeline_id:
+                    return pipeline_id
+        return None
     def _extract_model_name(self, args: TrainingArguments, model: Any | None) -> str | None:
         """Extract model name from various sources.

deepfabric/training/dataset_utils.py ADDED Viewed

@@ -0,0 +1,223 @@
+"""Dataset preparation utilities for training.
+This module provides utilities for preparing DeepFabric datasets for training,
+including tool filtering to reduce sequence lengths and memory usage.
+"""
+from __future__ import annotations
+import logging
+from typing import TYPE_CHECKING, Any, Literal
+if TYPE_CHECKING:
+    from datasets import Dataset
+logger = logging.getLogger(__name__)
+ToolInclusionStrategy = Literal["all", "used_only", "used_plus_related"]
+def get_used_tool_names(messages: list[dict[str, Any]]) -> set[str]:
+    """Extract tool names that are actually called in a conversation.
+    Args:
+        messages: List of message dicts from the conversation
+    Returns:
+        Set of tool names that were called
+    """
+    used_tools: set[str] = set()
+    for msg in messages:
+        if msg.get("role") == "assistant":
+            tool_calls = msg.get("tool_calls", [])
+            if tool_calls:
+                for tc in tool_calls:
+                    if isinstance(tc, dict):
+                        # OpenAI format: {"function": {"name": "..."}}
+                        func = tc.get("function", {})
+                        if isinstance(func, dict) and func.get("name"):
+                            used_tools.add(func["name"])
+                        # Alternative format: {"name": "..."}
+                        elif tc.get("name"):
+                            used_tools.add(tc["name"])
+    return used_tools
+def clean_tool_schema(tool: dict[str, Any]) -> dict[str, Any]:
+    """Remove null/None values from tool schema to reduce size.
+    Args:
+        tool: Tool definition in OpenAI format
+    Returns:
+        Cleaned tool definition with nulls removed
+    """
+    if not isinstance(tool, dict):
+        return tool
+    cleaned: dict[str, Any] = {}
+    for key, value in tool.items():
+        if value is None:
+            continue
+        if isinstance(value, dict):
+            cleaned_value = clean_tool_schema(value)
+            # Only include if dict is not empty after cleaning
+            if cleaned_value:
+                cleaned[key] = cleaned_value
+        elif isinstance(value, list):
+            cleaned_list = []
+            for item in value:
+                if isinstance(item, dict):
+                    cleaned_item = clean_tool_schema(item)
+                    if cleaned_item:
+                        cleaned_list.append(cleaned_item)
+                elif item is not None:
+                    cleaned_list.append(item)
+            if cleaned_list:
+                cleaned[key] = cleaned_list
+        else:
+            cleaned[key] = value
+    return cleaned
+def filter_tools_for_sample(
+    sample: dict[str, Any],
+    strategy: ToolInclusionStrategy = "used_only",
+    min_tools: int = 1,
+    clean_schemas: bool = True,
+) -> dict[str, Any]:
+    """Filter tools in a sample to only include relevant ones.
+    Args:
+        sample: Dataset sample with 'messages' and 'tools' fields
+        strategy: Tool inclusion strategy:
+            - "all": Keep all tools (no filtering)
+            - "used_only": Only include tools that are called in the conversation
+            - "used_plus_related": Include used tools plus related ones (not implemented)
+        min_tools: Minimum number of tools to include (fallback if filtering
+            removes all tools)
+        clean_schemas: Whether to remove null values from tool schemas
+    Returns:
+        Modified sample with filtered tools
+    """
+    if strategy == "all" and not clean_schemas:
+        return sample
+    messages = sample.get("messages", [])
+    all_tools = sample.get("tools", [])
+    if not all_tools:
+        return sample
+    # Clean schemas if requested
+    if clean_schemas:
+        all_tools = [clean_tool_schema(tool) for tool in all_tools]
+    if strategy == "all":
+        sample["tools"] = all_tools
+        return sample
+    # Get tools actually used
+    used_names = get_used_tool_names(messages)
+    if not used_names:
+        # No tools used - keep minimum number of tools
+        sample["tools"] = all_tools[:min_tools] if min_tools > 0 else []
+        return sample
+    # Filter to used tools
+    filtered_tools = []
+    for tool in all_tools:
+        func = tool.get("function", {})
+        if isinstance(func, dict) and func.get("name") in used_names:
+            filtered_tools.append(tool)
+    # Ensure minimum tools
+    if len(filtered_tools) < min_tools:
+        # Add more tools from the original list
+        for tool in all_tools:
+            if tool not in filtered_tools:
+                filtered_tools.append(tool)
+                if len(filtered_tools) >= min_tools:
+                    break
+    sample["tools"] = filtered_tools
+    return sample
+def prepare_dataset_for_training(
+    dataset: Dataset,
+    tool_strategy: ToolInclusionStrategy = "used_only",
+    clean_tool_schemas: bool = True,
+    min_tools: int = 1,
+    num_proc: int | None = None,
+) -> Dataset:
+    """Prepare a DeepFabric dataset for training with optimizations.
+    This function applies various optimizations to reduce dataset size and
+    memory usage during training:
+    - Filters tools to only include those actually used in each conversation
+    - Removes null values from tool schemas
+    - Can be extended with additional preprocessing steps
+    Args:
+        dataset: HuggingFace Dataset with DeepFabric conversation format
+        tool_strategy: How to filter tools (see filter_tools_for_sample)
+        clean_tool_schemas: Whether to remove null values from tool schemas
+        min_tools: Minimum tools to keep per sample
+        num_proc: Number of processes for parallel processing
+    Returns:
+        Processed dataset ready for training
+    Example:
+        >>> from datasets import load_dataset
+        >>> from deepfabric.training import prepare_dataset_for_training
+        >>>
+        >>> dataset = load_dataset("your/dataset", split="train")
+        >>> prepared = prepare_dataset_for_training(
+        ...     dataset,
+        ...     tool_strategy="used_only",
+        ...     clean_tool_schemas=True,
+        ... )
+        >>> # Now use prepared dataset for training
+    """
+    logger.info(
+        "Preparing dataset for training: tool_strategy=%s, clean_schemas=%s",
+        tool_strategy,
+        clean_tool_schemas,
+    )
+    # Get initial stats
+    if "tools" in dataset.column_names:
+        initial_tool_counts = [len(sample.get("tools", []) or []) for sample in dataset]
+        avg_initial = (
+            sum(initial_tool_counts) / len(initial_tool_counts) if initial_tool_counts else 0
+        )
+        logger.info("Initial average tools per sample: %.1f", avg_initial)
+    # Apply tool filtering
+    processed = dataset.map(
+        lambda x: filter_tools_for_sample(
+            x,
+            strategy=tool_strategy,
+            min_tools=min_tools,
+            clean_schemas=clean_tool_schemas,
+        ),
+        num_proc=num_proc,
+        desc="Filtering tools",
+    )
+    # Log final stats
+    if "tools" in processed.column_names:
+        final_tool_counts = [len(sample.get("tools", []) or []) for sample in processed]
+        avg_final = sum(final_tool_counts) / len(final_tool_counts) if final_tool_counts else 0
+        logger.info("Final average tools per sample: %.1f", avg_final)
+    return processed

deepfabric/training/metrics_sender.py CHANGED Viewed

@@ -35,6 +35,7 @@ class MetricsSender:
         self,
         endpoint: str,
         api_key: str | None,
+        pipeline_id: str | None = None,
         batch_size: int = 10,
         flush_interval: float = 5.0,
         max_queue_size: int = 1000,
@@ -45,6 +46,7 @@ class MetricsSender:
         Args:
             endpoint: Base URL for the DeepFabric API
             api_key: API key for authentication (None disables sending)
+            pipeline_id: Pipeline ID to associate training runs with (required)
             batch_size: Number of metrics to batch before sending
             flush_interval: Seconds between automatic flushes
             max_queue_size: Maximum queue size (overflow drops metrics)
@@ -52,12 +54,14 @@ class MetricsSender:
         """
         self.endpoint = endpoint.rstrip("/")
         self.api_key = api_key
+        self.pipeline_id = pipeline_id
         self.batch_size = batch_size
         self.flush_interval = flush_interval
         self.timeout = timeout
         self._queue: queue.Queue[dict[str, Any]] = queue.Queue(maxsize=max_queue_size)
         self._stop_event = threading.Event()
+        self._flush_event = threading.Event()
         self._enabled = api_key is not None
         # Start background sender thread
@@ -177,19 +181,25 @@ class MetricsSender:
                 should_flush = (
                     len(batch) >= self.batch_size
                     or (time.monotonic() - last_flush) >= self.flush_interval
+                    or self._flush_event.is_set()
                 )
                 if should_flush:
                     self._flush_batch(batch)
                     batch = []
                     last_flush = time.monotonic()
+                    self._flush_event.clear()
             except queue.Empty:
-                # Timeout - flush if we have pending items
-                if batch and (time.monotonic() - last_flush) >= self.flush_interval:
+                # Timeout - flush if we have pending items or flush requested
+                if batch and (
+                    (time.monotonic() - last_flush) >= self.flush_interval
+                    or self._flush_event.is_set()
+                ):
                     self._flush_batch(batch)
                     batch = []
                     last_flush = time.monotonic()
+                    self._flush_event.clear()
         # On shutdown, drain the queue and flush everything
         while not self._queue.empty():
@@ -209,21 +219,34 @@ class MetricsSender:
         if not batch or not self._enabled:
             return
+        if not self.pipeline_id:
+            logger.debug("No pipeline_id set, skipping metrics send")
+            return
         # Separate events and metrics
-        events = [item for item in batch if item["type"] != "metrics"]
+        run_start_events = [item for item in batch if item["type"] == "run_start"]
+        run_end_events = [item for item in batch if item["type"] == "run_end"]
         metrics = [item["data"] for item in batch if item["type"] == "metrics"]
-        # Send events first (run_start, run_end)
-        for event in events:
-            self._send_to_api(
-                endpoint=f"{self.endpoint}/v1/training/runs",
-                payload={"event_type": event["type"], **event["data"]},
-            )
+        # Build query string with pipeline_id
+        query = f"?pipeline_id={self.pipeline_id}"
+        def send_run_events(events: list[dict[str, Any]]) -> None:
+            """Send run start/end events."""
+            for event in events:
+                self._send_to_api(
+                    endpoint=f"{self.endpoint}/api/v1/training/runs{query}",
+                    payload={"event_type": event["type"], **event["data"]},
+                )
+        # Send run events, ensuring start events are processed before end events
+        send_run_events(run_start_events)
+        send_run_events(run_end_events)
         # Send metrics batch
         if metrics:
             self._send_to_api(
-                endpoint=f"{self.endpoint}/v1/training/metrics",
+                endpoint=f"{self.endpoint}/api/v1/training/metrics{query}",
                 payload={"metrics": metrics},
             )
             self._metrics_sent += len(metrics)
@@ -252,22 +275,27 @@ class MetricsSender:
             if not response.ok:
                 self._send_errors += 1
-                logger.debug(f"API request failed: {response.status_code} {response.text[:100]}")
+                logger.warning(
+                    "API error: %s %s (endpoint: %s)",
+                    response.status_code,
+                    response.text[:200],
+                    endpoint,
+                )
                 return False
         except requests.exceptions.Timeout:
             self._send_errors += 1
-            logger.debug("API request timed out")
+            logger.warning("Request timed out: %s", endpoint)
             return False
-        except requests.exceptions.ConnectionError:
+        except requests.exceptions.ConnectionError as e:
             self._send_errors += 1
-            logger.debug("API connection error")
+            logger.warning("Connection error: %s (endpoint: %s)", e, endpoint)
             return False
         except requests.exceptions.RequestException as e:
             self._send_errors += 1
-            logger.debug(f"API request error: {e}")
+            logger.warning("Request error: %s (endpoint: %s)", e, endpoint)
             return False
         else:
@@ -282,8 +310,14 @@ class MetricsSender:
         if not self._enabled:
             return
+        # Signal the background thread to flush its current batch
+        self._flush_event.set()
         start = time.monotonic()
-        while not self._queue.empty() and (time.monotonic() - start) < timeout:
+        # Wait for queue to empty and flush event to be cleared (indicates batch was sent)
+        while (time.monotonic() - start) < timeout:
+            if self._queue.empty() and not self._flush_event.is_set():
+                break
             time.sleep(0.1)
     def shutdown(self) -> None:

deepfabric/tui.py CHANGED Viewed

@@ -41,14 +41,22 @@ class TopicBuildingMixin:
     Subclasses must have these attributes:
     - tui: DeepFabricTUI instance
+    - live_display: Live | None
     - live_layout: Layout | None
     - events_log: deque
     """
     tui: "DeepFabricTUI"
+    live_display: "Live | None"
     live_layout: "Layout | None"
     events_log: "deque"
+    def stop_live(self) -> None:
+        """Stop the Live display if it's running."""
+        if self.live_display:
+            self.live_display.stop()
+            self.live_display = None
     def _refresh_left(self) -> None:
         """Update events panel in left column."""
         if self.live_layout is not None:
@@ -910,7 +918,7 @@ class DatasetGenerationTUI(StreamObserver):
             # Map conversation types to friendly names
             type_map = {
                 "basic": "Basic Q&A",
-                "chain_of_thought": "Chain of Thought",
+                "cot": "Chain of Thought",
                 "single_turn_agent": "Single-Turn Agent (Tool Calling)",
                 "multi_turn_agent": "Multi-Turn Agent (Tool Calling)",
             }

deepfabric/utils.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import ast
 import asyncio
 import json
+import os
 import re
 VALIDATION_ERROR_INDICATORS = [
@@ -147,4 +148,17 @@ def read_topic_tree_from_jsonl(file_path: str) -> list[dict]:
     with open(file_path) as file:
         for line in file:
             topic_tree.append(json.loads(line.strip()))
     return topic_tree
+def get_bool_env(key: str, default: bool = False) -> bool:
+    """Get a boolean environment variable.
+    Supports: '1', 'true', 'yes', 'on' (case-insensitive) as True.
+    Everything else is False unless default is True and key is missing.
+    """
+    val = os.getenv(key)
+    if val is None:
+        return default
+    return val.lower() in ("1", "true", "yes", "on")

deepfabric/validation.py CHANGED Viewed

@@ -79,7 +79,7 @@ def validate_path_requirements(
             for steps, batch in optimal_combinations[:3]:  # Show top 3
                 total_samples = steps * batch
                 recommendations.append(
-                    f"    --num-steps {steps} --batch-size {batch}  (generates {total_samples} samples)"
+                    f"    --num-samples {steps} --batch-size {batch}  (generates {total_samples} samples)"
                 )
         recommendations.extend(

DeepFabric 4.4.1__py3-none-any.whl → 4.6.0__py3-none-any.whl

DeepFabric 4.4.1py3-none-any.whl → 4.6.0py3-none-any.whl