PyPI - DeepFabric - Versions diffs - 4.10.1__py3-none-any.whl → 4.11.0__py3-none-any.whl - Mend

DeepFabric 4.10.1py3-none-any.whl → 4.11.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

deepfabric/cli.py +83 -27
deepfabric/cloud_upload.py +1 -1
deepfabric/config.py +6 -4
deepfabric/constants.py +1 -1
deepfabric/dataset_manager.py +264 -62
deepfabric/generator.py +687 -82
deepfabric/graph.py +25 -1
deepfabric/llm/retry_handler.py +28 -9
deepfabric/progress.py +42 -0
deepfabric/topic_manager.py +22 -2
deepfabric/topic_model.py +26 -0
deepfabric/tree.py +41 -16
deepfabric/tui.py +448 -349
deepfabric/utils.py +4 -1
{deepfabric-4.10.1.dist-info → deepfabric-4.11.0.dist-info}/METADATA +3 -1
{deepfabric-4.10.1.dist-info → deepfabric-4.11.0.dist-info}/RECORD +19 -19
{deepfabric-4.10.1.dist-info → deepfabric-4.11.0.dist-info}/licenses/LICENSE +1 -1
{deepfabric-4.10.1.dist-info → deepfabric-4.11.0.dist-info}/WHEEL +0 -0
{deepfabric-4.10.1.dist-info → deepfabric-4.11.0.dist-info}/entry_points.txt +0 -0

deepfabric/graph.py CHANGED Viewed

@@ -26,7 +26,7 @@ from .prompts import (
 )
 from .schemas import GraphSubtopics
 from .stream_simulator import simulate_stream
-from .topic_model import TopicModel, TopicPath
+from .topic_model import Topic, TopicModel, TopicPath
 if TYPE_CHECKING:  # only for type hints to avoid runtime cycles
     from .progress import ProgressReporter
@@ -615,6 +615,30 @@ class Graph(TopicModel):
         visited.remove(node.id)
+    def get_unique_topics(self) -> list[Topic]:
+        """Returns deduplicated topics by UUID.
+        Iterates through all nodes in the graph and returns unique topics.
+        Each node has a UUID in its metadata, ensuring uniqueness.
+        Returns:
+            List of Topic namedtuples containing (uuid, topic).
+            Each UUID appears exactly once.
+        """
+        seen_uuids: set[str] = set()
+        result: list[Topic] = []
+        for node in self.nodes.values():
+            # Skip root node — it holds the generation seed prompt, not a topic
+            if node.id == self.root.id:
+                continue
+            node_uuid = node.metadata.get("uuid")
+            if node_uuid and node_uuid not in seen_uuids:
+                seen_uuids.add(node_uuid)
+                result.append(Topic(uuid=node_uuid, topic=node.topic))
+        return result
     def _dfs_paths(
         self, node: Node, current_path: list[str], paths: list[list[str]], visited: set[int]
     ) -> None:

deepfabric/llm/retry_handler.py CHANGED Viewed

@@ -7,15 +7,21 @@ import time
 from collections.abc import Callable, Coroutine
 from functools import wraps
-from typing import Any, TypeVar
+from typing import TYPE_CHECKING, Any, TypeVar
 from .rate_limit_config import BackoffStrategy, RateLimitConfig
 from .rate_limit_detector import RateLimitDetector
+if TYPE_CHECKING:
+    from deepfabric.progress import ProgressReporter
 logger = logging.getLogger(__name__)
 T = TypeVar("T")
+# Max chars for error summaries emitted through progress reporter
+_ERROR_SUMMARY_MAX_LENGTH = 200
 class RetryHandler:
     """Intelligent retry handler for LLM API calls with provider-aware backoff."""
@@ -30,6 +36,7 @@ class RetryHandler:
         self.config = config
         self.provider = provider
         self.detector = RateLimitDetector()
+        self.progress_reporter: ProgressReporter | None = None
     def should_retry(self, exception: Exception) -> bool:
         """Determine if an exception should trigger a retry.
@@ -126,14 +133,26 @@ class RetryHandler:
             if quota_info.quota_type:
                 quota_info_str = f" (quota_type: {quota_info.quota_type})"
-        logger.warning(
-            "Rate limit/transient error for %s on attempt %d, backing off %.2fs%s: %s",
-            self.provider,
-            tries,
-            wait,
-            quota_info_str,
-            exception,
-        )
+        if self.progress_reporter:
+            error_summary = str(exception)
+            if len(error_summary) > _ERROR_SUMMARY_MAX_LENGTH:
+                error_summary = error_summary[:_ERROR_SUMMARY_MAX_LENGTH] + "..."
+            self.progress_reporter.emit_llm_retry(
+                provider=self.provider,
+                attempt=tries,
+                wait=wait,
+                error_summary=error_summary,
+                quota_type=quota_info_str.strip(" ()") if quota_info_str else "",
+            )
+        else:
+            logger.warning(
+                "Rate limit/transient error for %s on attempt %d, backing off %.2fs%s: %s",
+                self.provider,
+                tries,
+                wait,
+                quota_info_str,
+                exception,
+            )
     def on_giveup_handler(self, details: dict[str, Any]) -> None:
         """Callback when giving up after max retries.

deepfabric/progress.py CHANGED Viewed

@@ -81,6 +81,25 @@ class StreamObserver(Protocol):
         """
         ...
+    def on_llm_retry(
+        self,
+        provider: str,
+        attempt: int,
+        wait: float,
+        error_summary: str,
+        metadata: dict[str, Any],
+    ) -> None:
+        """Called when an LLM API call is retried due to rate limiting or transient error.
+        Args:
+            provider: LLM provider name (e.g., "gemini", "openai")
+            attempt: Current attempt number (1-based)
+            wait: Backoff delay in seconds
+            error_summary: Brief description of the error
+            metadata: Additional context (e.g., quota_type)
+        """
+        ...
 class ProgressReporter:
     """Central progress reporter that notifies observers of generation events.
@@ -184,6 +203,29 @@ class ProgressReporter:
             if hasattr(observer, "on_retry"):
                 observer.on_retry(sample_idx, attempt, max_attempts, error_summary, metadata)
+    def emit_llm_retry(
+        self,
+        provider: str,
+        attempt: int,
+        wait: float,
+        error_summary: str,
+        **metadata,
+    ) -> None:
+        """Emit an LLM retry event to all observers.
+        Used to track LLM API rate limits and transient errors.
+        Args:
+            provider: LLM provider name
+            attempt: Current attempt number (1-based)
+            wait: Backoff delay in seconds
+            error_summary: Brief description of the error
+            **metadata: Additional context as keyword arguments
+        """
+        for observer in self._observers:
+            if hasattr(observer, "on_llm_retry"):
+                observer.on_llm_retry(provider, attempt, wait, error_summary, metadata)
     def emit_tool_execution(
         self,
         tool_name: str,

deepfabric/topic_manager.py CHANGED Viewed

@@ -45,6 +45,8 @@ async def _process_graph_events(graph: Graph, debug: bool = False) -> dict | Non
     progress_reporter = ProgressReporter()
     progress_reporter.attach(tui)
     graph.progress_reporter = progress_reporter
+    if hasattr(graph, "llm_client"):
+        graph.llm_client.retry_handler.progress_reporter = progress_reporter
     tui_started = False
@@ -116,6 +118,8 @@ async def _process_tree_events(tree: Tree, debug: bool = False) -> dict | None:
     progress_reporter = ProgressReporter()
     progress_reporter.attach(tui)
     tree.progress_reporter = progress_reporter
+    if hasattr(tree, "llm_client"):
+        tree.llm_client.retry_handler.progress_reporter = progress_reporter
     final_event = None
     try:
@@ -129,6 +133,8 @@ async def _process_tree_events(tree: Tree, debug: bool = False) -> dict | None:
                     tui.add_failure()
                     if debug and "error" in event:
                         get_tui().error(f"Debug: Tree generation failure - {event['error']}")
+                else:
+                    tui.advance_simple_progress()
             elif event["event"] == "build_complete":
                 total_paths = (
                     int(event["total_paths"]) if isinstance(event["total_paths"], str | int) else 0
@@ -233,8 +239,22 @@ def load_or_build_topic_model(
     tui = get_tui()
     if topics_load:
-        # Determine mode from config or file extension
-        is_graph = config.topics.mode == "graph" or topics_load.endswith(".json")
+        # Config mode takes precedence; file extension is only used to warn on mismatch
+        is_graph = config.topics.mode == "graph"
+        # Warn if file extension doesn't match the configured mode
+        if not is_graph and topics_load.endswith(".json"):
+            tui.warning(
+                f"File '{topics_load}' has .json extension (typically a graph) "
+                f"but mode is '{config.topics.mode}'. "
+                "If this is a graph set mode: graph in config."
+            )
+        elif is_graph and topics_load.endswith(".jsonl"):
+            tui.warning(
+                f"File '{topics_load}' has .jsonl extension (typically a tree) "
+                "but mode is 'graph'. "
+                "If this is a tree set mode: tree in config."
+            )
         if is_graph:
             tui.info(f"Reading topic graph from JSON file: {topics_load}")

deepfabric/topic_model.py CHANGED Viewed

@@ -9,6 +9,18 @@ class TopicPath(NamedTuple):
     topic_id: str
+class Topic(NamedTuple):
+    """A unique topic with its UUID and content.
+    Used for generation where we iterate over unique topics (by UUID)
+    rather than paths. This deduplicated view is essential for graphs
+    where multiple paths can lead to the same topic node.
+    """
+    uuid: str
+    topic: str  # The topic text/content
 class TopicModel(ABC):
     """Abstract base class for topic models like Tree and Graph."""
@@ -37,6 +49,20 @@ class TopicModel(ABC):
         """
         raise NotImplementedError
+    @abstractmethod
+    def get_unique_topics(self) -> list[Topic]:
+        """Returns deduplicated topics by UUID.
+        For generation, we iterate over unique topics rather than paths.
+        This is important for graphs where multiple paths can lead to the
+        same topic node - we only want to generate one sample per unique topic.
+        Returns:
+            List of Topic namedtuples containing (uuid, topic).
+            Each UUID appears exactly once.
+        """
+        raise NotImplementedError
     def get_path_by_id(self, topic_id: str) -> list[str] | None:
         """Look up a path by its topic_id.

deepfabric/tree.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import asyncio
+import hashlib
 import json
 import time
 import warnings
@@ -21,7 +22,7 @@ from .metrics import trace
 from .prompts import TreePromptBuilder
 from .schemas import TopicList
 from .stream_simulator import simulate_stream
-from .topic_model import TopicModel, TopicPath
+from .topic_model import Topic, TopicModel, TopicPath
 warnings.filterwarnings("ignore", message=".*Pydantic serializer warnings:.*")
@@ -242,24 +243,41 @@ class Tree(TopicModel):
         """Returns all the paths in the topic model."""
         return self.tree_paths
+    @staticmethod
+    def _path_to_id(path: list[str]) -> str:
+        """Compute a deterministic topic ID from a tree path."""
+        return hashlib.sha256(json.dumps(path).encode()).hexdigest()[:16]
+    def _add_path(self, path: list[str]) -> None:
+        """Add a path to the tree.
+        Args:
+            path: The topic path to add.
+        """
+        self.tree_paths.append(path)
     def get_all_paths_with_ids(self) -> list[TopicPath]:
         """Returns all paths with their unique identifiers.
-        For Tree, we generate stable IDs by hashing the path content.
-        This ensures the same path always gets the same ID across runs.
         Returns:
             List of TopicPath namedtuples containing (path, topic_id).
+            The topic_id is computed deterministically from the path content.
         """
-        import hashlib  # noqa: PLC0415
+        return [TopicPath(path=path, topic_id=self._path_to_id(path)) for path in self.tree_paths]
-        result: list[TopicPath] = []
-        for path in self.tree_paths:
-            # Generate stable ID from path content
-            path_str = "::".join(path)
-            topic_id = hashlib.sha256(path_str.encode()).hexdigest()[:16]
-            result.append(TopicPath(path=path, topic_id=topic_id))
-        return result
+    def get_unique_topics(self) -> list[Topic]:
+        """Returns all leaf topics with computed IDs.
+        For Trees, each path is unique by definition, so this returns
+        all leaf topics with deterministic path-based IDs.
+        Returns:
+            List of Topic namedtuples containing (uuid, topic).
+        """
+        return [
+            Topic(uuid=self._path_to_id(path), topic=path[-1] if path else "")
+            for path in self.tree_paths
+        ]
     async def get_subtopics(
         self, system_prompt: str, node_path: list[str], num_subtopics: int
@@ -361,7 +379,7 @@ class Tree(TopicModel):
         yield {"event": "subtree_start", "node_path": node_path, "depth": current_depth}
         if current_depth > total_depth:
-            self.tree_paths.append(node_path)
+            self._add_path(node_path)
             yield {"event": "leaf_reached", "path": node_path}
             return
@@ -383,7 +401,7 @@ class Tree(TopicModel):
         yield event
         if not subtopics:
-            self.tree_paths.append(node_path)
+            self._add_path(node_path)
             yield {"event": "leaf_reached", "path": node_path}
             return
@@ -403,7 +421,11 @@ class Tree(TopicModel):
                 yield child_event
     def save(self, save_path: str) -> None:
-        """Save the topic tree to a file."""
+        """Save the topic tree to a file.
+        Format: {"path": [...]}
+        IDs are computed on-the-fly from path content, not persisted.
+        """
         from pathlib import Path  # noqa: PLC0415
         Path(save_path).parent.mkdir(parents=True, exist_ok=True)
@@ -446,8 +468,11 @@ class Tree(TopicModel):
     def from_dict_list(self, dict_list: list[dict[str, Any]]) -> None:
         """Construct the topic tree from a list of dictionaries.
+        Accepts both the current format (``{"path": [...]}``) and the
+        legacy format that included a ``leaf_uuid`` field (silently ignored).
         Args:
-            dict_list (list[dict]): The list of dictionaries representing the topic tree.
+            dict_list: The list of dictionaries representing the topic tree.
         """
         # Clear existing data
         self.tree_paths = []

DeepFabric 4.10.1__py3-none-any.whl → 4.11.0__py3-none-any.whl

DeepFabric 4.10.1py3-none-any.whl → 4.11.0py3-none-any.whl