PyPI - DeepFabric - Versions diffs - 4.10.1__py3-none-any.whl → 4.12.0__py3-none-any.whl - Mend

DeepFabric 4.10.1py3-none-any.whl → 4.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

deepfabric/cli.py +624 -33
deepfabric/cloud_upload.py +1 -1
deepfabric/config.py +14 -5
deepfabric/config_manager.py +6 -1
deepfabric/constants.py +1 -1
deepfabric/dataset_manager.py +264 -62
deepfabric/generator.py +687 -82
deepfabric/graph.py +202 -2
deepfabric/graph_pruner.py +122 -0
deepfabric/llm/retry_handler.py +28 -9
deepfabric/progress.py +42 -0
deepfabric/topic_inspector.py +237 -0
deepfabric/topic_manager.py +54 -2
deepfabric/topic_model.py +26 -0
deepfabric/tree.py +81 -41
deepfabric/tui.py +448 -349
deepfabric/utils.py +4 -1
{deepfabric-4.10.1.dist-info → deepfabric-4.12.0.dist-info}/METADATA +3 -1
{deepfabric-4.10.1.dist-info → deepfabric-4.12.0.dist-info}/RECORD +22 -20
{deepfabric-4.10.1.dist-info → deepfabric-4.12.0.dist-info}/licenses/LICENSE +1 -1
{deepfabric-4.10.1.dist-info → deepfabric-4.12.0.dist-info}/WHEEL +0 -0
{deepfabric-4.10.1.dist-info → deepfabric-4.12.0.dist-info}/entry_points.txt +0 -0

deepfabric/graph.py CHANGED Viewed

@@ -26,7 +26,7 @@ from .prompts import (
 )
 from .schemas import GraphSubtopics
 from .stream_simulator import simulate_stream
-from .topic_model import TopicModel, TopicPath
+from .topic_model import Topic, TopicModel, TopicPath
 if TYPE_CHECKING:  # only for type hints to avoid runtime cycles
     from .progress import ProgressReporter
@@ -70,6 +70,11 @@ class GraphConfig(BaseModel):
         le=20,
         description="Maximum concurrent LLM calls during graph expansion (helps avoid rate limits)",
     )
+    max_tokens: int = Field(
+        default=DEFAULT_MAX_TOKENS,
+        ge=1,
+        description="Maximum tokens for topic generation LLM calls",
+    )
     base_url: str | None = Field(
         default=None,
         description="Base URL for API endpoint (e.g., custom OpenAI-compatible servers)",
@@ -156,6 +161,7 @@ class Graph(TopicModel):
         self.degree = self.config.degree
         self.depth = self.config.depth
         self.max_concurrent = self.config.max_concurrent
+        self.max_tokens = self.config.max_tokens
         self.prompt_style = self.config.prompt_style
         # Initialize LLM client
@@ -211,6 +217,139 @@ class Graph(TopicModel):
             if parent_node not in child_node.parents:
                 child_node.parents.append(parent_node)
+    def find_node_by_uuid(self, uuid: str) -> Node | None:
+        """Find a node by its UUID.
+        Args:
+            uuid: The UUID string to search for.
+        Returns:
+            The Node if found, None otherwise.
+        """
+        for node in self.nodes.values():
+            if node.metadata.get("uuid") == uuid:
+                return node
+        return None
+    def remove_node(self, node_id: int) -> None:
+        """Remove a single node from the graph, cleaning up bidirectional references.
+        Does not remove children — use remove_subtree() for cascading removal.
+        Args:
+            node_id: The ID of the node to remove.
+        Raises:
+            ValueError: If node_id is the root node or does not exist.
+        """
+        if node_id == self.root.id:
+            raise ValueError("Cannot remove the root node")  # noqa: TRY003
+        node = self.nodes.get(node_id)
+        if node is None:
+            raise ValueError(f"Node {node_id} not found in graph")  # noqa: TRY003
+        for parent in node.parents:
+            if node in parent.children:
+                parent.children.remove(node)
+        for child in node.children:
+            if node in child.parents:
+                child.parents.remove(node)
+        del self.nodes[node_id]
+    def remove_subtree(self, node_id: int) -> list[int]:
+        """Remove a node and all its descendants from the graph.
+        Args:
+            node_id: The ID of the node to remove (along with all descendants).
+        Returns:
+            List of removed node IDs.
+        Raises:
+            ValueError: If node_id is the root node or does not exist.
+        """
+        if node_id == self.root.id:
+            raise ValueError("Cannot remove the root node")  # noqa: TRY003
+        node = self.nodes.get(node_id)
+        if node is None:
+            raise ValueError(f"Node {node_id} not found in graph")  # noqa: TRY003
+        # BFS to collect all descendant node IDs
+        to_remove: list[int] = []
+        queue = [node]
+        visited: set[int] = set()
+        while queue:
+            current = queue.pop(0)
+            if current.id in visited:
+                continue
+            visited.add(current.id)
+            to_remove.append(current.id)
+            for child in current.children:
+                if child.id not in visited:
+                    queue.append(child)
+        # Remove in reverse order (leaves first)
+        for nid in reversed(to_remove):
+            self.remove_node(nid)
+        return to_remove
+    def prune_at_level(self, max_depth: int) -> list[int]:
+        """Remove all nodes below the given depth level.
+        Nodes at exactly max_depth become leaf nodes. Root is depth 0.
+        Args:
+            max_depth: Maximum depth to keep (inclusive).
+                       0 = keep only root, 1 = root and its children, etc.
+        Returns:
+            List of removed node IDs.
+        Raises:
+            ValueError: If max_depth is negative.
+        """
+        if max_depth < 0:
+            raise ValueError("max_depth must be non-negative")  # noqa: TRY003
+        # BFS from root to compute node depths
+        node_depths: dict[int, int] = {}
+        queue: list[tuple[Node, int]] = [(self.root, 0)]
+        visited: set[int] = set()
+        while queue:
+            current, depth = queue.pop(0)
+            if current.id in visited:
+                continue
+            visited.add(current.id)
+            node_depths[current.id] = depth
+            for child in current.children:
+                if child.id not in visited:
+                    queue.append((child, depth + 1))
+        to_remove_set = {nid for nid, d in node_depths.items() if d > max_depth}
+        # Sever children links from boundary nodes
+        for nid, d in node_depths.items():
+            if d == max_depth:
+                self.nodes[nid].children = [
+                    c for c in self.nodes[nid].children if c.id not in to_remove_set
+                ]
+        # Remove deeper nodes
+        for nid in to_remove_set:
+            node = self.nodes[nid]
+            for parent in node.parents:
+                if node in parent.children:
+                    parent.children.remove(node)
+            for child in node.children:
+                if node in child.parents:
+                    child.parents.remove(node)
+            del self.nodes[nid]
+        return list(to_remove_set)
     def to_pydantic(self) -> GraphModel:
         """Converts the runtime graph to its Pydantic model representation."""
         return GraphModel(
@@ -237,6 +376,13 @@ class Graph(TopicModel):
         with open(save_path, "w") as f:
             f.write(self.to_json())
+        # Save failed generations if any
+        if self.failed_generations:
+            failed_path = save_path.replace(".json", "_failed.jsonl")
+            with open(failed_path, "w") as f:
+                for failed in self.failed_generations:
+                    f.write(json.dumps({"failed_generation": failed}) + "\n")
     @classmethod
     def from_json(cls, json_path: str, params: dict) -> "Graph":
         """Load a topic graph from a JSON file."""
@@ -268,6 +414,36 @@ class Graph(TopicModel):
         graph._next_node_id = max(graph.nodes.keys()) + 1
         return graph
+    @classmethod
+    def load(cls, json_path: str) -> "Graph":
+        """Load a graph from JSON without initializing LLM client.
+        Intended for inspection and manipulation operations that don't
+        require LLM generation capabilities. Restores provider, model,
+        and temperature from the file metadata so saves preserve them.
+        """
+        params = {
+            "topic_prompt": "loaded",
+            "model_name": "placeholder/model",
+            "degree": 3,
+            "depth": 2,
+            "temperature": 0.7,
+        }
+        graph = cls.from_json(json_path, params)
+        # Restore original metadata so save() preserves provenance
+        with open(json_path) as f:
+            raw = json.load(f)
+        file_meta = raw.get("metadata") or {}
+        if file_meta.get("provider"):
+            graph.provider = file_meta["provider"]
+        if file_meta.get("model"):
+            graph.model_name = file_meta["model"]
+        if file_meta.get("temperature") is not None:
+            graph.temperature = file_meta["temperature"]
+        return graph
     def visualize(self, save_path: str) -> None:
         """Visualize the graph and save it to a file."""
         try:
@@ -454,7 +630,7 @@ class Graph(TopicModel):
                     prompt=prompt,
                     schema=GraphSubtopics,
                     max_retries=1,  # Don't retry inside - we handle it here
-                    max_tokens=DEFAULT_MAX_TOKENS,
+                    max_tokens=self.max_tokens,
                     temperature=self.temperature,
                 )
@@ -615,6 +791,30 @@ class Graph(TopicModel):
         visited.remove(node.id)
+    def get_unique_topics(self) -> list[Topic]:
+        """Returns deduplicated topics by UUID.
+        Iterates through all nodes in the graph and returns unique topics.
+        Each node has a UUID in its metadata, ensuring uniqueness.
+        Returns:
+            List of Topic namedtuples containing (uuid, topic).
+            Each UUID appears exactly once.
+        """
+        seen_uuids: set[str] = set()
+        result: list[Topic] = []
+        for node in self.nodes.values():
+            # Skip root node — it holds the generation seed prompt, not a topic
+            if node.id == self.root.id:
+                continue
+            node_uuid = node.metadata.get("uuid")
+            if node_uuid and node_uuid not in seen_uuids:
+                seen_uuids.add(node_uuid)
+                result.append(Topic(uuid=node_uuid, topic=node.topic))
+        return result
     def _dfs_paths(
         self, node: Node, current_path: list[str], paths: list[list[str]], visited: set[int]
     ) -> None:

deepfabric/graph_pruner.py ADDED Viewed

@@ -0,0 +1,122 @@
+"""Graph pruning operations for deepfabric CLI."""
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Literal
+from .graph import Graph
+@dataclass
+class PruneResult:
+    """Result of a pruning operation."""
+    operation: Literal["level", "uuid"]
+    removed_count: int
+    removed_node_ids: list[int]
+    remaining_nodes: int
+    remaining_paths: int
+    output_path: str
+def load_graph_for_pruning(file_path: str) -> Graph:
+    """Load a graph from JSON for pruning operations.
+    Args:
+        file_path: Path to the graph JSON file.
+    Returns:
+        Loaded Graph instance.
+    Raises:
+        FileNotFoundError: If the file does not exist.
+        ValueError: If the file is not a JSON graph file.
+    """
+    path = Path(file_path)
+    if not path.exists():
+        raise FileNotFoundError(f"Graph file not found: {file_path}")
+    if path.suffix != ".json":
+        raise ValueError(
+            f"Expected a JSON graph file, got: {path.suffix}. "
+            "Pruning is only supported for graph format files."
+        )
+    return Graph.load(file_path)
+def prune_graph_at_level(
+    file_path: str,
+    max_depth: int,
+    output_path: str | None = None,
+) -> PruneResult:
+    """Prune a graph file by removing all nodes below a depth level.
+    Args:
+        file_path: Path to the input graph JSON file.
+        max_depth: Maximum depth to keep (0=root only, 1=root+children, etc.).
+        output_path: Output file path. If None, derives from input filename.
+    Returns:
+        PruneResult with operation details.
+    """
+    graph = load_graph_for_pruning(file_path)
+    removed_ids = graph.prune_at_level(max_depth)
+    final_output = output_path or _derive_output_path(file_path, f"pruned_level{max_depth}")
+    graph.save(final_output)
+    return PruneResult(
+        operation="level",
+        removed_count=len(removed_ids),
+        removed_node_ids=removed_ids,
+        remaining_nodes=len(graph.nodes),
+        remaining_paths=len(graph.get_all_paths()),
+        output_path=final_output,
+    )
+def prune_graph_by_uuid(
+    file_path: str,
+    uuid: str,
+    output_path: str | None = None,
+) -> PruneResult:
+    """Remove a node (by UUID) and its entire subtree from a graph file.
+    Args:
+        file_path: Path to the input graph JSON file.
+        uuid: UUID of the node to remove.
+        output_path: Output file path. If None, derives from input filename.
+    Returns:
+        PruneResult with operation details.
+    Raises:
+        ValueError: If UUID not found or targets the root node.
+    """
+    graph = load_graph_for_pruning(file_path)
+    node = graph.find_node_by_uuid(uuid)
+    if node is None:
+        raise ValueError(f"No node found with UUID: {uuid}")
+    removed_ids = graph.remove_subtree(node.id)
+    final_output = output_path or _derive_output_path(file_path, "pruned")
+    graph.save(final_output)
+    return PruneResult(
+        operation="uuid",
+        removed_count=len(removed_ids),
+        removed_node_ids=removed_ids,
+        remaining_nodes=len(graph.nodes),
+        remaining_paths=len(graph.get_all_paths()),
+        output_path=final_output,
+    )
+def _derive_output_path(input_path: str, suffix: str) -> str:
+    """Derive a non-destructive output path from the input path.
+    Example: topic_graph.json -> topic_graph_pruned_level2.json
+    """
+    p = Path(input_path)
+    return str(p.with_stem(f"{p.stem}_{suffix}"))

deepfabric/llm/retry_handler.py CHANGED Viewed

@@ -7,15 +7,21 @@ import time
 from collections.abc import Callable, Coroutine
 from functools import wraps
-from typing import Any, TypeVar
+from typing import TYPE_CHECKING, Any, TypeVar
 from .rate_limit_config import BackoffStrategy, RateLimitConfig
 from .rate_limit_detector import RateLimitDetector
+if TYPE_CHECKING:
+    from deepfabric.progress import ProgressReporter
 logger = logging.getLogger(__name__)
 T = TypeVar("T")
+# Max chars for error summaries emitted through progress reporter
+_ERROR_SUMMARY_MAX_LENGTH = 200
 class RetryHandler:
     """Intelligent retry handler for LLM API calls with provider-aware backoff."""
@@ -30,6 +36,7 @@ class RetryHandler:
         self.config = config
         self.provider = provider
         self.detector = RateLimitDetector()
+        self.progress_reporter: ProgressReporter | None = None
     def should_retry(self, exception: Exception) -> bool:
         """Determine if an exception should trigger a retry.
@@ -126,14 +133,26 @@ class RetryHandler:
             if quota_info.quota_type:
                 quota_info_str = f" (quota_type: {quota_info.quota_type})"
-        logger.warning(
-            "Rate limit/transient error for %s on attempt %d, backing off %.2fs%s: %s",
-            self.provider,
-            tries,
-            wait,
-            quota_info_str,
-            exception,
-        )
+        if self.progress_reporter:
+            error_summary = str(exception)
+            if len(error_summary) > _ERROR_SUMMARY_MAX_LENGTH:
+                error_summary = error_summary[:_ERROR_SUMMARY_MAX_LENGTH] + "..."
+            self.progress_reporter.emit_llm_retry(
+                provider=self.provider,
+                attempt=tries,
+                wait=wait,
+                error_summary=error_summary,
+                quota_type=quota_info_str.strip(" ()") if quota_info_str else "",
+            )
+        else:
+            logger.warning(
+                "Rate limit/transient error for %s on attempt %d, backing off %.2fs%s: %s",
+                self.provider,
+                tries,
+                wait,
+                quota_info_str,
+                exception,
+            )
     def on_giveup_handler(self, details: dict[str, Any]) -> None:
         """Callback when giving up after max retries.

deepfabric/progress.py CHANGED Viewed

@@ -81,6 +81,25 @@ class StreamObserver(Protocol):
         """
         ...
+    def on_llm_retry(
+        self,
+        provider: str,
+        attempt: int,
+        wait: float,
+        error_summary: str,
+        metadata: dict[str, Any],
+    ) -> None:
+        """Called when an LLM API call is retried due to rate limiting or transient error.
+        Args:
+            provider: LLM provider name (e.g., "gemini", "openai")
+            attempt: Current attempt number (1-based)
+            wait: Backoff delay in seconds
+            error_summary: Brief description of the error
+            metadata: Additional context (e.g., quota_type)
+        """
+        ...
 class ProgressReporter:
     """Central progress reporter that notifies observers of generation events.
@@ -184,6 +203,29 @@ class ProgressReporter:
             if hasattr(observer, "on_retry"):
                 observer.on_retry(sample_idx, attempt, max_attempts, error_summary, metadata)
+    def emit_llm_retry(
+        self,
+        provider: str,
+        attempt: int,
+        wait: float,
+        error_summary: str,
+        **metadata,
+    ) -> None:
+        """Emit an LLM retry event to all observers.
+        Used to track LLM API rate limits and transient errors.
+        Args:
+            provider: LLM provider name
+            attempt: Current attempt number (1-based)
+            wait: Backoff delay in seconds
+            error_summary: Brief description of the error
+            **metadata: Additional context as keyword arguments
+        """
+        for observer in self._observers:
+            if hasattr(observer, "on_llm_retry"):
+                observer.on_llm_retry(provider, attempt, wait, error_summary, metadata)
     def emit_tool_execution(
         self,
         tool_name: str,

DeepFabric 4.10.1__py3-none-any.whl → 4.12.0__py3-none-any.whl

DeepFabric 4.10.1py3-none-any.whl → 4.12.0py3-none-any.whl