DeepFabric 4.10.1__py3-none-any.whl → 4.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deepfabric/graph.py CHANGED
@@ -26,7 +26,7 @@ from .prompts import (
26
26
  )
27
27
  from .schemas import GraphSubtopics
28
28
  from .stream_simulator import simulate_stream
29
- from .topic_model import TopicModel, TopicPath
29
+ from .topic_model import Topic, TopicModel, TopicPath
30
30
 
31
31
  if TYPE_CHECKING: # only for type hints to avoid runtime cycles
32
32
  from .progress import ProgressReporter
@@ -615,6 +615,30 @@ class Graph(TopicModel):
615
615
 
616
616
  visited.remove(node.id)
617
617
 
618
+ def get_unique_topics(self) -> list[Topic]:
619
+ """Returns deduplicated topics by UUID.
620
+
621
+ Iterates through all nodes in the graph and returns unique topics.
622
+ Each node has a UUID in its metadata, ensuring uniqueness.
623
+
624
+ Returns:
625
+ List of Topic namedtuples containing (uuid, topic).
626
+ Each UUID appears exactly once.
627
+ """
628
+ seen_uuids: set[str] = set()
629
+ result: list[Topic] = []
630
+
631
+ for node in self.nodes.values():
632
+ # Skip root node — it holds the generation seed prompt, not a topic
633
+ if node.id == self.root.id:
634
+ continue
635
+ node_uuid = node.metadata.get("uuid")
636
+ if node_uuid and node_uuid not in seen_uuids:
637
+ seen_uuids.add(node_uuid)
638
+ result.append(Topic(uuid=node_uuid, topic=node.topic))
639
+
640
+ return result
641
+
618
642
  def _dfs_paths(
619
643
  self, node: Node, current_path: list[str], paths: list[list[str]], visited: set[int]
620
644
  ) -> None:
@@ -7,15 +7,21 @@ import time
7
7
 
8
8
  from collections.abc import Callable, Coroutine
9
9
  from functools import wraps
10
- from typing import Any, TypeVar
10
+ from typing import TYPE_CHECKING, Any, TypeVar
11
11
 
12
12
  from .rate_limit_config import BackoffStrategy, RateLimitConfig
13
13
  from .rate_limit_detector import RateLimitDetector
14
14
 
15
+ if TYPE_CHECKING:
16
+ from deepfabric.progress import ProgressReporter
17
+
15
18
  logger = logging.getLogger(__name__)
16
19
 
17
20
  T = TypeVar("T")
18
21
 
22
+ # Max chars for error summaries emitted through progress reporter
23
+ _ERROR_SUMMARY_MAX_LENGTH = 200
24
+
19
25
 
20
26
  class RetryHandler:
21
27
  """Intelligent retry handler for LLM API calls with provider-aware backoff."""
@@ -30,6 +36,7 @@ class RetryHandler:
30
36
  self.config = config
31
37
  self.provider = provider
32
38
  self.detector = RateLimitDetector()
39
+ self.progress_reporter: ProgressReporter | None = None
33
40
 
34
41
  def should_retry(self, exception: Exception) -> bool:
35
42
  """Determine if an exception should trigger a retry.
@@ -126,14 +133,26 @@ class RetryHandler:
126
133
  if quota_info.quota_type:
127
134
  quota_info_str = f" (quota_type: {quota_info.quota_type})"
128
135
 
129
- logger.warning(
130
- "Rate limit/transient error for %s on attempt %d, backing off %.2fs%s: %s",
131
- self.provider,
132
- tries,
133
- wait,
134
- quota_info_str,
135
- exception,
136
- )
136
+ if self.progress_reporter:
137
+ error_summary = str(exception)
138
+ if len(error_summary) > _ERROR_SUMMARY_MAX_LENGTH:
139
+ error_summary = error_summary[:_ERROR_SUMMARY_MAX_LENGTH] + "..."
140
+ self.progress_reporter.emit_llm_retry(
141
+ provider=self.provider,
142
+ attempt=tries,
143
+ wait=wait,
144
+ error_summary=error_summary,
145
+ quota_type=quota_info_str.strip(" ()") if quota_info_str else "",
146
+ )
147
+ else:
148
+ logger.warning(
149
+ "Rate limit/transient error for %s on attempt %d, backing off %.2fs%s: %s",
150
+ self.provider,
151
+ tries,
152
+ wait,
153
+ quota_info_str,
154
+ exception,
155
+ )
137
156
 
138
157
  def on_giveup_handler(self, details: dict[str, Any]) -> None:
139
158
  """Callback when giving up after max retries.
deepfabric/progress.py CHANGED
@@ -81,6 +81,25 @@ class StreamObserver(Protocol):
81
81
  """
82
82
  ...
83
83
 
84
+ def on_llm_retry(
85
+ self,
86
+ provider: str,
87
+ attempt: int,
88
+ wait: float,
89
+ error_summary: str,
90
+ metadata: dict[str, Any],
91
+ ) -> None:
92
+ """Called when an LLM API call is retried due to rate limiting or transient error.
93
+
94
+ Args:
95
+ provider: LLM provider name (e.g., "gemini", "openai")
96
+ attempt: Current attempt number (1-based)
97
+ wait: Backoff delay in seconds
98
+ error_summary: Brief description of the error
99
+ metadata: Additional context (e.g., quota_type)
100
+ """
101
+ ...
102
+
84
103
 
85
104
  class ProgressReporter:
86
105
  """Central progress reporter that notifies observers of generation events.
@@ -184,6 +203,29 @@ class ProgressReporter:
184
203
  if hasattr(observer, "on_retry"):
185
204
  observer.on_retry(sample_idx, attempt, max_attempts, error_summary, metadata)
186
205
 
206
+ def emit_llm_retry(
207
+ self,
208
+ provider: str,
209
+ attempt: int,
210
+ wait: float,
211
+ error_summary: str,
212
+ **metadata,
213
+ ) -> None:
214
+ """Emit an LLM retry event to all observers.
215
+
216
+ Used to track LLM API rate limits and transient errors.
217
+
218
+ Args:
219
+ provider: LLM provider name
220
+ attempt: Current attempt number (1-based)
221
+ wait: Backoff delay in seconds
222
+ error_summary: Brief description of the error
223
+ **metadata: Additional context as keyword arguments
224
+ """
225
+ for observer in self._observers:
226
+ if hasattr(observer, "on_llm_retry"):
227
+ observer.on_llm_retry(provider, attempt, wait, error_summary, metadata)
228
+
187
229
  def emit_tool_execution(
188
230
  self,
189
231
  tool_name: str,
@@ -45,6 +45,8 @@ async def _process_graph_events(graph: Graph, debug: bool = False) -> dict | Non
45
45
  progress_reporter = ProgressReporter()
46
46
  progress_reporter.attach(tui)
47
47
  graph.progress_reporter = progress_reporter
48
+ if hasattr(graph, "llm_client"):
49
+ graph.llm_client.retry_handler.progress_reporter = progress_reporter
48
50
 
49
51
  tui_started = False
50
52
 
@@ -116,6 +118,8 @@ async def _process_tree_events(tree: Tree, debug: bool = False) -> dict | None:
116
118
  progress_reporter = ProgressReporter()
117
119
  progress_reporter.attach(tui)
118
120
  tree.progress_reporter = progress_reporter
121
+ if hasattr(tree, "llm_client"):
122
+ tree.llm_client.retry_handler.progress_reporter = progress_reporter
119
123
 
120
124
  final_event = None
121
125
  try:
@@ -129,6 +133,8 @@ async def _process_tree_events(tree: Tree, debug: bool = False) -> dict | None:
129
133
  tui.add_failure()
130
134
  if debug and "error" in event:
131
135
  get_tui().error(f"Debug: Tree generation failure - {event['error']}")
136
+ else:
137
+ tui.advance_simple_progress()
132
138
  elif event["event"] == "build_complete":
133
139
  total_paths = (
134
140
  int(event["total_paths"]) if isinstance(event["total_paths"], str | int) else 0
@@ -233,8 +239,22 @@ def load_or_build_topic_model(
233
239
  tui = get_tui()
234
240
 
235
241
  if topics_load:
236
- # Determine mode from config or file extension
237
- is_graph = config.topics.mode == "graph" or topics_load.endswith(".json")
242
+ # Config mode takes precedence; file extension is only used to warn on mismatch
243
+ is_graph = config.topics.mode == "graph"
244
+
245
+ # Warn if file extension doesn't match the configured mode
246
+ if not is_graph and topics_load.endswith(".json"):
247
+ tui.warning(
248
+ f"File '{topics_load}' has .json extension (typically a graph) "
249
+ f"but mode is '{config.topics.mode}'. "
250
+ "If this is a graph set mode: graph in config."
251
+ )
252
+ elif is_graph and topics_load.endswith(".jsonl"):
253
+ tui.warning(
254
+ f"File '{topics_load}' has .jsonl extension (typically a tree) "
255
+ "but mode is 'graph'. "
256
+ "If this is a tree set mode: tree in config."
257
+ )
238
258
 
239
259
  if is_graph:
240
260
  tui.info(f"Reading topic graph from JSON file: {topics_load}")
deepfabric/topic_model.py CHANGED
@@ -9,6 +9,18 @@ class TopicPath(NamedTuple):
9
9
  topic_id: str
10
10
 
11
11
 
12
+ class Topic(NamedTuple):
13
+ """A unique topic with its UUID and content.
14
+
15
+ Used for generation where we iterate over unique topics (by UUID)
16
+ rather than paths. This deduplicated view is essential for graphs
17
+ where multiple paths can lead to the same topic node.
18
+ """
19
+
20
+ uuid: str
21
+ topic: str # The topic text/content
22
+
23
+
12
24
  class TopicModel(ABC):
13
25
  """Abstract base class for topic models like Tree and Graph."""
14
26
 
@@ -37,6 +49,20 @@ class TopicModel(ABC):
37
49
  """
38
50
  raise NotImplementedError
39
51
 
52
+ @abstractmethod
53
+ def get_unique_topics(self) -> list[Topic]:
54
+ """Returns deduplicated topics by UUID.
55
+
56
+ For generation, we iterate over unique topics rather than paths.
57
+ This is important for graphs where multiple paths can lead to the
58
+ same topic node - we only want to generate one sample per unique topic.
59
+
60
+ Returns:
61
+ List of Topic namedtuples containing (uuid, topic).
62
+ Each UUID appears exactly once.
63
+ """
64
+ raise NotImplementedError
65
+
40
66
  def get_path_by_id(self, topic_id: str) -> list[str] | None:
41
67
  """Look up a path by its topic_id.
42
68
 
deepfabric/tree.py CHANGED
@@ -1,4 +1,5 @@
1
1
  import asyncio
2
+ import hashlib
2
3
  import json
3
4
  import time
4
5
  import warnings
@@ -21,7 +22,7 @@ from .metrics import trace
21
22
  from .prompts import TreePromptBuilder
22
23
  from .schemas import TopicList
23
24
  from .stream_simulator import simulate_stream
24
- from .topic_model import TopicModel, TopicPath
25
+ from .topic_model import Topic, TopicModel, TopicPath
25
26
 
26
27
  warnings.filterwarnings("ignore", message=".*Pydantic serializer warnings:.*")
27
28
 
@@ -242,24 +243,41 @@ class Tree(TopicModel):
242
243
  """Returns all the paths in the topic model."""
243
244
  return self.tree_paths
244
245
 
246
+ @staticmethod
247
+ def _path_to_id(path: list[str]) -> str:
248
+ """Compute a deterministic topic ID from a tree path."""
249
+ return hashlib.sha256(json.dumps(path).encode()).hexdigest()[:16]
250
+
251
+ def _add_path(self, path: list[str]) -> None:
252
+ """Add a path to the tree.
253
+
254
+ Args:
255
+ path: The topic path to add.
256
+ """
257
+ self.tree_paths.append(path)
258
+
245
259
  def get_all_paths_with_ids(self) -> list[TopicPath]:
246
260
  """Returns all paths with their unique identifiers.
247
261
 
248
- For Tree, we generate stable IDs by hashing the path content.
249
- This ensures the same path always gets the same ID across runs.
250
-
251
262
  Returns:
252
263
  List of TopicPath namedtuples containing (path, topic_id).
264
+ The topic_id is computed deterministically from the path content.
253
265
  """
254
- import hashlib # noqa: PLC0415
266
+ return [TopicPath(path=path, topic_id=self._path_to_id(path)) for path in self.tree_paths]
255
267
 
256
- result: list[TopicPath] = []
257
- for path in self.tree_paths:
258
- # Generate stable ID from path content
259
- path_str = "::".join(path)
260
- topic_id = hashlib.sha256(path_str.encode()).hexdigest()[:16]
261
- result.append(TopicPath(path=path, topic_id=topic_id))
262
- return result
268
+ def get_unique_topics(self) -> list[Topic]:
269
+ """Returns all leaf topics with computed IDs.
270
+
271
+ For Trees, each path is unique by definition, so this returns
272
+ all leaf topics with deterministic path-based IDs.
273
+
274
+ Returns:
275
+ List of Topic namedtuples containing (uuid, topic).
276
+ """
277
+ return [
278
+ Topic(uuid=self._path_to_id(path), topic=path[-1] if path else "")
279
+ for path in self.tree_paths
280
+ ]
263
281
 
264
282
  async def get_subtopics(
265
283
  self, system_prompt: str, node_path: list[str], num_subtopics: int
@@ -361,7 +379,7 @@ class Tree(TopicModel):
361
379
  yield {"event": "subtree_start", "node_path": node_path, "depth": current_depth}
362
380
 
363
381
  if current_depth > total_depth:
364
- self.tree_paths.append(node_path)
382
+ self._add_path(node_path)
365
383
  yield {"event": "leaf_reached", "path": node_path}
366
384
  return
367
385
 
@@ -383,7 +401,7 @@ class Tree(TopicModel):
383
401
  yield event
384
402
 
385
403
  if not subtopics:
386
- self.tree_paths.append(node_path)
404
+ self._add_path(node_path)
387
405
  yield {"event": "leaf_reached", "path": node_path}
388
406
  return
389
407
 
@@ -403,7 +421,11 @@ class Tree(TopicModel):
403
421
  yield child_event
404
422
 
405
423
  def save(self, save_path: str) -> None:
406
- """Save the topic tree to a file."""
424
+ """Save the topic tree to a file.
425
+
426
+ Format: {"path": [...]}
427
+ IDs are computed on-the-fly from path content, not persisted.
428
+ """
407
429
  from pathlib import Path # noqa: PLC0415
408
430
 
409
431
  Path(save_path).parent.mkdir(parents=True, exist_ok=True)
@@ -446,8 +468,11 @@ class Tree(TopicModel):
446
468
  def from_dict_list(self, dict_list: list[dict[str, Any]]) -> None:
447
469
  """Construct the topic tree from a list of dictionaries.
448
470
 
471
+ Accepts both the current format (``{"path": [...]}``) and the
472
+ legacy format that included a ``leaf_uuid`` field (silently ignored).
473
+
449
474
  Args:
450
- dict_list (list[dict]): The list of dictionaries representing the topic tree.
475
+ dict_list: The list of dictionaries representing the topic tree.
451
476
  """
452
477
  # Clear existing data
453
478
  self.tree_paths = []