DeepFabric 4.11.0__py3-none-any.whl → 4.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepfabric/cli.py +541 -6
- deepfabric/config.py +8 -1
- deepfabric/config_manager.py +6 -1
- deepfabric/graph.py +177 -1
- deepfabric/graph_pruner.py +122 -0
- deepfabric/topic_inspector.py +237 -0
- deepfabric/topic_manager.py +32 -0
- deepfabric/tree.py +40 -25
- {deepfabric-4.11.0.dist-info → deepfabric-4.12.0.dist-info}/METADATA +1 -1
- {deepfabric-4.11.0.dist-info → deepfabric-4.12.0.dist-info}/RECORD +13 -11
- {deepfabric-4.11.0.dist-info → deepfabric-4.12.0.dist-info}/WHEEL +0 -0
- {deepfabric-4.11.0.dist-info → deepfabric-4.12.0.dist-info}/entry_points.txt +0 -0
- {deepfabric-4.11.0.dist-info → deepfabric-4.12.0.dist-info}/licenses/LICENSE +0 -0
deepfabric/graph.py
CHANGED
|
@@ -70,6 +70,11 @@ class GraphConfig(BaseModel):
|
|
|
70
70
|
le=20,
|
|
71
71
|
description="Maximum concurrent LLM calls during graph expansion (helps avoid rate limits)",
|
|
72
72
|
)
|
|
73
|
+
max_tokens: int = Field(
|
|
74
|
+
default=DEFAULT_MAX_TOKENS,
|
|
75
|
+
ge=1,
|
|
76
|
+
description="Maximum tokens for topic generation LLM calls",
|
|
77
|
+
)
|
|
73
78
|
base_url: str | None = Field(
|
|
74
79
|
default=None,
|
|
75
80
|
description="Base URL for API endpoint (e.g., custom OpenAI-compatible servers)",
|
|
@@ -156,6 +161,7 @@ class Graph(TopicModel):
|
|
|
156
161
|
self.degree = self.config.degree
|
|
157
162
|
self.depth = self.config.depth
|
|
158
163
|
self.max_concurrent = self.config.max_concurrent
|
|
164
|
+
self.max_tokens = self.config.max_tokens
|
|
159
165
|
self.prompt_style = self.config.prompt_style
|
|
160
166
|
|
|
161
167
|
# Initialize LLM client
|
|
@@ -211,6 +217,139 @@ class Graph(TopicModel):
|
|
|
211
217
|
if parent_node not in child_node.parents:
|
|
212
218
|
child_node.parents.append(parent_node)
|
|
213
219
|
|
|
220
|
+
def find_node_by_uuid(self, uuid: str) -> Node | None:
|
|
221
|
+
"""Find a node by its UUID.
|
|
222
|
+
|
|
223
|
+
Args:
|
|
224
|
+
uuid: The UUID string to search for.
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
The Node if found, None otherwise.
|
|
228
|
+
"""
|
|
229
|
+
for node in self.nodes.values():
|
|
230
|
+
if node.metadata.get("uuid") == uuid:
|
|
231
|
+
return node
|
|
232
|
+
return None
|
|
233
|
+
|
|
234
|
+
def remove_node(self, node_id: int) -> None:
|
|
235
|
+
"""Remove a single node from the graph, cleaning up bidirectional references.
|
|
236
|
+
|
|
237
|
+
Does not remove children — use remove_subtree() for cascading removal.
|
|
238
|
+
|
|
239
|
+
Args:
|
|
240
|
+
node_id: The ID of the node to remove.
|
|
241
|
+
|
|
242
|
+
Raises:
|
|
243
|
+
ValueError: If node_id is the root node or does not exist.
|
|
244
|
+
"""
|
|
245
|
+
if node_id == self.root.id:
|
|
246
|
+
raise ValueError("Cannot remove the root node") # noqa: TRY003
|
|
247
|
+
node = self.nodes.get(node_id)
|
|
248
|
+
if node is None:
|
|
249
|
+
raise ValueError(f"Node {node_id} not found in graph") # noqa: TRY003
|
|
250
|
+
|
|
251
|
+
for parent in node.parents:
|
|
252
|
+
if node in parent.children:
|
|
253
|
+
parent.children.remove(node)
|
|
254
|
+
|
|
255
|
+
for child in node.children:
|
|
256
|
+
if node in child.parents:
|
|
257
|
+
child.parents.remove(node)
|
|
258
|
+
|
|
259
|
+
del self.nodes[node_id]
|
|
260
|
+
|
|
261
|
+
def remove_subtree(self, node_id: int) -> list[int]:
|
|
262
|
+
"""Remove a node and all its descendants from the graph.
|
|
263
|
+
|
|
264
|
+
Args:
|
|
265
|
+
node_id: The ID of the node to remove (along with all descendants).
|
|
266
|
+
|
|
267
|
+
Returns:
|
|
268
|
+
List of removed node IDs.
|
|
269
|
+
|
|
270
|
+
Raises:
|
|
271
|
+
ValueError: If node_id is the root node or does not exist.
|
|
272
|
+
"""
|
|
273
|
+
if node_id == self.root.id:
|
|
274
|
+
raise ValueError("Cannot remove the root node") # noqa: TRY003
|
|
275
|
+
node = self.nodes.get(node_id)
|
|
276
|
+
if node is None:
|
|
277
|
+
raise ValueError(f"Node {node_id} not found in graph") # noqa: TRY003
|
|
278
|
+
|
|
279
|
+
# BFS to collect all descendant node IDs
|
|
280
|
+
to_remove: list[int] = []
|
|
281
|
+
queue = [node]
|
|
282
|
+
visited: set[int] = set()
|
|
283
|
+
while queue:
|
|
284
|
+
current = queue.pop(0)
|
|
285
|
+
if current.id in visited:
|
|
286
|
+
continue
|
|
287
|
+
visited.add(current.id)
|
|
288
|
+
to_remove.append(current.id)
|
|
289
|
+
for child in current.children:
|
|
290
|
+
if child.id not in visited:
|
|
291
|
+
queue.append(child)
|
|
292
|
+
|
|
293
|
+
# Remove in reverse order (leaves first)
|
|
294
|
+
for nid in reversed(to_remove):
|
|
295
|
+
self.remove_node(nid)
|
|
296
|
+
|
|
297
|
+
return to_remove
|
|
298
|
+
|
|
299
|
+
def prune_at_level(self, max_depth: int) -> list[int]:
|
|
300
|
+
"""Remove all nodes below the given depth level.
|
|
301
|
+
|
|
302
|
+
Nodes at exactly max_depth become leaf nodes. Root is depth 0.
|
|
303
|
+
|
|
304
|
+
Args:
|
|
305
|
+
max_depth: Maximum depth to keep (inclusive).
|
|
306
|
+
0 = keep only root, 1 = root and its children, etc.
|
|
307
|
+
|
|
308
|
+
Returns:
|
|
309
|
+
List of removed node IDs.
|
|
310
|
+
|
|
311
|
+
Raises:
|
|
312
|
+
ValueError: If max_depth is negative.
|
|
313
|
+
"""
|
|
314
|
+
if max_depth < 0:
|
|
315
|
+
raise ValueError("max_depth must be non-negative") # noqa: TRY003
|
|
316
|
+
|
|
317
|
+
# BFS from root to compute node depths
|
|
318
|
+
node_depths: dict[int, int] = {}
|
|
319
|
+
queue: list[tuple[Node, int]] = [(self.root, 0)]
|
|
320
|
+
visited: set[int] = set()
|
|
321
|
+
while queue:
|
|
322
|
+
current, depth = queue.pop(0)
|
|
323
|
+
if current.id in visited:
|
|
324
|
+
continue
|
|
325
|
+
visited.add(current.id)
|
|
326
|
+
node_depths[current.id] = depth
|
|
327
|
+
for child in current.children:
|
|
328
|
+
if child.id not in visited:
|
|
329
|
+
queue.append((child, depth + 1))
|
|
330
|
+
|
|
331
|
+
to_remove_set = {nid for nid, d in node_depths.items() if d > max_depth}
|
|
332
|
+
|
|
333
|
+
# Sever children links from boundary nodes
|
|
334
|
+
for nid, d in node_depths.items():
|
|
335
|
+
if d == max_depth:
|
|
336
|
+
self.nodes[nid].children = [
|
|
337
|
+
c for c in self.nodes[nid].children if c.id not in to_remove_set
|
|
338
|
+
]
|
|
339
|
+
|
|
340
|
+
# Remove deeper nodes
|
|
341
|
+
for nid in to_remove_set:
|
|
342
|
+
node = self.nodes[nid]
|
|
343
|
+
for parent in node.parents:
|
|
344
|
+
if node in parent.children:
|
|
345
|
+
parent.children.remove(node)
|
|
346
|
+
for child in node.children:
|
|
347
|
+
if node in child.parents:
|
|
348
|
+
child.parents.remove(node)
|
|
349
|
+
del self.nodes[nid]
|
|
350
|
+
|
|
351
|
+
return list(to_remove_set)
|
|
352
|
+
|
|
214
353
|
def to_pydantic(self) -> GraphModel:
|
|
215
354
|
"""Converts the runtime graph to its Pydantic model representation."""
|
|
216
355
|
return GraphModel(
|
|
@@ -237,6 +376,13 @@ class Graph(TopicModel):
|
|
|
237
376
|
with open(save_path, "w") as f:
|
|
238
377
|
f.write(self.to_json())
|
|
239
378
|
|
|
379
|
+
# Save failed generations if any
|
|
380
|
+
if self.failed_generations:
|
|
381
|
+
failed_path = save_path.replace(".json", "_failed.jsonl")
|
|
382
|
+
with open(failed_path, "w") as f:
|
|
383
|
+
for failed in self.failed_generations:
|
|
384
|
+
f.write(json.dumps({"failed_generation": failed}) + "\n")
|
|
385
|
+
|
|
240
386
|
@classmethod
|
|
241
387
|
def from_json(cls, json_path: str, params: dict) -> "Graph":
|
|
242
388
|
"""Load a topic graph from a JSON file."""
|
|
@@ -268,6 +414,36 @@ class Graph(TopicModel):
|
|
|
268
414
|
graph._next_node_id = max(graph.nodes.keys()) + 1
|
|
269
415
|
return graph
|
|
270
416
|
|
|
417
|
+
@classmethod
|
|
418
|
+
def load(cls, json_path: str) -> "Graph":
|
|
419
|
+
"""Load a graph from JSON without initializing LLM client.
|
|
420
|
+
|
|
421
|
+
Intended for inspection and manipulation operations that don't
|
|
422
|
+
require LLM generation capabilities. Restores provider, model,
|
|
423
|
+
and temperature from the file metadata so saves preserve them.
|
|
424
|
+
"""
|
|
425
|
+
params = {
|
|
426
|
+
"topic_prompt": "loaded",
|
|
427
|
+
"model_name": "placeholder/model",
|
|
428
|
+
"degree": 3,
|
|
429
|
+
"depth": 2,
|
|
430
|
+
"temperature": 0.7,
|
|
431
|
+
}
|
|
432
|
+
graph = cls.from_json(json_path, params)
|
|
433
|
+
|
|
434
|
+
# Restore original metadata so save() preserves provenance
|
|
435
|
+
with open(json_path) as f:
|
|
436
|
+
raw = json.load(f)
|
|
437
|
+
file_meta = raw.get("metadata") or {}
|
|
438
|
+
if file_meta.get("provider"):
|
|
439
|
+
graph.provider = file_meta["provider"]
|
|
440
|
+
if file_meta.get("model"):
|
|
441
|
+
graph.model_name = file_meta["model"]
|
|
442
|
+
if file_meta.get("temperature") is not None:
|
|
443
|
+
graph.temperature = file_meta["temperature"]
|
|
444
|
+
|
|
445
|
+
return graph
|
|
446
|
+
|
|
271
447
|
def visualize(self, save_path: str) -> None:
|
|
272
448
|
"""Visualize the graph and save it to a file."""
|
|
273
449
|
try:
|
|
@@ -454,7 +630,7 @@ class Graph(TopicModel):
|
|
|
454
630
|
prompt=prompt,
|
|
455
631
|
schema=GraphSubtopics,
|
|
456
632
|
max_retries=1, # Don't retry inside - we handle it here
|
|
457
|
-
max_tokens=
|
|
633
|
+
max_tokens=self.max_tokens,
|
|
458
634
|
temperature=self.temperature,
|
|
459
635
|
)
|
|
460
636
|
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
"""Graph pruning operations for deepfabric CLI."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Literal
|
|
6
|
+
|
|
7
|
+
from .graph import Graph
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class PruneResult:
|
|
12
|
+
"""Result of a pruning operation."""
|
|
13
|
+
|
|
14
|
+
operation: Literal["level", "uuid"]
|
|
15
|
+
removed_count: int
|
|
16
|
+
removed_node_ids: list[int]
|
|
17
|
+
remaining_nodes: int
|
|
18
|
+
remaining_paths: int
|
|
19
|
+
output_path: str
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def load_graph_for_pruning(file_path: str) -> Graph:
|
|
23
|
+
"""Load a graph from JSON for pruning operations.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
file_path: Path to the graph JSON file.
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
Loaded Graph instance.
|
|
30
|
+
|
|
31
|
+
Raises:
|
|
32
|
+
FileNotFoundError: If the file does not exist.
|
|
33
|
+
ValueError: If the file is not a JSON graph file.
|
|
34
|
+
"""
|
|
35
|
+
path = Path(file_path)
|
|
36
|
+
if not path.exists():
|
|
37
|
+
raise FileNotFoundError(f"Graph file not found: {file_path}")
|
|
38
|
+
if path.suffix != ".json":
|
|
39
|
+
raise ValueError(
|
|
40
|
+
f"Expected a JSON graph file, got: {path.suffix}. "
|
|
41
|
+
"Pruning is only supported for graph format files."
|
|
42
|
+
)
|
|
43
|
+
return Graph.load(file_path)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def prune_graph_at_level(
|
|
47
|
+
file_path: str,
|
|
48
|
+
max_depth: int,
|
|
49
|
+
output_path: str | None = None,
|
|
50
|
+
) -> PruneResult:
|
|
51
|
+
"""Prune a graph file by removing all nodes below a depth level.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
file_path: Path to the input graph JSON file.
|
|
55
|
+
max_depth: Maximum depth to keep (0=root only, 1=root+children, etc.).
|
|
56
|
+
output_path: Output file path. If None, derives from input filename.
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
PruneResult with operation details.
|
|
60
|
+
"""
|
|
61
|
+
graph = load_graph_for_pruning(file_path)
|
|
62
|
+
removed_ids = graph.prune_at_level(max_depth)
|
|
63
|
+
|
|
64
|
+
final_output = output_path or _derive_output_path(file_path, f"pruned_level{max_depth}")
|
|
65
|
+
graph.save(final_output)
|
|
66
|
+
|
|
67
|
+
return PruneResult(
|
|
68
|
+
operation="level",
|
|
69
|
+
removed_count=len(removed_ids),
|
|
70
|
+
removed_node_ids=removed_ids,
|
|
71
|
+
remaining_nodes=len(graph.nodes),
|
|
72
|
+
remaining_paths=len(graph.get_all_paths()),
|
|
73
|
+
output_path=final_output,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def prune_graph_by_uuid(
|
|
78
|
+
file_path: str,
|
|
79
|
+
uuid: str,
|
|
80
|
+
output_path: str | None = None,
|
|
81
|
+
) -> PruneResult:
|
|
82
|
+
"""Remove a node (by UUID) and its entire subtree from a graph file.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
file_path: Path to the input graph JSON file.
|
|
86
|
+
uuid: UUID of the node to remove.
|
|
87
|
+
output_path: Output file path. If None, derives from input filename.
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
PruneResult with operation details.
|
|
91
|
+
|
|
92
|
+
Raises:
|
|
93
|
+
ValueError: If UUID not found or targets the root node.
|
|
94
|
+
"""
|
|
95
|
+
graph = load_graph_for_pruning(file_path)
|
|
96
|
+
node = graph.find_node_by_uuid(uuid)
|
|
97
|
+
|
|
98
|
+
if node is None:
|
|
99
|
+
raise ValueError(f"No node found with UUID: {uuid}")
|
|
100
|
+
|
|
101
|
+
removed_ids = graph.remove_subtree(node.id)
|
|
102
|
+
|
|
103
|
+
final_output = output_path or _derive_output_path(file_path, "pruned")
|
|
104
|
+
graph.save(final_output)
|
|
105
|
+
|
|
106
|
+
return PruneResult(
|
|
107
|
+
operation="uuid",
|
|
108
|
+
removed_count=len(removed_ids),
|
|
109
|
+
removed_node_ids=removed_ids,
|
|
110
|
+
remaining_nodes=len(graph.nodes),
|
|
111
|
+
remaining_paths=len(graph.get_all_paths()),
|
|
112
|
+
output_path=final_output,
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _derive_output_path(input_path: str, suffix: str) -> str:
|
|
117
|
+
"""Derive a non-destructive output path from the input path.
|
|
118
|
+
|
|
119
|
+
Example: topic_graph.json -> topic_graph_pruned_level2.json
|
|
120
|
+
"""
|
|
121
|
+
p = Path(input_path)
|
|
122
|
+
return str(p.with_stem(f"{p.stem}_{suffix}"))
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
"""Topic file inspection utilities for deepfabric CLI."""
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import json
|
|
5
|
+
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any, Literal
|
|
9
|
+
|
|
10
|
+
from .graph import Graph
|
|
11
|
+
from .utils import read_topic_tree_from_jsonl
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class TopicInspectionResult:
|
|
16
|
+
"""Result of inspecting a topic file."""
|
|
17
|
+
|
|
18
|
+
format: Literal["tree", "graph"]
|
|
19
|
+
total_paths: int
|
|
20
|
+
max_depth: int
|
|
21
|
+
paths_at_level: list[list[str]] | None
|
|
22
|
+
expanded_paths: list[list[str]] | None # Paths from level onwards (with --expand)
|
|
23
|
+
all_paths: list[list[str]] | None
|
|
24
|
+
metadata: dict[str, Any]
|
|
25
|
+
source_file: str
|
|
26
|
+
# Maps path tuple to UUID/topic_id (for --uuid flag)
|
|
27
|
+
path_to_uuid: dict[tuple[str, ...], str] = field(default_factory=dict)
|
|
28
|
+
# Maps topic name to UUID (for graph format, all nodes)
|
|
29
|
+
topic_to_uuid: dict[str, str] = field(default_factory=dict)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def detect_format(file_path: str) -> Literal["tree", "graph"]:
|
|
33
|
+
"""Auto-detect topic file format based on content.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
file_path: Path to the topic file
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
"tree" for JSONL format, "graph" for JSON format
|
|
40
|
+
|
|
41
|
+
Raises:
|
|
42
|
+
ValueError: If format cannot be detected
|
|
43
|
+
FileNotFoundError: If file doesn't exist
|
|
44
|
+
"""
|
|
45
|
+
path = Path(file_path)
|
|
46
|
+
if not path.exists():
|
|
47
|
+
raise FileNotFoundError(f"File not found: {file_path}")
|
|
48
|
+
|
|
49
|
+
with open(file_path, encoding="utf-8") as f:
|
|
50
|
+
content = f.read().strip()
|
|
51
|
+
if not content:
|
|
52
|
+
raise ValueError("Empty file")
|
|
53
|
+
|
|
54
|
+
# Try to parse as a complete JSON object (Graph format)
|
|
55
|
+
try:
|
|
56
|
+
data = json.loads(content)
|
|
57
|
+
if isinstance(data, dict) and "nodes" in data and "root_id" in data:
|
|
58
|
+
return "graph"
|
|
59
|
+
except json.JSONDecodeError:
|
|
60
|
+
pass
|
|
61
|
+
|
|
62
|
+
# Try to parse first line as JSONL (Tree format)
|
|
63
|
+
first_line = content.split("\n")[0].strip()
|
|
64
|
+
try:
|
|
65
|
+
first_obj = json.loads(first_line)
|
|
66
|
+
if isinstance(first_obj, dict) and "path" in first_obj:
|
|
67
|
+
return "tree"
|
|
68
|
+
except json.JSONDecodeError:
|
|
69
|
+
pass
|
|
70
|
+
|
|
71
|
+
raise ValueError(f"Unable to detect format for: {file_path}")
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _load_tree_paths(file_path: str) -> tuple[list[list[str]], dict[tuple[str, ...], str]]:
|
|
75
|
+
"""Load tree paths directly from JSONL without initializing LLM.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
file_path: Path to the JSONL file
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
Tuple of (paths, path_to_uuid mapping)
|
|
82
|
+
"""
|
|
83
|
+
dict_list = read_topic_tree_from_jsonl(file_path)
|
|
84
|
+
paths = []
|
|
85
|
+
path_to_uuid: dict[tuple[str, ...], str] = {}
|
|
86
|
+
|
|
87
|
+
for d in dict_list:
|
|
88
|
+
if "path" not in d:
|
|
89
|
+
continue
|
|
90
|
+
path = d["path"]
|
|
91
|
+
paths.append(path)
|
|
92
|
+
# Generate hash-based ID from path (same as tree.py)
|
|
93
|
+
path_str = " > ".join(path)
|
|
94
|
+
topic_id = hashlib.sha256(path_str.encode()).hexdigest()[:16]
|
|
95
|
+
path_to_uuid[tuple(path)] = topic_id
|
|
96
|
+
|
|
97
|
+
return paths, path_to_uuid
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _load_graph_data(
|
|
101
|
+
file_path: str,
|
|
102
|
+
) -> tuple[list[list[str]], dict[str, Any], dict[tuple[str, ...], str], dict[str, str]]:
|
|
103
|
+
"""Load graph data and extract paths and metadata.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
file_path: Path to the JSON file
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
Tuple of (paths, metadata, path_to_uuid mapping, topic_to_uuid mapping)
|
|
110
|
+
"""
|
|
111
|
+
graph = Graph.load(file_path)
|
|
112
|
+
|
|
113
|
+
# Get paths with UUIDs (for leaf nodes)
|
|
114
|
+
paths_with_ids = graph.get_all_paths_with_ids()
|
|
115
|
+
all_paths = [tp.path for tp in paths_with_ids]
|
|
116
|
+
path_to_uuid: dict[tuple[str, ...], str] = {
|
|
117
|
+
tuple(tp.path): tp.topic_id for tp in paths_with_ids
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
# Build topic name to UUID mapping for ALL nodes (not just leaves)
|
|
121
|
+
topic_to_uuid: dict[str, str] = {}
|
|
122
|
+
for node in graph.nodes.values():
|
|
123
|
+
node_uuid = node.metadata.get("uuid", "")
|
|
124
|
+
if node_uuid:
|
|
125
|
+
topic_to_uuid[node.topic] = node_uuid
|
|
126
|
+
|
|
127
|
+
metadata: dict[str, Any] = {
|
|
128
|
+
"total_nodes": len(graph.nodes),
|
|
129
|
+
"has_cycles": graph.has_cycle(),
|
|
130
|
+
"root_topic": graph.root.topic if graph.root else None,
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
# Read graph-level metadata directly from the JSON file
|
|
134
|
+
# since Graph.from_json doesn't restore provider/model
|
|
135
|
+
with open(file_path, encoding="utf-8") as f:
|
|
136
|
+
raw_data = json.load(f)
|
|
137
|
+
|
|
138
|
+
if "metadata" in raw_data and raw_data["metadata"]:
|
|
139
|
+
file_metadata = raw_data["metadata"]
|
|
140
|
+
if file_metadata.get("created_at"):
|
|
141
|
+
metadata["created_at"] = file_metadata["created_at"]
|
|
142
|
+
if file_metadata.get("provider"):
|
|
143
|
+
metadata["provider"] = file_metadata["provider"]
|
|
144
|
+
if file_metadata.get("model"):
|
|
145
|
+
metadata["model"] = file_metadata["model"]
|
|
146
|
+
|
|
147
|
+
return all_paths, metadata, path_to_uuid, topic_to_uuid
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def inspect_topic_file(
|
|
151
|
+
file_path: str,
|
|
152
|
+
level: int | None = None,
|
|
153
|
+
expand_depth: int | None = None,
|
|
154
|
+
show_all: bool = False,
|
|
155
|
+
) -> TopicInspectionResult:
|
|
156
|
+
"""Inspect a topic file and return structured results.
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
file_path: Path to the topic file
|
|
160
|
+
level: Specific level to show (0=root), or None
|
|
161
|
+
expand_depth: Number of sublevels to show (-1 for all), or None for no expansion
|
|
162
|
+
show_all: Whether to include all paths in result
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
TopicInspectionResult with inspection data
|
|
166
|
+
"""
|
|
167
|
+
format_type = detect_format(file_path)
|
|
168
|
+
|
|
169
|
+
# Load paths and metadata based on format
|
|
170
|
+
topic_to_uuid: dict[str, str] = {}
|
|
171
|
+
if format_type == "graph":
|
|
172
|
+
all_paths, metadata, path_to_uuid, topic_to_uuid = _load_graph_data(file_path)
|
|
173
|
+
else:
|
|
174
|
+
all_paths, path_to_uuid = _load_tree_paths(file_path)
|
|
175
|
+
# Extract root topic from paths
|
|
176
|
+
metadata = {}
|
|
177
|
+
if all_paths:
|
|
178
|
+
metadata["root_topic"] = all_paths[0][0]
|
|
179
|
+
|
|
180
|
+
max_depth = max(len(p) for p in all_paths) if all_paths else 0
|
|
181
|
+
|
|
182
|
+
# Get unique topics at specific level if requested
|
|
183
|
+
# Level 0 = root, Level 1 = children of root, etc.
|
|
184
|
+
paths_at_level = None
|
|
185
|
+
expanded_paths = None
|
|
186
|
+
|
|
187
|
+
if level is not None:
|
|
188
|
+
# Extract unique topic names at the given depth position
|
|
189
|
+
seen_topics: set[str] = set()
|
|
190
|
+
unique_topics: list[str] = []
|
|
191
|
+
for path in all_paths:
|
|
192
|
+
if len(path) > level:
|
|
193
|
+
topic_at_level = path[level]
|
|
194
|
+
if topic_at_level not in seen_topics:
|
|
195
|
+
seen_topics.add(topic_at_level)
|
|
196
|
+
unique_topics.append(topic_at_level)
|
|
197
|
+
# If this topic is a leaf (path ends at level+1), map single-topic to UUID
|
|
198
|
+
if len(path) == level + 1:
|
|
199
|
+
original_uuid = path_to_uuid.get(tuple(path), "")
|
|
200
|
+
if original_uuid:
|
|
201
|
+
path_to_uuid[(topic_at_level,)] = original_uuid
|
|
202
|
+
# Store as single-element paths for consistency
|
|
203
|
+
paths_at_level = [[t] for t in unique_topics]
|
|
204
|
+
|
|
205
|
+
# If expand_depth is set, get paths from level onwards
|
|
206
|
+
if expand_depth is not None:
|
|
207
|
+
seen_paths: set[tuple[str, ...]] = set()
|
|
208
|
+
expanded_paths = []
|
|
209
|
+
for path in all_paths:
|
|
210
|
+
if len(path) > level:
|
|
211
|
+
original_uuid = path_to_uuid.get(tuple(path), "")
|
|
212
|
+
# Trim path to start from the specified level
|
|
213
|
+
trimmed_path = path[level:]
|
|
214
|
+
# Limit depth if expand_depth is not -1
|
|
215
|
+
if expand_depth != -1 and len(trimmed_path) > expand_depth + 1:
|
|
216
|
+
trimmed_path = trimmed_path[: expand_depth + 1]
|
|
217
|
+
# Deduplicate paths (after trimming, many may be identical)
|
|
218
|
+
path_key = tuple(trimmed_path)
|
|
219
|
+
if path_key not in seen_paths:
|
|
220
|
+
seen_paths.add(path_key)
|
|
221
|
+
expanded_paths.append(trimmed_path)
|
|
222
|
+
# Map trimmed path to original UUID (for --uuid display)
|
|
223
|
+
if original_uuid and path_key not in path_to_uuid:
|
|
224
|
+
path_to_uuid[path_key] = original_uuid
|
|
225
|
+
|
|
226
|
+
return TopicInspectionResult(
|
|
227
|
+
format=format_type,
|
|
228
|
+
total_paths=len(all_paths),
|
|
229
|
+
max_depth=max_depth,
|
|
230
|
+
paths_at_level=paths_at_level,
|
|
231
|
+
expanded_paths=expanded_paths,
|
|
232
|
+
all_paths=all_paths if show_all else None,
|
|
233
|
+
metadata=metadata,
|
|
234
|
+
source_file=file_path,
|
|
235
|
+
path_to_uuid=path_to_uuid,
|
|
236
|
+
topic_to_uuid=topic_to_uuid,
|
|
237
|
+
)
|
deepfabric/topic_manager.py
CHANGED
|
@@ -90,6 +90,19 @@ async def _process_graph_events(graph: Graph, debug: bool = False) -> dict | Non
|
|
|
90
90
|
tui.finish_building(failed_generations)
|
|
91
91
|
final_event = event
|
|
92
92
|
|
|
93
|
+
if failed_generations > 0 and hasattr(graph, "failed_generations"):
|
|
94
|
+
truncated = sum(
|
|
95
|
+
1
|
|
96
|
+
for f in graph.failed_generations
|
|
97
|
+
if "EOF while parsing" in f.get("last_error", "")
|
|
98
|
+
)
|
|
99
|
+
if truncated:
|
|
100
|
+
get_tui().warning(
|
|
101
|
+
f"Hint: {truncated} of {failed_generations} failures appear to be "
|
|
102
|
+
f"truncated responses. Consider increasing max_tokens "
|
|
103
|
+
f"(currently {graph.max_tokens})."
|
|
104
|
+
)
|
|
105
|
+
|
|
93
106
|
if debug and failed_generations > 0 and hasattr(graph, "failed_generations"):
|
|
94
107
|
get_tui().error("\nDebug: Graph generation failures:")
|
|
95
108
|
for idx, failure in enumerate(graph.failed_generations, 1):
|
|
@@ -147,6 +160,19 @@ async def _process_tree_events(tree: Tree, debug: bool = False) -> dict | None:
|
|
|
147
160
|
tui.finish_building(total_paths, failed_generations)
|
|
148
161
|
final_event = event
|
|
149
162
|
|
|
163
|
+
if failed_generations > 0 and hasattr(tree, "failed_generations"):
|
|
164
|
+
truncated = sum(
|
|
165
|
+
1
|
|
166
|
+
for f in tree.failed_generations
|
|
167
|
+
if "EOF while parsing" in f.get("error", "")
|
|
168
|
+
)
|
|
169
|
+
if truncated:
|
|
170
|
+
get_tui().warning(
|
|
171
|
+
f"Hint: {truncated} of {failed_generations} failures appear to be "
|
|
172
|
+
f"truncated responses. Consider increasing max_tokens "
|
|
173
|
+
f"(currently {tree.max_tokens})."
|
|
174
|
+
)
|
|
175
|
+
|
|
150
176
|
if debug and failed_generations > 0 and hasattr(tree, "failed_generations"):
|
|
151
177
|
get_tui().error("\nDebug: Tree generation failures:")
|
|
152
178
|
for idx, failure in enumerate(tree.failed_generations, 1):
|
|
@@ -313,6 +339,9 @@ def save_topic_model(
|
|
|
313
339
|
try:
|
|
314
340
|
tree_save_path = topics_save_as or config.topics.save_as or "topic_tree.jsonl"
|
|
315
341
|
topic_model.save(tree_save_path)
|
|
342
|
+
if getattr(topic_model, "failed_generations", None):
|
|
343
|
+
failed_path = tree_save_path.replace(".jsonl", "_failed.jsonl")
|
|
344
|
+
tui.warning(f"Failed generations saved to: {failed_path}")
|
|
316
345
|
tui.success(f"Topic tree saved to {tree_save_path}")
|
|
317
346
|
tui.info(f"Total paths: {len(topic_model.tree_paths)}")
|
|
318
347
|
except Exception as e:
|
|
@@ -322,6 +351,9 @@ def save_topic_model(
|
|
|
322
351
|
try:
|
|
323
352
|
graph_save_path = topics_save_as or config.topics.save_as or "topic_graph.json"
|
|
324
353
|
topic_model.save(graph_save_path)
|
|
354
|
+
if getattr(topic_model, "failed_generations", None):
|
|
355
|
+
failed_path = graph_save_path.replace(".json", "_failed.jsonl")
|
|
356
|
+
tui.warning(f"Failed generations saved to: {failed_path}")
|
|
325
357
|
tui.success(f"Topic graph saved to {graph_save_path}")
|
|
326
358
|
except Exception as e:
|
|
327
359
|
raise ConfigurationError(f"Error saving topic graph: {str(e)}") from e
|