DeepFabric 4.10.1__py3-none-any.whl → 4.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepfabric/cli.py +624 -33
- deepfabric/cloud_upload.py +1 -1
- deepfabric/config.py +14 -5
- deepfabric/config_manager.py +6 -1
- deepfabric/constants.py +1 -1
- deepfabric/dataset_manager.py +264 -62
- deepfabric/generator.py +687 -82
- deepfabric/graph.py +202 -2
- deepfabric/graph_pruner.py +122 -0
- deepfabric/llm/retry_handler.py +28 -9
- deepfabric/progress.py +42 -0
- deepfabric/topic_inspector.py +237 -0
- deepfabric/topic_manager.py +54 -2
- deepfabric/topic_model.py +26 -0
- deepfabric/tree.py +81 -41
- deepfabric/tui.py +448 -349
- deepfabric/utils.py +4 -1
- {deepfabric-4.10.1.dist-info → deepfabric-4.12.0.dist-info}/METADATA +3 -1
- {deepfabric-4.10.1.dist-info → deepfabric-4.12.0.dist-info}/RECORD +22 -20
- {deepfabric-4.10.1.dist-info → deepfabric-4.12.0.dist-info}/licenses/LICENSE +1 -1
- {deepfabric-4.10.1.dist-info → deepfabric-4.12.0.dist-info}/WHEEL +0 -0
- {deepfabric-4.10.1.dist-info → deepfabric-4.12.0.dist-info}/entry_points.txt +0 -0
deepfabric/cli.py
CHANGED
|
@@ -2,11 +2,19 @@ import contextlib
|
|
|
2
2
|
import json
|
|
3
3
|
import math
|
|
4
4
|
import os
|
|
5
|
+
import platform
|
|
6
|
+
import select
|
|
5
7
|
import signal
|
|
6
8
|
import sys
|
|
7
9
|
|
|
8
10
|
from pathlib import Path
|
|
9
|
-
from typing import Literal, NoReturn, cast
|
|
11
|
+
from typing import TYPE_CHECKING, Literal, NoReturn, cast
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from rich.tree import Tree as RichTree
|
|
15
|
+
|
|
16
|
+
from .topic_inspector import TopicInspectionResult
|
|
17
|
+
from .tui import DeepFabricTUI
|
|
10
18
|
|
|
11
19
|
import click
|
|
12
20
|
import yaml
|
|
@@ -137,7 +145,7 @@ class GenerateOptions(BaseModel):
|
|
|
137
145
|
batch_size: int | None = None
|
|
138
146
|
base_url: str | None = None
|
|
139
147
|
include_system_message: bool | None = None
|
|
140
|
-
mode: Literal["tree", "graph"] = Field(default=
|
|
148
|
+
mode: Literal["tree", "graph"] | None = Field(default=None)
|
|
141
149
|
debug: bool = False
|
|
142
150
|
topic_only: bool = False
|
|
143
151
|
tui: Literal["rich", "simple"] = Field(default="rich")
|
|
@@ -330,7 +338,7 @@ def _load_and_prepare_generation_context(
|
|
|
330
338
|
# Skip path validation for topic-only mode since we're not generating dataset samples
|
|
331
339
|
if not skip_path_validation:
|
|
332
340
|
validate_path_requirements(
|
|
333
|
-
mode=
|
|
341
|
+
mode=config.topics.mode,
|
|
334
342
|
depth=final_depth,
|
|
335
343
|
degree=final_degree,
|
|
336
344
|
num_samples=final_num_samples,
|
|
@@ -339,7 +347,7 @@ def _load_and_prepare_generation_context(
|
|
|
339
347
|
)
|
|
340
348
|
|
|
341
349
|
show_validation_success(
|
|
342
|
-
mode=
|
|
350
|
+
mode=config.topics.mode,
|
|
343
351
|
depth=final_depth,
|
|
344
352
|
degree=final_degree,
|
|
345
353
|
num_samples=final_num_samples,
|
|
@@ -419,6 +427,35 @@ def _trigger_cloud_upload(
|
|
|
419
427
|
)
|
|
420
428
|
|
|
421
429
|
|
|
430
|
+
def _prompt_with_timeout(
|
|
431
|
+
choices: list[str],
|
|
432
|
+
default: str,
|
|
433
|
+
timeout: int = 20,
|
|
434
|
+
) -> str:
|
|
435
|
+
"""Prompt for a choice with a visible countdown, auto-selecting default on timeout."""
|
|
436
|
+
if platform.system() == "Windows":
|
|
437
|
+
return click.prompt(
|
|
438
|
+
f" Choose [{'/'.join(choices)}]",
|
|
439
|
+
type=click.Choice(choices),
|
|
440
|
+
default=default,
|
|
441
|
+
)
|
|
442
|
+
valid = set(choices)
|
|
443
|
+
for remaining in range(timeout, 0, -1):
|
|
444
|
+
sys.stdout.write(f"\r Choose [{'/'.join(choices)}] (auto-{default} in {remaining:2d}s): ")
|
|
445
|
+
sys.stdout.flush()
|
|
446
|
+
ready, _, _ = select.select([sys.stdin], [], [], 1.0)
|
|
447
|
+
if ready:
|
|
448
|
+
line = sys.stdin.readline().strip()
|
|
449
|
+
sys.stdout.write("\n")
|
|
450
|
+
sys.stdout.flush()
|
|
451
|
+
if line in valid:
|
|
452
|
+
return line
|
|
453
|
+
return default
|
|
454
|
+
sys.stdout.write("\n")
|
|
455
|
+
sys.stdout.flush()
|
|
456
|
+
return default
|
|
457
|
+
|
|
458
|
+
|
|
422
459
|
def _run_generation(
|
|
423
460
|
*,
|
|
424
461
|
preparation: GenerationPreparation,
|
|
@@ -470,14 +507,10 @@ def _run_generation(
|
|
|
470
507
|
tui.console.print(" [cyan]3)[/cyan] Abort")
|
|
471
508
|
tui.console.print()
|
|
472
509
|
|
|
473
|
-
choice =
|
|
474
|
-
"Choose an option",
|
|
475
|
-
type=click.Choice(["1", "2", "3"]),
|
|
476
|
-
default="1",
|
|
477
|
-
)
|
|
510
|
+
choice = _prompt_with_timeout(["1", "2", "3"], default="1", timeout=20)
|
|
478
511
|
|
|
479
512
|
if choice == "1":
|
|
480
|
-
# User wants to resume
|
|
513
|
+
# User wants to resume (or auto-selected after timeout)
|
|
481
514
|
options.resume = True
|
|
482
515
|
elif choice == "2":
|
|
483
516
|
# Clear and start fresh
|
|
@@ -493,7 +526,7 @@ def _run_generation(
|
|
|
493
526
|
if engine.load_checkpoint(retry_failed=options.retry_failed):
|
|
494
527
|
samples_done = engine._flushed_samples_count
|
|
495
528
|
failures_done = engine._flushed_failures_count
|
|
496
|
-
ids_processed = len(engine.
|
|
529
|
+
ids_processed = len(engine._completed)
|
|
497
530
|
retry_msg = " (retrying failed samples)" if options.retry_failed else ""
|
|
498
531
|
|
|
499
532
|
# Update TUI status panel with checkpoint progress
|
|
@@ -503,17 +536,18 @@ def _run_generation(
|
|
|
503
536
|
if failures_done > 0:
|
|
504
537
|
tui.info(
|
|
505
538
|
f"Resuming from checkpoint: {samples_done} samples, "
|
|
506
|
-
f"{failures_done} failed, {ids_processed}
|
|
539
|
+
f"{failures_done} failed, {ids_processed} UUIDs processed{retry_msg}"
|
|
507
540
|
)
|
|
508
541
|
else:
|
|
509
542
|
tui.info(
|
|
510
543
|
f"Resuming from checkpoint: {samples_done} samples, "
|
|
511
|
-
f"{ids_processed}
|
|
544
|
+
f"{ids_processed} UUIDs processed{retry_msg}"
|
|
512
545
|
)
|
|
513
546
|
else:
|
|
514
547
|
tui.info("No checkpoint found, starting fresh generation")
|
|
515
548
|
|
|
516
|
-
# Set up graceful Ctrl+C handling
|
|
549
|
+
# Set up graceful Ctrl+C handling
|
|
550
|
+
has_checkpoint = generation_params.get("checkpoint_interval") is not None
|
|
517
551
|
interrupt_count = 0
|
|
518
552
|
|
|
519
553
|
def handle_sigint(_signum, _frame):
|
|
@@ -522,7 +556,12 @@ def _run_generation(
|
|
|
522
556
|
|
|
523
557
|
if interrupt_count == 1:
|
|
524
558
|
engine.stop_requested = True
|
|
525
|
-
|
|
559
|
+
if has_checkpoint:
|
|
560
|
+
tui.warning("Stopping after current checkpoint... (Ctrl+C again to force quit)")
|
|
561
|
+
else:
|
|
562
|
+
tui.warning(
|
|
563
|
+
"Stopping... partial results will be saved. (Ctrl+C again to force quit)"
|
|
564
|
+
)
|
|
526
565
|
dataset_tui = get_dataset_tui()
|
|
527
566
|
dataset_tui.log_event("⚠ Graceful stop requested")
|
|
528
567
|
dataset_tui.status_stop_requested()
|
|
@@ -547,12 +586,22 @@ def _run_generation(
|
|
|
547
586
|
finally:
|
|
548
587
|
signal.signal(signal.SIGINT, original_handler)
|
|
549
588
|
|
|
550
|
-
|
|
589
|
+
output_config = preparation.config.get_output_config()
|
|
590
|
+
output_save_path = options.output_save_as or output_config["save_as"]
|
|
591
|
+
|
|
592
|
+
# If gracefully stopped, handle based on checkpoint availability
|
|
551
593
|
if engine.stop_requested:
|
|
594
|
+
if has_checkpoint:
|
|
595
|
+
# Checkpoint on disk — user can resume later
|
|
596
|
+
return
|
|
597
|
+
# No checkpoint — save whatever was generated so far
|
|
598
|
+
if dataset and len(dataset) > 0:
|
|
599
|
+
tui.info(f"Saving {len(dataset)} samples generated before stop")
|
|
600
|
+
save_dataset(dataset, output_save_path, preparation.config, engine=engine)
|
|
601
|
+
else:
|
|
602
|
+
tui.warning("No samples were generated before stop")
|
|
552
603
|
return
|
|
553
604
|
|
|
554
|
-
output_config = preparation.config.get_output_config()
|
|
555
|
-
output_save_path = options.output_save_as or output_config["save_as"]
|
|
556
605
|
save_dataset(dataset, output_save_path, preparation.config, engine=engine)
|
|
557
606
|
|
|
558
607
|
# Clean up checkpoint files after successful completion
|
|
@@ -612,8 +661,8 @@ def _run_generation(
|
|
|
612
661
|
@click.option(
|
|
613
662
|
"--mode",
|
|
614
663
|
type=click.Choice(["tree", "graph"]),
|
|
615
|
-
default=
|
|
616
|
-
help="Topic generation mode (default:
|
|
664
|
+
default=None,
|
|
665
|
+
help="Topic generation mode (default: graph)",
|
|
617
666
|
)
|
|
618
667
|
@click.option(
|
|
619
668
|
"--debug",
|
|
@@ -651,8 +700,7 @@ def _run_generation(
|
|
|
651
700
|
"--cloud-upload",
|
|
652
701
|
type=click.Choice(["all", "dataset", "graph", "none"], case_sensitive=False),
|
|
653
702
|
default=None,
|
|
654
|
-
help="Upload to DeepFabric Cloud (experimental): all, dataset, graph, or none. "
|
|
655
|
-
"Enables headless mode for CI. Requires DEEPFABRIC_API_KEY or prior auth.",
|
|
703
|
+
help="Upload to DeepFabric Cloud (experimental): all, dataset, graph, or none. ",
|
|
656
704
|
)
|
|
657
705
|
@click.option(
|
|
658
706
|
"--checkpoint-interval",
|
|
@@ -783,7 +831,9 @@ def generate( # noqa: PLR0913
|
|
|
783
831
|
|
|
784
832
|
# Compute checkpoint directory once for consistent use throughout generation
|
|
785
833
|
# Use config file for hash, fallback to output path for config-less runs
|
|
786
|
-
path_source =
|
|
834
|
+
path_source = (
|
|
835
|
+
options.config_file or options.output_save_as or preparation.config.output.save_as
|
|
836
|
+
)
|
|
787
837
|
checkpoint_dir = options.checkpoint_path or get_checkpoint_dir(path_source)
|
|
788
838
|
|
|
789
839
|
# Auto-infer topics-load when resuming from checkpoint
|
|
@@ -1295,23 +1345,34 @@ def validate(config_file: str, check_api: bool) -> None: # noqa: PLR0912
|
|
|
1295
1345
|
f"estimated_paths={estimated_paths} ({degree}^{depth})"
|
|
1296
1346
|
)
|
|
1297
1347
|
|
|
1298
|
-
# Output summary with
|
|
1348
|
+
# Output summary with cycle-based generation info
|
|
1299
1349
|
num_samples = config.output.num_samples
|
|
1300
1350
|
batch_size = config.output.batch_size
|
|
1301
|
-
# Calculate num_steps - handle 'auto' and percentage strings
|
|
1302
|
-
if isinstance(num_samples, int):
|
|
1303
|
-
num_steps = math.ceil(num_samples / batch_size)
|
|
1304
|
-
output_info = f"Output: num_samples={num_samples}, batch_size={batch_size}, num_steps={num_steps}"
|
|
1305
|
-
else:
|
|
1306
|
-
# For 'auto' or percentage, we can't compute steps without topic count
|
|
1307
|
-
output_info = f"Output: num_samples={num_samples}, batch_size={batch_size}"
|
|
1308
1351
|
|
|
1309
|
-
#
|
|
1352
|
+
# Show output configuration
|
|
1353
|
+
output_info = f"Output: num_samples={num_samples}, concurrency={batch_size}"
|
|
1310
1354
|
if config.output.checkpoint:
|
|
1311
1355
|
checkpoint = config.output.checkpoint
|
|
1312
1356
|
output_info += f", checkpoint_interval={checkpoint.interval}"
|
|
1313
1357
|
tui.info(output_info)
|
|
1314
1358
|
|
|
1359
|
+
# Calculate and display cycle-based generation info
|
|
1360
|
+
if isinstance(num_samples, int):
|
|
1361
|
+
cycles_needed = math.ceil(num_samples / estimated_paths)
|
|
1362
|
+
final_cycle_size = num_samples - (cycles_needed - 1) * estimated_paths
|
|
1363
|
+
is_partial = final_cycle_size < estimated_paths
|
|
1364
|
+
|
|
1365
|
+
tui.info(
|
|
1366
|
+
f" → Cycles needed: {cycles_needed} "
|
|
1367
|
+
f"({num_samples} samples ÷ {estimated_paths} unique topics)"
|
|
1368
|
+
)
|
|
1369
|
+
if is_partial:
|
|
1370
|
+
tui.info(f" → Final cycle: {final_cycle_size} topics (partial)")
|
|
1371
|
+
elif num_samples == "auto":
|
|
1372
|
+
tui.info(f" → Will generate 1 sample per unique topic ({estimated_paths} samples)")
|
|
1373
|
+
else:
|
|
1374
|
+
tui.info(" → Samples calculated at runtime based on topic count")
|
|
1375
|
+
|
|
1315
1376
|
if config.huggingface:
|
|
1316
1377
|
hf_config = config.get_huggingface_config()
|
|
1317
1378
|
tui.info(f"Hugging Face: repo={hf_config.get('repository', 'not set')}")
|
|
@@ -1893,7 +1954,8 @@ def checkpoint_status(config_file: str) -> None:
|
|
|
1893
1954
|
# Check if checkpoint exists
|
|
1894
1955
|
if not metadata_path.exists():
|
|
1895
1956
|
tui.info(f"No checkpoint found at: {metadata_path}")
|
|
1896
|
-
tui.
|
|
1957
|
+
tui.console.print()
|
|
1958
|
+
tui.info("To enable checkpointing, run:")
|
|
1897
1959
|
tui.info(f" deepfabric generate {config_file} --checkpoint-interval 10")
|
|
1898
1960
|
return
|
|
1899
1961
|
|
|
@@ -1994,5 +2056,534 @@ def checkpoint_status(config_file: str) -> None:
|
|
|
1994
2056
|
)
|
|
1995
2057
|
|
|
1996
2058
|
|
|
2059
|
+
# Topic inspection command group
|
|
2060
|
+
@click.group()
|
|
2061
|
+
def topic() -> None:
|
|
2062
|
+
"""Topic management commands."""
|
|
2063
|
+
pass
|
|
2064
|
+
|
|
2065
|
+
|
|
2066
|
+
@topic.command("inspect")
|
|
2067
|
+
@click.argument("file", type=click.Path(exists=True))
|
|
2068
|
+
@click.option(
|
|
2069
|
+
"--level",
|
|
2070
|
+
"-l",
|
|
2071
|
+
type=int,
|
|
2072
|
+
default=None,
|
|
2073
|
+
help="Show topics at a specific depth level (0=root, 1=first children, etc.)",
|
|
2074
|
+
)
|
|
2075
|
+
@click.option(
|
|
2076
|
+
"--expand",
|
|
2077
|
+
"-e",
|
|
2078
|
+
type=int,
|
|
2079
|
+
default=None,
|
|
2080
|
+
is_flag=False,
|
|
2081
|
+
flag_value=-1, # -1 means expand all levels
|
|
2082
|
+
help="Show subtopics in tree format. Use alone for all levels, or specify depth (e.g., --expand 2)",
|
|
2083
|
+
)
|
|
2084
|
+
@click.option(
|
|
2085
|
+
"--all",
|
|
2086
|
+
"-a",
|
|
2087
|
+
"show_all",
|
|
2088
|
+
is_flag=True,
|
|
2089
|
+
help="Show the entire tree structure with indentation",
|
|
2090
|
+
)
|
|
2091
|
+
@click.option(
|
|
2092
|
+
"--format",
|
|
2093
|
+
"-f",
|
|
2094
|
+
"output_format",
|
|
2095
|
+
type=click.Choice(["tree", "table", "json"]),
|
|
2096
|
+
default="tree",
|
|
2097
|
+
help="Output format (default: tree)",
|
|
2098
|
+
)
|
|
2099
|
+
@click.option(
|
|
2100
|
+
"--uuid",
|
|
2101
|
+
"-u",
|
|
2102
|
+
"show_uuid",
|
|
2103
|
+
is_flag=True,
|
|
2104
|
+
help="Show UUID/topic_id for each leaf node",
|
|
2105
|
+
)
|
|
2106
|
+
def topic_inspect(
|
|
2107
|
+
file: str,
|
|
2108
|
+
level: int | None,
|
|
2109
|
+
expand: int | None,
|
|
2110
|
+
show_all: bool,
|
|
2111
|
+
output_format: str,
|
|
2112
|
+
show_uuid: bool,
|
|
2113
|
+
) -> None:
|
|
2114
|
+
"""Inspect a topic tree or graph file.
|
|
2115
|
+
|
|
2116
|
+
Displays metadata and structure of topic files generated by DeepFabric.
|
|
2117
|
+
Supports both Tree (JSONL) and Graph (JSON) formats with auto-detection.
|
|
2118
|
+
|
|
2119
|
+
Examples:
|
|
2120
|
+
|
|
2121
|
+
\b
|
|
2122
|
+
# Show file metadata and summary
|
|
2123
|
+
deepfabric topic inspect topic_tree.jsonl
|
|
2124
|
+
|
|
2125
|
+
\b
|
|
2126
|
+
# Show topics at depth level 2 (just topic names)
|
|
2127
|
+
deepfabric topic inspect topic_tree.jsonl --level 2
|
|
2128
|
+
|
|
2129
|
+
\b
|
|
2130
|
+
# Show level 2 topics and all subtopics (tree format)
|
|
2131
|
+
deepfabric topic inspect topic_tree.jsonl --level 2 --expand
|
|
2132
|
+
|
|
2133
|
+
\b
|
|
2134
|
+
# Show level 2 topics and 1 sublevel only (tree format)
|
|
2135
|
+
deepfabric topic inspect topic_tree.jsonl --level 2 --expand 1
|
|
2136
|
+
|
|
2137
|
+
\b
|
|
2138
|
+
# Show entire tree with indentation
|
|
2139
|
+
deepfabric topic inspect topic_graph.json --all
|
|
2140
|
+
|
|
2141
|
+
\b
|
|
2142
|
+
# Output as JSON for scripting
|
|
2143
|
+
deepfabric topic inspect topic_tree.jsonl --format json
|
|
2144
|
+
|
|
2145
|
+
\b
|
|
2146
|
+
# Show UUIDs for each leaf node
|
|
2147
|
+
deepfabric topic inspect topic_tree.jsonl --all --uuid
|
|
2148
|
+
"""
|
|
2149
|
+
from .topic_inspector import inspect_topic_file # noqa: PLC0415
|
|
2150
|
+
|
|
2151
|
+
tui = get_tui()
|
|
2152
|
+
|
|
2153
|
+
try:
|
|
2154
|
+
# Perform inspection
|
|
2155
|
+
result = inspect_topic_file(file, level=level, expand_depth=expand, show_all=show_all)
|
|
2156
|
+
|
|
2157
|
+
# Handle JSON output format
|
|
2158
|
+
if output_format == "json":
|
|
2159
|
+
output = {
|
|
2160
|
+
"format": result.format,
|
|
2161
|
+
"source_file": result.source_file,
|
|
2162
|
+
"total_paths": result.total_paths,
|
|
2163
|
+
"max_depth": result.max_depth,
|
|
2164
|
+
"metadata": result.metadata,
|
|
2165
|
+
}
|
|
2166
|
+
if result.paths_at_level is not None:
|
|
2167
|
+
output["paths_at_level"] = result.paths_at_level
|
|
2168
|
+
if result.expanded_paths is not None:
|
|
2169
|
+
output["expanded_paths"] = result.expanded_paths
|
|
2170
|
+
if result.all_paths is not None:
|
|
2171
|
+
output["all_paths"] = result.all_paths
|
|
2172
|
+
tui.console.print_json(json.dumps(output))
|
|
2173
|
+
return
|
|
2174
|
+
|
|
2175
|
+
# Rich output (tree or table format)
|
|
2176
|
+
_display_inspection_result(tui, result, output_format, level, expand, show_all, show_uuid)
|
|
2177
|
+
|
|
2178
|
+
except FileNotFoundError as e:
|
|
2179
|
+
tui.error(str(e))
|
|
2180
|
+
sys.exit(1)
|
|
2181
|
+
except ValueError as e:
|
|
2182
|
+
tui.error(f"Invalid file: {e}")
|
|
2183
|
+
sys.exit(1)
|
|
2184
|
+
except Exception as e:
|
|
2185
|
+
tui.error(f"Error inspecting file: {e}")
|
|
2186
|
+
sys.exit(1)
|
|
2187
|
+
|
|
2188
|
+
|
|
2189
|
+
def _display_inspection_result(
|
|
2190
|
+
tui: "DeepFabricTUI",
|
|
2191
|
+
result: "TopicInspectionResult",
|
|
2192
|
+
output_format: str,
|
|
2193
|
+
level: int | None,
|
|
2194
|
+
expand: int | None,
|
|
2195
|
+
show_all: bool,
|
|
2196
|
+
show_uuid: bool = False,
|
|
2197
|
+
) -> None:
|
|
2198
|
+
"""Display inspection result using rich formatting."""
|
|
2199
|
+
from rich.panel import Panel # noqa: PLC0415
|
|
2200
|
+
from rich.table import Table # noqa: PLC0415
|
|
2201
|
+
|
|
2202
|
+
# Header with file info
|
|
2203
|
+
format_label = "Graph (JSON)" if result.format == "graph" else "Tree (JSONL)"
|
|
2204
|
+
|
|
2205
|
+
tui.console.print()
|
|
2206
|
+
tui.console.print("[bold cyan]Topic Inspector[/bold cyan]")
|
|
2207
|
+
tui.console.print(f"[dim]{result.source_file}[/dim]")
|
|
2208
|
+
tui.console.print()
|
|
2209
|
+
|
|
2210
|
+
# Statistics panel
|
|
2211
|
+
stats_table = Table(show_header=False, box=None, padding=(0, 1))
|
|
2212
|
+
stats_table.add_column(style="cyan", no_wrap=True)
|
|
2213
|
+
stats_table.add_column(style="white")
|
|
2214
|
+
|
|
2215
|
+
stats_table.add_row("Format:", format_label)
|
|
2216
|
+
stats_table.add_row("Total Paths:", str(result.total_paths))
|
|
2217
|
+
stats_table.add_row("Max Depth:", str(result.max_depth))
|
|
2218
|
+
|
|
2219
|
+
if result.metadata.get("root_topic"):
|
|
2220
|
+
root = result.metadata["root_topic"]
|
|
2221
|
+
if len(root) > 60: # noqa: PLR2004
|
|
2222
|
+
root = root[:57] + "..."
|
|
2223
|
+
stats_table.add_row("Root Topic:", root)
|
|
2224
|
+
|
|
2225
|
+
if result.metadata.get("total_nodes"):
|
|
2226
|
+
stats_table.add_row("Total Nodes:", str(result.metadata["total_nodes"]))
|
|
2227
|
+
|
|
2228
|
+
if result.metadata.get("has_cycles") is not None:
|
|
2229
|
+
has_cycles = "Yes" if result.metadata["has_cycles"] else "No"
|
|
2230
|
+
stats_table.add_row("Has Cycles:", has_cycles)
|
|
2231
|
+
|
|
2232
|
+
if result.metadata.get("provider"):
|
|
2233
|
+
stats_table.add_row("Provider:", result.metadata["provider"])
|
|
2234
|
+
|
|
2235
|
+
if result.metadata.get("model"):
|
|
2236
|
+
stats_table.add_row("Model:", result.metadata["model"])
|
|
2237
|
+
|
|
2238
|
+
if result.metadata.get("created_at"):
|
|
2239
|
+
stats_table.add_row("Created:", result.metadata["created_at"])
|
|
2240
|
+
|
|
2241
|
+
tui.console.print(Panel(stats_table, title="Statistics", border_style="dim"))
|
|
2242
|
+
|
|
2243
|
+
# Show level-specific topics (without expand) - simple list of topic names
|
|
2244
|
+
if level is not None and expand is None and result.paths_at_level is not None:
|
|
2245
|
+
tui.console.print()
|
|
2246
|
+
tui.console.print(f"[cyan bold]Topics at Level {level}:[/cyan bold]")
|
|
2247
|
+
|
|
2248
|
+
if not result.paths_at_level:
|
|
2249
|
+
tui.console.print(f" [dim]No topics at level {level}[/dim]")
|
|
2250
|
+
else:
|
|
2251
|
+
# Display as simple list of topic names (with UUIDs)
|
|
2252
|
+
for topic_path in result.paths_at_level:
|
|
2253
|
+
topic_name = topic_path[0] if topic_path else ""
|
|
2254
|
+
if show_uuid:
|
|
2255
|
+
# For graph format, use topic_to_uuid (node UUIDs)
|
|
2256
|
+
# For tree format, use path_to_uuid (leaf UUIDs only)
|
|
2257
|
+
uuid = ""
|
|
2258
|
+
if result.topic_to_uuid:
|
|
2259
|
+
uuid = result.topic_to_uuid.get(topic_name, "")
|
|
2260
|
+
if not uuid and result.path_to_uuid:
|
|
2261
|
+
uuid = result.path_to_uuid.get(tuple(topic_path), "")
|
|
2262
|
+
if uuid:
|
|
2263
|
+
tui.console.print(
|
|
2264
|
+
f" • {topic_name} [dim](UUID: {uuid})[/dim]", highlight=False
|
|
2265
|
+
)
|
|
2266
|
+
else:
|
|
2267
|
+
tui.console.print(f" • {topic_name}")
|
|
2268
|
+
else:
|
|
2269
|
+
tui.console.print(f" • {topic_name}")
|
|
2270
|
+
|
|
2271
|
+
# Show expanded subtree from level (with --expand)
|
|
2272
|
+
if level is not None and expand is not None and result.expanded_paths is not None:
|
|
2273
|
+
tui.console.print()
|
|
2274
|
+
depth_info = "all sublevels" if expand == -1 else f"{expand} sublevel(s)"
|
|
2275
|
+
tui.console.print(f"[cyan bold]Subtree from Level {level} ({depth_info}):[/cyan bold]")
|
|
2276
|
+
|
|
2277
|
+
if not result.expanded_paths:
|
|
2278
|
+
tui.console.print(f" [dim]No topics at or below level {level}[/dim]")
|
|
2279
|
+
elif output_format == "table":
|
|
2280
|
+
_display_paths_as_table(tui, result.expanded_paths)
|
|
2281
|
+
else:
|
|
2282
|
+
_display_paths_as_tree(
|
|
2283
|
+
tui,
|
|
2284
|
+
result.expanded_paths,
|
|
2285
|
+
result.path_to_uuid if show_uuid else None,
|
|
2286
|
+
result.topic_to_uuid if show_uuid else None,
|
|
2287
|
+
)
|
|
2288
|
+
|
|
2289
|
+
# Show all paths with tree structure
|
|
2290
|
+
if show_all and result.all_paths:
|
|
2291
|
+
tui.console.print()
|
|
2292
|
+
tui.console.print("[cyan bold]Full Tree Structure:[/cyan bold]")
|
|
2293
|
+
|
|
2294
|
+
if output_format == "table":
|
|
2295
|
+
_display_paths_as_table(tui, result.all_paths)
|
|
2296
|
+
else:
|
|
2297
|
+
_display_paths_as_tree(
|
|
2298
|
+
tui,
|
|
2299
|
+
result.all_paths,
|
|
2300
|
+
result.path_to_uuid if show_uuid else None,
|
|
2301
|
+
result.topic_to_uuid if show_uuid else None,
|
|
2302
|
+
)
|
|
2303
|
+
|
|
2304
|
+
|
|
2305
|
+
def _display_paths_as_table(tui: "DeepFabricTUI", paths: list[list[str]]) -> None:
|
|
2306
|
+
"""Display paths in a table format."""
|
|
2307
|
+
from rich.table import Table # noqa: PLC0415
|
|
2308
|
+
|
|
2309
|
+
table = Table(show_header=True, header_style="bold cyan")
|
|
2310
|
+
table.add_column("#", style="dim")
|
|
2311
|
+
table.add_column("Path", style="white")
|
|
2312
|
+
table.add_column("Depth", style="green")
|
|
2313
|
+
|
|
2314
|
+
for i, path in enumerate(paths[:100], 1):
|
|
2315
|
+
path_str = " > ".join(path)
|
|
2316
|
+
if len(path_str) > 80: # noqa: PLR2004
|
|
2317
|
+
path_str = path_str[:77] + "..."
|
|
2318
|
+
table.add_row(str(i), path_str, str(len(path)))
|
|
2319
|
+
|
|
2320
|
+
if len(paths) > 100: # noqa: PLR2004
|
|
2321
|
+
table.add_row("...", f"[dim]{len(paths) - 100} more paths[/dim]", "")
|
|
2322
|
+
|
|
2323
|
+
tui.console.print(table)
|
|
2324
|
+
|
|
2325
|
+
|
|
2326
|
+
def _display_paths_as_tree(
|
|
2327
|
+
tui: "DeepFabricTUI",
|
|
2328
|
+
paths: list[list[str]],
|
|
2329
|
+
path_to_uuid: dict[tuple[str, ...], str] | None = None,
|
|
2330
|
+
topic_to_uuid: dict[str, str] | None = None,
|
|
2331
|
+
) -> None:
|
|
2332
|
+
"""Display paths in an indented tree format."""
|
|
2333
|
+
from rich.tree import Tree as RichTree # noqa: PLC0415
|
|
2334
|
+
|
|
2335
|
+
if not paths:
|
|
2336
|
+
return
|
|
2337
|
+
|
|
2338
|
+
# Build a tree structure from paths
|
|
2339
|
+
# Group paths by their root topic
|
|
2340
|
+
root_groups: dict[str, list[list[str]]] = {}
|
|
2341
|
+
for path in paths:
|
|
2342
|
+
if path:
|
|
2343
|
+
root = path[0]
|
|
2344
|
+
if root not in root_groups:
|
|
2345
|
+
root_groups[root] = []
|
|
2346
|
+
root_groups[root].append(path)
|
|
2347
|
+
|
|
2348
|
+
if len(root_groups) == 1:
|
|
2349
|
+
# Single root - show directly
|
|
2350
|
+
root_topic = paths[0][0]
|
|
2351
|
+
# Show UUID for root if available (graph format)
|
|
2352
|
+
root_label = f"[bold]{root_topic}[/bold]"
|
|
2353
|
+
if topic_to_uuid and root_topic in topic_to_uuid:
|
|
2354
|
+
root_label += f" [dim](UUID: {topic_to_uuid[root_topic]})[/dim]"
|
|
2355
|
+
tree = RichTree(root_label)
|
|
2356
|
+
_add_children_to_tree(
|
|
2357
|
+
tree, paths, 1, path_to_uuid=path_to_uuid, topic_to_uuid=topic_to_uuid
|
|
2358
|
+
)
|
|
2359
|
+
tui.console.print(tree)
|
|
2360
|
+
else:
|
|
2361
|
+
# Multiple roots - show each as a separate tree
|
|
2362
|
+
for root_topic, root_paths in list(root_groups.items())[:20]:
|
|
2363
|
+
root_label = f"[bold]{root_topic}[/bold]"
|
|
2364
|
+
if topic_to_uuid and root_topic in topic_to_uuid:
|
|
2365
|
+
root_label += f" [dim](UUID: {topic_to_uuid[root_topic]})[/dim]"
|
|
2366
|
+
tree = RichTree(root_label)
|
|
2367
|
+
_add_children_to_tree(
|
|
2368
|
+
tree, root_paths, 1, path_to_uuid=path_to_uuid, topic_to_uuid=topic_to_uuid
|
|
2369
|
+
)
|
|
2370
|
+
tui.console.print(tree)
|
|
2371
|
+
if len(root_groups) > 20: # noqa: PLR2004
|
|
2372
|
+
tui.console.print(f"[dim]... and {len(root_groups) - 20} more topics[/dim]")
|
|
2373
|
+
|
|
2374
|
+
|
|
2375
|
+
def _add_children_to_tree(
|
|
2376
|
+
parent: "RichTree",
|
|
2377
|
+
paths: list[list[str]],
|
|
2378
|
+
depth: int,
|
|
2379
|
+
max_depth: int = 5,
|
|
2380
|
+
path_to_uuid: dict[tuple[str, ...], str] | None = None,
|
|
2381
|
+
topic_to_uuid: dict[str, str] | None = None,
|
|
2382
|
+
) -> None:
|
|
2383
|
+
"""Recursively add children to a rich tree (limited depth for display)."""
|
|
2384
|
+
if depth > max_depth:
|
|
2385
|
+
remaining = len([p for p in paths if len(p) > depth])
|
|
2386
|
+
if remaining > 0:
|
|
2387
|
+
parent.add(f"[dim]... {remaining} more levels[/dim]")
|
|
2388
|
+
return
|
|
2389
|
+
|
|
2390
|
+
# Group paths by their element at current depth
|
|
2391
|
+
children: dict[str, list[list[str]]] = {}
|
|
2392
|
+
for path in paths:
|
|
2393
|
+
if len(path) > depth:
|
|
2394
|
+
child_topic = path[depth]
|
|
2395
|
+
if child_topic not in children:
|
|
2396
|
+
children[child_topic] = []
|
|
2397
|
+
children[child_topic].append(path)
|
|
2398
|
+
|
|
2399
|
+
# Add children to tree
|
|
2400
|
+
for child_topic, child_paths in list(children.items())[:20]:
|
|
2401
|
+
# Check for UUID: first try topic_to_uuid (graph nodes), then path_to_uuid (leaves)
|
|
2402
|
+
uuid = ""
|
|
2403
|
+
if topic_to_uuid and child_topic in topic_to_uuid:
|
|
2404
|
+
uuid = topic_to_uuid[child_topic]
|
|
2405
|
+
elif path_to_uuid:
|
|
2406
|
+
# Check if this child is a leaf (path ends at depth + 1)
|
|
2407
|
+
is_leaf = any(len(p) == depth + 1 for p in child_paths)
|
|
2408
|
+
if is_leaf:
|
|
2409
|
+
leaf_path = next((p for p in child_paths if len(p) == depth + 1), None)
|
|
2410
|
+
if leaf_path:
|
|
2411
|
+
uuid = path_to_uuid.get(tuple(leaf_path), "")
|
|
2412
|
+
|
|
2413
|
+
if uuid:
|
|
2414
|
+
child_node = parent.add(f"{child_topic} [dim](UUID: {uuid})[/dim]")
|
|
2415
|
+
else:
|
|
2416
|
+
child_node = parent.add(child_topic)
|
|
2417
|
+
_add_children_to_tree(
|
|
2418
|
+
child_node, child_paths, depth + 1, max_depth, path_to_uuid, topic_to_uuid
|
|
2419
|
+
)
|
|
2420
|
+
|
|
2421
|
+
if len(children) > 20: # noqa: PLR2004
|
|
2422
|
+
parent.add(f"[dim]... and {len(children) - 20} more siblings[/dim]")
|
|
2423
|
+
|
|
2424
|
+
|
|
2425
|
+
@topic.command("prune")
|
|
2426
|
+
@click.argument("file", type=click.Path(exists=True))
|
|
2427
|
+
@click.option(
|
|
2428
|
+
"--level",
|
|
2429
|
+
"-l",
|
|
2430
|
+
type=int,
|
|
2431
|
+
default=None,
|
|
2432
|
+
help="Prune all nodes below this depth level (0=root, 1=children, etc.)",
|
|
2433
|
+
)
|
|
2434
|
+
@click.option(
|
|
2435
|
+
"--uuid",
|
|
2436
|
+
"-u",
|
|
2437
|
+
type=str,
|
|
2438
|
+
default=None,
|
|
2439
|
+
help="Remove the node with this UUID and its entire subtree",
|
|
2440
|
+
)
|
|
2441
|
+
@click.option(
|
|
2442
|
+
"--output",
|
|
2443
|
+
"-o",
|
|
2444
|
+
type=click.Path(),
|
|
2445
|
+
default=None,
|
|
2446
|
+
help="Output file path (default: auto-generated from input filename)",
|
|
2447
|
+
)
|
|
2448
|
+
@click.option(
|
|
2449
|
+
"--force",
|
|
2450
|
+
"-f",
|
|
2451
|
+
is_flag=True,
|
|
2452
|
+
help="Overwrite the input file instead of creating a new one",
|
|
2453
|
+
)
|
|
2454
|
+
@click.option(
|
|
2455
|
+
"--dry-run",
|
|
2456
|
+
is_flag=True,
|
|
2457
|
+
help="Show what would be removed without making changes",
|
|
2458
|
+
)
|
|
2459
|
+
def topic_prune(
|
|
2460
|
+
file: str,
|
|
2461
|
+
level: int | None,
|
|
2462
|
+
uuid: str | None,
|
|
2463
|
+
output: str | None,
|
|
2464
|
+
force: bool,
|
|
2465
|
+
dry_run: bool,
|
|
2466
|
+
) -> None:
|
|
2467
|
+
"""Prune a topic graph by removing nodes.
|
|
2468
|
+
|
|
2469
|
+
Supports two modes:
|
|
2470
|
+
|
|
2471
|
+
\b
|
|
2472
|
+
# Remove all nodes below depth level 2
|
|
2473
|
+
deepfabric topic prune topic_graph.json --level 2
|
|
2474
|
+
|
|
2475
|
+
\b
|
|
2476
|
+
# Remove a specific node and its subtree by UUID
|
|
2477
|
+
deepfabric topic prune topic_graph.json --uuid abc-123-def
|
|
2478
|
+
|
|
2479
|
+
\b
|
|
2480
|
+
# Preview what would be removed (no file written)
|
|
2481
|
+
deepfabric topic prune topic_graph.json --level 1 --dry-run
|
|
2482
|
+
|
|
2483
|
+
\b
|
|
2484
|
+
# Overwrite the original file
|
|
2485
|
+
deepfabric topic prune topic_graph.json --uuid abc-123 --force
|
|
2486
|
+
"""
|
|
2487
|
+
from .graph_pruner import ( # noqa: PLC0415
|
|
2488
|
+
load_graph_for_pruning,
|
|
2489
|
+
prune_graph_at_level,
|
|
2490
|
+
prune_graph_by_uuid,
|
|
2491
|
+
)
|
|
2492
|
+
|
|
2493
|
+
tui = get_tui()
|
|
2494
|
+
|
|
2495
|
+
# Validate: exactly one mode must be specified
|
|
2496
|
+
if level is None and uuid is None:
|
|
2497
|
+
tui.error("Specify either --level or --uuid")
|
|
2498
|
+
sys.exit(1)
|
|
2499
|
+
if level is not None and uuid is not None:
|
|
2500
|
+
tui.error("Cannot use --level and --uuid together")
|
|
2501
|
+
sys.exit(1)
|
|
2502
|
+
|
|
2503
|
+
try:
|
|
2504
|
+
if dry_run:
|
|
2505
|
+
graph = load_graph_for_pruning(file)
|
|
2506
|
+
total_nodes = len(graph.nodes)
|
|
2507
|
+
|
|
2508
|
+
tui.console.print()
|
|
2509
|
+
tui.console.print("[bold]DRY RUN[/bold] — no changes will be made")
|
|
2510
|
+
tui.console.print()
|
|
2511
|
+
|
|
2512
|
+
if level is not None:
|
|
2513
|
+
# BFS to compute node depths
|
|
2514
|
+
node_depths: dict[int, int] = {}
|
|
2515
|
+
queue: list[tuple] = [(graph.root, 0)]
|
|
2516
|
+
visited: set[int] = set()
|
|
2517
|
+
while queue:
|
|
2518
|
+
current, d = queue.pop(0)
|
|
2519
|
+
if current.id in visited:
|
|
2520
|
+
continue
|
|
2521
|
+
visited.add(current.id)
|
|
2522
|
+
node_depths[current.id] = d
|
|
2523
|
+
for child in current.children:
|
|
2524
|
+
if child.id not in visited:
|
|
2525
|
+
queue.append((child, d + 1))
|
|
2526
|
+
|
|
2527
|
+
to_remove = {nid for nid, d in node_depths.items() if d > level}
|
|
2528
|
+
tui.console.print(f" Graph: {total_nodes} unique nodes")
|
|
2529
|
+
tui.console.print(f" Would remove: {len(to_remove)} nodes below level {level}")
|
|
2530
|
+
tui.console.print(f" Would keep: {total_nodes - len(to_remove)} nodes")
|
|
2531
|
+
else:
|
|
2532
|
+
target = graph.find_node_by_uuid(uuid)
|
|
2533
|
+
if target is None:
|
|
2534
|
+
tui.error(f"No node found with UUID: {uuid}")
|
|
2535
|
+
sys.exit(1)
|
|
2536
|
+
|
|
2537
|
+
# BFS to count subtree
|
|
2538
|
+
subtree_count = 0
|
|
2539
|
+
bfs_queue = [target]
|
|
2540
|
+
visited_ids: set[int] = set()
|
|
2541
|
+
while bfs_queue:
|
|
2542
|
+
current = bfs_queue.pop(0)
|
|
2543
|
+
if current.id in visited_ids:
|
|
2544
|
+
continue
|
|
2545
|
+
visited_ids.add(current.id)
|
|
2546
|
+
subtree_count += 1
|
|
2547
|
+
for child in current.children:
|
|
2548
|
+
if child.id not in visited_ids:
|
|
2549
|
+
bfs_queue.append(child)
|
|
2550
|
+
|
|
2551
|
+
tui.console.print(f" Graph: {total_nodes} unique nodes")
|
|
2552
|
+
tui.console.print(
|
|
2553
|
+
f" Target: {target.topic}",
|
|
2554
|
+
highlight=False,
|
|
2555
|
+
)
|
|
2556
|
+
tui.console.print(f" Would remove: {subtree_count} nodes (including subtree)")
|
|
2557
|
+
tui.console.print(f" Would keep: {total_nodes - subtree_count} nodes")
|
|
2558
|
+
return
|
|
2559
|
+
|
|
2560
|
+
# Determine output path
|
|
2561
|
+
output_path = file if force else output
|
|
2562
|
+
|
|
2563
|
+
if level is not None:
|
|
2564
|
+
result = prune_graph_at_level(file, level, output_path)
|
|
2565
|
+
else:
|
|
2566
|
+
result = prune_graph_by_uuid(file, uuid, output_path)
|
|
2567
|
+
|
|
2568
|
+
tui.console.print()
|
|
2569
|
+
tui.success("Graph pruned successfully")
|
|
2570
|
+
tui.console.print(f" Removed: {result.removed_count} nodes")
|
|
2571
|
+
tui.console.print(
|
|
2572
|
+
f" Remaining: {result.remaining_nodes} nodes, {result.remaining_paths} paths"
|
|
2573
|
+
)
|
|
2574
|
+
tui.console.print(f" Saved to: {result.output_path}")
|
|
2575
|
+
|
|
2576
|
+
except FileNotFoundError as e:
|
|
2577
|
+
tui.error(str(e))
|
|
2578
|
+
sys.exit(1)
|
|
2579
|
+
except ValueError as e:
|
|
2580
|
+
tui.error(str(e))
|
|
2581
|
+
sys.exit(1)
|
|
2582
|
+
|
|
2583
|
+
|
|
2584
|
+
# Register the topic command group
|
|
2585
|
+
cli.add_command(topic)
|
|
2586
|
+
|
|
2587
|
+
|
|
1997
2588
|
if __name__ == "__main__":
|
|
1998
2589
|
cli()
|