DeepFabric 4.10.1__py3-none-any.whl → 4.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deepfabric/cli.py CHANGED
@@ -2,11 +2,19 @@ import contextlib
2
2
  import json
3
3
  import math
4
4
  import os
5
+ import platform
6
+ import select
5
7
  import signal
6
8
  import sys
7
9
 
8
10
  from pathlib import Path
9
- from typing import Literal, NoReturn, cast
11
+ from typing import TYPE_CHECKING, Literal, NoReturn, cast
12
+
13
+ if TYPE_CHECKING:
14
+ from rich.tree import Tree as RichTree
15
+
16
+ from .topic_inspector import TopicInspectionResult
17
+ from .tui import DeepFabricTUI
10
18
 
11
19
  import click
12
20
  import yaml
@@ -137,7 +145,7 @@ class GenerateOptions(BaseModel):
137
145
  batch_size: int | None = None
138
146
  base_url: str | None = None
139
147
  include_system_message: bool | None = None
140
- mode: Literal["tree", "graph"] = Field(default="tree")
148
+ mode: Literal["tree", "graph"] | None = Field(default=None)
141
149
  debug: bool = False
142
150
  topic_only: bool = False
143
151
  tui: Literal["rich", "simple"] = Field(default="rich")
@@ -330,7 +338,7 @@ def _load_and_prepare_generation_context(
330
338
  # Skip path validation for topic-only mode since we're not generating dataset samples
331
339
  if not skip_path_validation:
332
340
  validate_path_requirements(
333
- mode=options.mode,
341
+ mode=config.topics.mode,
334
342
  depth=final_depth,
335
343
  degree=final_degree,
336
344
  num_samples=final_num_samples,
@@ -339,7 +347,7 @@ def _load_and_prepare_generation_context(
339
347
  )
340
348
 
341
349
  show_validation_success(
342
- mode=options.mode,
350
+ mode=config.topics.mode,
343
351
  depth=final_depth,
344
352
  degree=final_degree,
345
353
  num_samples=final_num_samples,
@@ -419,6 +427,35 @@ def _trigger_cloud_upload(
419
427
  )
420
428
 
421
429
 
430
+ def _prompt_with_timeout(
431
+ choices: list[str],
432
+ default: str,
433
+ timeout: int = 20,
434
+ ) -> str:
435
+ """Prompt for a choice with a visible countdown, auto-selecting default on timeout."""
436
+ if platform.system() == "Windows":
437
+ return click.prompt(
438
+ f" Choose [{'/'.join(choices)}]",
439
+ type=click.Choice(choices),
440
+ default=default,
441
+ )
442
+ valid = set(choices)
443
+ for remaining in range(timeout, 0, -1):
444
+ sys.stdout.write(f"\r Choose [{'/'.join(choices)}] (auto-{default} in {remaining:2d}s): ")
445
+ sys.stdout.flush()
446
+ ready, _, _ = select.select([sys.stdin], [], [], 1.0)
447
+ if ready:
448
+ line = sys.stdin.readline().strip()
449
+ sys.stdout.write("\n")
450
+ sys.stdout.flush()
451
+ if line in valid:
452
+ return line
453
+ return default
454
+ sys.stdout.write("\n")
455
+ sys.stdout.flush()
456
+ return default
457
+
458
+
422
459
  def _run_generation(
423
460
  *,
424
461
  preparation: GenerationPreparation,
@@ -470,14 +507,10 @@ def _run_generation(
470
507
  tui.console.print(" [cyan]3)[/cyan] Abort")
471
508
  tui.console.print()
472
509
 
473
- choice = click.prompt(
474
- "Choose an option",
475
- type=click.Choice(["1", "2", "3"]),
476
- default="1",
477
- )
510
+ choice = _prompt_with_timeout(["1", "2", "3"], default="1", timeout=20)
478
511
 
479
512
  if choice == "1":
480
- # User wants to resume
513
+ # User wants to resume (or auto-selected after timeout)
481
514
  options.resume = True
482
515
  elif choice == "2":
483
516
  # Clear and start fresh
@@ -493,7 +526,7 @@ def _run_generation(
493
526
  if engine.load_checkpoint(retry_failed=options.retry_failed):
494
527
  samples_done = engine._flushed_samples_count
495
528
  failures_done = engine._flushed_failures_count
496
- ids_processed = len(engine._processed_ids)
529
+ ids_processed = len(engine._completed)
497
530
  retry_msg = " (retrying failed samples)" if options.retry_failed else ""
498
531
 
499
532
  # Update TUI status panel with checkpoint progress
@@ -503,17 +536,18 @@ def _run_generation(
503
536
  if failures_done > 0:
504
537
  tui.info(
505
538
  f"Resuming from checkpoint: {samples_done} samples, "
506
- f"{failures_done} failed, {ids_processed} IDs processed{retry_msg}"
539
+ f"{failures_done} failed, {ids_processed} UUIDs processed{retry_msg}"
507
540
  )
508
541
  else:
509
542
  tui.info(
510
543
  f"Resuming from checkpoint: {samples_done} samples, "
511
- f"{ids_processed} IDs processed{retry_msg}"
544
+ f"{ids_processed} UUIDs processed{retry_msg}"
512
545
  )
513
546
  else:
514
547
  tui.info("No checkpoint found, starting fresh generation")
515
548
 
516
- # Set up graceful Ctrl+C handling for checkpoint-based stop
549
+ # Set up graceful Ctrl+C handling
550
+ has_checkpoint = generation_params.get("checkpoint_interval") is not None
517
551
  interrupt_count = 0
518
552
 
519
553
  def handle_sigint(_signum, _frame):
@@ -522,7 +556,12 @@ def _run_generation(
522
556
 
523
557
  if interrupt_count == 1:
524
558
  engine.stop_requested = True
525
- tui.warning("Stopping after current checkpoint... (Ctrl+C again to force quit)")
559
+ if has_checkpoint:
560
+ tui.warning("Stopping after current checkpoint... (Ctrl+C again to force quit)")
561
+ else:
562
+ tui.warning(
563
+ "Stopping... partial results will be saved. (Ctrl+C again to force quit)"
564
+ )
526
565
  dataset_tui = get_dataset_tui()
527
566
  dataset_tui.log_event("⚠ Graceful stop requested")
528
567
  dataset_tui.status_stop_requested()
@@ -547,12 +586,22 @@ def _run_generation(
547
586
  finally:
548
587
  signal.signal(signal.SIGINT, original_handler)
549
588
 
550
- # If gracefully stopped, don't save partial dataset or clean up checkpoints
589
+ output_config = preparation.config.get_output_config()
590
+ output_save_path = options.output_save_as or output_config["save_as"]
591
+
592
+ # If gracefully stopped, handle based on checkpoint availability
551
593
  if engine.stop_requested:
594
+ if has_checkpoint:
595
+ # Checkpoint on disk — user can resume later
596
+ return
597
+ # No checkpoint — save whatever was generated so far
598
+ if dataset and len(dataset) > 0:
599
+ tui.info(f"Saving {len(dataset)} samples generated before stop")
600
+ save_dataset(dataset, output_save_path, preparation.config, engine=engine)
601
+ else:
602
+ tui.warning("No samples were generated before stop")
552
603
  return
553
604
 
554
- output_config = preparation.config.get_output_config()
555
- output_save_path = options.output_save_as or output_config["save_as"]
556
605
  save_dataset(dataset, output_save_path, preparation.config, engine=engine)
557
606
 
558
607
  # Clean up checkpoint files after successful completion
@@ -612,8 +661,8 @@ def _run_generation(
612
661
  @click.option(
613
662
  "--mode",
614
663
  type=click.Choice(["tree", "graph"]),
615
- default="tree",
616
- help="Topic generation mode (default: tree)",
664
+ default=None,
665
+ help="Topic generation mode (default: graph)",
617
666
  )
618
667
  @click.option(
619
668
  "--debug",
@@ -651,8 +700,7 @@ def _run_generation(
651
700
  "--cloud-upload",
652
701
  type=click.Choice(["all", "dataset", "graph", "none"], case_sensitive=False),
653
702
  default=None,
654
- help="Upload to DeepFabric Cloud (experimental): all, dataset, graph, or none. "
655
- "Enables headless mode for CI. Requires DEEPFABRIC_API_KEY or prior auth.",
703
+ help="Upload to DeepFabric Cloud (experimental): all, dataset, graph, or none. ",
656
704
  )
657
705
  @click.option(
658
706
  "--checkpoint-interval",
@@ -783,7 +831,9 @@ def generate( # noqa: PLR0913
783
831
 
784
832
  # Compute checkpoint directory once for consistent use throughout generation
785
833
  # Use config file for hash, fallback to output path for config-less runs
786
- path_source = options.config_file or options.output_save_as or preparation.config.output.save_as
834
+ path_source = (
835
+ options.config_file or options.output_save_as or preparation.config.output.save_as
836
+ )
787
837
  checkpoint_dir = options.checkpoint_path or get_checkpoint_dir(path_source)
788
838
 
789
839
  # Auto-infer topics-load when resuming from checkpoint
@@ -1295,23 +1345,34 @@ def validate(config_file: str, check_api: bool) -> None: # noqa: PLR0912
1295
1345
  f"estimated_paths={estimated_paths} ({degree}^{depth})"
1296
1346
  )
1297
1347
 
1298
- # Output summary with step size and checkpoint info
1348
+ # Output summary with cycle-based generation info
1299
1349
  num_samples = config.output.num_samples
1300
1350
  batch_size = config.output.batch_size
1301
- # Calculate num_steps - handle 'auto' and percentage strings
1302
- if isinstance(num_samples, int):
1303
- num_steps = math.ceil(num_samples / batch_size)
1304
- output_info = f"Output: num_samples={num_samples}, batch_size={batch_size}, num_steps={num_steps}"
1305
- else:
1306
- # For 'auto' or percentage, we can't compute steps without topic count
1307
- output_info = f"Output: num_samples={num_samples}, batch_size={batch_size}"
1308
1351
 
1309
- # Add checkpoint info if enabled
1352
+ # Show output configuration
1353
+ output_info = f"Output: num_samples={num_samples}, concurrency={batch_size}"
1310
1354
  if config.output.checkpoint:
1311
1355
  checkpoint = config.output.checkpoint
1312
1356
  output_info += f", checkpoint_interval={checkpoint.interval}"
1313
1357
  tui.info(output_info)
1314
1358
 
1359
+ # Calculate and display cycle-based generation info
1360
+ if isinstance(num_samples, int):
1361
+ cycles_needed = math.ceil(num_samples / estimated_paths)
1362
+ final_cycle_size = num_samples - (cycles_needed - 1) * estimated_paths
1363
+ is_partial = final_cycle_size < estimated_paths
1364
+
1365
+ tui.info(
1366
+ f" → Cycles needed: {cycles_needed} "
1367
+ f"({num_samples} samples ÷ {estimated_paths} unique topics)"
1368
+ )
1369
+ if is_partial:
1370
+ tui.info(f" → Final cycle: {final_cycle_size} topics (partial)")
1371
+ elif num_samples == "auto":
1372
+ tui.info(f" → Will generate 1 sample per unique topic ({estimated_paths} samples)")
1373
+ else:
1374
+ tui.info(" → Samples calculated at runtime based on topic count")
1375
+
1315
1376
  if config.huggingface:
1316
1377
  hf_config = config.get_huggingface_config()
1317
1378
  tui.info(f"Hugging Face: repo={hf_config.get('repository', 'not set')}")
@@ -1893,7 +1954,8 @@ def checkpoint_status(config_file: str) -> None:
1893
1954
  # Check if checkpoint exists
1894
1955
  if not metadata_path.exists():
1895
1956
  tui.info(f"No checkpoint found at: {metadata_path}")
1896
- tui.info("\nTo enable checkpointing, run:")
1957
+ tui.console.print()
1958
+ tui.info("To enable checkpointing, run:")
1897
1959
  tui.info(f" deepfabric generate {config_file} --checkpoint-interval 10")
1898
1960
  return
1899
1961
 
@@ -1994,5 +2056,534 @@ def checkpoint_status(config_file: str) -> None:
1994
2056
  )
1995
2057
 
1996
2058
 
2059
+ # Topic inspection command group
2060
+ @click.group()
2061
+ def topic() -> None:
2062
+ """Topic management commands."""
2063
+ pass
2064
+
2065
+
2066
+ @topic.command("inspect")
2067
+ @click.argument("file", type=click.Path(exists=True))
2068
+ @click.option(
2069
+ "--level",
2070
+ "-l",
2071
+ type=int,
2072
+ default=None,
2073
+ help="Show topics at a specific depth level (0=root, 1=first children, etc.)",
2074
+ )
2075
+ @click.option(
2076
+ "--expand",
2077
+ "-e",
2078
+ type=int,
2079
+ default=None,
2080
+ is_flag=False,
2081
+ flag_value=-1, # -1 means expand all levels
2082
+ help="Show subtopics in tree format. Use alone for all levels, or specify depth (e.g., --expand 2)",
2083
+ )
2084
+ @click.option(
2085
+ "--all",
2086
+ "-a",
2087
+ "show_all",
2088
+ is_flag=True,
2089
+ help="Show the entire tree structure with indentation",
2090
+ )
2091
+ @click.option(
2092
+ "--format",
2093
+ "-f",
2094
+ "output_format",
2095
+ type=click.Choice(["tree", "table", "json"]),
2096
+ default="tree",
2097
+ help="Output format (default: tree)",
2098
+ )
2099
+ @click.option(
2100
+ "--uuid",
2101
+ "-u",
2102
+ "show_uuid",
2103
+ is_flag=True,
2104
+ help="Show UUID/topic_id for each leaf node",
2105
+ )
2106
+ def topic_inspect(
2107
+ file: str,
2108
+ level: int | None,
2109
+ expand: int | None,
2110
+ show_all: bool,
2111
+ output_format: str,
2112
+ show_uuid: bool,
2113
+ ) -> None:
2114
+ """Inspect a topic tree or graph file.
2115
+
2116
+ Displays metadata and structure of topic files generated by DeepFabric.
2117
+ Supports both Tree (JSONL) and Graph (JSON) formats with auto-detection.
2118
+
2119
+ Examples:
2120
+
2121
+ \b
2122
+ # Show file metadata and summary
2123
+ deepfabric topic inspect topic_tree.jsonl
2124
+
2125
+ \b
2126
+ # Show topics at depth level 2 (just topic names)
2127
+ deepfabric topic inspect topic_tree.jsonl --level 2
2128
+
2129
+ \b
2130
+ # Show level 2 topics and all subtopics (tree format)
2131
+ deepfabric topic inspect topic_tree.jsonl --level 2 --expand
2132
+
2133
+ \b
2134
+ # Show level 2 topics and 1 sublevel only (tree format)
2135
+ deepfabric topic inspect topic_tree.jsonl --level 2 --expand 1
2136
+
2137
+ \b
2138
+ # Show entire tree with indentation
2139
+ deepfabric topic inspect topic_graph.json --all
2140
+
2141
+ \b
2142
+ # Output as JSON for scripting
2143
+ deepfabric topic inspect topic_tree.jsonl --format json
2144
+
2145
+ \b
2146
+ # Show UUIDs for each leaf node
2147
+ deepfabric topic inspect topic_tree.jsonl --all --uuid
2148
+ """
2149
+ from .topic_inspector import inspect_topic_file # noqa: PLC0415
2150
+
2151
+ tui = get_tui()
2152
+
2153
+ try:
2154
+ # Perform inspection
2155
+ result = inspect_topic_file(file, level=level, expand_depth=expand, show_all=show_all)
2156
+
2157
+ # Handle JSON output format
2158
+ if output_format == "json":
2159
+ output = {
2160
+ "format": result.format,
2161
+ "source_file": result.source_file,
2162
+ "total_paths": result.total_paths,
2163
+ "max_depth": result.max_depth,
2164
+ "metadata": result.metadata,
2165
+ }
2166
+ if result.paths_at_level is not None:
2167
+ output["paths_at_level"] = result.paths_at_level
2168
+ if result.expanded_paths is not None:
2169
+ output["expanded_paths"] = result.expanded_paths
2170
+ if result.all_paths is not None:
2171
+ output["all_paths"] = result.all_paths
2172
+ tui.console.print_json(json.dumps(output))
2173
+ return
2174
+
2175
+ # Rich output (tree or table format)
2176
+ _display_inspection_result(tui, result, output_format, level, expand, show_all, show_uuid)
2177
+
2178
+ except FileNotFoundError as e:
2179
+ tui.error(str(e))
2180
+ sys.exit(1)
2181
+ except ValueError as e:
2182
+ tui.error(f"Invalid file: {e}")
2183
+ sys.exit(1)
2184
+ except Exception as e:
2185
+ tui.error(f"Error inspecting file: {e}")
2186
+ sys.exit(1)
2187
+
2188
+
2189
+ def _display_inspection_result(
2190
+ tui: "DeepFabricTUI",
2191
+ result: "TopicInspectionResult",
2192
+ output_format: str,
2193
+ level: int | None,
2194
+ expand: int | None,
2195
+ show_all: bool,
2196
+ show_uuid: bool = False,
2197
+ ) -> None:
2198
+ """Display inspection result using rich formatting."""
2199
+ from rich.panel import Panel # noqa: PLC0415
2200
+ from rich.table import Table # noqa: PLC0415
2201
+
2202
+ # Header with file info
2203
+ format_label = "Graph (JSON)" if result.format == "graph" else "Tree (JSONL)"
2204
+
2205
+ tui.console.print()
2206
+ tui.console.print("[bold cyan]Topic Inspector[/bold cyan]")
2207
+ tui.console.print(f"[dim]{result.source_file}[/dim]")
2208
+ tui.console.print()
2209
+
2210
+ # Statistics panel
2211
+ stats_table = Table(show_header=False, box=None, padding=(0, 1))
2212
+ stats_table.add_column(style="cyan", no_wrap=True)
2213
+ stats_table.add_column(style="white")
2214
+
2215
+ stats_table.add_row("Format:", format_label)
2216
+ stats_table.add_row("Total Paths:", str(result.total_paths))
2217
+ stats_table.add_row("Max Depth:", str(result.max_depth))
2218
+
2219
+ if result.metadata.get("root_topic"):
2220
+ root = result.metadata["root_topic"]
2221
+ if len(root) > 60: # noqa: PLR2004
2222
+ root = root[:57] + "..."
2223
+ stats_table.add_row("Root Topic:", root)
2224
+
2225
+ if result.metadata.get("total_nodes"):
2226
+ stats_table.add_row("Total Nodes:", str(result.metadata["total_nodes"]))
2227
+
2228
+ if result.metadata.get("has_cycles") is not None:
2229
+ has_cycles = "Yes" if result.metadata["has_cycles"] else "No"
2230
+ stats_table.add_row("Has Cycles:", has_cycles)
2231
+
2232
+ if result.metadata.get("provider"):
2233
+ stats_table.add_row("Provider:", result.metadata["provider"])
2234
+
2235
+ if result.metadata.get("model"):
2236
+ stats_table.add_row("Model:", result.metadata["model"])
2237
+
2238
+ if result.metadata.get("created_at"):
2239
+ stats_table.add_row("Created:", result.metadata["created_at"])
2240
+
2241
+ tui.console.print(Panel(stats_table, title="Statistics", border_style="dim"))
2242
+
2243
+ # Show level-specific topics (without expand) - simple list of topic names
2244
+ if level is not None and expand is None and result.paths_at_level is not None:
2245
+ tui.console.print()
2246
+ tui.console.print(f"[cyan bold]Topics at Level {level}:[/cyan bold]")
2247
+
2248
+ if not result.paths_at_level:
2249
+ tui.console.print(f" [dim]No topics at level {level}[/dim]")
2250
+ else:
2251
+ # Display as simple list of topic names (with UUIDs)
2252
+ for topic_path in result.paths_at_level:
2253
+ topic_name = topic_path[0] if topic_path else ""
2254
+ if show_uuid:
2255
+ # For graph format, use topic_to_uuid (node UUIDs)
2256
+ # For tree format, use path_to_uuid (leaf UUIDs only)
2257
+ uuid = ""
2258
+ if result.topic_to_uuid:
2259
+ uuid = result.topic_to_uuid.get(topic_name, "")
2260
+ if not uuid and result.path_to_uuid:
2261
+ uuid = result.path_to_uuid.get(tuple(topic_path), "")
2262
+ if uuid:
2263
+ tui.console.print(
2264
+ f" • {topic_name} [dim](UUID: {uuid})[/dim]", highlight=False
2265
+ )
2266
+ else:
2267
+ tui.console.print(f" • {topic_name}")
2268
+ else:
2269
+ tui.console.print(f" • {topic_name}")
2270
+
2271
+ # Show expanded subtree from level (with --expand)
2272
+ if level is not None and expand is not None and result.expanded_paths is not None:
2273
+ tui.console.print()
2274
+ depth_info = "all sublevels" if expand == -1 else f"{expand} sublevel(s)"
2275
+ tui.console.print(f"[cyan bold]Subtree from Level {level} ({depth_info}):[/cyan bold]")
2276
+
2277
+ if not result.expanded_paths:
2278
+ tui.console.print(f" [dim]No topics at or below level {level}[/dim]")
2279
+ elif output_format == "table":
2280
+ _display_paths_as_table(tui, result.expanded_paths)
2281
+ else:
2282
+ _display_paths_as_tree(
2283
+ tui,
2284
+ result.expanded_paths,
2285
+ result.path_to_uuid if show_uuid else None,
2286
+ result.topic_to_uuid if show_uuid else None,
2287
+ )
2288
+
2289
+ # Show all paths with tree structure
2290
+ if show_all and result.all_paths:
2291
+ tui.console.print()
2292
+ tui.console.print("[cyan bold]Full Tree Structure:[/cyan bold]")
2293
+
2294
+ if output_format == "table":
2295
+ _display_paths_as_table(tui, result.all_paths)
2296
+ else:
2297
+ _display_paths_as_tree(
2298
+ tui,
2299
+ result.all_paths,
2300
+ result.path_to_uuid if show_uuid else None,
2301
+ result.topic_to_uuid if show_uuid else None,
2302
+ )
2303
+
2304
+
2305
+ def _display_paths_as_table(tui: "DeepFabricTUI", paths: list[list[str]]) -> None:
2306
+ """Display paths in a table format."""
2307
+ from rich.table import Table # noqa: PLC0415
2308
+
2309
+ table = Table(show_header=True, header_style="bold cyan")
2310
+ table.add_column("#", style="dim")
2311
+ table.add_column("Path", style="white")
2312
+ table.add_column("Depth", style="green")
2313
+
2314
+ for i, path in enumerate(paths[:100], 1):
2315
+ path_str = " > ".join(path)
2316
+ if len(path_str) > 80: # noqa: PLR2004
2317
+ path_str = path_str[:77] + "..."
2318
+ table.add_row(str(i), path_str, str(len(path)))
2319
+
2320
+ if len(paths) > 100: # noqa: PLR2004
2321
+ table.add_row("...", f"[dim]{len(paths) - 100} more paths[/dim]", "")
2322
+
2323
+ tui.console.print(table)
2324
+
2325
+
2326
+ def _display_paths_as_tree(
2327
+ tui: "DeepFabricTUI",
2328
+ paths: list[list[str]],
2329
+ path_to_uuid: dict[tuple[str, ...], str] | None = None,
2330
+ topic_to_uuid: dict[str, str] | None = None,
2331
+ ) -> None:
2332
+ """Display paths in an indented tree format."""
2333
+ from rich.tree import Tree as RichTree # noqa: PLC0415
2334
+
2335
+ if not paths:
2336
+ return
2337
+
2338
+ # Build a tree structure from paths
2339
+ # Group paths by their root topic
2340
+ root_groups: dict[str, list[list[str]]] = {}
2341
+ for path in paths:
2342
+ if path:
2343
+ root = path[0]
2344
+ if root not in root_groups:
2345
+ root_groups[root] = []
2346
+ root_groups[root].append(path)
2347
+
2348
+ if len(root_groups) == 1:
2349
+ # Single root - show directly
2350
+ root_topic = paths[0][0]
2351
+ # Show UUID for root if available (graph format)
2352
+ root_label = f"[bold]{root_topic}[/bold]"
2353
+ if topic_to_uuid and root_topic in topic_to_uuid:
2354
+ root_label += f" [dim](UUID: {topic_to_uuid[root_topic]})[/dim]"
2355
+ tree = RichTree(root_label)
2356
+ _add_children_to_tree(
2357
+ tree, paths, 1, path_to_uuid=path_to_uuid, topic_to_uuid=topic_to_uuid
2358
+ )
2359
+ tui.console.print(tree)
2360
+ else:
2361
+ # Multiple roots - show each as a separate tree
2362
+ for root_topic, root_paths in list(root_groups.items())[:20]:
2363
+ root_label = f"[bold]{root_topic}[/bold]"
2364
+ if topic_to_uuid and root_topic in topic_to_uuid:
2365
+ root_label += f" [dim](UUID: {topic_to_uuid[root_topic]})[/dim]"
2366
+ tree = RichTree(root_label)
2367
+ _add_children_to_tree(
2368
+ tree, root_paths, 1, path_to_uuid=path_to_uuid, topic_to_uuid=topic_to_uuid
2369
+ )
2370
+ tui.console.print(tree)
2371
+ if len(root_groups) > 20: # noqa: PLR2004
2372
+ tui.console.print(f"[dim]... and {len(root_groups) - 20} more topics[/dim]")
2373
+
2374
+
2375
+ def _add_children_to_tree(
2376
+ parent: "RichTree",
2377
+ paths: list[list[str]],
2378
+ depth: int,
2379
+ max_depth: int = 5,
2380
+ path_to_uuid: dict[tuple[str, ...], str] | None = None,
2381
+ topic_to_uuid: dict[str, str] | None = None,
2382
+ ) -> None:
2383
+ """Recursively add children to a rich tree (limited depth for display)."""
2384
+ if depth > max_depth:
2385
+ remaining = len([p for p in paths if len(p) > depth])
2386
+ if remaining > 0:
2387
+ parent.add(f"[dim]... {remaining} more levels[/dim]")
2388
+ return
2389
+
2390
+ # Group paths by their element at current depth
2391
+ children: dict[str, list[list[str]]] = {}
2392
+ for path in paths:
2393
+ if len(path) > depth:
2394
+ child_topic = path[depth]
2395
+ if child_topic not in children:
2396
+ children[child_topic] = []
2397
+ children[child_topic].append(path)
2398
+
2399
+ # Add children to tree
2400
+ for child_topic, child_paths in list(children.items())[:20]:
2401
+ # Check for UUID: first try topic_to_uuid (graph nodes), then path_to_uuid (leaves)
2402
+ uuid = ""
2403
+ if topic_to_uuid and child_topic in topic_to_uuid:
2404
+ uuid = topic_to_uuid[child_topic]
2405
+ elif path_to_uuid:
2406
+ # Check if this child is a leaf (path ends at depth + 1)
2407
+ is_leaf = any(len(p) == depth + 1 for p in child_paths)
2408
+ if is_leaf:
2409
+ leaf_path = next((p for p in child_paths if len(p) == depth + 1), None)
2410
+ if leaf_path:
2411
+ uuid = path_to_uuid.get(tuple(leaf_path), "")
2412
+
2413
+ if uuid:
2414
+ child_node = parent.add(f"{child_topic} [dim](UUID: {uuid})[/dim]")
2415
+ else:
2416
+ child_node = parent.add(child_topic)
2417
+ _add_children_to_tree(
2418
+ child_node, child_paths, depth + 1, max_depth, path_to_uuid, topic_to_uuid
2419
+ )
2420
+
2421
+ if len(children) > 20: # noqa: PLR2004
2422
+ parent.add(f"[dim]... and {len(children) - 20} more siblings[/dim]")
2423
+
2424
+
2425
+ @topic.command("prune")
2426
+ @click.argument("file", type=click.Path(exists=True))
2427
+ @click.option(
2428
+ "--level",
2429
+ "-l",
2430
+ type=int,
2431
+ default=None,
2432
+ help="Prune all nodes below this depth level (0=root, 1=children, etc.)",
2433
+ )
2434
+ @click.option(
2435
+ "--uuid",
2436
+ "-u",
2437
+ type=str,
2438
+ default=None,
2439
+ help="Remove the node with this UUID and its entire subtree",
2440
+ )
2441
+ @click.option(
2442
+ "--output",
2443
+ "-o",
2444
+ type=click.Path(),
2445
+ default=None,
2446
+ help="Output file path (default: auto-generated from input filename)",
2447
+ )
2448
+ @click.option(
2449
+ "--force",
2450
+ "-f",
2451
+ is_flag=True,
2452
+ help="Overwrite the input file instead of creating a new one",
2453
+ )
2454
+ @click.option(
2455
+ "--dry-run",
2456
+ is_flag=True,
2457
+ help="Show what would be removed without making changes",
2458
+ )
2459
+ def topic_prune(
2460
+ file: str,
2461
+ level: int | None,
2462
+ uuid: str | None,
2463
+ output: str | None,
2464
+ force: bool,
2465
+ dry_run: bool,
2466
+ ) -> None:
2467
+ """Prune a topic graph by removing nodes.
2468
+
2469
+ Supports two modes:
2470
+
2471
+ \b
2472
+ # Remove all nodes below depth level 2
2473
+ deepfabric topic prune topic_graph.json --level 2
2474
+
2475
+ \b
2476
+ # Remove a specific node and its subtree by UUID
2477
+ deepfabric topic prune topic_graph.json --uuid abc-123-def
2478
+
2479
+ \b
2480
+ # Preview what would be removed (no file written)
2481
+ deepfabric topic prune topic_graph.json --level 1 --dry-run
2482
+
2483
+ \b
2484
+ # Overwrite the original file
2485
+ deepfabric topic prune topic_graph.json --uuid abc-123 --force
2486
+ """
2487
+ from .graph_pruner import ( # noqa: PLC0415
2488
+ load_graph_for_pruning,
2489
+ prune_graph_at_level,
2490
+ prune_graph_by_uuid,
2491
+ )
2492
+
2493
+ tui = get_tui()
2494
+
2495
+ # Validate: exactly one mode must be specified
2496
+ if level is None and uuid is None:
2497
+ tui.error("Specify either --level or --uuid")
2498
+ sys.exit(1)
2499
+ if level is not None and uuid is not None:
2500
+ tui.error("Cannot use --level and --uuid together")
2501
+ sys.exit(1)
2502
+
2503
+ try:
2504
+ if dry_run:
2505
+ graph = load_graph_for_pruning(file)
2506
+ total_nodes = len(graph.nodes)
2507
+
2508
+ tui.console.print()
2509
+ tui.console.print("[bold]DRY RUN[/bold] — no changes will be made")
2510
+ tui.console.print()
2511
+
2512
+ if level is not None:
2513
+ # BFS to compute node depths
2514
+ node_depths: dict[int, int] = {}
2515
+ queue: list[tuple] = [(graph.root, 0)]
2516
+ visited: set[int] = set()
2517
+ while queue:
2518
+ current, d = queue.pop(0)
2519
+ if current.id in visited:
2520
+ continue
2521
+ visited.add(current.id)
2522
+ node_depths[current.id] = d
2523
+ for child in current.children:
2524
+ if child.id not in visited:
2525
+ queue.append((child, d + 1))
2526
+
2527
+ to_remove = {nid for nid, d in node_depths.items() if d > level}
2528
+ tui.console.print(f" Graph: {total_nodes} unique nodes")
2529
+ tui.console.print(f" Would remove: {len(to_remove)} nodes below level {level}")
2530
+ tui.console.print(f" Would keep: {total_nodes - len(to_remove)} nodes")
2531
+ else:
2532
+ target = graph.find_node_by_uuid(uuid)
2533
+ if target is None:
2534
+ tui.error(f"No node found with UUID: {uuid}")
2535
+ sys.exit(1)
2536
+
2537
+ # BFS to count subtree
2538
+ subtree_count = 0
2539
+ bfs_queue = [target]
2540
+ visited_ids: set[int] = set()
2541
+ while bfs_queue:
2542
+ current = bfs_queue.pop(0)
2543
+ if current.id in visited_ids:
2544
+ continue
2545
+ visited_ids.add(current.id)
2546
+ subtree_count += 1
2547
+ for child in current.children:
2548
+ if child.id not in visited_ids:
2549
+ bfs_queue.append(child)
2550
+
2551
+ tui.console.print(f" Graph: {total_nodes} unique nodes")
2552
+ tui.console.print(
2553
+ f" Target: {target.topic}",
2554
+ highlight=False,
2555
+ )
2556
+ tui.console.print(f" Would remove: {subtree_count} nodes (including subtree)")
2557
+ tui.console.print(f" Would keep: {total_nodes - subtree_count} nodes")
2558
+ return
2559
+
2560
+ # Determine output path
2561
+ output_path = file if force else output
2562
+
2563
+ if level is not None:
2564
+ result = prune_graph_at_level(file, level, output_path)
2565
+ else:
2566
+ result = prune_graph_by_uuid(file, uuid, output_path)
2567
+
2568
+ tui.console.print()
2569
+ tui.success("Graph pruned successfully")
2570
+ tui.console.print(f" Removed: {result.removed_count} nodes")
2571
+ tui.console.print(
2572
+ f" Remaining: {result.remaining_nodes} nodes, {result.remaining_paths} paths"
2573
+ )
2574
+ tui.console.print(f" Saved to: {result.output_path}")
2575
+
2576
+ except FileNotFoundError as e:
2577
+ tui.error(str(e))
2578
+ sys.exit(1)
2579
+ except ValueError as e:
2580
+ tui.error(str(e))
2581
+ sys.exit(1)
2582
+
2583
+
2584
+ # Register the topic command group
2585
+ cli.add_command(topic)
2586
+
2587
+
1997
2588
  if __name__ == "__main__":
1998
2589
  cli()