PyPI - DeepFabric - Versions diffs - 4.10.0__py3-none-any.whl → 4.11.0__py3-none-any.whl - Mend

DeepFabric 4.10.0py3-none-any.whl → 4.11.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

deepfabric/cli.py +83 -27
deepfabric/cloud_upload.py +1 -1
deepfabric/config.py +6 -4
deepfabric/constants.py +1 -1
deepfabric/dataset_manager.py +264 -62
deepfabric/generator.py +687 -82
deepfabric/graph.py +25 -1
deepfabric/llm/retry_handler.py +28 -9
deepfabric/progress.py +42 -0
deepfabric/topic_manager.py +22 -2
deepfabric/topic_model.py +26 -0
deepfabric/tree.py +41 -16
deepfabric/tui.py +448 -349
deepfabric/utils.py +4 -1
{deepfabric-4.10.0.dist-info → deepfabric-4.11.0.dist-info}/METADATA +4 -2
{deepfabric-4.10.0.dist-info → deepfabric-4.11.0.dist-info}/RECORD +19 -19
{deepfabric-4.10.0.dist-info → deepfabric-4.11.0.dist-info}/licenses/LICENSE +1 -1
{deepfabric-4.10.0.dist-info → deepfabric-4.11.0.dist-info}/WHEEL +0 -0
{deepfabric-4.10.0.dist-info → deepfabric-4.11.0.dist-info}/entry_points.txt +0 -0

deepfabric/cli.py CHANGED Viewed

@@ -2,6 +2,8 @@ import contextlib
 import json
 import math
 import os
+import platform
+import select
 import signal
 import sys
@@ -419,6 +421,35 @@ def _trigger_cloud_upload(
     )
+def _prompt_with_timeout(
+    choices: list[str],
+    default: str,
+    timeout: int = 20,
+) -> str:
+    """Prompt for a choice with a visible countdown, auto-selecting default on timeout."""
+    if platform.system() == "Windows":
+        return click.prompt(
+            f"  Choose [{'/'.join(choices)}]",
+            type=click.Choice(choices),
+            default=default,
+        )
+    valid = set(choices)
+    for remaining in range(timeout, 0, -1):
+        sys.stdout.write(f"\r  Choose [{'/'.join(choices)}] (auto-{default} in {remaining:2d}s): ")
+        sys.stdout.flush()
+        ready, _, _ = select.select([sys.stdin], [], [], 1.0)
+        if ready:
+            line = sys.stdin.readline().strip()
+            sys.stdout.write("\n")
+            sys.stdout.flush()
+            if line in valid:
+                return line
+            return default
+    sys.stdout.write("\n")
+    sys.stdout.flush()
+    return default
 def _run_generation(
     *,
     preparation: GenerationPreparation,
@@ -470,14 +501,10 @@ def _run_generation(
         tui.console.print("  [cyan]3)[/cyan] Abort")
         tui.console.print()
-        choice = click.prompt(
-            "Choose an option",
-            type=click.Choice(["1", "2", "3"]),
-            default="1",
-        )
+        choice = _prompt_with_timeout(["1", "2", "3"], default="1", timeout=20)
         if choice == "1":
-            # User wants to resume
+            # User wants to resume (or auto-selected after timeout)
             options.resume = True
         elif choice == "2":
             # Clear and start fresh
@@ -493,7 +520,7 @@ def _run_generation(
         if engine.load_checkpoint(retry_failed=options.retry_failed):
             samples_done = engine._flushed_samples_count
             failures_done = engine._flushed_failures_count
-            ids_processed = len(engine._processed_ids)
+            ids_processed = len(engine._completed)
             retry_msg = " (retrying failed samples)" if options.retry_failed else ""
             # Update TUI status panel with checkpoint progress
@@ -503,17 +530,18 @@ def _run_generation(
             if failures_done > 0:
                 tui.info(
                     f"Resuming from checkpoint: {samples_done} samples, "
-                    f"{failures_done} failed, {ids_processed} IDs processed{retry_msg}"
+                    f"{failures_done} failed, {ids_processed} UUIDs processed{retry_msg}"
                 )
             else:
                 tui.info(
                     f"Resuming from checkpoint: {samples_done} samples, "
-                    f"{ids_processed} IDs processed{retry_msg}"
+                    f"{ids_processed} UUIDs processed{retry_msg}"
                 )
         else:
             tui.info("No checkpoint found, starting fresh generation")
-    # Set up graceful Ctrl+C handling for checkpoint-based stop
+    # Set up graceful Ctrl+C handling
+    has_checkpoint = generation_params.get("checkpoint_interval") is not None
     interrupt_count = 0
     def handle_sigint(_signum, _frame):
@@ -522,7 +550,12 @@ def _run_generation(
         if interrupt_count == 1:
             engine.stop_requested = True
-            tui.warning("Stopping after current checkpoint... (Ctrl+C again to force quit)")
+            if has_checkpoint:
+                tui.warning("Stopping after current checkpoint... (Ctrl+C again to force quit)")
+            else:
+                tui.warning(
+                    "Stopping... partial results will be saved. (Ctrl+C again to force quit)"
+                )
             dataset_tui = get_dataset_tui()
             dataset_tui.log_event("⚠ Graceful stop requested")
             dataset_tui.status_stop_requested()
@@ -547,12 +580,22 @@ def _run_generation(
     finally:
         signal.signal(signal.SIGINT, original_handler)
-    # If gracefully stopped, don't save partial dataset or clean up checkpoints
+    output_config = preparation.config.get_output_config()
+    output_save_path = options.output_save_as or output_config["save_as"]
+    # If gracefully stopped, handle based on checkpoint availability
     if engine.stop_requested:
+        if has_checkpoint:
+            # Checkpoint on disk — user can resume later
+            return
+        # No checkpoint — save whatever was generated so far
+        if dataset and len(dataset) > 0:
+            tui.info(f"Saving {len(dataset)} samples generated before stop")
+            save_dataset(dataset, output_save_path, preparation.config, engine=engine)
+        else:
+            tui.warning("No samples were generated before stop")
         return
-    output_config = preparation.config.get_output_config()
-    output_save_path = options.output_save_as or output_config["save_as"]
     save_dataset(dataset, output_save_path, preparation.config, engine=engine)
     # Clean up checkpoint files after successful completion
@@ -651,8 +694,7 @@ def _run_generation(
     "--cloud-upload",
     type=click.Choice(["all", "dataset", "graph", "none"], case_sensitive=False),
     default=None,
-    help="Upload to DeepFabric Cloud (experimental): all, dataset, graph, or none. "
-    "Enables headless mode for CI. Requires DEEPFABRIC_API_KEY or prior auth.",
+    help="Upload to DeepFabric Cloud (experimental): all, dataset, graph, or none. ",
 )
 @click.option(
     "--checkpoint-interval",
@@ -783,7 +825,9 @@ def generate(  # noqa: PLR0913
         # Compute checkpoint directory once for consistent use throughout generation
         # Use config file for hash, fallback to output path for config-less runs
-        path_source = options.config_file or options.output_save_as or preparation.config.output.save_as
+        path_source = (
+            options.config_file or options.output_save_as or preparation.config.output.save_as
+        )
         checkpoint_dir = options.checkpoint_path or get_checkpoint_dir(path_source)
         # Auto-infer topics-load when resuming from checkpoint
@@ -1295,23 +1339,34 @@ def validate(config_file: str, check_api: bool) -> None:  # noqa: PLR0912
             f"estimated_paths={estimated_paths} ({degree}^{depth})"
         )
-        # Output summary with step size and checkpoint info
+        # Output summary with cycle-based generation info
         num_samples = config.output.num_samples
         batch_size = config.output.batch_size
-        # Calculate num_steps - handle 'auto' and percentage strings
-        if isinstance(num_samples, int):
-            num_steps = math.ceil(num_samples / batch_size)
-            output_info = f"Output: num_samples={num_samples}, batch_size={batch_size}, num_steps={num_steps}"
-        else:
-            # For 'auto' or percentage, we can't compute steps without topic count
-            output_info = f"Output: num_samples={num_samples}, batch_size={batch_size}"
-        # Add checkpoint info if enabled
+        # Show output configuration
+        output_info = f"Output: num_samples={num_samples}, concurrency={batch_size}"
         if config.output.checkpoint:
             checkpoint = config.output.checkpoint
             output_info += f", checkpoint_interval={checkpoint.interval}"
         tui.info(output_info)
+        # Calculate and display cycle-based generation info
+        if isinstance(num_samples, int):
+            cycles_needed = math.ceil(num_samples / estimated_paths)
+            final_cycle_size = num_samples - (cycles_needed - 1) * estimated_paths
+            is_partial = final_cycle_size < estimated_paths
+            tui.info(
+                f"  → Cycles needed: {cycles_needed} "
+                f"({num_samples} samples ÷ {estimated_paths} unique topics)"
+            )
+            if is_partial:
+                tui.info(f"  → Final cycle: {final_cycle_size} topics (partial)")
+        elif num_samples == "auto":
+            tui.info(f"  → Will generate 1 sample per unique topic ({estimated_paths} samples)")
+        else:
+            tui.info("  → Samples calculated at runtime based on topic count")
         if config.huggingface:
             hf_config = config.get_huggingface_config()
             tui.info(f"Hugging Face: repo={hf_config.get('repository', 'not set')}")
@@ -1893,7 +1948,8 @@ def checkpoint_status(config_file: str) -> None:
     # Check if checkpoint exists
     if not metadata_path.exists():
         tui.info(f"No checkpoint found at: {metadata_path}")
-        tui.info("\nTo enable checkpointing, run:")
+        tui.console.print()
+        tui.info("To enable checkpointing, run:")
         tui.info(f"  deepfabric generate {config_file} --checkpoint-interval 10")
         return

deepfabric/cloud_upload.py CHANGED Viewed

@@ -778,7 +778,7 @@ def handle_cloud_upload(  # noqa: PLR0911
         # Build prompt based on what's available
         if has_dataset and has_graph:
-            prompt_text = "  Upload to DeepFabric Cloud?"
+            prompt_text = "  Upload graph and dataset to DeepFabric Cloud?"
             hint = "[dim](Y=both, n=skip, c=choose)[/dim]"
         elif has_dataset:
             prompt_text = "  Upload dataset to DeepFabric Cloud?"

deepfabric/config.py CHANGED Viewed

@@ -628,11 +628,13 @@ See documentation for full examples.
             "output_save_as": self.output.save_as,
             # Checkpoint config (nested inside output)
             # Note: checkpoint_path can be None, meaning "auto-resolve" at runtime
-            "checkpoint_interval": self.output.checkpoint.interval if self.output.checkpoint else None,
+            "checkpoint_interval": self.output.checkpoint.interval
+            if self.output.checkpoint
+            else None,
             "checkpoint_path": self.output.checkpoint.path if self.output.checkpoint else None,
-            "checkpoint_retry_failed": (
-                self.output.checkpoint.retry_failed if self.output.checkpoint else False
-            ),
+            "checkpoint_retry_failed": self.output.checkpoint.retry_failed
+            if self.output.checkpoint
+            else False,
         }
         # Tool config

deepfabric/constants.py CHANGED Viewed

@@ -93,7 +93,7 @@ FAILED_TREE_SUFFIX = "_failed.jsonl"
 CHECKPOINT_METADATA_SUFFIX = ".checkpoint.json"
 CHECKPOINT_SAMPLES_SUFFIX = ".checkpoint.jsonl"
 CHECKPOINT_FAILURES_SUFFIX = ".checkpoint.failures.jsonl"
-CHECKPOINT_VERSION = 3  # Increment when checkpoint format changes
+CHECKPOINT_VERSION = 4  # v4: (uuid, cycle) tuple tracking for cycle-based generation
 # Stream simulation defaults
 STREAM_SIM_CHUNK_SIZE = 8  # characters per chunk

DeepFabric 4.10.0__py3-none-any.whl → 4.11.0__py3-none-any.whl

DeepFabric 4.10.0py3-none-any.whl → 4.11.0py3-none-any.whl