DeepFabric 4.10.0__py3-none-any.whl → 4.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deepfabric/cli.py CHANGED
@@ -2,6 +2,8 @@ import contextlib
2
2
  import json
3
3
  import math
4
4
  import os
5
+ import platform
6
+ import select
5
7
  import signal
6
8
  import sys
7
9
 
@@ -419,6 +421,35 @@ def _trigger_cloud_upload(
419
421
  )
420
422
 
421
423
 
424
+ def _prompt_with_timeout(
425
+ choices: list[str],
426
+ default: str,
427
+ timeout: int = 20,
428
+ ) -> str:
429
+ """Prompt for a choice with a visible countdown, auto-selecting default on timeout."""
430
+ if platform.system() == "Windows":
431
+ return click.prompt(
432
+ f" Choose [{'/'.join(choices)}]",
433
+ type=click.Choice(choices),
434
+ default=default,
435
+ )
436
+ valid = set(choices)
437
+ for remaining in range(timeout, 0, -1):
438
+ sys.stdout.write(f"\r Choose [{'/'.join(choices)}] (auto-{default} in {remaining:2d}s): ")
439
+ sys.stdout.flush()
440
+ ready, _, _ = select.select([sys.stdin], [], [], 1.0)
441
+ if ready:
442
+ line = sys.stdin.readline().strip()
443
+ sys.stdout.write("\n")
444
+ sys.stdout.flush()
445
+ if line in valid:
446
+ return line
447
+ return default
448
+ sys.stdout.write("\n")
449
+ sys.stdout.flush()
450
+ return default
451
+
452
+
422
453
  def _run_generation(
423
454
  *,
424
455
  preparation: GenerationPreparation,
@@ -470,14 +501,10 @@ def _run_generation(
470
501
  tui.console.print(" [cyan]3)[/cyan] Abort")
471
502
  tui.console.print()
472
503
 
473
- choice = click.prompt(
474
- "Choose an option",
475
- type=click.Choice(["1", "2", "3"]),
476
- default="1",
477
- )
504
+ choice = _prompt_with_timeout(["1", "2", "3"], default="1", timeout=20)
478
505
 
479
506
  if choice == "1":
480
- # User wants to resume
507
+ # User wants to resume (or auto-selected after timeout)
481
508
  options.resume = True
482
509
  elif choice == "2":
483
510
  # Clear and start fresh
@@ -493,7 +520,7 @@ def _run_generation(
493
520
  if engine.load_checkpoint(retry_failed=options.retry_failed):
494
521
  samples_done = engine._flushed_samples_count
495
522
  failures_done = engine._flushed_failures_count
496
- ids_processed = len(engine._processed_ids)
523
+ ids_processed = len(engine._completed)
497
524
  retry_msg = " (retrying failed samples)" if options.retry_failed else ""
498
525
 
499
526
  # Update TUI status panel with checkpoint progress
@@ -503,17 +530,18 @@ def _run_generation(
503
530
  if failures_done > 0:
504
531
  tui.info(
505
532
  f"Resuming from checkpoint: {samples_done} samples, "
506
- f"{failures_done} failed, {ids_processed} IDs processed{retry_msg}"
533
+ f"{failures_done} failed, {ids_processed} UUIDs processed{retry_msg}"
507
534
  )
508
535
  else:
509
536
  tui.info(
510
537
  f"Resuming from checkpoint: {samples_done} samples, "
511
- f"{ids_processed} IDs processed{retry_msg}"
538
+ f"{ids_processed} UUIDs processed{retry_msg}"
512
539
  )
513
540
  else:
514
541
  tui.info("No checkpoint found, starting fresh generation")
515
542
 
516
- # Set up graceful Ctrl+C handling for checkpoint-based stop
543
+ # Set up graceful Ctrl+C handling
544
+ has_checkpoint = generation_params.get("checkpoint_interval") is not None
517
545
  interrupt_count = 0
518
546
 
519
547
  def handle_sigint(_signum, _frame):
@@ -522,7 +550,12 @@ def _run_generation(
522
550
 
523
551
  if interrupt_count == 1:
524
552
  engine.stop_requested = True
525
- tui.warning("Stopping after current checkpoint... (Ctrl+C again to force quit)")
553
+ if has_checkpoint:
554
+ tui.warning("Stopping after current checkpoint... (Ctrl+C again to force quit)")
555
+ else:
556
+ tui.warning(
557
+ "Stopping... partial results will be saved. (Ctrl+C again to force quit)"
558
+ )
526
559
  dataset_tui = get_dataset_tui()
527
560
  dataset_tui.log_event("⚠ Graceful stop requested")
528
561
  dataset_tui.status_stop_requested()
@@ -547,12 +580,22 @@ def _run_generation(
547
580
  finally:
548
581
  signal.signal(signal.SIGINT, original_handler)
549
582
 
550
- # If gracefully stopped, don't save partial dataset or clean up checkpoints
583
+ output_config = preparation.config.get_output_config()
584
+ output_save_path = options.output_save_as or output_config["save_as"]
585
+
586
+ # If gracefully stopped, handle based on checkpoint availability
551
587
  if engine.stop_requested:
588
+ if has_checkpoint:
589
+ # Checkpoint on disk — user can resume later
590
+ return
591
+ # No checkpoint — save whatever was generated so far
592
+ if dataset and len(dataset) > 0:
593
+ tui.info(f"Saving {len(dataset)} samples generated before stop")
594
+ save_dataset(dataset, output_save_path, preparation.config, engine=engine)
595
+ else:
596
+ tui.warning("No samples were generated before stop")
552
597
  return
553
598
 
554
- output_config = preparation.config.get_output_config()
555
- output_save_path = options.output_save_as or output_config["save_as"]
556
599
  save_dataset(dataset, output_save_path, preparation.config, engine=engine)
557
600
 
558
601
  # Clean up checkpoint files after successful completion
@@ -651,8 +694,7 @@ def _run_generation(
651
694
  "--cloud-upload",
652
695
  type=click.Choice(["all", "dataset", "graph", "none"], case_sensitive=False),
653
696
  default=None,
654
- help="Upload to DeepFabric Cloud (experimental): all, dataset, graph, or none. "
655
- "Enables headless mode for CI. Requires DEEPFABRIC_API_KEY or prior auth.",
697
+ help="Upload to DeepFabric Cloud (experimental): all, dataset, graph, or none. ",
656
698
  )
657
699
  @click.option(
658
700
  "--checkpoint-interval",
@@ -783,7 +825,9 @@ def generate( # noqa: PLR0913
783
825
 
784
826
  # Compute checkpoint directory once for consistent use throughout generation
785
827
  # Use config file for hash, fallback to output path for config-less runs
786
- path_source = options.config_file or options.output_save_as or preparation.config.output.save_as
828
+ path_source = (
829
+ options.config_file or options.output_save_as or preparation.config.output.save_as
830
+ )
787
831
  checkpoint_dir = options.checkpoint_path or get_checkpoint_dir(path_source)
788
832
 
789
833
  # Auto-infer topics-load when resuming from checkpoint
@@ -1295,23 +1339,34 @@ def validate(config_file: str, check_api: bool) -> None: # noqa: PLR0912
1295
1339
  f"estimated_paths={estimated_paths} ({degree}^{depth})"
1296
1340
  )
1297
1341
 
1298
- # Output summary with step size and checkpoint info
1342
+ # Output summary with cycle-based generation info
1299
1343
  num_samples = config.output.num_samples
1300
1344
  batch_size = config.output.batch_size
1301
- # Calculate num_steps - handle 'auto' and percentage strings
1302
- if isinstance(num_samples, int):
1303
- num_steps = math.ceil(num_samples / batch_size)
1304
- output_info = f"Output: num_samples={num_samples}, batch_size={batch_size}, num_steps={num_steps}"
1305
- else:
1306
- # For 'auto' or percentage, we can't compute steps without topic count
1307
- output_info = f"Output: num_samples={num_samples}, batch_size={batch_size}"
1308
1345
 
1309
- # Add checkpoint info if enabled
1346
+ # Show output configuration
1347
+ output_info = f"Output: num_samples={num_samples}, concurrency={batch_size}"
1310
1348
  if config.output.checkpoint:
1311
1349
  checkpoint = config.output.checkpoint
1312
1350
  output_info += f", checkpoint_interval={checkpoint.interval}"
1313
1351
  tui.info(output_info)
1314
1352
 
1353
+ # Calculate and display cycle-based generation info
1354
+ if isinstance(num_samples, int):
1355
+ cycles_needed = math.ceil(num_samples / estimated_paths)
1356
+ final_cycle_size = num_samples - (cycles_needed - 1) * estimated_paths
1357
+ is_partial = final_cycle_size < estimated_paths
1358
+
1359
+ tui.info(
1360
+ f" → Cycles needed: {cycles_needed} "
1361
+ f"({num_samples} samples ÷ {estimated_paths} unique topics)"
1362
+ )
1363
+ if is_partial:
1364
+ tui.info(f" → Final cycle: {final_cycle_size} topics (partial)")
1365
+ elif num_samples == "auto":
1366
+ tui.info(f" → Will generate 1 sample per unique topic ({estimated_paths} samples)")
1367
+ else:
1368
+ tui.info(" → Samples calculated at runtime based on topic count")
1369
+
1315
1370
  if config.huggingface:
1316
1371
  hf_config = config.get_huggingface_config()
1317
1372
  tui.info(f"Hugging Face: repo={hf_config.get('repository', 'not set')}")
@@ -1893,7 +1948,8 @@ def checkpoint_status(config_file: str) -> None:
1893
1948
  # Check if checkpoint exists
1894
1949
  if not metadata_path.exists():
1895
1950
  tui.info(f"No checkpoint found at: {metadata_path}")
1896
- tui.info("\nTo enable checkpointing, run:")
1951
+ tui.console.print()
1952
+ tui.info("To enable checkpointing, run:")
1897
1953
  tui.info(f" deepfabric generate {config_file} --checkpoint-interval 10")
1898
1954
  return
1899
1955
 
@@ -778,7 +778,7 @@ def handle_cloud_upload( # noqa: PLR0911
778
778
 
779
779
  # Build prompt based on what's available
780
780
  if has_dataset and has_graph:
781
- prompt_text = " Upload to DeepFabric Cloud?"
781
+ prompt_text = " Upload graph and dataset to DeepFabric Cloud?"
782
782
  hint = "[dim](Y=both, n=skip, c=choose)[/dim]"
783
783
  elif has_dataset:
784
784
  prompt_text = " Upload dataset to DeepFabric Cloud?"
deepfabric/config.py CHANGED
@@ -628,11 +628,13 @@ See documentation for full examples.
628
628
  "output_save_as": self.output.save_as,
629
629
  # Checkpoint config (nested inside output)
630
630
  # Note: checkpoint_path can be None, meaning "auto-resolve" at runtime
631
- "checkpoint_interval": self.output.checkpoint.interval if self.output.checkpoint else None,
631
+ "checkpoint_interval": self.output.checkpoint.interval
632
+ if self.output.checkpoint
633
+ else None,
632
634
  "checkpoint_path": self.output.checkpoint.path if self.output.checkpoint else None,
633
- "checkpoint_retry_failed": (
634
- self.output.checkpoint.retry_failed if self.output.checkpoint else False
635
- ),
635
+ "checkpoint_retry_failed": self.output.checkpoint.retry_failed
636
+ if self.output.checkpoint
637
+ else False,
636
638
  }
637
639
 
638
640
  # Tool config
deepfabric/constants.py CHANGED
@@ -93,7 +93,7 @@ FAILED_TREE_SUFFIX = "_failed.jsonl"
93
93
  CHECKPOINT_METADATA_SUFFIX = ".checkpoint.json"
94
94
  CHECKPOINT_SAMPLES_SUFFIX = ".checkpoint.jsonl"
95
95
  CHECKPOINT_FAILURES_SUFFIX = ".checkpoint.failures.jsonl"
96
- CHECKPOINT_VERSION = 3 # Increment when checkpoint format changes
96
+ CHECKPOINT_VERSION = 4 # v4: (uuid, cycle) tuple tracking for cycle-based generation
97
97
 
98
98
  # Stream simulation defaults
99
99
  STREAM_SIM_CHUNK_SIZE = 8 # characters per chunk