DeepFabric 4.5.1__py3-none-any.whl → 4.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deepfabric/__init__.py CHANGED
@@ -11,6 +11,7 @@ from .auth import (
11
11
  )
12
12
  from .cli import cli
13
13
  from .config import DeepFabricConfig
14
+ from .dataset import Dataset, DatasetDict
14
15
  from .exceptions import (
15
16
  APIError,
16
17
  ConfigurationError,
@@ -19,6 +20,7 @@ from .exceptions import (
19
20
  DeepFabricError,
20
21
  HubUploadError,
21
22
  JSONParsingError,
23
+ LoaderError,
22
24
  ModelError,
23
25
  RetryExhaustedError,
24
26
  TreeError,
@@ -27,6 +29,7 @@ from .exceptions import (
27
29
  from .generator import DataSetGenerator, DataSetGeneratorConfig
28
30
  from .graph import Graph, GraphConfig
29
31
  from .hf_hub import HFUploader
32
+ from .loader import load_dataset
30
33
  from .training import DeepFabricCallback, MetricsSender
31
34
  from .tree import Tree, TreeConfig
32
35
 
@@ -42,6 +45,10 @@ __all__ = [
42
45
  "DeepFabricConfig",
43
46
  "HFUploader",
44
47
  "cli",
48
+ # Dataset loading
49
+ "load_dataset",
50
+ "Dataset",
51
+ "DatasetDict",
45
52
  # Training metrics logging
46
53
  "DeepFabricCallback",
47
54
  "MetricsSender",
@@ -67,4 +74,5 @@ __all__ = [
67
74
  "JSONParsingError",
68
75
  "APIError",
69
76
  "RetryExhaustedError",
77
+ "LoaderError",
70
78
  ]
deepfabric/auth.py CHANGED
@@ -10,8 +10,11 @@ import click
10
10
  import httpx
11
11
 
12
12
  from .tui import get_tui
13
+ from .utils import get_bool_env
14
+
15
+ DEFAULT_API_URL = os.getenv("DEEPFABRIC_API_URL", "https://api.deepfabric.cloud")
16
+ DEFAULT_FRONTEND_URL = os.getenv("DEEPFABRIC_FRONTEND_URL", "https://deepfabric.cloud")
13
17
 
14
- DEFAULT_API_URL = os.getenv("DEEPFABRIC_API_URL", "https://api.deepfabric.dev")
15
18
  CONFIG_DIR = Path.home() / ".deepfabric"
16
19
  CONFIG_FILE = CONFIG_DIR / "config.json"
17
20
 
@@ -57,13 +60,17 @@ def clear_tokens() -> None:
57
60
 
58
61
 
59
62
  def is_authenticated() -> bool:
60
- """Check if user is authenticated."""
61
- config = get_config()
62
- return bool(config.get("access_token") or config.get("api_key"))
63
+ """Check if user is authenticated via API key or OAuth token."""
64
+ return bool(get_auth_token())
63
65
 
64
66
 
65
67
  def get_auth_token() -> str | None:
66
- """Get authentication token (API key or access token)."""
68
+ """Get authentication token (API key from env, or stored API key/access token)."""
69
+ # Check environment variable first
70
+ env_api_key = os.getenv("DEEPFABRIC_API_KEY")
71
+ if env_api_key:
72
+ return env_api_key
73
+ # Then check stored config
67
74
  config = get_config()
68
75
  return config.get("api_key") or config.get("access_token")
69
76
 
@@ -75,6 +82,9 @@ def prompt_cloud_signup(api_url: str = DEFAULT_API_URL) -> bool:
75
82
  Returns:
76
83
  True if user successfully authenticated, False otherwise
77
84
  """
85
+ if not get_bool_env("EXPERIMENTAL_DF"):
86
+ return False
87
+
78
88
  tui = get_tui()
79
89
 
80
90
  tui.console.print("")
@@ -112,7 +122,7 @@ def prompt_cloud_signup(api_url: str = DEFAULT_API_URL) -> bool:
112
122
 
113
123
  if auth_choice == "register":
114
124
  tui.info("Opening DeepFabric Cloud registration page...")
115
- register_url = api_url.replace("/api", "").rstrip("/") + "/signup"
125
+ register_url = DEFAULT_FRONTEND_URL.rstrip("/") + "/signup"
116
126
  with contextlib.suppress(Exception):
117
127
  webbrowser.open(register_url)
118
128
  tui.info("After registering, come back here to log in.")
deepfabric/builders.py CHANGED
@@ -114,7 +114,7 @@ def determine_builder_type(config: "DataSetGeneratorConfig") -> BuilderType:
114
114
  raise ValueError(msg)
115
115
 
116
116
  # Non-agent conversations use single-shot generation
117
- if config.conversation_type in ("basic", "chain_of_thought"):
117
+ if config.conversation_type in ("basic", "cot"):
118
118
  return SINGLE_SHOT_BUILDER
119
119
 
120
120
  msg = f"Cannot determine builder type for conversation_type={config.conversation_type}"
@@ -222,7 +222,7 @@ class SingleShotBuilder(ConversationBuilder):
222
222
  prompt_parts.append(f"\nAdditional Instructions: {self.config.instructions}")
223
223
 
224
224
  # Add reasoning-specific guidance based on style
225
- if self.config.conversation_type == "chain_of_thought":
225
+ if self.config.conversation_type == "cot":
226
226
  if self.config.reasoning_style == "freetext":
227
227
  prompt_parts.append(
228
228
  "\nREASONING FORMAT: Generate natural, conversational reasoning content (string format). "
@@ -756,7 +756,7 @@ Remember: You have access to the tools listed above and have used them in this c
756
756
 
757
757
  # Build metadata
758
758
  metadata = {
759
- "conversation_type": "chain_of_thought",
759
+ "conversation_type": "cot",
760
760
  "react_steps": len(steps),
761
761
  }
762
762
 
@@ -1290,7 +1290,7 @@ Is the user's original task/goal from the scenario fully completed?
1290
1290
 
1291
1291
  # Build metadata
1292
1292
  metadata = {
1293
- "conversation_type": "chain_of_thought" if reasoning_trace else "basic",
1293
+ "conversation_type": "cot" if reasoning_trace else "basic",
1294
1294
  "topic": topic_prompt if topic_prompt else "general",
1295
1295
  }
1296
1296
 
deepfabric/cli.py CHANGED
@@ -23,6 +23,7 @@ from .topic_manager import load_or_build_topic_model, save_topic_model
23
23
  from .topic_model import TopicModel
24
24
  from .tui import configure_tui, get_tui
25
25
  from .update_checker import check_for_updates
26
+ from .utils import get_bool_env
26
27
  from .validation import show_validation_success, validate_path_requirements
27
28
 
28
29
  OverrideValue = str | int | float | bool | None
@@ -98,7 +99,7 @@ class GenerateOptions(BaseModel):
98
99
  tui: Literal["rich", "simple"] = Field(default="rich")
99
100
 
100
101
  # Modular conversation configuration
101
- conversation_type: Literal["basic", "chain_of_thought"] | None = None
102
+ conversation_type: Literal["basic", "cot"] | None = None
102
103
  reasoning_style: Literal["freetext", "agent", "structured", "hybrid"] | None = None
103
104
  agent_mode: Literal["single_turn", "multi_turn"] | None = None
104
105
 
@@ -107,6 +108,9 @@ class GenerateOptions(BaseModel):
107
108
  max_turns: int | None = None
108
109
  min_tool_calls: int | None = None
109
110
 
111
+ # Cloud upload (experimental)
112
+ cloud_upload: Literal["all", "dataset", "graph", "none"] | None = None
113
+
110
114
  @model_validator(mode="after")
111
115
  def validate_mode_constraints(self) -> "GenerateOptions":
112
116
  if self.topic_only and self.topics_load:
@@ -333,6 +337,36 @@ def _initialize_topic_model(
333
337
  return topic_model
334
338
 
335
339
 
340
+ def _trigger_cloud_upload(
341
+ *,
342
+ preparation: GenerationPreparation,
343
+ options: GenerateOptions,
344
+ dataset_path: str | None = None,
345
+ ) -> None:
346
+ """Trigger cloud upload if EXPERIMENTAL_DF is enabled and mode is 'graph'.
347
+
348
+ Args:
349
+ preparation: Generation preparation context
350
+ options: CLI options including cloud_upload flag
351
+ dataset_path: Path to dataset file (None for topic-only mode)
352
+ """
353
+ # Cloud upload only supports graph mode, not tree mode
354
+ # Use config.topics.mode since options.mode may have CLI default value
355
+ actual_mode = preparation.config.topics.mode
356
+ if not (get_bool_env("EXPERIMENTAL_DF") and actual_mode == "graph"):
357
+ return
358
+
359
+ from .cloud_upload import handle_cloud_upload # noqa: PLC0415
360
+
361
+ graph_path = options.topics_save_as or preparation.config.topics.save_as or "topic_graph.json"
362
+
363
+ handle_cloud_upload(
364
+ dataset_path=dataset_path,
365
+ graph_path=graph_path,
366
+ cloud_upload_flag=options.cloud_upload,
367
+ )
368
+
369
+
336
370
  def _run_generation(
337
371
  *,
338
372
  preparation: GenerationPreparation,
@@ -366,6 +400,13 @@ def _run_generation(
366
400
  {"samples": len(dataset)},
367
401
  )
368
402
 
403
+ # Cloud upload (experimental feature)
404
+ _trigger_cloud_upload(
405
+ preparation=preparation,
406
+ options=options,
407
+ dataset_path=output_save_path,
408
+ )
409
+
369
410
 
370
411
  @cli.command()
371
412
  @click.argument("config_file", type=click.Path(exists=True), required=False)
@@ -421,13 +462,13 @@ def _run_generation(
421
462
  )
422
463
  @click.option(
423
464
  "--conversation-type",
424
- type=click.Choice(["basic", "chain_of_thought"]),
425
- help="Base conversation type: basic (simple chat), chain_of_thought (with reasoning)",
465
+ type=click.Choice(["basic", "cot"]),
466
+ help="Base conversation type: basic (simple chat), cot (with reasoning)",
426
467
  )
427
468
  @click.option(
428
469
  "--reasoning-style",
429
470
  type=click.Choice(["freetext", "agent"]),
430
- help="Reasoning style for chain_of_thought: freetext (natural language) or agent (structured for tool-calling)",
471
+ help="Reasoning style for cot: freetext (natural language) or agent (structured for tool-calling)",
431
472
  )
432
473
  @click.option(
433
474
  "--agent-mode",
@@ -449,6 +490,13 @@ def _run_generation(
449
490
  type=int,
450
491
  help="Minimum tool calls before allowing conversation conclusion",
451
492
  )
493
+ @click.option(
494
+ "--cloud-upload",
495
+ type=click.Choice(["all", "dataset", "graph", "none"], case_sensitive=False),
496
+ default=None,
497
+ help="Upload to DeepFabric Cloud (experimental): all, dataset, graph, or none. "
498
+ "Enables headless mode for CI. Requires DEEPFABRIC_API_KEY or prior auth.",
499
+ )
452
500
  def generate( # noqa: PLR0913
453
501
  config_file: str | None,
454
502
  output_system_prompt: str | None = None,
@@ -470,12 +518,13 @@ def generate( # noqa: PLR0913
470
518
  mode: Literal["tree", "graph"] = "tree",
471
519
  debug: bool = False,
472
520
  topic_only: bool = False,
473
- conversation_type: Literal["basic", "chain_of_thought"] | None = None,
521
+ conversation_type: Literal["basic", "cot"] | None = None,
474
522
  reasoning_style: Literal["freetext", "agent"] | None = None,
475
523
  agent_mode: Literal["single_turn", "multi_turn"] | None = None,
476
524
  min_turns: int | None = None,
477
525
  max_turns: int | None = None,
478
526
  min_tool_calls: int | None = None,
527
+ cloud_upload: Literal["all", "dataset", "graph", "none"] | None = None,
479
528
  tui: Literal["rich", "simple"] = "rich",
480
529
  ) -> None:
481
530
  """Generate training data from a YAML configuration file or CLI parameters."""
@@ -518,6 +567,7 @@ def generate( # noqa: PLR0913
518
567
  min_turns=min_turns,
519
568
  max_turns=max_turns,
520
569
  min_tool_calls=min_tool_calls,
570
+ cloud_upload=cloud_upload,
521
571
  tui=tui,
522
572
  )
523
573
  except PydanticValidationError as error:
@@ -541,6 +591,12 @@ def generate( # noqa: PLR0913
541
591
  )
542
592
 
543
593
  if topic_only:
594
+ # Cloud upload for topic-only mode (graph only, no dataset)
595
+ _trigger_cloud_upload(
596
+ preparation=preparation,
597
+ options=options,
598
+ dataset_path=None,
599
+ )
544
600
  return
545
601
 
546
602
  _run_generation(
@@ -689,6 +745,224 @@ def upload_kaggle(
689
745
  sys.exit(1)
690
746
 
691
747
 
748
+ # DeepFabric Cloud upload command group
749
+ @click.group()
750
+ def upload() -> None:
751
+ """Upload datasets and graphs to DeepFabric Cloud."""
752
+ pass
753
+
754
+
755
+ def _upload_to_cloud(
756
+ file: str,
757
+ resource_type: Literal["dataset", "graph"],
758
+ handle: str | None,
759
+ name: str | None,
760
+ description: str | None,
761
+ tags: list[str] | None,
762
+ config_file: str | None,
763
+ ) -> None:
764
+ """Shared helper for uploading datasets and graphs to DeepFabric Cloud.
765
+
766
+ Args:
767
+ file: Path to the file to upload
768
+ resource_type: Either "dataset" or "graph"
769
+ handle: Resource handle (e.g., username/resource-name)
770
+ name: Display name for the resource
771
+ description: Description for the resource
772
+ tags: Tags for the resource (only used for datasets)
773
+ config_file: Path to config file with upload settings
774
+ """
775
+ # Lazy imports to avoid slow startup
776
+ import httpx # noqa: PLC0415
777
+
778
+ from .auth import DEFAULT_API_URL # noqa: PLC0415
779
+ from .cloud_upload import ( # noqa: PLC0415
780
+ _get_user_friendly_error,
781
+ build_urls,
782
+ derive_frontend_url,
783
+ derive_name_and_slug,
784
+ ensure_authenticated,
785
+ get_current_user,
786
+ upload_dataset,
787
+ upload_topic_graph,
788
+ )
789
+
790
+ tui = get_tui()
791
+ config_key = resource_type # "dataset" or "graph"
792
+ url_resource_type = "datasets" if resource_type == "dataset" else "graphs"
793
+
794
+ # Load handle from config if not provided via CLI
795
+ final_handle = handle
796
+ final_description = description or ""
797
+ final_tags = list(tags) if tags else []
798
+
799
+ if config_file:
800
+ config = DeepFabricConfig.from_yaml(config_file)
801
+ cloud_config = config.get_deepfabric_cloud_config()
802
+ if not final_handle:
803
+ final_handle = cloud_config.get(config_key)
804
+ if not description and cloud_config.get("description"):
805
+ final_description = cloud_config.get("description", "")
806
+ if resource_type == "dataset" and not tags and cloud_config.get("tags"):
807
+ final_tags = cloud_config.get("tags", [])
808
+
809
+ # Ensure authenticated
810
+ if not ensure_authenticated(DEFAULT_API_URL, headless=False):
811
+ tui.error("Authentication required. Run 'deepfabric auth login' first.")
812
+ sys.exit(1)
813
+
814
+ # Derive name and slug from filename if not provided
815
+ default_name, default_slug = derive_name_and_slug(file)
816
+ final_name = name or default_name
817
+
818
+ # Use slug from handle if provided, otherwise use derived slug
819
+ if final_handle and "/" in final_handle:
820
+ final_slug = final_handle.split("/")[-1]
821
+ else:
822
+ final_slug = final_handle or default_slug
823
+
824
+ tui.info(f"Uploading {resource_type} '{final_name}'...")
825
+
826
+ try:
827
+ # Call the appropriate upload function
828
+ if resource_type == "dataset":
829
+ result = upload_dataset(
830
+ dataset_path=file,
831
+ name=final_name,
832
+ slug=final_slug,
833
+ description=final_description,
834
+ tags=final_tags,
835
+ api_url=DEFAULT_API_URL,
836
+ )
837
+ resource_id = result.get("dataset_id") or result.get("id")
838
+ else:
839
+ result = upload_topic_graph(
840
+ graph_path=file,
841
+ name=final_name,
842
+ description=final_description,
843
+ slug=final_slug,
844
+ api_url=DEFAULT_API_URL,
845
+ )
846
+ resource_id = result.get("id")
847
+
848
+ # Display success message
849
+ tui.success(f"{resource_type.capitalize()} '{final_name}' uploaded successfully!")
850
+
851
+ # Display URL if available
852
+ if resource_id:
853
+ user_info = get_current_user(DEFAULT_API_URL)
854
+ username = user_info.get("username") if user_info else None
855
+ frontend_url = derive_frontend_url(DEFAULT_API_URL)
856
+ public_url, internal_url = build_urls(
857
+ url_resource_type, resource_id, final_slug, username, frontend_url
858
+ )
859
+ tui.info(f"View at: {public_url or internal_url}")
860
+
861
+ except httpx.HTTPStatusError as e:
862
+ error_msg = _get_user_friendly_error(e)
863
+ if "already exists" in error_msg.lower():
864
+ tui.error(
865
+ f"A {resource_type} with slug '{final_slug}' already exists. "
866
+ "Use a different --handle value."
867
+ )
868
+ else:
869
+ tui.error(f"Error uploading {resource_type}: {error_msg}")
870
+ sys.exit(1)
871
+ except Exception as e:
872
+ tui.error(f"Error uploading {resource_type}: {str(e)}")
873
+ sys.exit(1)
874
+
875
+
876
+ @upload.command("dataset")
877
+ @click.argument("file", type=click.Path(exists=True))
878
+ @click.option("--handle", help="Dataset handle (e.g., username/dataset-name)")
879
+ @click.option("--name", help="Display name for the dataset")
880
+ @click.option("--description", help="Description for the dataset")
881
+ @click.option(
882
+ "--tags", multiple=True, help="Tags for the dataset (can be specified multiple times)"
883
+ )
884
+ @click.option(
885
+ "--config",
886
+ "config_file",
887
+ type=click.Path(exists=True),
888
+ help="Config file with upload settings",
889
+ )
890
+ def upload_dataset_cmd(
891
+ file: str,
892
+ handle: str | None,
893
+ name: str | None,
894
+ description: str | None,
895
+ tags: tuple[str, ...],
896
+ config_file: str | None,
897
+ ) -> None:
898
+ """Upload a dataset to DeepFabric Cloud.
899
+
900
+ FILE is the path to the JSONL dataset file.
901
+
902
+ Examples:
903
+
904
+ deepfabric upload dataset my-dataset.jsonl --handle myuser/my-dataset
905
+
906
+ deepfabric upload dataset output.jsonl --config config.yaml
907
+ """
908
+ trace(
909
+ "cli_upload_dataset",
910
+ {"has_config": config_file is not None, "has_handle": handle is not None},
911
+ )
912
+ _upload_to_cloud(
913
+ file=file,
914
+ resource_type="dataset",
915
+ handle=handle,
916
+ name=name,
917
+ description=description,
918
+ tags=list(tags) if tags else None,
919
+ config_file=config_file,
920
+ )
921
+
922
+
923
+ @upload.command("graph")
924
+ @click.argument("file", type=click.Path(exists=True))
925
+ @click.option("--handle", help="Graph handle (e.g., username/graph-name)")
926
+ @click.option("--name", help="Display name for the graph")
927
+ @click.option("--description", help="Description for the graph")
928
+ @click.option(
929
+ "--config",
930
+ "config_file",
931
+ type=click.Path(exists=True),
932
+ help="Config file with upload settings",
933
+ )
934
+ def upload_graph_cmd(
935
+ file: str,
936
+ handle: str | None,
937
+ name: str | None,
938
+ description: str | None,
939
+ config_file: str | None,
940
+ ) -> None:
941
+ """Upload a topic graph to DeepFabric Cloud.
942
+
943
+ FILE is the path to the JSON graph file.
944
+
945
+ Examples:
946
+
947
+ deepfabric upload graph topic_graph.json --handle myuser/my-graph
948
+
949
+ deepfabric upload graph graph.json --config config.yaml
950
+ """
951
+ trace(
952
+ "cli_upload_graph",
953
+ {"has_config": config_file is not None, "has_handle": handle is not None},
954
+ )
955
+ _upload_to_cloud(
956
+ file=file,
957
+ resource_type="graph",
958
+ handle=handle,
959
+ name=name,
960
+ description=description,
961
+ tags=None,
962
+ config_file=config_file,
963
+ )
964
+
965
+
692
966
  @cli.command()
693
967
  @click.argument("graph_file", type=click.Path(exists=True))
694
968
  @click.option(
@@ -846,8 +1120,10 @@ def info() -> None:
846
1120
  ("generate", "Generate training data from configuration"),
847
1121
  ("validate", "Validate a configuration file"),
848
1122
  ("visualize", "Create SVG visualization of a topic graph"),
849
- ("upload", "Upload dataset to Hugging Face Hub"),
1123
+ ("upload-hf", "Upload dataset to Hugging Face Hub"),
850
1124
  ("upload-kaggle", "Upload dataset to Kaggle"),
1125
+ ("evaluate", "Evaluate a fine-tuned model on tool-calling tasks"),
1126
+ ("import-tools", "Import tool definitions from external sources"),
851
1127
  ("info", "Show this information"),
852
1128
  ]
853
1129
  for cmd, desc in commands:
@@ -863,7 +1139,7 @@ def info() -> None:
863
1139
  tui.console.print(f" [yellow]{var}[/yellow] - {desc}")
864
1140
 
865
1141
  tui.console.print(
866
- "\nFor more information, visit: [link]https://github.com/RedDotRocket/deepfabric[/link]"
1142
+ "\nFor more information, visit: [link]https://github.com/always-further/deepfabric[/link]"
867
1143
  )
868
1144
 
869
1145
  except Exception as e:
@@ -979,7 +1255,7 @@ def evaluate(
979
1255
 
980
1256
  # Create inference configuration
981
1257
  inference_config = InferenceConfig(
982
- model_path=model_path,
1258
+ model=model_path,
983
1259
  adapter_path=adapter_path,
984
1260
  backend=cast(Literal["transformers", "ollama"], backend),
985
1261
  temperature=temperature,
@@ -1094,8 +1370,11 @@ def evaluate(
1094
1370
  handle_error(click.get_current_context(), e)
1095
1371
 
1096
1372
 
1097
- # Register the auth command group
1098
- cli.add_command(auth_group)
1373
+ # Register the auth and upload command groups
1374
+ # EXPERIMENTAL: Only enable cloud features if explicitly opted in
1375
+ if get_bool_env("EXPERIMENTAL_DF"):
1376
+ cli.add_command(auth_group)
1377
+ cli.add_command(upload)
1099
1378
 
1100
1379
 
1101
1380
  @cli.command("import-tools")