PyPI - DeepFabric - Versions diffs - 4.4.1__py3-none-any.whl → 4.6.0__py3-none-any.whl - Mend

DeepFabric 4.4.1py3-none-any.whl → 4.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

deepfabric/__init__.py +8 -0
deepfabric/auth.py +8 -2
deepfabric/builders.py +2 -2
deepfabric/builders_agent.py +18 -6
deepfabric/cli.py +292 -13
deepfabric/cloud_upload.py +884 -0
deepfabric/config.py +47 -20
deepfabric/config_manager.py +2 -2
deepfabric/dataset.py +302 -0
deepfabric/evaluation/backends/__init__.py +2 -0
deepfabric/evaluation/backends/llm_eval_backend.py +527 -0
deepfabric/evaluation/backends/ollama_backend.py +3 -3
deepfabric/evaluation/backends/tool_call_parsers.py +7 -7
deepfabric/evaluation/backends/transformers_backend.py +73 -16
deepfabric/evaluation/evaluator.py +41 -7
deepfabric/evaluation/evaluators/builtin/tool_calling.py +13 -8
deepfabric/evaluation/inference.py +77 -5
deepfabric/evaluation/metrics.py +4 -0
deepfabric/evaluation/parser.py +8 -8
deepfabric/evaluation/reporters/cloud_reporter.py +19 -6
deepfabric/exceptions.py +14 -0
deepfabric/generator.py +8 -4
deepfabric/graph.py +38 -0
deepfabric/hf_hub.py +1 -1
deepfabric/loader.py +554 -0
deepfabric/schemas.py +7 -7
deepfabric/topic_manager.py +4 -0
deepfabric/training/__init__.py +24 -5
deepfabric/training/callback.py +43 -1
deepfabric/training/dataset_utils.py +223 -0
deepfabric/training/metrics_sender.py +50 -16
deepfabric/tui.py +9 -1
deepfabric/utils.py +14 -0
deepfabric/validation.py +1 -1
{deepfabric-4.4.1.dist-info → deepfabric-4.6.0.dist-info}/METADATA +84 -177
{deepfabric-4.4.1.dist-info → deepfabric-4.6.0.dist-info}/RECORD +39 -34
{deepfabric-4.4.1.dist-info → deepfabric-4.6.0.dist-info}/WHEEL +0 -0
{deepfabric-4.4.1.dist-info → deepfabric-4.6.0.dist-info}/entry_points.txt +0 -0
{deepfabric-4.4.1.dist-info → deepfabric-4.6.0.dist-info}/licenses/LICENSE +0 -0

deepfabric/__init__.py CHANGED Viewed

@@ -11,6 +11,7 @@ from .auth import (
 )
 from .cli import cli
 from .config import DeepFabricConfig
+from .dataset import Dataset, DatasetDict
 from .exceptions import (
     APIError,
     ConfigurationError,
@@ -19,6 +20,7 @@ from .exceptions import (
     DeepFabricError,
     HubUploadError,
     JSONParsingError,
+    LoaderError,
     ModelError,
     RetryExhaustedError,
     TreeError,
@@ -27,6 +29,7 @@ from .exceptions import (
 from .generator import DataSetGenerator, DataSetGeneratorConfig
 from .graph import Graph, GraphConfig
 from .hf_hub import HFUploader
+from .loader import load_dataset
 from .training import DeepFabricCallback, MetricsSender
 from .tree import Tree, TreeConfig
@@ -42,6 +45,10 @@ __all__ = [
     "DeepFabricConfig",
     "HFUploader",
     "cli",
+    # Dataset loading
+    "load_dataset",
+    "Dataset",
+    "DatasetDict",
     # Training metrics logging
     "DeepFabricCallback",
     "MetricsSender",
@@ -67,4 +74,5 @@ __all__ = [
     "JSONParsingError",
     "APIError",
     "RetryExhaustedError",
+    "LoaderError",
 ]

deepfabric/auth.py CHANGED Viewed

@@ -10,8 +10,11 @@ import click
 import httpx
 from .tui import get_tui
+from .utils import get_bool_env
+DEFAULT_API_URL = os.getenv("DEEPFABRIC_API_URL", "https://api.deepfabric.cloud")
+DEFAULT_FRONTEND_URL = os.getenv("DEEPFABRIC_FRONTEND_URL", "https://deepfabric.cloud")
-DEFAULT_API_URL = os.getenv("DEEPFABRIC_API_URL", "https://api.deepfabric.dev")
 CONFIG_DIR = Path.home() / ".deepfabric"
 CONFIG_FILE = CONFIG_DIR / "config.json"
@@ -75,6 +78,9 @@ def prompt_cloud_signup(api_url: str = DEFAULT_API_URL) -> bool:
     Returns:
         True if user successfully authenticated, False otherwise
     """
+    if not get_bool_env("EXPERIMENTAL_DF"):
+        return False
     tui = get_tui()
     tui.console.print("")
@@ -112,7 +118,7 @@ def prompt_cloud_signup(api_url: str = DEFAULT_API_URL) -> bool:
     if auth_choice == "register":
         tui.info("Opening DeepFabric Cloud registration page...")
-        register_url = api_url.replace("/api", "").rstrip("/") + "/signup"
+        register_url = DEFAULT_FRONTEND_URL.rstrip("/") + "/signup"
         with contextlib.suppress(Exception):
             webbrowser.open(register_url)
         tui.info("After registering, come back here to log in.")

deepfabric/builders.py CHANGED Viewed

@@ -114,7 +114,7 @@ def determine_builder_type(config: "DataSetGeneratorConfig") -> BuilderType:
         raise ValueError(msg)
     # Non-agent conversations use single-shot generation
-    if config.conversation_type in ("basic", "chain_of_thought"):
+    if config.conversation_type in ("basic", "cot"):
         return SINGLE_SHOT_BUILDER
     msg = f"Cannot determine builder type for conversation_type={config.conversation_type}"
@@ -222,7 +222,7 @@ class SingleShotBuilder(ConversationBuilder):
             prompt_parts.append(f"\nAdditional Instructions: {self.config.instructions}")
         # Add reasoning-specific guidance based on style
-        if self.config.conversation_type == "chain_of_thought":
+        if self.config.conversation_type == "cot":
             if self.config.reasoning_style == "freetext":
                 prompt_parts.append(
                     "\nREASONING FORMAT: Generate natural, conversational reasoning content (string format). "

deepfabric/builders_agent.py CHANGED Viewed

@@ -756,15 +756,21 @@ Remember: You have access to the tools listed above and have used them in this c
         # Build metadata
         metadata = {
-            "conversation_type": "chain_of_thought",
+            "conversation_type": "cot",
             "react_steps": len(steps),
         }
         # Insert system message if configured
         self._insert_system_message_if_configured(messages)
-        # Convert tools to OpenAI format
-        tools_openai = [tool.to_openai() for tool in self.tool_registry.tools]
+        # Convert tools to OpenAI format, filtering based on inclusion strategy
+        if self.config.tool_inclusion_strategy == "used_only" and tool_results:
+            used_names = {te.function_name for te in tool_results}
+            tools_openai = [
+                tool.to_openai() for tool in self.tool_registry.tools if tool.name in used_names
+            ]
+        else:
+            tools_openai = [tool.to_openai() for tool in self.tool_registry.tools]
         return Conversation(
             messages=messages,
@@ -1284,15 +1290,21 @@ Is the user's original task/goal from the scenario fully completed?
         # Build metadata
         metadata = {
-            "conversation_type": "chain_of_thought" if reasoning_trace else "basic",
+            "conversation_type": "cot" if reasoning_trace else "basic",
             "topic": topic_prompt if topic_prompt else "general",
         }
         # Insert system message if configured
         self._insert_system_message_if_configured(messages)
-        # Convert tools to OpenAI format
-        tools_openai = [tool.to_openai() for tool in self.tool_registry.tools]
+        # Convert tools to OpenAI format, filtering based on inclusion strategy
+        if self.config.tool_inclusion_strategy == "used_only" and all_executions:
+            used_names = {te.function_name for te in all_executions}
+            tools_openai = [
+                tool.to_openai() for tool in self.tool_registry.tools if tool.name in used_names
+            ]
+        else:
+            tools_openai = [tool.to_openai() for tool in self.tool_registry.tools]
         return Conversation(
             messages=messages,

deepfabric/cli.py CHANGED Viewed

@@ -23,6 +23,7 @@ from .topic_manager import load_or_build_topic_model, save_topic_model
 from .topic_model import TopicModel
 from .tui import configure_tui, get_tui
 from .update_checker import check_for_updates
+from .utils import get_bool_env
 from .validation import show_validation_success, validate_path_requirements
 OverrideValue = str | int | float | bool | None
@@ -98,7 +99,7 @@ class GenerateOptions(BaseModel):
     tui: Literal["rich", "simple"] = Field(default="rich")
     # Modular conversation configuration
-    conversation_type: Literal["basic", "chain_of_thought"] | None = None
+    conversation_type: Literal["basic", "cot"] | None = None
     reasoning_style: Literal["freetext", "agent", "structured", "hybrid"] | None = None
     agent_mode: Literal["single_turn", "multi_turn"] | None = None
@@ -107,6 +108,9 @@ class GenerateOptions(BaseModel):
     max_turns: int | None = None
     min_tool_calls: int | None = None
+    # Cloud upload (experimental)
+    cloud_upload: Literal["all", "dataset", "graph", "none"] | None = None
     @model_validator(mode="after")
     def validate_mode_constraints(self) -> "GenerateOptions":
         if self.topic_only and self.topics_load:
@@ -333,6 +337,36 @@ def _initialize_topic_model(
     return topic_model
+def _trigger_cloud_upload(
+    *,
+    preparation: GenerationPreparation,
+    options: GenerateOptions,
+    dataset_path: str | None = None,
+) -> None:
+    """Trigger cloud upload if EXPERIMENTAL_DF is enabled and mode is 'graph'.
+    Args:
+        preparation: Generation preparation context
+        options: CLI options including cloud_upload flag
+        dataset_path: Path to dataset file (None for topic-only mode)
+    """
+    # Cloud upload only supports graph mode, not tree mode
+    # Use config.topics.mode since options.mode may have CLI default value
+    actual_mode = preparation.config.topics.mode
+    if not (get_bool_env("EXPERIMENTAL_DF") and actual_mode == "graph"):
+        return
+    from .cloud_upload import handle_cloud_upload  # noqa: PLC0415
+    graph_path = options.topics_save_as or preparation.config.topics.save_as or "topic_graph.json"
+    handle_cloud_upload(
+        dataset_path=dataset_path,
+        graph_path=graph_path,
+        cloud_upload_flag=options.cloud_upload,
+    )
 def _run_generation(
     *,
     preparation: GenerationPreparation,
@@ -366,6 +400,13 @@ def _run_generation(
         {"samples": len(dataset)},
     )
+    # Cloud upload (experimental feature)
+    _trigger_cloud_upload(
+        preparation=preparation,
+        options=options,
+        dataset_path=output_save_path,
+    )
 @cli.command()
 @click.argument("config_file", type=click.Path(exists=True), required=False)
@@ -421,13 +462,13 @@ def _run_generation(
 )
 @click.option(
     "--conversation-type",
-    type=click.Choice(["basic", "chain_of_thought"]),
-    help="Base conversation type: basic (simple chat), chain_of_thought (with reasoning)",
+    type=click.Choice(["basic", "cot"]),
+    help="Base conversation type: basic (simple chat), cot (with reasoning)",
 )
 @click.option(
     "--reasoning-style",
     type=click.Choice(["freetext", "agent"]),
-    help="Reasoning style for chain_of_thought: freetext (natural language) or agent (structured for tool-calling)",
+    help="Reasoning style for cot: freetext (natural language) or agent (structured for tool-calling)",
 )
 @click.option(
     "--agent-mode",
@@ -449,6 +490,13 @@ def _run_generation(
     type=int,
     help="Minimum tool calls before allowing conversation conclusion",
 )
+@click.option(
+    "--cloud-upload",
+    type=click.Choice(["all", "dataset", "graph", "none"], case_sensitive=False),
+    default=None,
+    help="Upload to DeepFabric Cloud (experimental): all, dataset, graph, or none. "
+    "Enables headless mode for CI. Requires DEEPFABRIC_API_KEY or prior auth.",
+)
 def generate(  # noqa: PLR0913
     config_file: str | None,
     output_system_prompt: str | None = None,
@@ -470,12 +518,13 @@ def generate(  # noqa: PLR0913
     mode: Literal["tree", "graph"] = "tree",
     debug: bool = False,
     topic_only: bool = False,
-    conversation_type: Literal["basic", "chain_of_thought"] | None = None,
+    conversation_type: Literal["basic", "cot"] | None = None,
     reasoning_style: Literal["freetext", "agent"] | None = None,
     agent_mode: Literal["single_turn", "multi_turn"] | None = None,
     min_turns: int | None = None,
     max_turns: int | None = None,
     min_tool_calls: int | None = None,
+    cloud_upload: Literal["all", "dataset", "graph", "none"] | None = None,
     tui: Literal["rich", "simple"] = "rich",
 ) -> None:
     """Generate training data from a YAML configuration file or CLI parameters."""
@@ -518,6 +567,7 @@ def generate(  # noqa: PLR0913
             min_turns=min_turns,
             max_turns=max_turns,
             min_tool_calls=min_tool_calls,
+            cloud_upload=cloud_upload,
             tui=tui,
         )
     except PydanticValidationError as error:
@@ -541,6 +591,12 @@ def generate(  # noqa: PLR0913
         )
         if topic_only:
+            # Cloud upload for topic-only mode (graph only, no dataset)
+            _trigger_cloud_upload(
+                preparation=preparation,
+                options=options,
+                dataset_path=None,
+            )
             return
         _run_generation(
@@ -557,7 +613,7 @@ def generate(  # noqa: PLR0913
         sys.exit(1)
-@cli.command()
+@cli.command("upload-hf")
 @click.argument("dataset_file", type=click.Path(exists=True))
 @click.option(
     "--repo",
@@ -573,14 +629,14 @@ def generate(  # noqa: PLR0913
     multiple=True,
     help="Tags for the dataset (can be specified multiple times)",
 )
-def upload(
+def upload_hf(
     dataset_file: str,
     repo: str,
     token: str | None = None,
     tags: list[str] | None = None,
 ) -> None:
     """Upload a dataset to Hugging Face Hub."""
-    trace("cli_upload", {"has_tags": len(tags) > 0 if tags else False})
+    trace("cli_upload_hf", {"has_tags": len(tags) > 0 if tags else False})
     try:
         # Get token from CLI arg or env var
@@ -689,6 +745,224 @@ def upload_kaggle(
         sys.exit(1)
+# DeepFabric Cloud upload command group
+@click.group()
+def upload() -> None:
+    """Upload datasets and graphs to DeepFabric Cloud."""
+    pass
+def _upload_to_cloud(
+    file: str,
+    resource_type: Literal["dataset", "graph"],
+    handle: str | None,
+    name: str | None,
+    description: str | None,
+    tags: list[str] | None,
+    config_file: str | None,
+) -> None:
+    """Shared helper for uploading datasets and graphs to DeepFabric Cloud.
+    Args:
+        file: Path to the file to upload
+        resource_type: Either "dataset" or "graph"
+        handle: Resource handle (e.g., username/resource-name)
+        name: Display name for the resource
+        description: Description for the resource
+        tags: Tags for the resource (only used for datasets)
+        config_file: Path to config file with upload settings
+    """
+    # Lazy imports to avoid slow startup
+    import httpx  # noqa: PLC0415
+    from .auth import DEFAULT_API_URL  # noqa: PLC0415
+    from .cloud_upload import (  # noqa: PLC0415
+        _get_user_friendly_error,
+        build_urls,
+        derive_frontend_url,
+        derive_name_and_slug,
+        ensure_authenticated,
+        get_current_user,
+        upload_dataset,
+        upload_topic_graph,
+    )
+    tui = get_tui()
+    config_key = resource_type  # "dataset" or "graph"
+    url_resource_type = "datasets" if resource_type == "dataset" else "graphs"
+    # Load handle from config if not provided via CLI
+    final_handle = handle
+    final_description = description or ""
+    final_tags = list(tags) if tags else []
+    if config_file:
+        config = DeepFabricConfig.from_yaml(config_file)
+        cloud_config = config.get_deepfabric_cloud_config()
+        if not final_handle:
+            final_handle = cloud_config.get(config_key)
+        if not description and cloud_config.get("description"):
+            final_description = cloud_config.get("description", "")
+        if resource_type == "dataset" and not tags and cloud_config.get("tags"):
+            final_tags = cloud_config.get("tags", [])
+    # Ensure authenticated
+    if not ensure_authenticated(DEFAULT_API_URL, headless=False):
+        tui.error("Authentication required. Run 'deepfabric auth login' first.")
+        sys.exit(1)
+    # Derive name and slug from filename if not provided
+    default_name, default_slug = derive_name_and_slug(file)
+    final_name = name or default_name
+    # Use slug from handle if provided, otherwise use derived slug
+    if final_handle and "/" in final_handle:
+        final_slug = final_handle.split("/")[-1]
+    else:
+        final_slug = final_handle or default_slug
+    tui.info(f"Uploading {resource_type} '{final_name}'...")
+    try:
+        # Call the appropriate upload function
+        if resource_type == "dataset":
+            result = upload_dataset(
+                dataset_path=file,
+                name=final_name,
+                slug=final_slug,
+                description=final_description,
+                tags=final_tags,
+                api_url=DEFAULT_API_URL,
+            )
+            resource_id = result.get("dataset_id") or result.get("id")
+        else:
+            result = upload_topic_graph(
+                graph_path=file,
+                name=final_name,
+                description=final_description,
+                slug=final_slug,
+                api_url=DEFAULT_API_URL,
+            )
+            resource_id = result.get("id")
+        # Display success message
+        tui.success(f"{resource_type.capitalize()} '{final_name}' uploaded successfully!")
+        # Display URL if available
+        if resource_id:
+            user_info = get_current_user(DEFAULT_API_URL)
+            username = user_info.get("username") if user_info else None
+            frontend_url = derive_frontend_url(DEFAULT_API_URL)
+            public_url, internal_url = build_urls(
+                url_resource_type, resource_id, final_slug, username, frontend_url
+            )
+            tui.info(f"View at: {public_url or internal_url}")
+    except httpx.HTTPStatusError as e:
+        error_msg = _get_user_friendly_error(e)
+        if "already exists" in error_msg.lower():
+            tui.error(
+                f"A {resource_type} with slug '{final_slug}' already exists. "
+                "Use a different --handle value."
+            )
+        else:
+            tui.error(f"Error uploading {resource_type}: {error_msg}")
+        sys.exit(1)
+    except Exception as e:
+        tui.error(f"Error uploading {resource_type}: {str(e)}")
+        sys.exit(1)
+@upload.command("dataset")
+@click.argument("file", type=click.Path(exists=True))
+@click.option("--handle", help="Dataset handle (e.g., username/dataset-name)")
+@click.option("--name", help="Display name for the dataset")
+@click.option("--description", help="Description for the dataset")
+@click.option(
+    "--tags", multiple=True, help="Tags for the dataset (can be specified multiple times)"
+)
+@click.option(
+    "--config",
+    "config_file",
+    type=click.Path(exists=True),
+    help="Config file with upload settings",
+)
+def upload_dataset_cmd(
+    file: str,
+    handle: str | None,
+    name: str | None,
+    description: str | None,
+    tags: tuple[str, ...],
+    config_file: str | None,
+) -> None:
+    """Upload a dataset to DeepFabric Cloud.
+    FILE is the path to the JSONL dataset file.
+    Examples:
+        deepfabric upload dataset my-dataset.jsonl --handle myuser/my-dataset
+        deepfabric upload dataset output.jsonl --config config.yaml
+    """
+    trace(
+        "cli_upload_dataset",
+        {"has_config": config_file is not None, "has_handle": handle is not None},
+    )
+    _upload_to_cloud(
+        file=file,
+        resource_type="dataset",
+        handle=handle,
+        name=name,
+        description=description,
+        tags=list(tags) if tags else None,
+        config_file=config_file,
+    )
+@upload.command("graph")
+@click.argument("file", type=click.Path(exists=True))
+@click.option("--handle", help="Graph handle (e.g., username/graph-name)")
+@click.option("--name", help="Display name for the graph")
+@click.option("--description", help="Description for the graph")
+@click.option(
+    "--config",
+    "config_file",
+    type=click.Path(exists=True),
+    help="Config file with upload settings",
+)
+def upload_graph_cmd(
+    file: str,
+    handle: str | None,
+    name: str | None,
+    description: str | None,
+    config_file: str | None,
+) -> None:
+    """Upload a topic graph to DeepFabric Cloud.
+    FILE is the path to the JSON graph file.
+    Examples:
+        deepfabric upload graph topic_graph.json --handle myuser/my-graph
+        deepfabric upload graph graph.json --config config.yaml
+    """
+    trace(
+        "cli_upload_graph",
+        {"has_config": config_file is not None, "has_handle": handle is not None},
+    )
+    _upload_to_cloud(
+        file=file,
+        resource_type="graph",
+        handle=handle,
+        name=name,
+        description=description,
+        tags=None,
+        config_file=config_file,
+    )
 @cli.command()
 @click.argument("graph_file", type=click.Path(exists=True))
 @click.option(
@@ -846,8 +1120,10 @@ def info() -> None:
             ("generate", "Generate training data from configuration"),
             ("validate", "Validate a configuration file"),
             ("visualize", "Create SVG visualization of a topic graph"),
-            ("upload", "Upload dataset to Hugging Face Hub"),
+            ("upload-hf", "Upload dataset to Hugging Face Hub"),
             ("upload-kaggle", "Upload dataset to Kaggle"),
+            ("evaluate", "Evaluate a fine-tuned model on tool-calling tasks"),
+            ("import-tools", "Import tool definitions from external sources"),
             ("info", "Show this information"),
         ]
         for cmd, desc in commands:
@@ -863,7 +1139,7 @@ def info() -> None:
             tui.console.print(f"  [yellow]{var}[/yellow] - {desc}")
         tui.console.print(
-            "\nFor more information, visit: [link]https://github.com/RedDotRocket/deepfabric[/link]"
+            "\nFor more information, visit: [link]https://github.com/always-further/deepfabric[/link]"
         )
     except Exception as e:
@@ -979,7 +1255,7 @@ def evaluate(
         # Create inference configuration
         inference_config = InferenceConfig(
-            model_path=model_path,
+            model=model_path,
             adapter_path=adapter_path,
             backend=cast(Literal["transformers", "ollama"], backend),
             temperature=temperature,
@@ -1094,8 +1370,11 @@ def evaluate(
         handle_error(click.get_current_context(), e)
-# Register the auth command group
-cli.add_command(auth_group)
+# Register the auth and upload command groups
+# EXPERIMENTAL: Only enable cloud features if explicitly opted in
+if get_bool_env("EXPERIMENTAL_DF"):
+    cli.add_command(auth_group)
+    cli.add_command(upload)
 @cli.command("import-tools")

DeepFabric 4.4.1__py3-none-any.whl → 4.6.0__py3-none-any.whl

DeepFabric 4.4.1py3-none-any.whl → 4.6.0py3-none-any.whl