PyPI - hydraflow - Versions diffs - 0.12.4__tar.gz → 0.13.0__tar.gz - Mend

hydraflow 0.12.4tar.gz → 0.13.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (98) hide show

{hydraflow-0.12.4 → hydraflow-0.13.0}/.github/workflows/ci.yaml RENAMED Viewed

@@ -14,7 +14,7 @@ env:
   FORCE_COLOR: "1"
 jobs:
-  run:
+  ci:
     name: Python ${{ matrix.python-version }} on ${{ startsWith(matrix.os, 'macos-') && 'macOS' || startsWith(matrix.os, 'windows-') && 'Windows' || 'Linux' }}
     runs-on: ${{ matrix.os }}
     strategy:
@@ -29,7 +29,6 @@ jobs:
         uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
-          allow-prereleases: true
       - name: Install uv and ruff
         run: pip install uv ruff
       - name: Install the project

{hydraflow-0.12.4 → hydraflow-0.13.0}/.github/workflows/docs.yaml RENAMED Viewed

@@ -1,25 +1,29 @@
 name: Documentation
 on:
   push:
     branches: [main]
-    tags: ["*"]
-permissions:
-  contents: write
+    tags:
+      - "[0-9]+.[0-9]+.[0-9]+"
 jobs:
-  deploy:
-    name: Documentation
+  docs:
     runs-on: ubuntu-latest
+    permissions:
+      contents: write
     steps:
       - uses: actions/checkout@v4
       - name: Configure Git Credentials
         run: |
           git config user.name github-actions[bot]
           git config user.email 41898282+github-actions[bot]@users.noreply.github.com
-      - name: Set up Python 3.11
+      - name: Set up Python 3.13
         uses: actions/setup-python@v5
         with:
-          python-version: 3.11
-      - name: Install package
-        run: pip install -e . mkapi markdown-exec[ansi]
+          python-version: 3.13
+      - name: Install uv
+        run: pip install uv
+      - name: Install the project
+        run: uv sync --group docs
       - name: Deploy documentation
-        run: mkdocs gh-deploy --force
+        run: uv run mkdocs gh-deploy --force

{hydraflow-0.12.4 → hydraflow-0.13.0}/.github/workflows/publish.yaml RENAMED Viewed

@@ -7,7 +7,6 @@ on:
 jobs:
   publish:
-    name: Publish
     runs-on: ubuntu-latest
     permissions:
       id-token: write

{hydraflow-0.12.4 → hydraflow-0.13.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hydraflow
-Version: 0.12.4
+Version: 0.13.0
 Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
 Project-URL: Documentation, https://daizutabi.github.io/hydraflow/
 Project-URL: Source, https://github.com/daizutabi/hydraflow

{hydraflow-0.12.4 → hydraflow-0.13.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "hydraflow"
-version = "0.12.4"
+version = "0.13.0"
 description = "Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments."
 readme = "README.md"
 license = { file = "LICENSE" }
@@ -92,6 +92,7 @@ ignore = [
   "S603",
   "SIM102",
   "SIM108",
+  "SIM115",
   "TRY003",
 ]

hydraflow-0.13.0/src/hydraflow/cli.py ADDED Viewed

@@ -0,0 +1,127 @@
+"""Hydraflow CLI."""
+from __future__ import annotations
+import shlex
+from typing import Annotated
+import typer
+from rich.console import Console
+from typer import Argument, Option
+app = typer.Typer(add_completion=False)
+console = Console()
+@app.command(context_settings={"ignore_unknown_options": True})
+def run(
+    name: Annotated[str, Argument(help="Job name.", show_default=False)],
+    *,
+    args: Annotated[
+        list[str] | None,
+        Argument(help="Arguments to pass to the job.", show_default=False),
+    ] = None,
+    dry_run: Annotated[
+        bool,
+        Option("--dry-run", help="Perform a dry run."),
+    ] = False,
+) -> None:
+    """Run a job."""
+    from hydraflow.executor.io import get_job
+    from hydraflow.executor.job import iter_batches, iter_calls, iter_runs
+    args = args or []
+    job = get_job(name)
+    if job.run:
+        args = [*shlex.split(job.run), *args]
+        it = iter_runs(args, iter_batches(job), dry_run=dry_run)
+    elif job.call:
+        args = [*shlex.split(job.call), *args]
+        it = iter_calls(args, iter_batches(job), dry_run=dry_run)
+    else:
+        typer.echo(f"No command found in job: {job.name}.")
+        raise typer.Exit(1)
+    if not dry_run:
+        import mlflow
+        mlflow.set_experiment(job.name)
+    for task in it:  # jobs will be executed here
+        if job.run and dry_run:
+            typer.echo(shlex.join(task.args))
+        elif job.call and dry_run:
+            funcname, *args = task.args
+            arg = ", ".join(f"{arg!r}" for arg in args)
+            typer.echo(f"{funcname}([{arg}])")
+@app.command(context_settings={"ignore_unknown_options": True})
+def submit(
+    name: Annotated[str, Argument(help="Job name.", show_default=False)],
+    *,
+    args: Annotated[
+        list[str] | None,
+        Argument(help="Arguments to pass to the job.", show_default=False),
+    ] = None,
+    dry_run: Annotated[
+        bool,
+        Option("--dry-run", help="Perform a dry run."),
+    ] = False,
+) -> None:
+    """Submit a job."""
+    from hydraflow.executor.io import get_job
+    from hydraflow.executor.job import iter_batches, submit
+    args = args or []
+    job = get_job(name)
+    if not job.run:
+        typer.echo(f"No run found in job: {job.name}.")
+        raise typer.Exit(1)
+    if not dry_run:
+        import mlflow
+        mlflow.set_experiment(job.name)
+    args = [*shlex.split(job.run), *args]
+    result = submit(args, iter_batches(job), dry_run=dry_run)
+    if dry_run and isinstance(result, tuple):
+        for line in result[1].splitlines():
+            args = shlex.split(line)
+            typer.echo(shlex.join([*result[0][:-1], *args]))
+@app.command()
+def show(
+    name: Annotated[str, Argument(help="Job name.", show_default=False)] = "",
+) -> None:
+    """Show the hydraflow config."""
+    from omegaconf import OmegaConf
+    from hydraflow.executor.io import get_job, load_config
+    if name:
+        cfg = get_job(name)
+    else:
+        cfg = load_config()
+    typer.echo(OmegaConf.to_yaml(cfg))
+@app.callback(invoke_without_command=True)
+def callback(
+    *,
+    version: Annotated[
+        bool,
+        Option("--version", help="Show the version and exit."),
+    ] = False,
+) -> None:
+    if version:
+        import importlib.metadata
+        typer.echo(f"hydraflow {importlib.metadata.version('hydraflow')}")
+        raise typer.Exit

{hydraflow-0.12.4 → hydraflow-0.13.0}/src/hydraflow/core/io.py RENAMED Viewed

@@ -2,6 +2,7 @@
 from __future__ import annotations
+import fnmatch
 import shutil
 import urllib.parse
 import urllib.request
@@ -152,21 +153,6 @@ def remove_run(run: Run | Iterable[Run]) -> None:
     shutil.rmtree(get_artifact_dir(run).parent)
-def get_root_dir(uri: str | Path | None = None) -> Path:
-    """Get the root directory for the MLflow tracking server."""
-    import mlflow
-    if uri is not None:
-        return Path(uri).absolute()
-    uri = mlflow.get_tracking_uri()
-    if uri.startswith("file:"):
-        return file_uri_to_path(uri)
-    return Path(uri).absolute()
 def get_experiment_name(path: Path) -> str | None:
     """Get the experiment name from the meta file."""
     metafile = path / "meta.yaml"
@@ -195,50 +181,49 @@ def predicate_experiment_dir(
         return True
     if isinstance(experiment_names, list):
-        return name in experiment_names
+        return any(fnmatch.fnmatch(name, e) for e in experiment_names)
     return experiment_names(name)
 def iter_experiment_dirs(
+    root_dir: str | Path,
     experiment_names: str | list[str] | Callable[[str], bool] | None = None,
-    root_dir: str | Path | None = None,
 ) -> Iterator[Path]:
     """Iterate over the experiment directories in the root directory."""
     if isinstance(experiment_names, str):
         experiment_names = [experiment_names]
-    root_dir = get_root_dir(root_dir)
-    for path in root_dir.iterdir():
+    for path in Path(root_dir).iterdir():
         if predicate_experiment_dir(path, experiment_names):
             yield path
 def iter_run_dirs(
+    root_dir: str | Path,
     experiment_names: str | list[str] | Callable[[str], bool] | None = None,
-    root_dir: str | Path | None = None,
 ) -> Iterator[Path]:
     """Iterate over the run directories in the root directory."""
-    for experiment_dir in iter_experiment_dirs(experiment_names, root_dir):
+    for experiment_dir in iter_experiment_dirs(root_dir, experiment_names):
         for path in experiment_dir.iterdir():
             if path.is_dir() and (path / "artifacts").exists():
                 yield path
 def iter_artifacts_dirs(
+    root_dir: str | Path,
     experiment_names: str | list[str] | Callable[[str], bool] | None = None,
-    root_dir: str | Path | None = None,
 ) -> Iterator[Path]:
     """Iterate over the artifacts directories in the root directory."""
-    for path in iter_run_dirs(experiment_names, root_dir):
+    for path in iter_run_dirs(root_dir, experiment_names):
         yield path / "artifacts"
 def iter_artifact_paths(
+    root_dir: str | Path,
     artifact_path: str | Path,
     experiment_names: str | list[str] | Callable[[str], bool] | None = None,
-    root_dir: str | Path | None = None,
 ) -> Iterator[Path]:
     """Iterate over the artifact paths in the root directory."""
-    for path in iter_artifacts_dirs(experiment_names, root_dir):
+    for path in iter_artifacts_dirs(root_dir, experiment_names):
         yield path / artifact_path

hydraflow-0.13.0/src/hydraflow/executor/job.py ADDED Viewed

@@ -0,0 +1,222 @@
+"""Job execution and argument handling for HydraFlow.
+This module provides functionality for executing jobs in HydraFlow, including:
+- Argument parsing and expansion for job steps
+- Batch processing of Hydra configurations
+- Execution of jobs via shell commands or Python functions
+The module supports two execution modes:
+1. Shell command execution
+2. Python function calls
+Each job can consist of multiple steps, and each step can have its own
+arguments and configurations that will be expanded into multiple runs.
+"""
+from __future__ import annotations
+import importlib
+import shlex
+import subprocess
+import sys
+from dataclasses import dataclass
+from pathlib import Path
+from subprocess import CompletedProcess
+from tempfile import NamedTemporaryFile
+from typing import TYPE_CHECKING, overload
+import ulid
+from .parser import collect, expand
+if TYPE_CHECKING:
+    from collections.abc import Callable, Iterable, Iterator
+    from subprocess import CompletedProcess
+    from typing import Any
+    from .conf import Job
+def iter_args(batch: str, args: str) -> Iterator[list[str]]:
+    """Iterate over combinations generated from parsed arguments.
+    Generate all possible combinations of arguments by parsing and
+    expanding each one, yielding them as an iterator.
+    Args:
+        batch (str): The batch to parse.
+        args (str): The arguments to parse.
+    Yields:
+        list[str]: a list of the parsed argument combinations.
+    """
+    args_ = collect(args)
+    for batch_ in expand(batch):
+        yield [*batch_, *args_]
+def iter_batches(job: Job) -> Iterator[list[str]]:
+    """Generate Hydra application arguments for a job.
+    This function generates a list of Hydra application arguments
+    for a given job, including the job name and the root directory
+    for the sweep.
+    Args:
+        job (Job): The job to generate the Hydra configuration for.
+    Returns:
+        list[str]: A list of Hydra configuration strings.
+    """
+    job_name = f"hydra.job.name={job.name}"
+    job_configs = shlex.split(job.with_)
+    for step in job.steps:
+        configs = shlex.split(step.with_) or job_configs
+        for args in iter_args(step.batch, step.args):
+            sweep_dir = f"hydra.sweep.dir=multirun/{ulid.ULID()}"
+            yield ["--multirun", *args, job_name, sweep_dir, *configs]
+@dataclass
+class Task:
+    """An executed task."""
+    args: list[str]
+    total: int
+    completed: int
+@dataclass
+class Run(Task):
+    """An executed run."""
+    result: CompletedProcess
+@dataclass
+class Call(Task):
+    """An executed call."""
+    result: Any
+@overload
+def iter_runs(args: list[str], iterable: Iterable[list[str]]) -> Iterator[Run]: ...
+@overload
+def iter_runs(
+    args: list[str],
+    iterable: Iterable[list[str]],
+    *,
+    dry_run: bool = False,
+) -> Iterator[Task | Run]: ...
+def iter_runs(
+    args: list[str],
+    iterable: Iterable[list[str]],
+    *,
+    dry_run: bool = False,
+) -> Iterator[Task | Run]:
+    """Execute multiple runs of a job using shell commands."""
+    executable, *args = args
+    if executable == "python" and sys.platform == "win32":
+        executable = sys.executable
+    iterable = list(iterable)
+    total = len(iterable)
+    for completed, args_ in enumerate(iterable, 1):
+        cmd = [executable, *args, *args_]
+        if dry_run:
+            yield Task(cmd, total, completed)
+        else:
+            result = subprocess.run(cmd, check=False)
+            yield Run(cmd, total, completed, result)
+@overload
+def iter_calls(args: list[str], iterable: Iterable[list[str]]) -> Iterator[Call]: ...
+@overload
+def iter_calls(
+    args: list[str],
+    iterable: Iterable[list[str]],
+    *,
+    dry_run: bool = False,
+) -> Iterator[Task | Call]: ...
+def iter_calls(
+    args: list[str],
+    iterable: Iterable[list[str]],
+    *,
+    dry_run: bool = False,
+) -> Iterator[Task | Call]:
+    """Execute multiple calls of a job using Python functions."""
+    funcname, *args = args
+    func = get_callable(funcname)
+    iterable = list(iterable)
+    total = len(iterable)
+    for completed, args_ in enumerate(iterable, 1):
+        cmd = [funcname, *args, *args_]
+        if dry_run:
+            yield Task(cmd, total, completed)
+        else:
+            result = func([*args, *args_])
+            yield Call(cmd, total, completed, result)
+def submit(
+    args: list[str],
+    iterable: Iterable[list[str]],
+    *,
+    dry_run: bool = False,
+) -> CompletedProcess | tuple[list[str], str]:
+    """Submit entire job using a shell command."""
+    executable, *args = args
+    if executable == "python" and sys.platform == "win32":
+        executable = sys.executable
+    temp = NamedTemporaryFile(dir=Path.cwd(), delete=False)  # for Windows
+    file = Path(temp.name)
+    temp.close()
+    text = "\n".join(shlex.join(args) for args in iterable)
+    file.write_text(text)
+    cmd = [executable, *args, file.as_posix()]
+    try:
+        if dry_run:
+            return cmd, text
+        return subprocess.run(cmd, check=False)
+    finally:
+        file.unlink(missing_ok=True)
+def get_callable(name: str) -> Callable:
+    """Get a callable from a function name."""
+    if "." not in name:
+        msg = f"Invalid function path: {name}."
+        raise ValueError(msg)
+    try:
+        module_name, func_name = name.rsplit(".", 1)
+        module = importlib.import_module(module_name)
+        return getattr(module, func_name)
+    except (ImportError, AttributeError, ModuleNotFoundError) as e:
+        msg = f"Failed to import or find function: {name}"
+        raise ValueError(msg) from e

{hydraflow-0.12.4 → hydraflow-0.13.0}/tests/cli/conftest.py RENAMED Viewed

@@ -10,3 +10,5 @@ def setup(chdir):
     copy(src, src.name)
     src = Path(__file__).parent / "app.py"
     copy(src, src.name)
+    src = Path(__file__).parent / "submit.py"
+    copy(src, src.name)

{hydraflow-0.12.4 → hydraflow-0.13.0}/tests/cli/hydraflow.yaml RENAMED Viewed

@@ -20,3 +20,23 @@ jobs:
       - batch: name=b
         args: count=11:14
         with: hydra/launcher=joblib hydra.launcher.n_jobs=4
+  echo:
+    call: typer.echo a b c
+    steps:
+      - batch: name=a,b
+        args: count=1:3
+      - batch: name=c,d
+        args: count=4:6
+  submit:
+    run: python submit.py
+    steps:
+      - batch: name=a,b
+        args: count=1
+      - batch: name=c
+        args: count=5
+      - batch: name=d
+        args: count=6
+  error:
+    steps:
+      - batch: name=a
+        args: count=1:3

hydraflow-0.13.0/tests/cli/submit.py ADDED Viewed

@@ -0,0 +1,17 @@
+from __future__ import annotations
+import shlex
+import subprocess
+import sys
+from pathlib import Path
+def main():
+    file = Path(sys.argv[-1])
+    for line in file.read_text().splitlines():
+        args = shlex.split(line)
+        subprocess.run([sys.executable, "app.py", *args], check=False)
+if __name__ == "__main__":
+    main()

hydraflow 0.12.4__tar.gz → 0.13.0__tar.gz

hydraflow 0.12.4tar.gz → 0.13.0tar.gz