PyPI - hydraflow - Versions diffs - 0.8.0__tar.gz → 0.9.0__tar.gz - Mend

hydraflow 0.8.0tar.gz → 0.9.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (96) hide show

{hydraflow-0.8.0 → hydraflow-0.9.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hydraflow
-Version: 0.8.0
+Version: 0.9.0
 Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
 Project-URL: Documentation, https://daizutabi.github.io/hydraflow/
 Project-URL: Source, https://github.com/daizutabi/hydraflow
@@ -41,6 +41,7 @@ Requires-Dist: mlflow>=2.15
 Requires-Dist: omegaconf
 Requires-Dist: rich
 Requires-Dist: typer
+Requires-Dist: ulid
 Description-Content-Type: text/markdown
 # Hydraflow
@@ -93,31 +94,29 @@ pip install hydraflow
 Here is a simple example to get you started with Hydraflow:
 ```python
-import hydra
-import hydraflow
-import mlflow
+from __future__ import annotations
 from dataclasses import dataclass
-from hydra.core.config_store import ConfigStore
 from pathlib import Path
+from typing import TYPE_CHECKING
-@dataclass
-class MySQLConfig:
-    host: str = "localhost"
-    port: int = 3306
+import hydraflow
-cs = ConfigStore.instance()
-cs.store(name="config", node=MySQLConfig)
+if TYPE_CHECKING:
+    from mlflow.entities import Run
+@dataclass
+class Config:
+    count: int = 1
+    name: str = "a"
-@hydra.main(config_name="config", version_base=None)
-def my_app(cfg: MySQLConfig) -> None:
-    # Set experiment by Hydra job name.
-    hydraflow.set_experiment()
-    # Automatically log Hydra config as params.
-    with hydraflow.start_run(cfg):
-        # Your app code below.
+@hydraflow.main(Config)
+def app(run: Run, cfg: Config):
+    """Your app code here."""
 if __name__ == "__main__":
-    my_app()
+    app()
 ```

{hydraflow-0.8.0 → hydraflow-0.9.0}/README.md RENAMED Viewed

@@ -48,31 +48,29 @@ pip install hydraflow
 Here is a simple example to get you started with Hydraflow:
 ```python
-import hydra
-import hydraflow
-import mlflow
+from __future__ import annotations
 from dataclasses import dataclass
-from hydra.core.config_store import ConfigStore
 from pathlib import Path
+from typing import TYPE_CHECKING
-@dataclass
-class MySQLConfig:
-    host: str = "localhost"
-    port: int = 3306
+import hydraflow
-cs = ConfigStore.instance()
-cs.store(name="config", node=MySQLConfig)
+if TYPE_CHECKING:
+    from mlflow.entities import Run
+@dataclass
+class Config:
+    count: int = 1
+    name: str = "a"
-@hydra.main(config_name="config", version_base=None)
-def my_app(cfg: MySQLConfig) -> None:
-    # Set experiment by Hydra job name.
-    hydraflow.set_experiment()
-    # Automatically log Hydra config as params.
-    with hydraflow.start_run(cfg):
-        # Your app code below.
+@hydraflow.main(Config)
+def app(run: Run, cfg: Config):
+    """Your app code here."""
 if __name__ == "__main__":
-    my_app()
+    app()
 ```

{hydraflow-0.8.0 → hydraflow-0.9.0}/docs/usage/quickstart.md RENAMED Viewed

@@ -12,16 +12,6 @@ There are two main steps to using Hydraflow:
 --8<-- "apps/quickstart.py"
 ```
-### Set the MLflow experiment
-[`hydraflow.set_experiment`][] sets the MLflow experiment using the Hydra job name.
-Optionally, it can also set the tracking URI with `uri` argument.
-For example,
-```python
-hydraflow.set_experiment(uri="sqlite:///mlruns.db")
-```
 ### Start a new MLflow run
 [`hydraflow.start_run`][] starts a new MLflow run that logs the Hydra configuration.
@@ -64,10 +54,8 @@ $ python apps/quickstart.py -m width=400,600 height=100,200,300
 ### Run collection
 ```pycon exec="1" source="console" session="quickstart"
->>> import mlflow
->>> mlflow.set_experiment("quickstart")
 >>> import hydraflow
->>> rc = hydraflow.list_runs()
+>>> rc = hydraflow.list_runs("quickstart")
 >>> print(rc)
 ```
@@ -107,16 +95,6 @@ $ python apps/quickstart.py -m width=400,600 height=100,200,300
 >>> print(filtered)
 ```
-```pycon exec="1" source="console" session="quickstart"
->>> run = rc.find(height=100)
->>> print(run.data.params)
-```
-```pycon exec="1" source="console" session="quickstart"
->>> run = rc.find_last(height=100)
->>> print(run.data.params)
-```
 ### Group runs
 ```pycon exec="1" source="console" session="quickstart"

{hydraflow-0.8.0 → hydraflow-0.9.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "hydraflow"
-version = "0.8.0"
+version = "0.9.0"
 description = "Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments."
 readme = "README.md"
 license = { file = "LICENSE" }
@@ -19,7 +19,14 @@ classifiers = [
   "Programming Language :: Python :: 3.13",
 ]
 requires-python = ">=3.10"
-dependencies = ["hydra-core>=1.3", "mlflow>=2.15", "omegaconf", "rich", "typer"]
+dependencies = [
+  "hydra-core>=1.3",
+  "mlflow>=2.15",
+  "omegaconf",
+  "rich",
+  "typer",
+  "ulid",
+]
 [project.urls]
 Documentation = "https://daizutabi.github.io/hydraflow/"
@@ -44,6 +51,7 @@ addopts = [
   "--cov=hydraflow",
   "--cov-report=lcov:lcov.info",
   "--dist=loadgroup",
+  "--doctest-modules",
   "-n8",
 ]
 filterwarnings = [
@@ -67,6 +75,7 @@ ignore = [
   "ANN003",
   "ANN401",
   "B904",
+  "D104",
   "D105",
   "D107",
   "D203",
@@ -79,13 +88,15 @@ ignore = [
   "PLR0913",
   "PLR1704",
   "PLR2004",
+  "S603",
   "SIM102",
   "SIM108",
   "TRY003",
 ]
 [tool.ruff.lint.per-file-ignores]
-"tests/*" = ["A001", "ANN", "ARG", "D", "FBT", "PD", "PLR", "PT", "S", "SLF"]
 "apps/*.py" = ["D", "G", "INP"]
-"src/hydraflow/main.py" = ["ANN201", "D401"]
 "src/hydraflow/cli.py" = ["ANN", "D"]
+"src/hydraflow/core/main.py" = ["ANN201", "D401"]
+"src/hydraflow/executor/conf.py" = ["ANN", "D"]
+"tests/*" = ["A001", "ANN", "ARG", "D", "FBT", "PD", "PLR", "PT", "S", "SLF"]

{hydraflow-0.8.0 → hydraflow-0.9.0}/src/hydraflow/__init__.py RENAMED Viewed

@@ -1,16 +1,16 @@
 """Integrate Hydra and MLflow to manage and track machine learning experiments."""
-from hydraflow.context import chdir_artifact, log_run, start_run
-from hydraflow.main import main
-from hydraflow.mlflow import list_run_ids, list_run_paths, list_runs
-from hydraflow.run_collection import RunCollection
-from hydraflow.utils import (
+from hydraflow.core.context import chdir_artifact, log_run, start_run
+from hydraflow.core.io import (
     get_artifact_dir,
     get_artifact_path,
     get_hydra_output_dir,
     load_config,
     remove_run,
 )
+from hydraflow.core.main import main
+from hydraflow.core.mlflow import list_run_ids, list_run_paths, list_runs
+from hydraflow.entities.run_collection import RunCollection
 __all__ = [
     "RunCollection",

hydraflow-0.9.0/src/hydraflow/cli.py ADDED Viewed

@@ -0,0 +1,67 @@
+"""Hydraflow CLI."""
+from __future__ import annotations
+from typing import TYPE_CHECKING, Annotated
+import typer
+from rich.console import Console
+from typer import Argument, Option
+from hydraflow.executor.io import load_config
+if TYPE_CHECKING:
+    from hydraflow.executor.job import Job
+app = typer.Typer(add_completion=False)
+console = Console()
+def get_job(name: str) -> Job:
+    cfg = load_config()
+    job = cfg.jobs[name]
+    if not job.name:
+        job.name = name
+    return job
+@app.command()
+def run(
+    name: Annotated[str, Argument(help="Job name.", show_default=False)],
+) -> None:
+    """Run a job."""
+    import mlflow
+    from hydraflow.executor.job import multirun
+    job = get_job(name)
+    mlflow.set_experiment(job.name)
+    multirun(job)
+@app.command()
+def show(
+    name: Annotated[str, Argument(help="Job name.", show_default=False)],
+) -> None:
+    """Show a job."""
+    from hydraflow.executor.job import show
+    job = get_job(name)
+    show(job)
+@app.callback(invoke_without_command=True)
+def callback(
+    *,
+    version: Annotated[
+        bool,
+        Option("--version", help="Show the version and exit."),
+    ] = False,
+) -> None:
+    if version:
+        import importlib.metadata
+        typer.echo(f"hydraflow {importlib.metadata.version('hydraflow')}")
+        raise typer.Exit

{hydraflow-0.8.0/src/hydraflow → hydraflow-0.9.0/src/hydraflow/core}/context.py RENAMED Viewed

@@ -12,8 +12,9 @@ import mlflow
 import mlflow.artifacts
 from hydra.core.hydra_config import HydraConfig
-from hydraflow.mlflow import log_params, log_text
-from hydraflow.utils import get_artifact_dir
+from hydraflow.core.io import get_artifact_dir
+from .mlflow import log_params, log_text
 if TYPE_CHECKING:
     from collections.abc import Iterator

{hydraflow-0.8.0/src/hydraflow → hydraflow-0.9.0/src/hydraflow/core}/main.py RENAMED Viewed

@@ -7,6 +7,7 @@ management.
 The main functionality is provided through the `main` decorator, which can be
 used to wrap experiment entry points. This decorator handles:
 - Configuration management via Hydra
 - Experiment tracking via MLflow
 - Run deduplication based on configurations
@@ -44,11 +45,12 @@ from mlflow.entities import RunStatus
 from omegaconf import OmegaConf
 import hydraflow
-from hydraflow.utils import file_uri_to_path
+from hydraflow.core.io import file_uri_to_path
 if TYPE_CHECKING:
     from collections.abc import Callable
     from pathlib import Path
+    from typing import Any
     from mlflow.entities import Run
@@ -115,7 +117,7 @@ def main(
     return decorator
-def get_run_id(uri: str, config: object, overrides: list[str] | None) -> str | None:
+def get_run_id(uri: str, config: Any, overrides: list[str] | None) -> str | None:
     """Try to get the run ID for the given configuration.
     If the run is not found, the function will return None.
@@ -137,7 +139,7 @@ def get_run_id(uri: str, config: object, overrides: list[str] | None) -> str | N
     return None
-def equals(run_dir: Path, config: object, overrides: list[str] | None) -> bool:
+def equals(run_dir: Path, config: Any, overrides: list[str] | None) -> bool:
     """Check if the run directory matches the given configuration or overrides.
     Args:

{hydraflow-0.8.0/src/hydraflow → hydraflow-0.9.0/src/hydraflow/core}/mlflow.py RENAMED Viewed

@@ -13,9 +13,10 @@ import joblib
 import mlflow
 import mlflow.artifacts
-from hydraflow.config import iter_params
-from hydraflow.run_collection import RunCollection
-from hydraflow.utils import file_uri_to_path, get_artifact_dir
+from hydraflow.core.io import file_uri_to_path, get_artifact_dir
+from hydraflow.entities.run_collection import RunCollection
+from .config import iter_params
 if TYPE_CHECKING:
     from pathlib import Path

{hydraflow-0.8.0/src/hydraflow → hydraflow-0.9.0/src/hydraflow/entities}/run_collection.py RENAMED Viewed

@@ -25,12 +25,13 @@ from typing import TYPE_CHECKING, Any, overload
 from mlflow.entities import RunStatus
-import hydraflow.param
-from hydraflow.config import iter_params, select_config, select_overrides
-from hydraflow.param import get_params, get_values
-from hydraflow.run_data import RunCollectionData
-from hydraflow.run_info import RunCollectionInfo
-from hydraflow.utils import load_config
+import hydraflow.core.param
+from hydraflow.core.config import iter_params, select_config, select_overrides
+from hydraflow.core.io import load_config
+from hydraflow.core.param import get_params, get_values
+from .run_data import RunCollectionData
+from .run_info import RunCollectionInfo
 if TYPE_CHECKING:
     from collections.abc import Callable, Iterator
@@ -478,7 +479,7 @@ def _param_matches(run: Run, key: str, value: Any) -> bool:
     if param == "None":
         return value is None or value == "None"
-    return hydraflow.param.match(param, value)
+    return hydraflow.core.param.match(param, value)
 def filter_runs(

{hydraflow-0.8.0/src/hydraflow → hydraflow-0.9.0/src/hydraflow/entities}/run_data.py RENAMED Viewed

@@ -6,14 +6,14 @@ from typing import TYPE_CHECKING
 from pandas import DataFrame
-from hydraflow.config import iter_params
-from hydraflow.utils import load_config
+from hydraflow.core.config import iter_params
+from hydraflow.core.io import load_config
 if TYPE_CHECKING:
     from collections.abc import Iterable
     from typing import Any
-    from hydraflow.run_collection import RunCollection
+    from .run_collection import RunCollection
 class RunCollectionData:

{hydraflow-0.8.0/src/hydraflow → hydraflow-0.9.0/src/hydraflow/entities}/run_info.py RENAMED Viewed

@@ -4,12 +4,12 @@ from __future__ import annotations
 from typing import TYPE_CHECKING
-from hydraflow.utils import get_artifact_dir
+from hydraflow.core.io import get_artifact_dir
 if TYPE_CHECKING:
     from pathlib import Path
-    from hydraflow.run_collection import RunCollection
+    from .run_collection import RunCollection
 class RunCollectionInfo:

hydraflow-0.9.0/src/hydraflow/executor/conf.py ADDED Viewed

@@ -0,0 +1,23 @@
+from __future__ import annotations
+from dataclasses import dataclass, field
+@dataclass
+class Step:
+    args: str = ""
+    batch: str = ""
+    options: str = ""
+@dataclass
+class Job:
+    name: str = ""
+    run: str = ""
+    call: str = ""
+    steps: list[Step] = field(default_factory=list)
+@dataclass
+class HydraflowConf:
+    jobs: dict[str, Job] = field(default_factory=dict)

hydraflow-0.9.0/src/hydraflow/executor/io.py ADDED Viewed

@@ -0,0 +1,34 @@
+"""Hydraflow jobs IO."""
+from __future__ import annotations
+from pathlib import Path
+from omegaconf import OmegaConf
+from .conf import HydraflowConf
+def find_config_file() -> Path | None:
+    """Find the hydraflow config file."""
+    if Path("hydraflow.yaml").exists():
+        return Path("hydraflow.yaml")
+    if Path("hydraflow.yml").exists():
+        return Path("hydraflow.yml")
+    return None
+def load_config() -> HydraflowConf:
+    """Load the hydraflow config."""
+    schema = OmegaConf.structured(HydraflowConf)
+    path = find_config_file()
+    if path is None:
+        return schema
+    cfg = OmegaConf.load(path)
+    return OmegaConf.merge(schema, cfg)  # type: ignore

hydraflow-0.9.0/src/hydraflow/executor/job.py ADDED Viewed

@@ -0,0 +1,152 @@
+"""Job execution and argument handling for HydraFlow.
+This module provides functionality for executing jobs in HydraFlow, including:
+- Argument parsing and expansion for job steps
+- Batch processing of Hydra configurations
+- Execution of jobs via shell commands or Python functions
+The module supports two execution modes:
+1. Shell command execution
+2. Python function calls
+Each job can consist of multiple steps, and each step can have its own
+arguments and options that will be expanded into multiple runs.
+"""
+from __future__ import annotations
+import importlib
+import shlex
+import subprocess
+from subprocess import CalledProcessError
+from typing import TYPE_CHECKING
+import ulid
+from .parser import collect, expand
+if TYPE_CHECKING:
+    from collections.abc import Iterator
+    from .conf import Job, Step
+def iter_args(step: Step) -> Iterator[list[str]]:
+    """Iterate over combinations generated from parsed arguments.
+    Generate all possible combinations of arguments by parsing and
+    expanding each one, yielding them as an iterator.
+    Args:
+        step (Step): The step to parse.
+    Yields:
+        list[str]: a list of the parsed argument combinations.
+    """
+    args = collect(step.args)
+    options = [o for o in step.options.split(" ") if o]
+    for batch in expand(step.batch):
+        yield [*options, *sorted([*batch, *args])]
+def iter_batches(job: Job) -> Iterator[list[str]]:
+    """Generate Hydra application arguments for a job.
+    This function generates a list of Hydra application arguments
+    for a given job, including the job name and the root directory
+    for the sweep.
+    Args:
+        job (Job): The job to generate the Hydra configuration for.
+    Returns:
+        list[str]: A list of Hydra configuration strings.
+    """
+    job_name = f"hydra.job.name={job.name}"
+    for step in job.steps:
+        for args in iter_args(step):
+            sweep_dir = f"hydra.sweep.dir=multirun/{ulid.ulid()}"
+            yield ["--multirun", sweep_dir, job_name, *args]
+def multirun(job: Job) -> None:
+    """Execute multiple runs of a job using either shell commands or Python functions.
+    This function processes a job configuration and executes it in one of two modes:
+    1. Shell command mode (job.run): Executes shell commands with the generated
+       arguments
+    2. Python function mode (job.call): Calls a Python function with the generated
+       arguments
+    Args:
+        job (Job): The job configuration containing run parameters and steps.
+    Raises:
+        RuntimeError: If a shell command fails or if a function call encounters
+            an error.
+        ValueError: If the Python function path is invalid or the function cannot
+            be imported.
+    """
+    it = iter_batches(job)
+    if job.run:
+        base_cmds = shlex.split(job.run)
+        for args in it:
+            cmds = [*base_cmds, *args]
+            try:
+                subprocess.run(cmds, check=True)
+            except CalledProcessError as e:
+                msg = f"Command failed with exit code {e.returncode}"
+                raise RuntimeError(msg) from e
+    elif job.call:
+        if "." not in job.call:
+            msg = f"Invalid function path: {job.call}."
+            msg += " Expected format: 'package.module.function'"
+            raise ValueError(msg)
+        try:
+            module_name, func_name = job.call.rsplit(".", 1)
+            module = importlib.import_module(module_name)
+            func = getattr(module, func_name)
+        except (ImportError, AttributeError, ModuleNotFoundError) as e:
+            msg = f"Failed to import or find function: {job.call}"
+            raise ValueError(msg) from e
+        for args in it:
+            try:
+                func(*args)
+            except Exception as e:  # noqa: PERF203
+                msg = f"Function call '{job.call}' failed with args: {args}"
+                raise RuntimeError(msg) from e
+def show(job: Job) -> None:
+    """Show the job configuration.
+    This function shows the job configuration for a given job.
+    Args:
+        job (Job): The job configuration to show.
+    """
+    it = iter_batches(job)
+    if job.run:
+        base_cmds = shlex.split(job.run)
+        for args in it:
+            cmds = " ".join([*base_cmds, *args])
+            print(cmds)  # noqa: T201
+    elif job.call:
+        print(f"call: {job.call}")  # noqa: T201
+        for args in it:
+            print(f"args: {args}")  # noqa: T201

hydraflow 0.8.0__tar.gz → 0.9.0__tar.gz

hydraflow 0.8.0tar.gz → 0.9.0tar.gz