PyPI - hydraflow - Versions diffs - 0.7.5__py3-none-any.whl → 0.9.0__py3-none-any.whl - Mend

hydraflow 0.7.5py3-none-any.whl → 0.9.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

hydraflow/__init__.py +5 -20
hydraflow/cli.py +31 -39
hydraflow/core/__init__.py +0 -0
hydraflow/{config.py → core/config.py} +10 -27
hydraflow/{context.py → core/context.py} +8 -50
hydraflow/{utils.py → core/io.py} +19 -28
hydraflow/core/main.py +164 -0
hydraflow/core/mlflow.py +168 -0
hydraflow/{param.py → core/param.py} +2 -2
hydraflow/entities/__init__.py +0 -0
hydraflow/{run_collection.py → entities/run_collection.py} +18 -163
hydraflow/{run_data.py → entities/run_data.py} +5 -3
hydraflow/{run_info.py → entities/run_info.py} +2 -2
hydraflow/executor/__init__.py +0 -0
hydraflow/executor/conf.py +23 -0
hydraflow/executor/io.py +34 -0
hydraflow/executor/job.py +152 -0
hydraflow/executor/parser.py +397 -0
{hydraflow-0.7.5.dist-info → hydraflow-0.9.0.dist-info}/METADATA +18 -19
hydraflow-0.9.0.dist-info/RECORD +24 -0
hydraflow/main.py +0 -54
hydraflow/mlflow.py +0 -280
hydraflow-0.7.5.dist-info/RECORD +0 -17
{hydraflow-0.7.5.dist-info → hydraflow-0.9.0.dist-info}/WHEEL +0 -0
{hydraflow-0.7.5.dist-info → hydraflow-0.9.0.dist-info}/entry_points.txt +0 -0
{hydraflow-0.7.5.dist-info → hydraflow-0.9.0.dist-info}/licenses/LICENSE +0 -0

hydraflow/__init__.py CHANGED Viewed

@@ -1,25 +1,16 @@
 """Integrate Hydra and MLflow to manage and track machine learning experiments."""
-from hydraflow.config import select_config, select_overrides
-from hydraflow.context import chdir_artifact, log_run, start_run
-from hydraflow.main import main
-from hydraflow.mlflow import (
-    list_run_ids,
-    list_run_paths,
-    list_runs,
-    search_runs,
-    set_experiment,
-)
-from hydraflow.run_collection import RunCollection
-from hydraflow.utils import (
+from hydraflow.core.context import chdir_artifact, log_run, start_run
+from hydraflow.core.io import (
     get_artifact_dir,
     get_artifact_path,
     get_hydra_output_dir,
-    get_overrides,
     load_config,
-    load_overrides,
     remove_run,
 )
+from hydraflow.core.main import main
+from hydraflow.core.mlflow import list_run_ids, list_run_paths, list_runs
+from hydraflow.entities.run_collection import RunCollection
 __all__ = [
     "RunCollection",
@@ -27,18 +18,12 @@ __all__ = [
     "get_artifact_dir",
     "get_artifact_path",
     "get_hydra_output_dir",
-    "get_overrides",
     "list_run_ids",
     "list_run_paths",
     "list_runs",
     "load_config",
-    "load_overrides",
     "log_run",
     "main",
     "remove_run",
-    "search_runs",
-    "select_config",
-    "select_overrides",
-    "set_experiment",
     "start_run",
 ]

hydraflow/cli.py CHANGED Viewed

@@ -2,41 +2,54 @@
 from __future__ import annotations
-from pathlib import Path
-from typing import Annotated
+from typing import TYPE_CHECKING, Annotated
 import typer
-from omegaconf import DictConfig, OmegaConf
 from rich.console import Console
 from typer import Argument, Option
+from hydraflow.executor.io import load_config
+if TYPE_CHECKING:
+    from hydraflow.executor.job import Job
 app = typer.Typer(add_completion=False)
 console = Console()
+def get_job(name: str) -> Job:
+    cfg = load_config()
+    job = cfg.jobs[name]
+    if not job.name:
+        job.name = name
+    return job
 @app.command()
 def run(
-    names: Annotated[
-        list[str] | None,
-        Argument(help="Job names.", show_default=False),
-    ] = None,
+    name: Annotated[str, Argument(help="Job name.", show_default=False)],
 ) -> None:
-    """Run jobs."""
-    typer.echo(names)
+    """Run a job."""
+    import mlflow
-    cfg = load_config()
-    typer.echo(cfg)
+    from hydraflow.executor.job import multirun
+    job = get_job(name)
+    mlflow.set_experiment(job.name)
+    multirun(job)
 @app.command()
-def show() -> None:
-    """Show the config."""
-    from rich.syntax import Syntax
+def show(
+    name: Annotated[str, Argument(help="Job name.", show_default=False)],
+) -> None:
+    """Show a job."""
+    from hydraflow.executor.job import show
-    cfg = load_config()
-    code = OmegaConf.to_yaml(cfg)
-    syntax = Syntax(code, "yaml")
-    console.print(syntax)
+    job = get_job(name)
+    show(job)
 @app.callback(invoke_without_command=True)
@@ -52,24 +65,3 @@ def callback(
         typer.echo(f"hydraflow {importlib.metadata.version('hydraflow')}")
         raise typer.Exit
-def find_config() -> Path:
-    if Path("hydraflow.yaml").exists():
-        return Path("hydraflow.yaml")
-    if Path("hydraflow.yml").exists():
-        return Path("hydraflow.yml")
-    typer.echo("No config file found.")
-    raise typer.Exit(code=1)
-def load_config() -> DictConfig:
-    cfg = OmegaConf.load(find_config())
-    if isinstance(cfg, DictConfig):
-        return cfg
-    typer.echo("Invalid config file.")
-    raise typer.Exit(code=1)

hydraflow/core/__init__.py ADDED Viewed

File without changes

hydraflow/{config.py → core/config.py} RENAMED Viewed

@@ -6,35 +6,19 @@ from typing import TYPE_CHECKING
 from omegaconf import DictConfig, ListConfig, OmegaConf
-from hydraflow.utils import get_overrides
 if TYPE_CHECKING:
     from collections.abc import Iterator
     from typing import Any
-def collect_params(config: object) -> dict[str, Any]:
-    """Iterate over parameters and collect them into a dictionary.
-    Args:
-        config (object): The configuration object to iterate over.
-        prefix (str): The prefix to prepend to the parameter keys.
-    Returns:
-        dict[str, Any]: A dictionary of collected parameters.
-    """
-    return dict(iter_params(config))
-def iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
+def iter_params(config: Any, prefix: str = "") -> Iterator[tuple[str, Any]]:
     """Recursively iterate over the parameters in the given configuration object.
     This function traverses the configuration object and yields key-value pairs
     representing the parameters. The keys are prefixed with the provided prefix.
     Args:
-        config (object): The configuration object to iterate over. This can be a
+        config (Any): The configuration object to iterate over. This can be a
             dictionary, list, DictConfig, or ListConfig.
         prefix (str): The prefix to prepend to the parameter keys.
             Defaults to an empty string.
@@ -50,7 +34,7 @@ def iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
         config = _from_dotlist(config)
     if not isinstance(config, DictConfig | ListConfig):
-        config = OmegaConf.create(config)  # type: ignore
+        config = OmegaConf.create(config)
     yield from _iter_params(config, prefix)
@@ -65,7 +49,7 @@ def _from_dotlist(config: list[str]) -> dict[str, str]:
     return result
-def _iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
+def _iter_params(config: Any, prefix: str = "") -> Iterator[tuple[str, Any]]:
     if isinstance(config, DictConfig):
         for key, value in config.items():
             if _is_param(value):
@@ -83,12 +67,12 @@ def _iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
                 yield from _iter_params(value, f"{prefix}{index}.")
-def _is_param(value: object) -> bool:
+def _is_param(value: Any) -> bool:
     """Check if the given value is a parameter."""
     if isinstance(value, DictConfig):
         return False
-    if isinstance(value, ListConfig):  # noqa: SIM102
+    if isinstance(value, ListConfig):
         if any(isinstance(v, DictConfig | ListConfig) for v in value):
             return False
@@ -103,14 +87,14 @@ def _convert(value: Any) -> Any:
     return value
-def select_config(config: object, names: list[str]) -> dict[str, Any]:
+def select_config(config: Any, names: list[str]) -> dict[str, Any]:
     """Select the given parameters from the configuration object.
     This function selects the given parameters from the configuration object
     and returns a new configuration object containing only the selected parameters.
     Args:
-        config (object): The configuration object to select parameters from.
+        config (Any): The configuration object to select parameters from.
         names (list[str]): The names of the parameters to select.
     Returns:
@@ -120,7 +104,7 @@ def select_config(config: object, names: list[str]) -> dict[str, Any]:
     if not isinstance(config, DictConfig):
         config = OmegaConf.structured(config)
-    return {name: _get(config, name) for name in names}  # type: ignore
+    return {name: _get(config, name) for name in names}
 def _get(config: DictConfig, name: str) -> Any:
@@ -132,8 +116,7 @@ def _get(config: DictConfig, name: str) -> Any:
     return _get(config.get(prefix), name)
-def select_overrides(config: object) -> dict[str, Any]:
+def select_overrides(config: object, overrides: list[str]) -> dict[str, Any]:
     """Select the given overrides from the configuration object."""
-    overrides = get_overrides()
     names = [override.split("=")[0].strip() for override in overrides]
     return select_config(config, names)

hydraflow/{context.py → core/context.py} RENAMED Viewed

@@ -12,8 +12,9 @@ import mlflow
 import mlflow.artifacts
 from hydra.core.hydra_config import HydraConfig
-from hydraflow.mlflow import log_params
-from hydraflow.utils import get_artifact_dir
+from hydraflow.core.io import get_artifact_dir
+from .mlflow import log_params, log_text
 if TYPE_CHECKING:
     from collections.abc import Iterator
@@ -55,11 +56,11 @@ def log_run(
         log_params(config, synchronous=synchronous)
     hc = HydraConfig.get()
-    output_dir = Path(hc.runtime.output_dir)
+    hydra_dir = Path(hc.runtime.output_dir)
     # Save '.hydra' config directory.
-    output_subdir = output_dir / (hc.output_subdir or "")
-    mlflow.log_artifacts(output_subdir.as_posix(), hc.output_subdir)
+    hydra_subdir = hydra_dir / (hc.output_subdir or "")
+    mlflow.log_artifacts(hydra_subdir.as_posix(), hc.output_subdir)
     try:
         yield
@@ -70,43 +71,14 @@ def log_run(
         raise
     finally:
-        log_text(output_dir)
-def log_text(directory: Path, pattern: str = "*.log") -> None:
-    """Log text files in the given directory as artifacts.
-    Append the text files to the existing text file in the artifact directory.
-    Args:
-        directory (Path): The directory to find the logs in.
-        pattern (str): The pattern to match the logs.
-    """
-    artifact_dir = get_artifact_dir()
-    for file in directory.glob(pattern):
-        if not file.is_file():
-            continue
-        file_artifact = artifact_dir / file.name
-        if file_artifact.exists():
-            text = file_artifact.read_text()
-            if not text.endswith("\n"):
-                text += "\n"
-        else:
-            text = ""
-        text += file.read_text()
-        mlflow.log_text(text, file.name)
+        log_text(hydra_dir)
 @contextmanager
-def start_run(  # noqa: PLR0913
+def start_run(
     config: object,
     *,
     chdir: bool = False,
-    run: Run | None = None,
     run_id: str | None = None,
     experiment_id: str | None = None,
     run_name: str | None = None,
@@ -126,7 +98,6 @@ def start_run(  # noqa: PLR0913
         config (object): The configuration object to log parameters from.
         chdir (bool): Whether to change the current working directory to the
             artifact directory of the current run. Defaults to False.
-        run (Run | None): The existing run. Defaults to None.
         run_id (str | None): The existing run ID. Defaults to None.
         experiment_id (str | None): The experiment ID. Defaults to None.
         run_name (str | None): The name of the run. Defaults to None.
@@ -142,20 +113,7 @@ def start_run(  # noqa: PLR0913
     Yields:
         Run: An MLflow Run object representing the started run.
-    Example:
-        with start_run(config) as run:
-            # Perform operations within the MLflow run context
-            pass
-    See Also:
-        - `mlflow.start_run`: The MLflow function to start a run directly.
-        - `log_run`: A context manager to log parameters and manage the MLflow
-           run context.
     """
-    if run:
-        run_id = run.info.run_id
     with (
         mlflow.start_run(
             run_id=run_id,

hydraflow/{utils.py → core/io.py} RENAMED Viewed

@@ -12,46 +12,42 @@ import mlflow
 import mlflow.artifacts
 from hydra.core.hydra_config import HydraConfig
 from mlflow.entities import Run
-from omegaconf import DictConfig, OmegaConf
+from omegaconf import DictConfig, ListConfig, OmegaConf
 if TYPE_CHECKING:
     from collections.abc import Iterable
-def get_artifact_dir(run: Run | None = None, uri: str | None = None) -> Path:
+def file_uri_to_path(uri: str) -> Path:
+    """Convert a file URI to a local path."""
+    if not uri.startswith("file:"):
+        return Path(uri)
+    path = urllib.parse.urlparse(uri).path
+    return Path(urllib.request.url2pathname(path))  # for Windows
+def get_artifact_dir(run: Run | None = None) -> Path:
     """Retrieve the artifact directory for the given run.
     This function uses MLflow to get the artifact directory for the given run.
     Args:
         run (Run | None): The run object. Defaults to None.
-        uri (str | None): The URI of the artifact. Defaults to None.
     Returns:
         The local path to the directory where the artifacts are downloaded.
     """
-    if run is not None and uri is not None:
-        raise ValueError("Cannot provide both run and uri")
-    if run is None and uri is None:
+    if run is None:
         uri = mlflow.get_artifact_uri()
-    elif run:
+    else:
         uri = run.info.artifact_uri
     if not isinstance(uri, str):
         raise NotImplementedError
-    if uri.startswith("file:"):
-        return file_uri_to_path(uri)
-    return Path(uri)
-def file_uri_to_path(uri: str) -> Path:
-    """Convert a file URI to a local path."""
-    path = urllib.parse.urlparse(uri).path
-    return Path(urllib.request.url2pathname(path))  # for Windows
+    return file_uri_to_path(uri)
 def get_artifact_path(run: Run | None, path: str) -> Path:
@@ -123,12 +119,7 @@ def load_config(run: Run) -> DictConfig:
     return OmegaConf.load(path)  # type: ignore
-def get_overrides() -> list[str]:
-    """Retrieve the overrides for the current run."""
-    return list(HydraConfig.get().overrides.task)  # ListConifg -> list
-def load_overrides(run: Run) -> list[str]:
+def load_overrides(run: Run) -> ListConfig:
     """Load the overrides for a given run.
     This function loads the overrides for the provided Run instance
@@ -137,15 +128,15 @@ def load_overrides(run: Run) -> list[str]:
     `.hydra/overrides.yaml` is not found in the run's artifact directory.
     Args:
-        run (Run): The Run instance for which to load the overrides.
+        run (Run): The Run instance for which to load the configuration.
     Returns:
-        The loaded overrides as a list of strings. Returns an empty list
-        if the overrides file is not found.
+        The loaded configuration as a DictConfig object. Returns an empty
+        DictConfig if the configuration file is not found.
     """
     path = get_artifact_dir(run) / ".hydra/overrides.yaml"
-    return [str(x) for x in OmegaConf.load(path)]
+    return OmegaConf.load(path)  # type: ignore
 def remove_run(run: Run | Iterable[Run]) -> None:

hydraflow/core/main.py ADDED Viewed

@@ -0,0 +1,164 @@
+"""Integration of MLflow experiment tracking with Hydra configuration management.
+This module provides decorators and utilities to seamlessly combine Hydra's
+configuration management with MLflow's experiment tracking capabilities. It
+enables automatic run deduplication, configuration storage, and experiment
+management.
+The main functionality is provided through the `main` decorator, which can be
+used to wrap experiment entry points. This decorator handles:
+- Configuration management via Hydra
+- Experiment tracking via MLflow
+- Run deduplication based on configurations
+- Working directory management
+- Automatic configuration storage
+Example:
+    ```python
+    from dataclasses import dataclass
+    from mlflow.entities import Run
+    @dataclass
+    class Config:
+        learning_rate: float
+        batch_size: int
+    @main(Config)
+    def train(run: Run, config: Config):
+        # Your training code here
+        pass
+    ```
+"""
+from __future__ import annotations
+from functools import wraps
+from typing import TYPE_CHECKING, TypeVar
+import hydra
+import mlflow
+from hydra.core.config_store import ConfigStore
+from hydra.core.hydra_config import HydraConfig
+from mlflow.entities import RunStatus
+from omegaconf import OmegaConf
+import hydraflow
+from hydraflow.core.io import file_uri_to_path
+if TYPE_CHECKING:
+    from collections.abc import Callable
+    from pathlib import Path
+    from typing import Any
+    from mlflow.entities import Run
+FINISHED = RunStatus.to_string(RunStatus.FINISHED)
+T = TypeVar("T")
+def main(
+    node: T | type[T],
+    config_name: str = "config",
+    *,
+    chdir: bool = False,
+    force_new_run: bool = False,
+    match_overrides: bool = False,
+    rerun_finished: bool = False,
+):
+    """Decorator for configuring and running MLflow experiments with Hydra.
+    This decorator combines Hydra configuration management with MLflow experiment
+    tracking. It automatically handles run deduplication and configuration storage.
+    Args:
+        node: Configuration node class or instance defining the structure of the
+            configuration.
+        config_name: Name of the configuration. Defaults to "config".
+        chdir: If True, changes working directory to the artifact directory
+            of the run. Defaults to False.
+        force_new_run: If True, always creates a new MLflow run instead of
+            reusing existing ones. Defaults to False.
+        match_overrides: If True, matches runs based on Hydra CLI overrides
+            instead of full config. Defaults to False.
+        rerun_finished: If True, allows rerunning completed runs. Defaults to
+            False.
+    """
+    def decorator(app: Callable[[Run, T], None]) -> Callable[[], None]:
+        ConfigStore.instance().store(config_name, node)
+        @hydra.main(config_name=config_name, version_base=None)
+        @wraps(app)
+        def inner_decorator(config: T) -> None:
+            hc = HydraConfig.get()
+            experiment = mlflow.set_experiment(hc.job.name)
+            if force_new_run:
+                run_id = None
+            else:
+                uri = experiment.artifact_location
+                overrides = hc.overrides.task if match_overrides else None
+                run_id = get_run_id(uri, config, overrides)
+                if run_id and not rerun_finished:
+                    run = mlflow.get_run(run_id)
+                    if run.info.status == FINISHED:
+                        return
+            with hydraflow.start_run(config, run_id=run_id, chdir=chdir) as run:
+                app(run, config)
+        return inner_decorator
+    return decorator
+def get_run_id(uri: str, config: Any, overrides: list[str] | None) -> str | None:
+    """Try to get the run ID for the given configuration.
+    If the run is not found, the function will return None.
+    Args:
+        uri (str): The URI of the experiment.
+        config (object): The configuration object.
+        overrides (list[str] | None): The task overrides.
+    Returns:
+        The run ID for the given configuration or overrides. Returns None if
+        no run ID is found.
+    """
+    for run_dir in file_uri_to_path(uri).iterdir():
+        if run_dir.is_dir() and equals(run_dir, config, overrides):
+            return run_dir.name
+    return None
+def equals(run_dir: Path, config: Any, overrides: list[str] | None) -> bool:
+    """Check if the run directory matches the given configuration or overrides.
+    Args:
+        run_dir (Path): The run directory.
+        config (object): The configuration object.
+        overrides (list[str] | None): The task overrides.
+    Returns:
+        True if the run directory matches the given configuration or overrides,
+        False otherwise.
+    """
+    if overrides is None:
+        path = run_dir / "artifacts/.hydra/config.yaml"
+    else:
+        path = run_dir / "artifacts/.hydra/overrides.yaml"
+        config = overrides
+    if not path.exists():
+        return False
+    return OmegaConf.load(path) == config

hydraflow 0.7.5__py3-none-any.whl → 0.9.0__py3-none-any.whl

hydraflow 0.7.5py3-none-any.whl → 0.9.0py3-none-any.whl