PyPI - hydraflow - Versions diffs - 0.7.5__tar.gz → 0.8.0__tar.gz - Mend

hydraflow 0.7.5tar.gz → 0.8.0tar.gz

Files changed (83) hide show

{hydraflow-0.7.5 → hydraflow-0.8.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hydraflow
-Version: 0.7.5
+Version: 0.8.0
 Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
 Project-URL: Documentation, https://daizutabi.github.io/hydraflow/
 Project-URL: Source, https://github.com/daizutabi/hydraflow
@@ -108,7 +108,7 @@ class MySQLConfig:
 cs = ConfigStore.instance()
 cs.store(name="config", node=MySQLConfig)
-@hydra.main(version_base=None, config_name="config")
+@hydra.main(config_name="config", version_base=None)
 def my_app(cfg: MySQLConfig) -> None:
     # Set experiment by Hydra job name.
     hydraflow.set_experiment()

{hydraflow-0.7.5 → hydraflow-0.8.0}/README.md RENAMED Viewed

@@ -63,7 +63,7 @@ class MySQLConfig:
 cs = ConfigStore.instance()
 cs.store(name="config", node=MySQLConfig)
-@hydra.main(version_base=None, config_name="config")
+@hydra.main(config_name="config", version_base=None)
 def my_app(cfg: MySQLConfig) -> None:
     # Set experiment by Hydra job name.
     hydraflow.set_experiment()

{hydraflow-0.7.5 → hydraflow-0.8.0}/apps/quickstart.py RENAMED Viewed

@@ -2,7 +2,9 @@ import logging
 from dataclasses import dataclass
 import hydra
+import mlflow
 from hydra.core.config_store import ConfigStore
+from hydra.core.hydra_config import HydraConfig
 import hydraflow
@@ -19,9 +21,10 @@ cs = ConfigStore.instance()
 cs.store(name="config", node=Config)
-@hydra.main(version_base=None, config_name="config")
+@hydra.main(config_name="config", version_base=None)
 def app(cfg: Config) -> None:
-    hydraflow.set_experiment()
+    hc = HydraConfig.get()
+    mlflow.set_experiment(hc.job.name)
     with hydraflow.start_run(cfg):
         log.info(f"{cfg.width=}, {cfg.height=}")

{hydraflow-0.7.5 → hydraflow-0.8.0}/docs/usage/quickstart.md RENAMED Viewed

@@ -117,18 +117,6 @@ $ python apps/quickstart.py -m width=400,600 height=100,200,300
 >>> print(run.data.params)
 ```
-### Map runs
-```pycon exec="1" source="console" session="quickstart"
->>> params = rc.map(lambda x: x.data.params)
->>> for p in params:
-...     print(p)
-```
-```pycon exec="1" source="console" session="quickstart"
->>> list(rc.map_id(print))
-```
 ### Group runs
 ```pycon exec="1" source="console" session="quickstart"

{hydraflow-0.7.5 → hydraflow-0.8.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "hydraflow"
-version = "0.7.5"
+version = "0.8.0"
 description = "Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments."
 readme = "README.md"
 license = { file = "LICENSE" }
@@ -72,24 +72,20 @@ ignore = [
   "D203",
   "D213",
   "EM101",
+  "FBT001",
+  "FBT002",
   "PGH003",
+  "PLR0911",
+  "PLR0913",
   "PLR1704",
+  "PLR2004",
+  "SIM102",
+  "SIM108",
   "TRY003",
 ]
 [tool.ruff.lint.per-file-ignores]
-"tests/*" = [
-  "A001",
-  "ANN",
-  "ARG",
-  "D",
-  "FBT",
-  "PLR",
-  "PT",
-  "S",
-  "SIM108",
-  "SLF",
-]
+"tests/*" = ["A001", "ANN", "ARG", "D", "FBT", "PD", "PLR", "PT", "S", "SLF"]
 "apps/*.py" = ["D", "G", "INP"]
-"src/hydraflow/main.py" = ["ANN201", "D401", "PLR0913"]
+"src/hydraflow/main.py" = ["ANN201", "D401"]
 "src/hydraflow/cli.py" = ["ANN", "D"]

{hydraflow-0.7.5 → hydraflow-0.8.0}/src/hydraflow/__init__.py RENAMED Viewed

@@ -1,23 +1,14 @@
 """Integrate Hydra and MLflow to manage and track machine learning experiments."""
-from hydraflow.config import select_config, select_overrides
 from hydraflow.context import chdir_artifact, log_run, start_run
 from hydraflow.main import main
-from hydraflow.mlflow import (
-    list_run_ids,
-    list_run_paths,
-    list_runs,
-    search_runs,
-    set_experiment,
-)
+from hydraflow.mlflow import list_run_ids, list_run_paths, list_runs
 from hydraflow.run_collection import RunCollection
 from hydraflow.utils import (
     get_artifact_dir,
     get_artifact_path,
     get_hydra_output_dir,
-    get_overrides,
     load_config,
-    load_overrides,
     remove_run,
 )
@@ -27,18 +18,12 @@ __all__ = [
     "get_artifact_dir",
     "get_artifact_path",
     "get_hydra_output_dir",
-    "get_overrides",
     "list_run_ids",
     "list_run_paths",
     "list_runs",
     "load_config",
-    "load_overrides",
     "log_run",
     "main",
     "remove_run",
-    "search_runs",
-    "select_config",
-    "select_overrides",
-    "set_experiment",
     "start_run",
 ]

{hydraflow-0.7.5 → hydraflow-0.8.0}/src/hydraflow/config.py RENAMED Viewed

@@ -6,35 +6,19 @@ from typing import TYPE_CHECKING
 from omegaconf import DictConfig, ListConfig, OmegaConf
-from hydraflow.utils import get_overrides
 if TYPE_CHECKING:
     from collections.abc import Iterator
     from typing import Any
-def collect_params(config: object) -> dict[str, Any]:
-    """Iterate over parameters and collect them into a dictionary.
-    Args:
-        config (object): The configuration object to iterate over.
-        prefix (str): The prefix to prepend to the parameter keys.
-    Returns:
-        dict[str, Any]: A dictionary of collected parameters.
-    """
-    return dict(iter_params(config))
-def iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
+def iter_params(config: Any, prefix: str = "") -> Iterator[tuple[str, Any]]:
     """Recursively iterate over the parameters in the given configuration object.
     This function traverses the configuration object and yields key-value pairs
     representing the parameters. The keys are prefixed with the provided prefix.
     Args:
-        config (object): The configuration object to iterate over. This can be a
+        config (Any): The configuration object to iterate over. This can be a
             dictionary, list, DictConfig, or ListConfig.
         prefix (str): The prefix to prepend to the parameter keys.
             Defaults to an empty string.
@@ -50,7 +34,7 @@ def iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
         config = _from_dotlist(config)
     if not isinstance(config, DictConfig | ListConfig):
-        config = OmegaConf.create(config)  # type: ignore
+        config = OmegaConf.create(config)
     yield from _iter_params(config, prefix)
@@ -65,7 +49,7 @@ def _from_dotlist(config: list[str]) -> dict[str, str]:
     return result
-def _iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
+def _iter_params(config: Any, prefix: str = "") -> Iterator[tuple[str, Any]]:
     if isinstance(config, DictConfig):
         for key, value in config.items():
             if _is_param(value):
@@ -83,12 +67,12 @@ def _iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
                 yield from _iter_params(value, f"{prefix}{index}.")
-def _is_param(value: object) -> bool:
+def _is_param(value: Any) -> bool:
     """Check if the given value is a parameter."""
     if isinstance(value, DictConfig):
         return False
-    if isinstance(value, ListConfig):  # noqa: SIM102
+    if isinstance(value, ListConfig):
         if any(isinstance(v, DictConfig | ListConfig) for v in value):
             return False
@@ -103,14 +87,14 @@ def _convert(value: Any) -> Any:
     return value
-def select_config(config: object, names: list[str]) -> dict[str, Any]:
+def select_config(config: Any, names: list[str]) -> dict[str, Any]:
     """Select the given parameters from the configuration object.
     This function selects the given parameters from the configuration object
     and returns a new configuration object containing only the selected parameters.
     Args:
-        config (object): The configuration object to select parameters from.
+        config (Any): The configuration object to select parameters from.
         names (list[str]): The names of the parameters to select.
     Returns:
@@ -120,7 +104,7 @@ def select_config(config: object, names: list[str]) -> dict[str, Any]:
     if not isinstance(config, DictConfig):
         config = OmegaConf.structured(config)
-    return {name: _get(config, name) for name in names}  # type: ignore
+    return {name: _get(config, name) for name in names}
 def _get(config: DictConfig, name: str) -> Any:
@@ -132,8 +116,7 @@ def _get(config: DictConfig, name: str) -> Any:
     return _get(config.get(prefix), name)
-def select_overrides(config: object) -> dict[str, Any]:
+def select_overrides(config: object, overrides: list[str]) -> dict[str, Any]:
     """Select the given overrides from the configuration object."""
-    overrides = get_overrides()
     names = [override.split("=")[0].strip() for override in overrides]
     return select_config(config, names)

{hydraflow-0.7.5 → hydraflow-0.8.0}/src/hydraflow/context.py RENAMED Viewed

@@ -12,7 +12,7 @@ import mlflow
 import mlflow.artifacts
 from hydra.core.hydra_config import HydraConfig
-from hydraflow.mlflow import log_params
+from hydraflow.mlflow import log_params, log_text
 from hydraflow.utils import get_artifact_dir
 if TYPE_CHECKING:
@@ -55,11 +55,11 @@ def log_run(
         log_params(config, synchronous=synchronous)
     hc = HydraConfig.get()
-    output_dir = Path(hc.runtime.output_dir)
+    hydra_dir = Path(hc.runtime.output_dir)
     # Save '.hydra' config directory.
-    output_subdir = output_dir / (hc.output_subdir or "")
-    mlflow.log_artifacts(output_subdir.as_posix(), hc.output_subdir)
+    hydra_subdir = hydra_dir / (hc.output_subdir or "")
+    mlflow.log_artifacts(hydra_subdir.as_posix(), hc.output_subdir)
     try:
         yield
@@ -70,43 +70,14 @@ def log_run(
         raise
     finally:
-        log_text(output_dir)
-def log_text(directory: Path, pattern: str = "*.log") -> None:
-    """Log text files in the given directory as artifacts.
-    Append the text files to the existing text file in the artifact directory.
-    Args:
-        directory (Path): The directory to find the logs in.
-        pattern (str): The pattern to match the logs.
-    """
-    artifact_dir = get_artifact_dir()
-    for file in directory.glob(pattern):
-        if not file.is_file():
-            continue
-        file_artifact = artifact_dir / file.name
-        if file_artifact.exists():
-            text = file_artifact.read_text()
-            if not text.endswith("\n"):
-                text += "\n"
-        else:
-            text = ""
-        text += file.read_text()
-        mlflow.log_text(text, file.name)
+        log_text(hydra_dir)
 @contextmanager
-def start_run(  # noqa: PLR0913
+def start_run(
     config: object,
     *,
     chdir: bool = False,
-    run: Run | None = None,
     run_id: str | None = None,
     experiment_id: str | None = None,
     run_name: str | None = None,
@@ -126,7 +97,6 @@ def start_run(  # noqa: PLR0913
         config (object): The configuration object to log parameters from.
         chdir (bool): Whether to change the current working directory to the
             artifact directory of the current run. Defaults to False.
-        run (Run | None): The existing run. Defaults to None.
         run_id (str | None): The existing run ID. Defaults to None.
         experiment_id (str | None): The experiment ID. Defaults to None.
         run_name (str | None): The name of the run. Defaults to None.
@@ -142,20 +112,7 @@ def start_run(  # noqa: PLR0913
     Yields:
         Run: An MLflow Run object representing the started run.
-    Example:
-        with start_run(config) as run:
-            # Perform operations within the MLflow run context
-            pass
-    See Also:
-        - `mlflow.start_run`: The MLflow function to start a run directly.
-        - `log_run`: A context manager to log parameters and manage the MLflow
-           run context.
     """
-    if run:
-        run_id = run.info.run_id
     with (
         mlflow.start_run(
             run_id=run_id,

hydraflow-0.8.0/src/hydraflow/main.py ADDED Viewed

@@ -0,0 +1,162 @@
+"""Integration of MLflow experiment tracking with Hydra configuration management.
+This module provides decorators and utilities to seamlessly combine Hydra's
+configuration management with MLflow's experiment tracking capabilities. It
+enables automatic run deduplication, configuration storage, and experiment
+management.
+The main functionality is provided through the `main` decorator, which can be
+used to wrap experiment entry points. This decorator handles:
+- Configuration management via Hydra
+- Experiment tracking via MLflow
+- Run deduplication based on configurations
+- Working directory management
+- Automatic configuration storage
+Example:
+    ```python
+    from dataclasses import dataclass
+    from mlflow.entities import Run
+    @dataclass
+    class Config:
+        learning_rate: float
+        batch_size: int
+    @main(Config)
+    def train(run: Run, config: Config):
+        # Your training code here
+        pass
+    ```
+"""
+from __future__ import annotations
+from functools import wraps
+from typing import TYPE_CHECKING, TypeVar
+import hydra
+import mlflow
+from hydra.core.config_store import ConfigStore
+from hydra.core.hydra_config import HydraConfig
+from mlflow.entities import RunStatus
+from omegaconf import OmegaConf
+import hydraflow
+from hydraflow.utils import file_uri_to_path
+if TYPE_CHECKING:
+    from collections.abc import Callable
+    from pathlib import Path
+    from mlflow.entities import Run
+FINISHED = RunStatus.to_string(RunStatus.FINISHED)
+T = TypeVar("T")
+def main(
+    node: T | type[T],
+    config_name: str = "config",
+    *,
+    chdir: bool = False,
+    force_new_run: bool = False,
+    match_overrides: bool = False,
+    rerun_finished: bool = False,
+):
+    """Decorator for configuring and running MLflow experiments with Hydra.
+    This decorator combines Hydra configuration management with MLflow experiment
+    tracking. It automatically handles run deduplication and configuration storage.
+    Args:
+        node: Configuration node class or instance defining the structure of the
+            configuration.
+        config_name: Name of the configuration. Defaults to "config".
+        chdir: If True, changes working directory to the artifact directory
+            of the run. Defaults to False.
+        force_new_run: If True, always creates a new MLflow run instead of
+            reusing existing ones. Defaults to False.
+        match_overrides: If True, matches runs based on Hydra CLI overrides
+            instead of full config. Defaults to False.
+        rerun_finished: If True, allows rerunning completed runs. Defaults to
+            False.
+    """
+    def decorator(app: Callable[[Run, T], None]) -> Callable[[], None]:
+        ConfigStore.instance().store(config_name, node)
+        @hydra.main(config_name=config_name, version_base=None)
+        @wraps(app)
+        def inner_decorator(config: T) -> None:
+            hc = HydraConfig.get()
+            experiment = mlflow.set_experiment(hc.job.name)
+            if force_new_run:
+                run_id = None
+            else:
+                uri = experiment.artifact_location
+                overrides = hc.overrides.task if match_overrides else None
+                run_id = get_run_id(uri, config, overrides)
+                if run_id and not rerun_finished:
+                    run = mlflow.get_run(run_id)
+                    if run.info.status == FINISHED:
+                        return
+            with hydraflow.start_run(config, run_id=run_id, chdir=chdir) as run:
+                app(run, config)
+        return inner_decorator
+    return decorator
+def get_run_id(uri: str, config: object, overrides: list[str] | None) -> str | None:
+    """Try to get the run ID for the given configuration.
+    If the run is not found, the function will return None.
+    Args:
+        uri (str): The URI of the experiment.
+        config (object): The configuration object.
+        overrides (list[str] | None): The task overrides.
+    Returns:
+        The run ID for the given configuration or overrides. Returns None if
+        no run ID is found.
+    """
+    for run_dir in file_uri_to_path(uri).iterdir():
+        if run_dir.is_dir() and equals(run_dir, config, overrides):
+            return run_dir.name
+    return None
+def equals(run_dir: Path, config: object, overrides: list[str] | None) -> bool:
+    """Check if the run directory matches the given configuration or overrides.
+    Args:
+        run_dir (Path): The run directory.
+        config (object): The configuration object.
+        overrides (list[str] | None): The task overrides.
+    Returns:
+        True if the run directory matches the given configuration or overrides,
+        False otherwise.
+    """
+    if overrides is None:
+        path = run_dir / "artifacts/.hydra/config.yaml"
+    else:
+        path = run_dir / "artifacts/.hydra/overrides.yaml"
+        config = overrides
+    if not path.exists():
+        return False
+    return OmegaConf.load(path) == config

hydraflow-0.8.0/src/hydraflow/mlflow.py ADDED Viewed

@@ -0,0 +1,167 @@
+"""Integration of MLflow experiment tracking with Hydra configuration management.
+This module provides functions to log parameters from Hydra configuration objects
+to MLflow, set experiments, and manage tracking URIs. It integrates Hydra's
+configuration management with MLflow's experiment tracking capabilities.
+"""
+from __future__ import annotations
+from typing import TYPE_CHECKING
+import joblib
+import mlflow
+import mlflow.artifacts
+from hydraflow.config import iter_params
+from hydraflow.run_collection import RunCollection
+from hydraflow.utils import file_uri_to_path, get_artifact_dir
+if TYPE_CHECKING:
+    from pathlib import Path
+    from typing import Any
+def log_params(config: Any, *, synchronous: bool | None = None) -> None:
+    """Log the parameters from the given configuration object.
+    This method logs the parameters from the provided configuration object
+    using MLflow. It iterates over the parameters and logs them using the
+    `mlflow.log_param` method.
+    Args:
+        config (Any): The configuration object to log the parameters from.
+        synchronous (bool | None): Whether to log the parameters synchronously.
+            Defaults to None.
+    """
+    for key, value in iter_params(config):
+        mlflow.log_param(key, value, synchronous=synchronous)
+def log_text(from_dir: Path, pattern: str = "*.log") -> None:
+    """Log text files in the given directory as artifacts.
+    Append the text files to the existing text file in the artifact directory.
+    Args:
+        from_dir (Path): The directory to find the logs in.
+        pattern (str): The pattern to match the logs.
+    """
+    artifact_dir = get_artifact_dir()
+    for file in from_dir.glob(pattern):
+        if not file.is_file():
+            continue
+        file_artifact = artifact_dir / file.name
+        if file_artifact.exists():
+            text = file_artifact.read_text()
+            if not text.endswith("\n"):
+                text += "\n"
+        else:
+            text = ""
+        text += file.read_text()
+        mlflow.log_text(text, file.name)
+def list_run_paths(
+    experiment_names: str | list[str] | None = None,
+    *other: str,
+) -> list[Path]:
+    """List all run paths for the specified experiments.
+    This function retrieves all run paths for the given list of experiment names.
+    If no experiment names are provided (None), the function will search all runs
+    for all experiments except the "Default" experiment.
+    Args:
+        experiment_names (list[str] | None): List of experiment names to search
+            for runs. If None is provided, the function will search all runs
+            for all experiments except the "Default" experiment.
+        *other (str): The parts of the run directory to join.
+    Returns:
+        list[Path]: A list of run paths for the specified experiments.
+    """
+    if isinstance(experiment_names, str):
+        experiment_names = [experiment_names]
+    elif experiment_names is None:
+        experiments = mlflow.search_experiments()
+        experiment_names = [e.name for e in experiments if e.name != "Default"]
+    run_paths: list[Path] = []
+    for name in experiment_names:
+        if experiment := mlflow.get_experiment_by_name(name):
+            uri = experiment.artifact_location
+            if isinstance(uri, str):
+                path = file_uri_to_path(uri)
+                run_paths.extend(p for p in path.iterdir() if p.is_dir())
+    if other:
+        return [p.joinpath(*other) for p in run_paths]
+    return run_paths
+def list_run_ids(experiment_names: str | list[str] | None = None) -> list[str]:
+    """List all run IDs for the specified experiments.
+    This function retrieves all runs for the given list of experiment names.
+    If no experiment names are provided (None), the function will search all
+    runs for all experiments except the "Default" experiment.
+    Args:
+        experiment_names (list[str] | None): List of experiment names to search
+            for runs. If None is provided, the function will search all runs
+            for all experiments except the "Default" experiment.
+    Returns:
+        list[str]: A list of run IDs for the specified experiments.
+    """
+    return [run_path.stem for run_path in list_run_paths(experiment_names)]
+def list_runs(
+    experiment_names: str | list[str] | None = None,
+    n_jobs: int = 0,
+) -> RunCollection:
+    """List all runs for the specified experiments.
+    This function retrieves all runs for the given list of experiment names.
+    If no experiment names are provided (None), the function will search all runs
+    for all experiments except the "Default" experiment.
+    The function returns the results as a `RunCollection` object.
+    Note:
+        The returned runs are sorted by their start time in ascending order.
+    Args:
+        experiment_names (list[str] | None): List of experiment names to search
+            for runs. If None is provided, the function will search all runs
+            for all experiments except the "Default" experiment.
+        n_jobs (int): The number of jobs to retrieve runs in parallel.
+    Returns:
+        RunCollection: A `RunCollection` instance containing the runs for the
+        specified experiments.
+    """
+    run_ids = list_run_ids(experiment_names)
+    if n_jobs == 0:
+        runs = [mlflow.get_run(run_id) for run_id in run_ids]
+    else:
+        it = (joblib.delayed(mlflow.get_run)(run_id) for run_id in run_ids)
+        runs = joblib.Parallel(n_jobs, backend="threading")(it)
+    runs = sorted(runs, key=lambda run: run.info.start_time)  # type: ignore
+    return RunCollection(runs)  # type: ignore

hydraflow 0.7.5__tar.gz → 0.8.0__tar.gz

hydraflow 0.7.5tar.gz → 0.8.0tar.gz