PyPI - hydraflow - Versions diffs - 0.7.4__py3-none-any.whl → 0.8.0__py3-none-any.whl - Mend

hydraflow 0.7.4py3-none-any.whl → 0.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

hydraflow/__init__.py +3 -10
hydraflow/config.py +10 -27
hydraflow/context.py +6 -49
hydraflow/main.py +128 -20
hydraflow/mlflow.py +93 -151
hydraflow/param.py +2 -2
hydraflow/run_collection.py +10 -156
hydraflow/run_data.py +4 -2
hydraflow/utils.py +19 -28
{hydraflow-0.7.4.dist-info → hydraflow-0.8.0.dist-info}/METADATA +3 -3
hydraflow-0.8.0.dist-info/RECORD +17 -0
hydraflow-0.7.4.dist-info/RECORD +0 -17
{hydraflow-0.7.4.dist-info → hydraflow-0.8.0.dist-info}/WHEEL +0 -0
{hydraflow-0.7.4.dist-info → hydraflow-0.8.0.dist-info}/entry_points.txt +0 -0
{hydraflow-0.7.4.dist-info → hydraflow-0.8.0.dist-info}/licenses/LICENSE +0 -0

hydraflow/__init__.py CHANGED Viewed

@@ -1,17 +1,14 @@
 """Integrate Hydra and MLflow to manage and track machine learning experiments."""
-from hydraflow.config import select_config, select_overrides
 from hydraflow.context import chdir_artifact, log_run, start_run
 from hydraflow.main import main
-from hydraflow.mlflow import list_runs, search_runs, set_experiment
+from hydraflow.mlflow import list_run_ids, list_run_paths, list_runs
 from hydraflow.run_collection import RunCollection
 from hydraflow.utils import (
     get_artifact_dir,
     get_artifact_path,
     get_hydra_output_dir,
-    get_overrides,
     load_config,
-    load_overrides,
     remove_run,
 )
@@ -21,16 +18,12 @@ __all__ = [
     "get_artifact_dir",
     "get_artifact_path",
     "get_hydra_output_dir",
-    "get_overrides",
+    "list_run_ids",
+    "list_run_paths",
     "list_runs",
     "load_config",
-    "load_overrides",
     "log_run",
     "main",
     "remove_run",
-    "search_runs",
-    "select_config",
-    "select_overrides",
-    "set_experiment",
     "start_run",
 ]

hydraflow/config.py CHANGED Viewed

@@ -6,35 +6,19 @@ from typing import TYPE_CHECKING
 from omegaconf import DictConfig, ListConfig, OmegaConf
-from hydraflow.utils import get_overrides
 if TYPE_CHECKING:
     from collections.abc import Iterator
     from typing import Any
-def collect_params(config: object) -> dict[str, Any]:
-    """Iterate over parameters and collect them into a dictionary.
-    Args:
-        config (object): The configuration object to iterate over.
-        prefix (str): The prefix to prepend to the parameter keys.
-    Returns:
-        dict[str, Any]: A dictionary of collected parameters.
-    """
-    return dict(iter_params(config))
-def iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
+def iter_params(config: Any, prefix: str = "") -> Iterator[tuple[str, Any]]:
     """Recursively iterate over the parameters in the given configuration object.
     This function traverses the configuration object and yields key-value pairs
     representing the parameters. The keys are prefixed with the provided prefix.
     Args:
-        config (object): The configuration object to iterate over. This can be a
+        config (Any): The configuration object to iterate over. This can be a
             dictionary, list, DictConfig, or ListConfig.
         prefix (str): The prefix to prepend to the parameter keys.
             Defaults to an empty string.
@@ -50,7 +34,7 @@ def iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
         config = _from_dotlist(config)
     if not isinstance(config, DictConfig | ListConfig):
-        config = OmegaConf.create(config)  # type: ignore
+        config = OmegaConf.create(config)
     yield from _iter_params(config, prefix)
@@ -65,7 +49,7 @@ def _from_dotlist(config: list[str]) -> dict[str, str]:
     return result
-def _iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
+def _iter_params(config: Any, prefix: str = "") -> Iterator[tuple[str, Any]]:
     if isinstance(config, DictConfig):
         for key, value in config.items():
             if _is_param(value):
@@ -83,12 +67,12 @@ def _iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
                 yield from _iter_params(value, f"{prefix}{index}.")
-def _is_param(value: object) -> bool:
+def _is_param(value: Any) -> bool:
     """Check if the given value is a parameter."""
     if isinstance(value, DictConfig):
         return False
-    if isinstance(value, ListConfig):  # noqa: SIM102
+    if isinstance(value, ListConfig):
         if any(isinstance(v, DictConfig | ListConfig) for v in value):
             return False
@@ -103,14 +87,14 @@ def _convert(value: Any) -> Any:
     return value
-def select_config(config: object, names: list[str]) -> dict[str, Any]:
+def select_config(config: Any, names: list[str]) -> dict[str, Any]:
     """Select the given parameters from the configuration object.
     This function selects the given parameters from the configuration object
     and returns a new configuration object containing only the selected parameters.
     Args:
-        config (object): The configuration object to select parameters from.
+        config (Any): The configuration object to select parameters from.
         names (list[str]): The names of the parameters to select.
     Returns:
@@ -120,7 +104,7 @@ def select_config(config: object, names: list[str]) -> dict[str, Any]:
     if not isinstance(config, DictConfig):
         config = OmegaConf.structured(config)
-    return {name: _get(config, name) for name in names}  # type: ignore
+    return {name: _get(config, name) for name in names}
 def _get(config: DictConfig, name: str) -> Any:
@@ -132,8 +116,7 @@ def _get(config: DictConfig, name: str) -> Any:
     return _get(config.get(prefix), name)
-def select_overrides(config: object) -> dict[str, Any]:
+def select_overrides(config: object, overrides: list[str]) -> dict[str, Any]:
     """Select the given overrides from the configuration object."""
-    overrides = get_overrides()
     names = [override.split("=")[0].strip() for override in overrides]
     return select_config(config, names)

hydraflow/context.py CHANGED Viewed

@@ -12,7 +12,7 @@ import mlflow
 import mlflow.artifacts
 from hydra.core.hydra_config import HydraConfig
-from hydraflow.mlflow import log_params
+from hydraflow.mlflow import log_params, log_text
 from hydraflow.utils import get_artifact_dir
 if TYPE_CHECKING:
@@ -55,11 +55,11 @@ def log_run(
         log_params(config, synchronous=synchronous)
     hc = HydraConfig.get()
-    output_dir = Path(hc.runtime.output_dir)
+    hydra_dir = Path(hc.runtime.output_dir)
     # Save '.hydra' config directory.
-    output_subdir = output_dir / (hc.output_subdir or "")
-    mlflow.log_artifacts(output_subdir.as_posix(), hc.output_subdir)
+    hydra_subdir = hydra_dir / (hc.output_subdir or "")
+    mlflow.log_artifacts(hydra_subdir.as_posix(), hc.output_subdir)
     try:
         yield
@@ -70,43 +70,14 @@ def log_run(
         raise
     finally:
-        log_text(output_dir)
-def log_text(directory: Path, pattern: str = "*.log") -> None:
-    """Log text files in the given directory as artifacts.
-    Append the text files to the existing text file in the artifact directory.
-    Args:
-        directory (Path): The directory to find the logs in.
-        pattern (str): The pattern to match the logs.
-    """
-    artifact_dir = get_artifact_dir()
-    for file in directory.glob(pattern):
-        if not file.is_file():
-            continue
-        file_artifact = artifact_dir / file.name
-        if file_artifact.exists():
-            text = file_artifact.read_text()
-            if not text.endswith("\n"):
-                text += "\n"
-        else:
-            text = ""
-        text += file.read_text()
-        mlflow.log_text(text, file.name)
+        log_text(hydra_dir)
 @contextmanager
-def start_run(  # noqa: PLR0913
+def start_run(
     config: object,
     *,
     chdir: bool = False,
-    run: Run | None = None,
     run_id: str | None = None,
     experiment_id: str | None = None,
     run_name: str | None = None,
@@ -126,7 +97,6 @@ def start_run(  # noqa: PLR0913
         config (object): The configuration object to log parameters from.
         chdir (bool): Whether to change the current working directory to the
             artifact directory of the current run. Defaults to False.
-        run (Run | None): The existing run. Defaults to None.
         run_id (str | None): The existing run ID. Defaults to None.
         experiment_id (str | None): The experiment ID. Defaults to None.
         run_name (str | None): The name of the run. Defaults to None.
@@ -142,20 +112,7 @@ def start_run(  # noqa: PLR0913
     Yields:
         Run: An MLflow Run object representing the started run.
-    Example:
-        with start_run(config) as run:
-            # Perform operations within the MLflow run context
-            pass
-    See Also:
-        - `mlflow.start_run`: The MLflow function to start a run directly.
-        - `log_run`: A context manager to log parameters and manage the MLflow
-           run context.
     """
-    if run:
-        run_id = run.info.run_id
     with (
         mlflow.start_run(
             run_id=run_id,

hydraflow/main.py CHANGED Viewed

@@ -1,54 +1,162 @@
-"""main decorator."""
+"""Integration of MLflow experiment tracking with Hydra configuration management.
+This module provides decorators and utilities to seamlessly combine Hydra's
+configuration management with MLflow's experiment tracking capabilities. It
+enables automatic run deduplication, configuration storage, and experiment
+management.
+The main functionality is provided through the `main` decorator, which can be
+used to wrap experiment entry points. This decorator handles:
+- Configuration management via Hydra
+- Experiment tracking via MLflow
+- Run deduplication based on configurations
+- Working directory management
+- Automatic configuration storage
+Example:
+    ```python
+    from dataclasses import dataclass
+    from mlflow.entities import Run
+    @dataclass
+    class Config:
+        learning_rate: float
+        batch_size: int
+    @main(Config)
+    def train(run: Run, config: Config):
+        # Your training code here
+        pass
+    ```
+"""
 from __future__ import annotations
 from functools import wraps
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, TypeVar
 import hydra
+import mlflow
 from hydra.core.config_store import ConfigStore
+from hydra.core.hydra_config import HydraConfig
 from mlflow.entities import RunStatus
+from omegaconf import OmegaConf
 import hydraflow
+from hydraflow.utils import file_uri_to_path
 if TYPE_CHECKING:
     from collections.abc import Callable
+    from pathlib import Path
     from mlflow.entities import Run
 FINISHED = RunStatus.to_string(RunStatus.FINISHED)
+T = TypeVar("T")
 def main(
-    node: Any,
+    node: T | type[T],
     config_name: str = "config",
     *,
     chdir: bool = False,
     force_new_run: bool = False,
-    skip_finished: bool = True,
+    match_overrides: bool = False,
+    rerun_finished: bool = False,
 ):
-    """Main decorator."""
-    def decorator(app: Callable[[Run, Any], None]) -> Callable[[], None]:
-        ConfigStore.instance().store(name=config_name, node=node)
+    """Decorator for configuring and running MLflow experiments with Hydra.
+    This decorator combines Hydra configuration management with MLflow experiment
+    tracking. It automatically handles run deduplication and configuration storage.
+    Args:
+        node: Configuration node class or instance defining the structure of the
+            configuration.
+        config_name: Name of the configuration. Defaults to "config".
+        chdir: If True, changes working directory to the artifact directory
+            of the run. Defaults to False.
+        force_new_run: If True, always creates a new MLflow run instead of
+            reusing existing ones. Defaults to False.
+        match_overrides: If True, matches runs based on Hydra CLI overrides
+            instead of full config. Defaults to False.
+        rerun_finished: If True, allows rerunning completed runs. Defaults to
+            False.
+    """
+    def decorator(app: Callable[[Run, T], None]) -> Callable[[], None]:
+        ConfigStore.instance().store(config_name, node)
+        @hydra.main(config_name=config_name, version_base=None)
         @wraps(app)
-        @hydra.main(version_base=None, config_name=config_name)
-        def inner_app(cfg: object) -> None:
-            hydraflow.set_experiment()
+        def inner_decorator(config: T) -> None:
+            hc = HydraConfig.get()
+            experiment = mlflow.set_experiment(hc.job.name)
             if force_new_run:
-                run = None
+                run_id = None
             else:
-                rc = hydraflow.search_runs()
-                run = rc.try_get(cfg, override=True)
+                uri = experiment.artifact_location
+                overrides = hc.overrides.task if match_overrides else None
+                run_id = get_run_id(uri, config, overrides)
-                if skip_finished and run and run.info.status == FINISHED:
-                    return
+                if run_id and not rerun_finished:
+                    run = mlflow.get_run(run_id)
+                    if run.info.status == FINISHED:
+                        return
-            with hydraflow.start_run(cfg, run=run, chdir=chdir) as run:
-                app(run, cfg)
+            with hydraflow.start_run(config, run_id=run_id, chdir=chdir) as run:
+                app(run, config)
-        return inner_app
+        return inner_decorator
     return decorator
+def get_run_id(uri: str, config: object, overrides: list[str] | None) -> str | None:
+    """Try to get the run ID for the given configuration.
+    If the run is not found, the function will return None.
+    Args:
+        uri (str): The URI of the experiment.
+        config (object): The configuration object.
+        overrides (list[str] | None): The task overrides.
+    Returns:
+        The run ID for the given configuration or overrides. Returns None if
+        no run ID is found.
+    """
+    for run_dir in file_uri_to_path(uri).iterdir():
+        if run_dir.is_dir() and equals(run_dir, config, overrides):
+            return run_dir.name
+    return None
+def equals(run_dir: Path, config: object, overrides: list[str] | None) -> bool:
+    """Check if the run directory matches the given configuration or overrides.
+    Args:
+        run_dir (Path): The run directory.
+        config (object): The configuration object.
+        overrides (list[str] | None): The task overrides.
+    Returns:
+        True if the run directory matches the given configuration or overrides,
+        False otherwise.
+    """
+    if overrides is None:
+        path = run_dir / "artifacts/.hydra/config.yaml"
+    else:
+        path = run_dir / "artifacts/.hydra/overrides.yaml"
+        config = overrides
+    if not path.exists():
+        return False
+    return OmegaConf.load(path) == config

hydraflow/mlflow.py CHANGED Viewed

@@ -1,17 +1,8 @@
-"""Provide functionality to log parameters from Hydra configuration objects.
+"""Integration of MLflow experiment tracking with Hydra configuration management.
 This module provides functions to log parameters from Hydra configuration objects
 to MLflow, set experiments, and manage tracking URIs. It integrates Hydra's
 configuration management with MLflow's experiment tracking capabilities.
-Key Features:
-- **Experiment Management**: Set experiment names and tracking URIs using Hydra
-  configuration details.
-- **Parameter Logging**: Log parameters from Hydra configuration objects to MLflow,
-  supporting both synchronous and asynchronous logging.
-- **Run Collection**: Utilize the `RunCollection` class to manage and interact with
-  multiple MLflow runs, providing methods to filter and retrieve runs based on
-  various criteria.
 """
 from __future__ import annotations
@@ -21,149 +12,132 @@ from typing import TYPE_CHECKING
 import joblib
 import mlflow
 import mlflow.artifacts
-from hydra.core.hydra_config import HydraConfig
-from mlflow.entities import ViewType
-from mlflow.tracking.fluent import SEARCH_MAX_RESULTS_PANDAS, _get_experiment_id
 from hydraflow.config import iter_params
 from hydraflow.run_collection import RunCollection
-from hydraflow.utils import get_artifact_dir
+from hydraflow.utils import file_uri_to_path, get_artifact_dir
 if TYPE_CHECKING:
     from pathlib import Path
-    from mlflow.entities.experiment import Experiment
+    from typing import Any
-def set_experiment(
-    prefix: str = "",
-    suffix: str = "",
-    uri: str | Path | None = None,
-    name: str | None = None,
-) -> Experiment:
-    """Set the experiment name and tracking URI optionally.
+def log_params(config: Any, *, synchronous: bool | None = None) -> None:
+    """Log the parameters from the given configuration object.
-    This function sets the experiment name by combining the given prefix,
-    the job name from HydraConfig, and the given suffix. Optionally, it can
-    also set the tracking URI.
+    This method logs the parameters from the provided configuration object
+    using MLflow. It iterates over the parameters and logs them using the
+    `mlflow.log_param` method.
     Args:
-        prefix (str): The prefix to prepend to the experiment name.
-        suffix (str): The suffix to append to the experiment name.
-        uri (str | Path | None): The tracking URI to use. Defaults to None.
-        name (str | None): The name of the experiment. Defaults to None.
+        config (Any): The configuration object to log the parameters from.
+        synchronous (bool | None): Whether to log the parameters synchronously.
+            Defaults to None.
-    Returns:
-        Experiment: An instance of `mlflow.entities.Experiment` representing
-        the new active experiment.
+    """
+    for key, value in iter_params(config):
+        mlflow.log_param(key, value, synchronous=synchronous)
+def log_text(from_dir: Path, pattern: str = "*.log") -> None:
+    """Log text files in the given directory as artifacts.
+    Append the text files to the existing text file in the artifact directory.
+    Args:
+        from_dir (Path): The directory to find the logs in.
+        pattern (str): The pattern to match the logs.
     """
-    if uri is not None:
-        mlflow.set_tracking_uri(uri)
+    artifact_dir = get_artifact_dir()
-    if name is not None:
-        return mlflow.set_experiment(name)
+    for file in from_dir.glob(pattern):
+        if not file.is_file():
+            continue
-    hc = HydraConfig.get()
-    name = f"{prefix}{hc.job.name}{suffix}"
-    return mlflow.set_experiment(name)
+        file_artifact = artifact_dir / file.name
+        if file_artifact.exists():
+            text = file_artifact.read_text()
+            if not text.endswith("\n"):
+                text += "\n"
+        else:
+            text = ""
+        text += file.read_text()
+        mlflow.log_text(text, file.name)
-def log_params(config: object, *, synchronous: bool | None = None) -> None:
-    """Log the parameters from the given configuration object.
-    This method logs the parameters from the provided configuration object
-    using MLflow. It iterates over the parameters and logs them using the
-    `mlflow.log_param` method.
+def list_run_paths(
+    experiment_names: str | list[str] | None = None,
+    *other: str,
+) -> list[Path]:
+    """List all run paths for the specified experiments.
+    This function retrieves all run paths for the given list of experiment names.
+    If no experiment names are provided (None), the function will search all runs
+    for all experiments except the "Default" experiment.
     Args:
-        config (object): The configuration object to log the parameters from.
-        synchronous (bool | None): Whether to log the parameters synchronously.
-            Defaults to None.
+        experiment_names (list[str] | None): List of experiment names to search
+            for runs. If None is provided, the function will search all runs
+            for all experiments except the "Default" experiment.
+        *other (str): The parts of the run directory to join.
+    Returns:
+        list[Path]: A list of run paths for the specified experiments.
     """
-    for key, value in iter_params(config):
-        mlflow.log_param(key, value, synchronous=synchronous)
+    if isinstance(experiment_names, str):
+        experiment_names = [experiment_names]
+    elif experiment_names is None:
+        experiments = mlflow.search_experiments()
+        experiment_names = [e.name for e in experiments if e.name != "Default"]
+    run_paths: list[Path] = []
-def search_runs(  # noqa: PLR0913
-    *,
-    experiment_ids: list[str] | None = None,
-    filter_string: str = "",
-    run_view_type: int = ViewType.ACTIVE_ONLY,
-    max_results: int = SEARCH_MAX_RESULTS_PANDAS,
-    order_by: list[str] | None = None,
-    search_all_experiments: bool = False,
-    experiment_names: list[str] | None = None,
-) -> RunCollection:
-    """Search for Runs that fit the specified criteria.
+    for name in experiment_names:
+        if experiment := mlflow.get_experiment_by_name(name):
+            uri = experiment.artifact_location
-    This function wraps the `mlflow.search_runs` function and returns the
-    results as a `RunCollection` object. It allows for flexible searching of
-    MLflow runs based on various criteria.
+            if isinstance(uri, str):
+                path = file_uri_to_path(uri)
+                run_paths.extend(p for p in path.iterdir() if p.is_dir())
-    Note:
-        The returned runs are sorted by their start time in ascending order.
+    if other:
+        return [p.joinpath(*other) for p in run_paths]
+    return run_paths
+def list_run_ids(experiment_names: str | list[str] | None = None) -> list[str]:
+    """List all run IDs for the specified experiments.
+    This function retrieves all runs for the given list of experiment names.
+    If no experiment names are provided (None), the function will search all
+    runs for all experiments except the "Default" experiment.
     Args:
-        experiment_ids (list[str] | None): List of experiment IDs. Search can
-            work with experiment IDs or experiment names, but not both in the
-            same call. Values other than ``None`` or ``[]`` will result in
-            error if ``experiment_names`` is also not ``None`` or ``[]``.
-            ``None`` will default to the active experiment if ``experiment_names``
-            is ``None`` or ``[]``.
-        filter_string (str): Filter query string, defaults to searching all
-            runs.
-        run_view_type (int): one of enum values ``ACTIVE_ONLY``, ``DELETED_ONLY``,
-            or ``ALL`` runs defined in :py:class:`mlflow.entities.ViewType`.
-        max_results (int): The maximum number of runs to put in the dataframe.
-            Default is 100,000 to avoid causing out-of-memory issues on the user's
-            machine.
-        order_by (list[str] | None): List of columns to order by (e.g.,
-            "metrics.rmse"). The ``order_by`` column can contain an optional
-            ``DESC`` or ``ASC`` value. The default is ``ASC``. The default
-            ordering is to sort by ``start_time DESC``, then ``run_id``.
-            ``start_time DESC``, then ``run_id``.
-        search_all_experiments (bool): Boolean specifying whether all
-            experiments should be searched. Only honored if ``experiment_ids``
-            is ``[]`` or ``None``.
-        experiment_names (list[str] | None): List of experiment names. Search
-            can work with experiment IDs or experiment names, but not both in
-            the same call. Values other than ``None`` or ``[]`` will result in
-            error if ``experiment_ids`` is also not ``None`` or ``[]``.
-            ``experiment_ids`` is also not ``None`` or ``[]``. ``None`` will
-            default to the active experiment if ``experiment_ids`` is ``None``
-            or ``[]``.
+        experiment_names (list[str] | None): List of experiment names to search
+            for runs. If None is provided, the function will search all runs
+            for all experiments except the "Default" experiment.
     Returns:
-        A `RunCollection` object containing the search results.
+        list[str]: A list of run IDs for the specified experiments.
     """
-    runs = mlflow.search_runs(
-        experiment_ids=experiment_ids,
-        filter_string=filter_string,
-        run_view_type=run_view_type,
-        max_results=max_results,
-        order_by=order_by,
-        output_format="list",
-        search_all_experiments=search_all_experiments,
-        experiment_names=experiment_names,
-    )
-    runs = sorted(runs, key=lambda run: run.info.start_time)  # type: ignore
-    return RunCollection(runs)  # type: ignore
+    return [run_path.stem for run_path in list_run_paths(experiment_names)]
 def list_runs(
     experiment_names: str | list[str] | None = None,
     n_jobs: int = 0,
-    status: str | list[str] | int | list[int] | None = None,
 ) -> RunCollection:
     """List all runs for the specified experiments.
     This function retrieves all runs for the given list of experiment names.
-    If no experiment names are provided (None), it defaults to searching all runs
-    for the currently active experiment. If an empty list is provided, the function
-    will search all runs for all experiments except the "Default" experiment.
+    If no experiment names are provided (None), the function will search all runs
+    for all experiments except the "Default" experiment.
     The function returns the results as a `RunCollection` object.
     Note:
@@ -171,55 +145,23 @@ def list_runs(
     Args:
         experiment_names (list[str] | None): List of experiment names to search
-            for runs. If None or an empty list is provided, the function will
-            search the currently active experiment or all experiments except
-            the "Default" experiment.
-        n_jobs (int): The number of jobs to run in parallel. If 0, the function
-            will search runs sequentially.
-        status (str | list[str] | int | list[int] | None): The status of the runs
-            to filter.
+            for runs. If None is provided, the function will search all runs
+            for all experiments except the "Default" experiment.
+        n_jobs (int): The number of jobs to retrieve runs in parallel.
     Returns:
         RunCollection: A `RunCollection` instance containing the runs for the
         specified experiments.
     """
-    rc = _list_runs(experiment_names, n_jobs)
-    if status is None:
-        return rc
-    return rc.filter(status=status)
-def _list_runs(
-    experiment_names: str | list[str] | None = None,
-    n_jobs: int = 0,
-) -> RunCollection:
-    if isinstance(experiment_names, str):
-        experiment_names = [experiment_names]
-    elif experiment_names == []:
-        experiments = mlflow.search_experiments()
-        experiment_names = [e.name for e in experiments if e.name != "Default"]
+    run_ids = list_run_ids(experiment_names)
     if n_jobs == 0:
-        return search_runs(experiment_names=experiment_names)
-    if experiment_names is None:
-        experiment_id = _get_experiment_id()
-        experiment_names = [mlflow.get_experiment(experiment_id).name]
-    run_ids = []
-    for name in experiment_names:
-        if experiment := mlflow.get_experiment_by_name(name):
-            uri = experiment.artifact_location
+        runs = [mlflow.get_run(run_id) for run_id in run_ids]
-            if isinstance(uri, str):
-                path = get_artifact_dir(uri=uri)
-                run_ids.extend(file.stem for file in path.iterdir() if file.is_dir())
+    else:
+        it = (joblib.delayed(mlflow.get_run)(run_id) for run_id in run_ids)
+        runs = joblib.Parallel(n_jobs, backend="threading")(it)
-    it = (joblib.delayed(mlflow.get_run)(run_id) for run_id in run_ids)
-    runs = joblib.Parallel(n_jobs, prefer="threads")(it)
     runs = sorted(runs, key=lambda run: run.info.start_time)  # type: ignore
     return RunCollection(runs)  # type: ignore

hydraflow/param.py CHANGED Viewed

@@ -18,7 +18,7 @@ if TYPE_CHECKING:
     from mlflow.entities import Run
-def match(param: str, value: Any) -> bool:  # noqa: PLR0911
+def match(param: str, value: Any) -> bool:
     """Check if the string matches the specified value.
     Args:
@@ -68,7 +68,7 @@ def _match_list(param: str, value: list) -> bool | None:
 def _match_tuple(param: str, value: tuple) -> bool | None:
-    if len(value) != 2:  # noqa: PLR2004
+    if len(value) != 2:
         return None
     if any(param.startswith(x) for x in ["[", "(", "{"]):

hydraflow/run_collection.py CHANGED Viewed

@@ -21,7 +21,7 @@ from __future__ import annotations
 from dataclasses import dataclass, field
 from itertools import chain
-from typing import TYPE_CHECKING, Any, Concatenate, ParamSpec, TypeVar, overload
+from typing import TYPE_CHECKING, Any, overload
 from mlflow.entities import RunStatus
@@ -34,15 +34,9 @@ from hydraflow.utils import load_config
 if TYPE_CHECKING:
     from collections.abc import Callable, Iterator
-    from pathlib import Path
     from typing import Any
     from mlflow.entities.run import Run
-    from omegaconf import DictConfig
-T = TypeVar("T")
-P = ParamSpec("P")
 @dataclass
@@ -124,11 +118,6 @@ class RunCollection:
         runs = [run for run in self._runs if run not in other._runs]  # noqa: SLF001
         return self.__class__(runs)
-    @classmethod
-    def from_list(cls, runs: list[Run]) -> RunCollection:
-        """Create a `RunCollection` instance from a list of MLflow `Run` instances."""
-        return cls(runs)
     @property
     def info(self) -> RunCollectionInfo:
         """An instance of `RunCollectionInfo`."""
@@ -139,26 +128,6 @@ class RunCollection:
         """An instance of `RunCollectionData`."""
         return self._data
-    def take(self, n: int) -> RunCollection:
-        """Take the first n runs from the collection.
-        If n is negative, the method returns the last n runs
-        from the collection.
-        Args:
-            n (int): The number of runs to take. If n is negative, the method
-            returns the last n runs from the collection.
-        Returns:
-            A new `RunCollection` instance containing the first n runs if n is
-            positive, or the last n runs if n is negative.
-        """
-        if n < 0:
-            return self.__class__(self._runs[n:])
-        return self.__class__(self._runs[:n])
     def one(self) -> Run:
         """Get the only `Run` instance in the collection.
@@ -238,8 +207,8 @@ class RunCollection:
         self,
         config: object | Callable[[Run], bool] | None = None,
         *,
-        override: bool = False,
         select: list[str] | None = None,
+        overrides: list[str] | None = None,
         status: str | list[str] | int | list[int] | None = None,
         **kwargs,
     ) -> RunCollection:
@@ -264,9 +233,9 @@ class RunCollection:
                 to filter the runs. This can be any object that provides key-value
                 pairs through the `iter_params` function, or a callable that
                 takes a `Run` object and returns a boolean value.
-            override (bool): If True, override the configuration object with the
-                provided key-value pairs.
             select (list[str] | None): The list of parameters to select.
+            overrides (list[str] | None): The list of overrides to filter the
+                runs.
             status (str | list[str] | int | list[int] | None): The status of the
                 runs to filter.
             **kwargs: Additional key-value pairs to filter the runs.
@@ -279,8 +248,8 @@ class RunCollection:
             filter_runs(
                 self._runs,
                 config,
-                override=override,
                 select=select,
+                overrides=overrides,
                 status=status,
                 **kwargs,
             ),
@@ -400,121 +369,6 @@ class RunCollection:
         return params
-    def map(
-        self,
-        func: Callable[Concatenate[Run, P], T],
-        *args: P.args,
-        **kwargs: P.kwargs,
-    ) -> Iterator[T]:
-        """Return an iterator of results by applying a function to each run.
-        This method iterates over each run in the collection and applies the
-        provided function to it, along with any additional arguments and
-        keyword arguments.
-        Args:
-            func (Callable[[Run, P], T]): A function that takes a run and
-                additional arguments and returns a result.
-            *args: Additional arguments to pass to the function.
-            **kwargs: Additional keyword arguments to pass to the function.
-        Yields:
-            Results obtained by applying the function to each run in the collection.
-        """
-        return (func(run, *args, **kwargs) for run in self)
-    def map_id(
-        self,
-        func: Callable[Concatenate[str, P], T],
-        *args: P.args,
-        **kwargs: P.kwargs,
-    ) -> Iterator[T]:
-        """Return an iterator of results by applying a function to each run id.
-        Args:
-            func (Callable[[str, P], T]): A function that takes a run id and returns a
-                result.
-            *args: Additional arguments to pass to the function.
-            **kwargs: Additional keyword arguments to pass to the function.
-        Yields:
-            Results obtained by applying the function to each run id in the
-            collection.
-        """
-        return (func(run_id, *args, **kwargs) for run_id in self.info.run_id)
-    def map_config(
-        self,
-        func: Callable[Concatenate[DictConfig, P], T],
-        *args: P.args,
-        **kwargs: P.kwargs,
-    ) -> Iterator[T]:
-        """Return an iterator of results by applying a function to each run config.
-        Args:
-            func (Callable[[DictConfig, P], T]): A function that takes a run
-                configuration and returns a result.
-            *args: Additional arguments to pass to the function.
-            **kwargs: Additional keyword arguments to pass to the function.
-        Yields:
-            Results obtained by applying the function to each run configuration
-            in the collection.
-        """
-        return (func(load_config(run), *args, **kwargs) for run in self)
-    def map_uri(
-        self,
-        func: Callable[Concatenate[str | None, P], T],
-        *args: P.args,
-        **kwargs: P.kwargs,
-    ) -> Iterator[T]:
-        """Return an iterator of results by applying a function to each artifact URI.
-        Iterate over each run in the collection, retrieves the artifact URI, and
-        apply the provided function to it. If a run does not have an artifact
-        URI, None is passed to the function.
-        Args:
-            func (Callable[[str | None, P], T]): A function that takes an
-                artifact URI (string or None) and returns a result.
-            *args: Additional arguments to pass to the function.
-            **kwargs: Additional keyword arguments to pass to the function.
-        Yields:
-            Results obtained by applying the function to each artifact URI in the
-            collection.
-        """
-        return (func(uri, *args, **kwargs) for uri in self.info.artifact_uri)
-    def map_dir(
-        self,
-        func: Callable[Concatenate[Path, P], T],
-        *args: P.args,
-        **kwargs: P.kwargs,
-    ) -> Iterator[T]:
-        """Return an iterator of results by applying a function to each artifact dir.
-        Iterate over each run in the collection, downloads the artifact
-        directory, and apply the provided function to the directory path.
-        Args:
-            func (Callable[[Path, P], T]): A function that takes an artifact directory
-                path (string) and returns a result.
-            *args: Additional arguments to pass to the function.
-            **kwargs: Additional keyword arguments to pass to the function.
-        Yields:
-            Results obtained by applying the function to each artifact directory
-            in the collection.
-        """
-        return (func(dir, *args, **kwargs) for dir in self.info.artifact_dir)  # noqa: A001
     def groupby(
         self,
         names: str | list[str],
@@ -631,8 +485,8 @@ def filter_runs(
     runs: list[Run],
     config: object | Callable[[Run], bool] | None = None,
     *,
-    override: bool = False,
     select: list[str] | None = None,
+    overrides: list[str] | None = None,
     status: str | list[str] | int | list[int] | None = None,
     **kwargs,
 ) -> list[Run]:
@@ -658,10 +512,10 @@ def filter_runs(
             that provides key-value pairs through the `iter_params` function.
             This can also be a callable that takes a `Run` object and returns
             a boolean value. Defaults to None.
-        override (bool, optional): If True, filter the runs based on
-            the overrides. Defaults to False.
         select (list[str] | None, optional): The list of parameters to select.
             Defaults to None.
+        overrides (list[str] | None, optional): The list of overrides to filter the
+            runs. Defaults to None.
         status (str | list[str] | RunStatus | list[RunStatus] | None, optional): The
             status of the runs to filter. Defaults to None.
         **kwargs: Additional key-value pairs to filter the runs.
@@ -674,8 +528,8 @@ def filter_runs(
         runs = [run for run in runs if config(run)]
     else:
-        if override:
-            config = select_overrides(config)
+        if overrides:
+            config = select_overrides(config, overrides)
         elif select:
             config = select_config(config, select)

hydraflow/run_data.py CHANGED Viewed

@@ -6,7 +6,8 @@ from typing import TYPE_CHECKING
 from pandas import DataFrame
-from hydraflow.config import collect_params
+from hydraflow.config import iter_params
+from hydraflow.utils import load_config
 if TYPE_CHECKING:
     from collections.abc import Iterable
@@ -39,7 +40,8 @@ class RunCollectionData:
             A DataFrame containing the runs' configurations.
         """
-        return DataFrame(self._runs.map_config(collect_params))
+        values = [dict(iter_params(load_config(r))) for r in self._runs]
+        return DataFrame(values)
 def _to_dict(it: Iterable[dict[str, Any]]) -> dict[str, list[Any]]:

hydraflow/utils.py CHANGED Viewed

@@ -12,46 +12,42 @@ import mlflow
 import mlflow.artifacts
 from hydra.core.hydra_config import HydraConfig
 from mlflow.entities import Run
-from omegaconf import DictConfig, OmegaConf
+from omegaconf import DictConfig, ListConfig, OmegaConf
 if TYPE_CHECKING:
     from collections.abc import Iterable
-def get_artifact_dir(run: Run | None = None, uri: str | None = None) -> Path:
+def file_uri_to_path(uri: str) -> Path:
+    """Convert a file URI to a local path."""
+    if not uri.startswith("file:"):
+        return Path(uri)
+    path = urllib.parse.urlparse(uri).path
+    return Path(urllib.request.url2pathname(path))  # for Windows
+def get_artifact_dir(run: Run | None = None) -> Path:
     """Retrieve the artifact directory for the given run.
     This function uses MLflow to get the artifact directory for the given run.
     Args:
         run (Run | None): The run object. Defaults to None.
-        uri (str | None): The URI of the artifact. Defaults to None.
     Returns:
         The local path to the directory where the artifacts are downloaded.
     """
-    if run is not None and uri is not None:
-        raise ValueError("Cannot provide both run and uri")
-    if run is None and uri is None:
+    if run is None:
         uri = mlflow.get_artifact_uri()
-    elif run:
+    else:
         uri = run.info.artifact_uri
     if not isinstance(uri, str):
         raise NotImplementedError
-    if uri.startswith("file:"):
-        return file_uri_to_path(uri)
-    return Path(uri)
-def file_uri_to_path(uri: str) -> Path:
-    """Convert a file URI to a local path."""
-    path = urllib.parse.urlparse(uri).path
-    return Path(urllib.request.url2pathname(path))  # for Windows
+    return file_uri_to_path(uri)
 def get_artifact_path(run: Run | None, path: str) -> Path:
@@ -123,12 +119,7 @@ def load_config(run: Run) -> DictConfig:
     return OmegaConf.load(path)  # type: ignore
-def get_overrides() -> list[str]:
-    """Retrieve the overrides for the current run."""
-    return list(HydraConfig.get().overrides.task)  # ListConifg -> list
-def load_overrides(run: Run) -> list[str]:
+def load_overrides(run: Run) -> ListConfig:
     """Load the overrides for a given run.
     This function loads the overrides for the provided Run instance
@@ -137,15 +128,15 @@ def load_overrides(run: Run) -> list[str]:
     `.hydra/overrides.yaml` is not found in the run's artifact directory.
     Args:
-        run (Run): The Run instance for which to load the overrides.
+        run (Run): The Run instance for which to load the configuration.
     Returns:
-        The loaded overrides as a list of strings. Returns an empty list
-        if the overrides file is not found.
+        The loaded configuration as a DictConfig object. Returns an empty
+        DictConfig if the configuration file is not found.
     """
     path = get_artifact_dir(run) / ".hydra/overrides.yaml"
-    return [str(x) for x in OmegaConf.load(path)]
+    return OmegaConf.load(path)  # type: ignore
 def remove_run(run: Run | Iterable[Run]) -> None:

{hydraflow-0.7.4.dist-info → hydraflow-0.8.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hydraflow
-Version: 0.7.4
+Version: 0.8.0
 Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
 Project-URL: Documentation, https://daizutabi.github.io/hydraflow/
 Project-URL: Source, https://github.com/daizutabi/hydraflow
@@ -55,7 +55,7 @@ Description-Content-Type: text/markdown
 [pypi-v-link]: https://pypi.org/project/hydraflow/
 [python-v-image]: https://img.shields.io/pypi/pyversions/hydraflow.svg
 [python-v-link]: https://pypi.org/project/hydraflow
-[GHAction-image]: https://github.com/daizutabi/hydraflow/actions/workflows/ci.yml/badge.svg?branch=main&event=push
+[GHAction-image]: https://github.com/daizutabi/hydraflow/actions/workflows/ci.yaml/badge.svg?branch=main&event=push
 [GHAction-link]: https://github.com/daizutabi/hydraflow/actions?query=event%3Apush+branch%3Amain
 [codecov-image]: https://codecov.io/github/daizutabi/hydraflow/coverage.svg?branch=main
 [codecov-link]: https://codecov.io/github/daizutabi/hydraflow?branch=main
@@ -108,7 +108,7 @@ class MySQLConfig:
 cs = ConfigStore.instance()
 cs.store(name="config", node=MySQLConfig)
-@hydra.main(version_base=None, config_name="config")
+@hydra.main(config_name="config", version_base=None)
 def my_app(cfg: MySQLConfig) -> None:
     # Set experiment by Hydra job name.
     hydraflow.set_experiment()

hydraflow-0.8.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,17 @@
+hydraflow/__init__.py,sha256=yp4LT1FDYPIduR6PqJNuSm9kztVCpL1P0zcPHWGvaJU,712
+hydraflow/cli.py,sha256=jxqFppNeJWAr2Tb-C_MQXEJtegJ6TXcd3C1CT7Jdb1A,1559
+hydraflow/config.py,sha256=SJzjgsO_kzB78_whJ3lmy7GlZvTvwZONH1BJBn8zCuI,3817
+hydraflow/context.py,sha256=H5xeNbhMS23U-epsucprl5G3lbOR1aO9nDES4QGLWNk,4747
+hydraflow/main.py,sha256=O5ETCMCg12zXoaYlZMHcM4IYAs6GVTkADrmEssrtjkk,4994
+hydraflow/mlflow.py,sha256=pRRsBaBBH4cfzSko-8mmo5bV04GGklxoO0kORkInypM,5663
+hydraflow/param.py,sha256=LHU9j9_7oA99igasoOyKofKClVr9FmGA3UABJ-KmyS0,4538
+hydraflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+hydraflow/run_collection.py,sha256=rtH1cglSlK3QFg9hhifo9lzjDa9veHpoyYxEOmIEM84,19646
+hydraflow/run_data.py,sha256=S2NNFtA1TleqpgeK4mIn1YY8YbWJFyhF7wXR5NWeYLk,1604
+hydraflow/run_info.py,sha256=Jf5wrIjRLIV1-k-obHDqwKHa6j_ZonrY8od-rXlbtMo,1024
+hydraflow/utils.py,sha256=T4ESiepEcqR-FZlo_m7VTBEFMwalrqPI8eFKPagvv3Q,4402
+hydraflow-0.8.0.dist-info/METADATA,sha256=J1ilgG7L4A8OvzgZSNycp0YgyHk5e8_gwTr9NN82Ejk,4767
+hydraflow-0.8.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+hydraflow-0.8.0.dist-info/entry_points.txt,sha256=XI0khPbpCIUo9UPqkNEpgh-kqK3Jy8T7L2VCWOdkbSM,48
+hydraflow-0.8.0.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
+hydraflow-0.8.0.dist-info/RECORD,,

hydraflow-0.7.4.dist-info/RECORD DELETED Viewed

@@ -1,17 +0,0 @@
-hydraflow/__init__.py,sha256=rujOGabEPPhPfyqTHynem3unqIEQ1haTWWSMuu2LuoQ,898
-hydraflow/cli.py,sha256=jxqFppNeJWAr2Tb-C_MQXEJtegJ6TXcd3C1CT7Jdb1A,1559
-hydraflow/config.py,sha256=MNX9da5bPVDcjnpji7Cm9ndK6ura92pt361m4PRh6_E,4326
-hydraflow/context.py,sha256=3xfKhMozkKFqtWeOp9Gie0A5o5URMta4US6iVD5TcLU,6002
-hydraflow/main.py,sha256=hroncI_SNpNgEtdxLgzI397J5S2Amv7J0atnPxwBePM,1314
-hydraflow/mlflow.py,sha256=imD3XL0RTlpnKrkyvO8FNy_Bv6hwSfLiOu1yJuL40ck,8773
-hydraflow/param.py,sha256=yu1aMNXRLegXGDL-68vwIkfeDF9CaU784WZENGLwl7Q,4572
-hydraflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-hydraflow/run_collection.py,sha256=YCWg5Dz1j49xB2LA75onq5wsAeQQbifXpG4yPUwRN4I,24776
-hydraflow/run_data.py,sha256=dpyyfnuH9mCtIZeigMo1iFQo9bafMdEL4i4uI2l0UqY,1525
-hydraflow/run_info.py,sha256=Jf5wrIjRLIV1-k-obHDqwKHa6j_ZonrY8od-rXlbtMo,1024
-hydraflow/utils.py,sha256=a9i5PEJn8Ssowv9dqHadAihZXlsqtVjHZ9MZvkPq1bY,4747
-hydraflow-0.7.4.dist-info/METADATA,sha256=GTJi5z8TTIwPy6qpscw-t3Mb1V-GOR0iYU_IB-DB-UE,4766
-hydraflow-0.7.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-hydraflow-0.7.4.dist-info/entry_points.txt,sha256=XI0khPbpCIUo9UPqkNEpgh-kqK3Jy8T7L2VCWOdkbSM,48
-hydraflow-0.7.4.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
-hydraflow-0.7.4.dist-info/RECORD,,

{hydraflow-0.7.4.dist-info → hydraflow-0.8.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{hydraflow-0.7.4.dist-info → hydraflow-0.8.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{hydraflow-0.7.4.dist-info → hydraflow-0.8.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

hydraflow 0.7.4__py3-none-any.whl → 0.8.0__py3-none-any.whl

hydraflow 0.7.4py3-none-any.whl → 0.8.0py3-none-any.whl