PyPI - hydraflow - Versions diffs - 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl - Mend

hydraflow 0.2.6py3-none-any.whl → 0.2.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

hydraflow/__init__.py +4 -4
hydraflow/context.py +24 -8
hydraflow/info.py +116 -0
hydraflow/mlflow.py +101 -50
hydraflow/progress.py +87 -12
hydraflow/{runs.py → run_collection.py} +117 -154
{hydraflow-0.2.6.dist-info → hydraflow-0.2.8.dist-info}/METADATA +1 -1
hydraflow-0.2.8.dist-info/RECORD +12 -0
hydraflow-0.2.6.dist-info/RECORD +0 -11
{hydraflow-0.2.6.dist-info → hydraflow-0.2.8.dist-info}/WHEEL +0 -0
{hydraflow-0.2.6.dist-info → hydraflow-0.2.8.dist-info}/licenses/LICENSE +0 -0

hydraflow/__init__.py CHANGED Viewed

@@ -1,11 +1,11 @@
 from .context import chdir_artifact, log_run, start_run, watch
-from .mlflow import get_artifact_dir, get_hydra_output_dir, set_experiment
-from .runs import (
-    RunCollection,
+from .info import get_artifact_dir, get_hydra_output_dir, load_config
+from .mlflow import (
     list_runs,
-    load_config,
     search_runs,
+    set_experiment,
 )
+from .run_collection import RunCollection
 __all__ = [
     "RunCollection",

hydraflow/context.py CHANGED Viewed

@@ -14,10 +14,11 @@ from typing import TYPE_CHECKING
 import mlflow
 from hydra.core.hydra_config import HydraConfig
-from watchdog.events import FileModifiedEvent, FileSystemEventHandler
+from watchdog.events import FileModifiedEvent, PatternMatchingEventHandler
 from watchdog.observers import Observer
-from hydraflow.mlflow import get_artifact_dir, log_params
+from hydraflow.info import get_artifact_dir
+from hydraflow.mlflow import log_params
 if TYPE_CHECKING:
     from collections.abc import Callable, Iterator
@@ -68,7 +69,7 @@ def log_run(
         mlflow.log_artifact(local_path)
     try:
-        with watch(log_artifact, output_dir):
+        with watch(log_artifact, output_dir, ignore_log=False):
             yield
     except Exception as e:
@@ -140,9 +141,11 @@ def start_run(
 @contextmanager
 def watch(
-    func: Callable[[Path], None],
+    callback: Callable[[Path], None],
     dir: Path | str = "",
     timeout: int = 60,
+    ignore_patterns: list[str] | None = None,
+    ignore_log: bool = True,
 ) -> Iterator[None]:
     """
     Watch the given directory for changes and call the provided function
@@ -154,7 +157,7 @@ def watch(
     period or until the context is exited.
     Args:
-        func (Callable[[Path], None]): The function to call when a change is
+        callback (Callable[[Path], None]): The function to call when a change is
             detected. It should accept a single argument of type `Path`,
             which is the path of the modified file.
         dir (Path | str): The directory to watch. If not specified,
@@ -174,7 +177,7 @@ def watch(
     if isinstance(dir, Path):
         dir = dir.as_posix()
-    handler = Handler(func)
+    handler = Handler(callback, ignore_patterns=ignore_patterns, ignore_log=ignore_log)
     observer = Observer()
     observer.schedule(handler, dir, recursive=True)
     observer.start()
@@ -198,10 +201,23 @@ def watch(
         observer.join()
-class Handler(FileSystemEventHandler):
-    def __init__(self, func: Callable[[Path], None]) -> None:
+class Handler(PatternMatchingEventHandler):
+    def __init__(
+        self,
+        func: Callable[[Path], None],
+        ignore_patterns: list[str] | None = None,
+        ignore_log: bool = True,
+    ) -> None:
         self.func = func
+        if ignore_log:
+            if ignore_patterns:
+                ignore_patterns.append("*.log")
+            else:
+                ignore_patterns = ["*.log"]
+        super().__init__(ignore_patterns=ignore_patterns)
     def on_modified(self, event: FileModifiedEvent) -> None:
         file = Path(str(event.src_path))
         if file.is_file():

hydraflow/info.py ADDED Viewed

@@ -0,0 +1,116 @@
+from __future__ import annotations
+from pathlib import Path
+from typing import TYPE_CHECKING
+import mlflow
+from hydra.core.hydra_config import HydraConfig
+from mlflow.tracking import artifact_utils
+from omegaconf import DictConfig, OmegaConf
+if TYPE_CHECKING:
+    from mlflow.entities import Run
+    from hydraflow.run_collection import RunCollection
+class RunCollectionInfo:
+    def __init__(self, runs: RunCollection):
+        self._runs = runs
+    @property
+    def run_id(self) -> list[str]:
+        return [run.info.run_id for run in self._runs]
+    @property
+    def params(self) -> list[dict[str, str]]:
+        return [run.data.params for run in self._runs]
+    @property
+    def metrics(self) -> list[dict[str, float]]:
+        return [run.data.metrics for run in self._runs]
+    @property
+    def artifact_uri(self) -> list[str | None]:
+        return [run.info.artifact_uri for run in self._runs]
+    @property
+    def artifact_dir(self) -> list[Path]:
+        return [get_artifact_dir(run) for run in self._runs]
+    @property
+    def config(self) -> list[DictConfig]:
+        return [load_config(run) for run in self._runs]
+def get_artifact_dir(run: Run | None = None) -> Path:
+    """
+    Retrieve the artifact directory for the given run.
+    This function uses MLflow to get the artifact directory for the given run.
+    Args:
+        run (Run | None): The run object. Defaults to None.
+    Returns:
+        The local path to the directory where the artifacts are downloaded.
+    """
+    if run is None:
+        uri = mlflow.get_artifact_uri()
+    else:
+        uri = artifact_utils.get_artifact_uri(run.info.run_id)
+    return Path(mlflow.artifacts.download_artifacts(uri))
+def get_hydra_output_dir(run: Run | None = None) -> Path:
+    """
+    Retrieve the Hydra output directory for the given run.
+    This function returns the Hydra output directory. If no run is provided,
+    it retrieves the output directory from the current Hydra configuration.
+    If a run is provided, it retrieves the artifact path for the run, loads
+    the Hydra configuration from the downloaded artifacts, and returns the
+    output directory specified in that configuration.
+    Args:
+        run (Run | None): The run object. Defaults to None.
+    Returns:
+        Path: The path to the Hydra output directory.
+    Raises:
+        FileNotFoundError: If the Hydra configuration file is not found
+            in the artifacts.
+    """
+    if run is None:
+        hc = HydraConfig.get()
+        return Path(hc.runtime.output_dir)
+    path = get_artifact_dir(run) / ".hydra/hydra.yaml"
+    if path.exists():
+        hc = OmegaConf.load(path)
+        return Path(hc.hydra.runtime.output_dir)
+    raise FileNotFoundError
+def load_config(run: Run) -> DictConfig:
+    """
+    Load the configuration for a given run.
+    This function loads the configuration for the provided Run instance
+    by downloading the configuration file from the MLflow artifacts and
+    loading it using OmegaConf. It returns an empty config if
+    `.hydra/config.yaml` is not found in the run's artifact directory.
+    Args:
+        run (Run): The Run instance for which to load the configuration.
+    Returns:
+        The loaded configuration as a DictConfig object. Returns an empty
+        DictConfig if the configuration file is not found.
+    """
+    path = get_artifact_dir(run) / ".hydra/config.yaml"
+    return OmegaConf.load(path)  # type: ignore

hydraflow/mlflow.py CHANGED Viewed

@@ -1,6 +1,20 @@
 """
-This module provides functionality to log parameters from Hydra
-configuration objects and set up experiments using MLflow.
+This module provides functionality to log parameters from Hydra configuration objects
+and set up experiments using MLflow. It includes methods for managing experiments,
+searching for runs, and logging parameters and artifacts.
+Key Features:
+- **Experiment Management**: Set and manage MLflow experiments with customizable names
+  based on Hydra configuration.
+- **Run Logging**: Log parameters and metrics from Hydra configuration objects to
+  MLflow, ensuring that all relevant information is captured during experiments.
+- **Run Search**: Search for runs based on various criteria, allowing for flexible
+  retrieval of experiment results.
+- **Artifact Management**: Retrieve and log artifacts associated with runs, facilitating
+  easy access to outputs generated during experiments.
+This module is designed to integrate seamlessly with Hydra, providing a robust
+solution for tracking machine learning experiments and their associated metadata.
 """
 from __future__ import annotations
@@ -10,10 +24,11 @@ from typing import TYPE_CHECKING
 import mlflow
 from hydra.core.hydra_config import HydraConfig
-from mlflow.tracking import artifact_utils
-from omegaconf import OmegaConf
+from mlflow.entities import ViewType
+from mlflow.tracking.fluent import SEARCH_MAX_RESULTS_PANDAS
 from hydraflow.config import iter_params
+from hydraflow.run_collection import RunCollection
 if TYPE_CHECKING:
     from mlflow.entities.experiment import Experiment
@@ -25,7 +40,7 @@ def set_experiment(
     uri: str | Path | None = None,
 ) -> Experiment:
     """
-    Set the experiment name and tracking URI optionally.
+    Sets the experiment name and tracking URI optionally.
     This function sets the experiment name by combining the given prefix,
     the job name from HydraConfig, and the given suffix. Optionally, it can
@@ -65,60 +80,96 @@ def log_params(config: object, *, synchronous: bool | None = None) -> None:
         mlflow.log_param(key, value, synchronous=synchronous)
-def get_artifact_dir(
-    artifact_path: str | None = None,
-    *,
-    run_id: str | None = None,
-) -> Path:
+def search_runs(
+    experiment_ids: list[str] | None = None,
+    filter_string: str = "",
+    run_view_type: int = ViewType.ACTIVE_ONLY,
+    max_results: int = SEARCH_MAX_RESULTS_PANDAS,
+    order_by: list[str] | None = None,
+    search_all_experiments: bool = False,
+    experiment_names: list[str] | None = None,
+) -> RunCollection:
     """
-    Get the artifact directory for the given artifact path.
+    Search for Runs that fit the specified criteria.
-    This function retrieves the artifact URI for the specified artifact path
-    using MLflow, downloads the artifacts to a local directory, and returns
-    the path to that directory.
+    This function wraps the `mlflow.search_runs` function and returns the
+    results as a `RunCollection` object. It allows for flexible searching of
+    MLflow runs based on various criteria.
+    Note:
+        The returned runs are sorted by their start time in ascending order.
     Args:
-        artifact_path (str | None): The artifact path for which to get the
-            directory. Defaults to None.
-        run_id (str | None): The run ID for which to get the artifact directory.
+        experiment_ids (list[str] | None): List of experiment IDs. Search can
+            work with experiment IDs or experiment names, but not both in the
+            same call. Values other than ``None`` or ``[]`` will result in
+            error if ``experiment_names`` is also not ``None`` or ``[]``.
+            ``None`` will default to the active experiment if ``experiment_names``
+            is ``None`` or ``[]``.
+        filter_string (str): Filter query string, defaults to searching all
+            runs.
+        run_view_type (int): one of enum values ``ACTIVE_ONLY``, ``DELETED_ONLY``,
+            or ``ALL`` runs defined in :py:class:`mlflow.entities.ViewType`.
+        max_results (int): The maximum number of runs to put in the dataframe.
+            Default is 100,000 to avoid causing out-of-memory issues on the user's
+            machine.
+        order_by (list[str] | None): List of columns to order by (e.g.,
+            "metrics.rmse"). The ``order_by`` column can contain an optional
+            ``DESC`` or ``ASC`` value. The default is ``ASC``. The default
+            ordering is to sort by ``start_time DESC``, then ``run_id``.
+            ``start_time DESC``, then ``run_id``.
+        search_all_experiments (bool): Boolean specifying whether all
+            experiments should be searched. Only honored if ``experiment_ids``
+            is ``[]`` or ``None``.
+        experiment_names (list[str] | None): List of experiment names. Search
+            can work with experiment IDs or experiment names, but not both in
+            the same call. Values other than ``None`` or ``[]`` will result in
+            error if ``experiment_ids`` is also not ``None`` or ``[]``.
+            ``experiment_ids`` is also not ``None`` or ``[]``. ``None`` will
+            default to the active experiment if ``experiment_ids`` is ``None``
+            or ``[]``.
     Returns:
-        The local path to the directory where the artifacts are downloaded.
+        A `RunCollection` object containing the search results.
     """
-    if run_id is None:
-        uri = mlflow.get_artifact_uri(artifact_path)
-    else:
-        uri = artifact_utils.get_artifact_uri(run_id, artifact_path)
-    dir = mlflow.artifacts.download_artifacts(artifact_uri=uri)
-    return Path(dir)
-def get_hydra_output_dir(*, run_id: str | None = None) -> Path:
-    if run_id is None:
-        hc = HydraConfig.get()
-        return Path(hc.runtime.output_dir)
-    path = get_artifact_dir(run_id=run_id) / ".hydra/hydra.yaml"
-    if path.exists():
-        hc = OmegaConf.load(path)
-        return Path(hc.hydra.runtime.output_dir)
+    runs = mlflow.search_runs(
+        experiment_ids=experiment_ids,
+        filter_string=filter_string,
+        run_view_type=run_view_type,
+        max_results=max_results,
+        order_by=order_by,
+        output_format="list",
+        search_all_experiments=search_all_experiments,
+        experiment_names=experiment_names,
+    )
+    runs = sorted(runs, key=lambda run: run.info.start_time)  # type: ignore
+    return RunCollection(runs)  # type: ignore
+def list_runs(experiment_names: list[str] | None = None) -> RunCollection:
+    """
+    List all runs for the specified experiments.
-    raise FileNotFoundError
+    This function retrieves all runs for the given list of experiment names.
+    If no experiment names are provided (None), it defaults to searching all runs
+    for the currently active experiment. If an empty list is provided, the function
+    will search all runs for all experiments except the "Default" experiment.
+    The function returns the results as a `RunCollection` object.
+    Note:
+        The returned runs are sorted by their start time in ascending order.
-# def log_hydra_output_dir(run: Run_ | Series | str) -> None:
-#     """
-#     Log the Hydra output directory.
+    Args:
+        experiment_names (list[str] | None): List of experiment names to search
+            for runs. If None or an empty list is provided, the function will
+            search the currently active experiment or all experiments except
+            the "Default" experiment.
-#     Args:
-#         run: The run object.
+    Returns:
+        A `RunCollection` object containing the runs for the specified experiments.
+    """
+    if experiment_names == []:
+        experiments = mlflow.search_experiments()
+        experiment_names = [e.name for e in experiments if e.name != "Default"]
-#     Returns:
-#         None
-#     """
-#     output_dir = get_hydra_output_dir(run)
-#     run_id = run if isinstance(run, str) else run.info.run_id
-#     mlflow.log_artifacts(output_dir.as_posix(), run_id=run_id)
+    return search_runs(experiment_names=experiment_names)

hydraflow/progress.py CHANGED Viewed

@@ -3,27 +3,57 @@ from __future__ import annotations
 from typing import TYPE_CHECKING
 import joblib
-from rich.progress import Progress, SpinnerColumn, TimeElapsedColumn
+from rich.progress import Progress
 if TYPE_CHECKING:
     from collections.abc import Iterable
+    from rich.progress import ProgressColumn
-def progress(
-    *iterables: Iterable[int | tuple[int, int]],
+def multi_task_progress(
+    iterables: Iterable[Iterable[int | tuple[int, int]]],
+    *columns: ProgressColumn | str,
     n_jobs: int = -1,
-    task_name: str = "#{:0>3}",
-    main_task_name: str = "main",
+    description: str = "#{:0>3}",
+    main_description: str = "main",
+    transient: bool | None = None,
+    **kwargs,
 ) -> None:
-    with Progress(
-        SpinnerColumn(),
-        *Progress.get_default_columns(),
-        TimeElapsedColumn(),
-    ) as progress:
+    """
+    Render auto-updating progress bars for multiple tasks concurrently.
+    Args:
+        iterables (Iterable[Iterable[int | tuple[int, int]]]): A collection of
+            iterables, each representing a task. Each iterable can yield
+            integers (completed) or tuples of integers (completed, total).
+        *columns (ProgressColumn | str): Additional columns to display in the
+            progress bars.
+        n_jobs (int, optional): Number of jobs to run in parallel. Defaults to
+            -1, which means using all processors.
+        description (str, optional): Format string for describing tasks. Defaults to
+            "#{:0>3}".
+        main_description (str, optional): Description for the main task.
+            Defaults to "main".
+        transient (bool | None, optional): Whether to remove the progress bar
+            after completion. Defaults to None.
+        **kwargs: Additional keyword arguments passed to the Progress instance.
+    Returns:
+        None
+    """
+    if not columns:
+        columns = Progress.get_default_columns()
+    iterables = list(iterables)
+    with Progress(*columns, transient=transient or False, **kwargs) as progress:
         n = len(iterables)
-        task_main = progress.add_task(main_task_name, total=None) if n > 1 else None
-        tasks = [progress.add_task(task_name.format(i), start=False, total=None) for i in range(n)]
+        task_main = progress.add_task(main_description, total=None) if n > 1 else None
+        tasks = [
+            progress.add_task(description.format(i), start=False, total=None) for i in range(n)
+        ]
         total = {}
         completed = {}
@@ -48,9 +78,54 @@ def progress(
                     c = sum(completed.values())
                     progress.update(task_main, total=t, completed=c)
+            if transient or n > 1:
+                progress.remove_task(tasks[i])
         if n > 1:
             it = (joblib.delayed(func)(i) for i in range(n))
             joblib.Parallel(n_jobs, prefer="threads")(it)
         else:
             func(0)
+if __name__ == "__main__":
+    import random
+    import time
+    from rich.progress import MofNCompleteColumn, Progress, SpinnerColumn, TimeElapsedColumn
+    from hydraflow.progress import multi_task_progress
+    def task(total):
+        for i in range(total or 90):
+            if total is None:
+                yield i
+            else:
+                yield i, total
+            time.sleep(random.random() / 30)
+    def multi_task_progress_test(unknown_total: bool):
+        tasks = [task(random.randint(80, 100)) for _ in range(4)]
+        if unknown_total:
+            tasks = [task(None), *tasks, task(None)]
+        columns = [
+            SpinnerColumn(),
+            *Progress.get_default_columns(),
+            MofNCompleteColumn(),
+            TimeElapsedColumn(),
+        ]
+        kwargs = {}
+        if unknown_total:
+            kwargs["main_description"] = "unknown"
+        multi_task_progress(tasks, *columns, n_jobs=4, **kwargs)
+    multi_task_progress_test(False)
+    multi_task_progress_test(True)
+    multi_task_progress([task(100)])
+    multi_task_progress([task(None)], description="unknown")
+    multi_task_progress([task(100), task(None)], main_description="transient", transient=True)
+    multi_task_progress([task(100)], description="transient", transient=True)

hydraflow/{runs.py → run_collection.py} RENAMED Viewed

@@ -1,126 +1,47 @@
 """
-This module provides functionality for managing and interacting with MLflow
-runs. It includes the `RunCollection` class and various methods to filter
-runs, retrieve run information, log artifacts, and load configurations.
+This module provides functionality for managing and interacting with MLflow runs.
+It includes the `RunCollection` class, which serves as a container for multiple MLflow
+run objects, and various methods to filter, retrieve, and manipulate these runs.
+Key Features:
+- **Run Management**: The `RunCollection` class allows for easy management of multiple
+  MLflow runs, providing methods to access, filter, and sort runs based on various
+  criteria.
+- **Filtering**: The module supports filtering runs based on specific configurations
+  and parameters, enabling users to easily find runs that match certain conditions.
+- **Retrieval**: Users can retrieve specific runs, including the first, last, or any
+  run that matches a given configuration.
+- **Artifact Handling**: The module provides methods to access and manipulate the
+  artifacts associated with each run, including retrieving artifact URIs and directories.
+The `RunCollection` class is designed to work seamlessly with the MLflow tracking
+API, providing a robust solution for managing machine learning experiment runs and
+their associated metadata. This module is particularly useful for data scientists and
+machine learning engineers who need to track and analyze the results of their experiments
+efficiently.
 """
 from __future__ import annotations
-from dataclasses import dataclass
-from functools import cache
+from dataclasses import dataclass, field
 from itertools import chain
-from typing import TYPE_CHECKING, Any, TypeVar
+from typing import TYPE_CHECKING, Any, Concatenate, ParamSpec, TypeVar
-import mlflow
-from mlflow.artifacts import download_artifacts
-from mlflow.entities import ViewType
 from mlflow.entities.run import Run
-from mlflow.tracking.fluent import SEARCH_MAX_RESULTS_PANDAS
-from omegaconf import DictConfig, OmegaConf
 from hydraflow.config import iter_params
+from hydraflow.info import RunCollectionInfo
 if TYPE_CHECKING:
     from collections.abc import Callable, Iterator
+    from pathlib import Path
     from typing import Any
-def search_runs(
-    experiment_ids: list[str] | None = None,
-    filter_string: str = "",
-    run_view_type: int = ViewType.ACTIVE_ONLY,
-    max_results: int = SEARCH_MAX_RESULTS_PANDAS,
-    order_by: list[str] | None = None,
-    search_all_experiments: bool = False,
-    experiment_names: list[str] | None = None,
-) -> RunCollection:
-    """
-    Search for Runs that fit the specified criteria.
-    This function wraps the `mlflow.search_runs` function and returns the
-    results as a `RunCollection` object. It allows for flexible searching of
-    MLflow runs based on various criteria.
-    Note:
-        The returned runs are sorted by their start time in ascending order.
-    Args:
-        experiment_ids (list[str] | None): List of experiment IDs. Search can
-            work with experiment IDs or experiment names, but not both in the
-            same call. Values other than ``None`` or ``[]`` will result in
-            error if ``experiment_names`` is also not ``None`` or ``[]``.
-            ``None`` will default to the active experiment if ``experiment_names``
-            is ``None`` or ``[]``.
-        filter_string (str): Filter query string, defaults to searching all
-            runs.
-        run_view_type (int): one of enum values ``ACTIVE_ONLY``, ``DELETED_ONLY``,
-            or ``ALL`` runs defined in :py:class:`mlflow.entities.ViewType`.
-        max_results (int): The maximum number of runs to put in the dataframe.
-            Default is 100,000 to avoid causing out-of-memory issues on the user's
-            machine.
-        order_by (list[str] | None): List of columns to order by (e.g.,
-            "metrics.rmse"). The ``order_by`` column can contain an optional
-            ``DESC`` or ``ASC`` value. The default is ``ASC``. The default
-            ordering is to sort by ``start_time DESC``, then ``run_id``.
-            ``start_time DESC``, then ``run_id``.
-        search_all_experiments (bool): Boolean specifying whether all
-            experiments should be searched. Only honored if ``experiment_ids``
-            is ``[]`` or ``None``.
-        experiment_names (list[str] | None): List of experiment names. Search
-            can work with experiment IDs or experiment names, but not both in
-            the same call. Values other than ``None`` or ``[]`` will result in
-            error if ``experiment_ids`` is also not ``None`` or ``[]``.
-            ``experiment_ids`` is also not ``None`` or ``[]``. ``None`` will
-            default to the active experiment if ``experiment_ids`` is ``None``
-            or ``[]``.
-    Returns:
-        A `RunCollection` object containing the search results.
-    """
-    runs = mlflow.search_runs(
-        experiment_ids=experiment_ids,
-        filter_string=filter_string,
-        run_view_type=run_view_type,
-        max_results=max_results,
-        order_by=order_by,
-        output_format="list",
-        search_all_experiments=search_all_experiments,
-        experiment_names=experiment_names,
-    )
-    runs = sorted(runs, key=lambda run: run.info.start_time)  # type: ignore
-    return RunCollection(runs)  # type: ignore
-def list_runs(experiment_names: list[str] | None = None) -> RunCollection:
-    """
-    List all runs for the specified experiments.
-    This function retrieves all runs for the given list of experiment names.
-    If no experiment names are provided (None), it defaults to searching all runs
-    for the currently active experiment. If an empty list is provided, the function
-    will search all runs for all experiments except the "Default" experiment.
-    The function returns the results as a `RunCollection` object.
-    Note:
-        The returned runs are sorted by their start time in ascending order.
-    Args:
-        experiment_names (list[str] | None): List of experiment names to search
-            for runs. If None or an empty list is provided, the function will
-            search the currently active experiment or all experiments except
-            the "Default" experiment.
-    Returns:
-        A `RunCollection` object containing the runs for the specified experiments.
-    """
-    if experiment_names == []:
-        experiments = mlflow.search_experiments()
-        experiment_names = [e.name for e in experiments if e.name != "Default"]
-    return search_runs(experiment_names=experiment_names)
+    from omegaconf import DictConfig
 T = TypeVar("T")
+P = ParamSpec("P")
 @dataclass
@@ -130,11 +51,22 @@ class RunCollection:
     This class provides methods to interact with the runs, such as filtering,
     retrieving specific runs, and accessing run information.
+    Key Features:
+    - Filtering: Easily filter runs based on various criteria.
+    - Retrieval: Access specific runs by index or through methods.
+    - Metadata: Access run metadata and associated information.
     """
     _runs: list[Run]
     """A list of MLflow Run objects."""
+    _info: RunCollectionInfo = field(init=False)
+    """A list of MLflow Run objects."""
+    def __post_init__(self):
+        self._info = RunCollectionInfo(self)
     def __repr__(self) -> str:
         return f"{self.__class__.__name__}({len(self)})"
@@ -150,6 +82,10 @@ class RunCollection:
     def __contains__(self, run: Run) -> bool:
         return run in self._runs
+    @property
+    def info(self) -> RunCollectionInfo:
+        return self._info
     def sort(
         self,
         key: Callable[[Run], Any] | None = None,
@@ -411,52 +347,81 @@ class RunCollection:
         """
         return get_param_dict(self._runs)
-    def map(self, func: Callable[[Run], T]) -> Iterator[T]:
+    def map(
+        self,
+        func: Callable[Concatenate[Run, P], T],
+        *args: P.args,
+        **kwargs: P.kwargs,
+    ) -> Iterator[T]:
         """
         Apply a function to each run in the collection and return an iterator of
         results.
+        This method iterates over each run in the collection and applies the
+        provided function to it, along with any additional arguments and
+        keyword arguments.
         Args:
-            func (Callable[[Run], T]): A function that takes a run and returns a
-                result.
+            func (Callable[[Run, P], T]): A function that takes a run and
+                additional arguments and returns a result.
+            *args: Additional arguments to pass to the function.
+            **kwargs: Additional keyword arguments to pass to the function.
         Yields:
-            Results obtained by applying the function to each run in the
-            collection.
+            Results obtained by applying the function to each run in the collection.
         """
-        return (func(run) for run in self._runs)
+        return (func(run, *args, **kwargs) for run in self)
-    def map_run_id(self, func: Callable[[str], T]) -> Iterator[T]:
+    def map_run_id(
+        self,
+        func: Callable[Concatenate[str, P], T],
+        *args: P.args,
+        **kwargs: P.kwargs,
+    ) -> Iterator[T]:
         """
         Apply a function to each run id in the collection and return an iterator
         of results.
         Args:
-            func (Callable[[str], T]): A function that takes a run id and returns a
+            func (Callable[[str, P], T]): A function that takes a run id and returns a
                 result.
+            *args: Additional arguments to pass to the function.
+            **kwargs: Additional keyword arguments to pass to the function.
         Yields:
             Results obtained by applying the function to each run id in the
             collection.
         """
-        return (func(run.info.run_id) for run in self._runs)
+        return (func(run_id, *args, **kwargs) for run_id in self.info.run_id)
-    def map_config(self, func: Callable[[DictConfig], T]) -> Iterator[T]:
+    def map_config(
+        self,
+        func: Callable[Concatenate[DictConfig, P], T],
+        *args: P.args,
+        **kwargs: P.kwargs,
+    ) -> Iterator[T]:
         """
         Apply a function to each run configuration in the collection and return
         an iterator of results.
         Args:
-            func (Callable[[DictConfig], T]): A function that takes a run
+            func (Callable[[DictConfig, P], T]): A function that takes a run
                 configuration and returns a result.
+            *args: Additional arguments to pass to the function.
+            **kwargs: Additional keyword arguments to pass to the function.
         Yields:
             Results obtained by applying the function to each run configuration
             in the collection.
         """
-        return (func(load_config(run)) for run in self._runs)
+        return (func(config, *args, **kwargs) for config in self.info.config)
-    def map_uri(self, func: Callable[[str | None], T]) -> Iterator[T]:
+    def map_uri(
+        self,
+        func: Callable[Concatenate[str | None, P], T],
+        *args: P.args,
+        **kwargs: P.kwargs,
+    ) -> Iterator[T]:
         """
         Apply a function to each artifact URI in the collection and return an
         iterator of results.
@@ -466,16 +431,23 @@ class RunCollection:
         have an artifact URI, None is passed to the function.
         Args:
-            func (Callable[[str | None], T]): A function that takes an
-            artifact URI (string or None) and returns a result.
+            func (Callable[[str | None, P], T]): A function that takes an
+                artifact URI (string or None) and returns a result.
+            *args: Additional arguments to pass to the function.
+            **kwargs: Additional keyword arguments to pass to the function.
         Yields:
             Results obtained by applying the function to each artifact URI in the
             collection.
         """
-        return (func(run.info.artifact_uri) for run in self._runs)
+        return (func(uri, *args, **kwargs) for uri in self.info.artifact_uri)
-    def map_dir(self, func: Callable[[str], T]) -> Iterator[T]:
+    def map_dir(
+        self,
+        func: Callable[Concatenate[Path, P], T],
+        *args: P.args,
+        **kwargs: P.kwargs,
+    ) -> Iterator[T]:
         """
         Apply a function to each artifact directory in the collection and return
         an iterator of results.
@@ -485,14 +457,16 @@ class RunCollection:
         path.
         Args:
-            func (Callable[[str], T]): A function that takes an artifact directory
+            func (Callable[[Path, P], T]): A function that takes an artifact directory
                 path (string) and returns a result.
+            *args: Additional arguments to pass to the function.
+            **kwargs: Additional keyword arguments to pass to the function.
         Yields:
             Results obtained by applying the function to each artifact directory
             in the collection.
         """
-        return (func(download_artifacts(run_id=run.info.run_id)) for run in self._runs)
+        return (func(dir, *args, **kwargs) for dir in self.info.artifact_dir)
     def group_by(self, *names: str | list[str]) -> dict[tuple[str | None, ...], RunCollection]:
         """
@@ -519,6 +493,25 @@ class RunCollection:
         return {key: RunCollection(runs) for key, runs in grouped_runs.items()}
+    def group_by_values(self, *names: str | list[str]) -> list[RunCollection]:
+        """
+        Group runs by specified parameter names.
+        This method groups the runs in the collection based on the values of the
+        specified parameters. Each unique combination of parameter values will
+        form a separate RunCollection in the returned list.
+        Args:
+            *names (str | list[str]): The names of the parameters to group by.
+                This can be a single parameter name or multiple names provided
+                as separate arguments or as a list.
+        Returns:
+            list[RunCollection]: A list of RunCollection objects, where each
+            object contains runs that match the specified parameter values.
+        """
+        return list(self.group_by(*names).values())
 def _param_matches(run: Run, key: str, value: Any) -> bool:
     """
@@ -858,33 +851,3 @@ def get_param_dict(runs: list[Run]) -> dict[str, list[str]]:
         params[name] = sorted(set(it))
     return params
-def load_config(run: Run) -> DictConfig:
-    """
-    Load the configuration for a given run.
-    This function loads the configuration for the provided Run instance
-    by downloading the configuration file from the MLflow artifacts and
-    loading it using OmegaConf. It returns an empty config if
-    `.hydra/config.yaml` is not found in the run's artifact directory.
-    Args:
-        run (Run): The Run instance for which to load the configuration.
-    Returns:
-        The loaded configuration as a DictConfig object. Returns an empty
-        DictConfig if the configuration file is not found.
-    """
-    run_id = run.info.run_id
-    return _load_config(run_id)
-@cache
-def _load_config(run_id: str) -> DictConfig:
-    try:
-        path = download_artifacts(run_id=run_id, artifact_path=".hydra/config.yaml")
-    except OSError:
-        return DictConfig({})
-    return OmegaConf.load(path)  # type: ignore

{hydraflow-0.2.6.dist-info → hydraflow-0.2.8.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: hydraflow
-Version: 0.2.6
+Version: 0.2.8
 Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
 Project-URL: Documentation, https://github.com/daizutabi/hydraflow
 Project-URL: Source, https://github.com/daizutabi/hydraflow

hydraflow-0.2.8.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,12 @@
+hydraflow/__init__.py,sha256=K2xXk5Za_9OkiRmbsgkuWn7EMaTcQOVCPFs5oTP_QFw,483
+hydraflow/asyncio.py,sha256=yh851L315QHzRBwq6r-uwO2oZKgz1JawHp-fswfxT1E,6175
+hydraflow/config.py,sha256=6TCKNQZ3sSrIEvl245T2udwFuknejyN1dMcIVmOHdrQ,2102
+hydraflow/context.py,sha256=G7JMrG70sgBH2qILXl5nkGWNUoRggj518JWUq0ZiJ9E,7776
+hydraflow/info.py,sha256=Vj2sT66Ric63mmaq7Yu8nDFhsGQYO3MCHrxFpapDufc,3458
+hydraflow/mlflow.py,sha256=Q8RGijSURTjRkEDxzi_2Tk9KOx3QK__al5aArGQriHA,7249
+hydraflow/progress.py,sha256=0GJfKnnY_SAHVWpGvLdgOBsogGs8vVofjLuphuUEy2g,4296
+hydraflow/run_collection.py,sha256=Ge-PAsoQBbn7cuow0DYMf5SoBmIXUfZ9ftufN_75Pw8,29963
+hydraflow-0.2.8.dist-info/METADATA,sha256=9CF5S8LdmDUx4sihDqVRvwLLk34FNBmy_Vv6BVoahoc,4181
+hydraflow-0.2.8.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
+hydraflow-0.2.8.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
+hydraflow-0.2.8.dist-info/RECORD,,

hydraflow-0.2.6.dist-info/RECORD DELETED Viewed

@@ -1,11 +0,0 @@
-hydraflow/__init__.py,sha256=l5BrZAfpJHFkQnDRuETZVjDTntMmzOI3CUwnsm2fGzk,460
-hydraflow/asyncio.py,sha256=yh851L315QHzRBwq6r-uwO2oZKgz1JawHp-fswfxT1E,6175
-hydraflow/config.py,sha256=6TCKNQZ3sSrIEvl245T2udwFuknejyN1dMcIVmOHdrQ,2102
-hydraflow/context.py,sha256=8Qn99yCSkCarDDthQ6hjgW80CBBIg0H7fnLvtw4ZXo8,7248
-hydraflow/mlflow.py,sha256=gGr0fvFEllduA-ByHMeEamM39zVY_30tjtEbkSZ4lHA,3659
-hydraflow/progress.py,sha256=dReFp-AfBuYpjGQnqRmkwPcoyFfe2WCgkklXuo9ZjNg,1709
-hydraflow/runs.py,sha256=TETX54OVJPJLi6rjpNcsXAhXH2Q9unhjXhGkOtFtHng,31559
-hydraflow-0.2.6.dist-info/METADATA,sha256=yOEx7M9jM5M7MNkLOZShO-DexNqXzIHjSkqbxcNMHQ0,4181
-hydraflow-0.2.6.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
-hydraflow-0.2.6.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
-hydraflow-0.2.6.dist-info/RECORD,,

{hydraflow-0.2.6.dist-info → hydraflow-0.2.8.dist-info}/WHEEL RENAMED Viewed

File without changes

{hydraflow-0.2.6.dist-info → hydraflow-0.2.8.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

hydraflow 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl

hydraflow 0.2.6py3-none-any.whl → 0.2.8py3-none-any.whl