PyPI - hydraflow - Versions diffs - 0.2.17__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

hydraflow 0.2.17py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

hydraflow/__init__.py +4 -1
hydraflow/asyncio.py +13 -11
hydraflow/config.py +17 -6
hydraflow/context.py +16 -16
hydraflow/mlflow.py +36 -23
hydraflow/param.py +11 -0
hydraflow/progress.py +7 -18
hydraflow/run_collection.py +138 -74
hydraflow/run_data.py +56 -0
hydraflow/{info.py → run_info.py} +12 -37
{hydraflow-0.2.17.dist-info → hydraflow-0.3.0.dist-info}/METADATA +2 -1
hydraflow-0.3.0.dist-info/RECORD +15 -0
hydraflow-0.2.17.dist-info/RECORD +0 -14
{hydraflow-0.2.17.dist-info → hydraflow-0.3.0.dist-info}/WHEEL +0 -0
{hydraflow-0.2.17.dist-info → hydraflow-0.3.0.dist-info}/licenses/LICENSE +0 -0

hydraflow/__init__.py CHANGED Viewed

@@ -1,5 +1,6 @@
+"""Provide a collection of MLflow runs."""
 from .context import chdir_artifact, log_run, start_run, watch
-from .info import get_artifact_dir, get_hydra_output_dir, load_config
 from .mlflow import (
     list_runs,
     search_runs,
@@ -7,6 +8,8 @@ from .mlflow import (
 )
 from .progress import multi_tasks_progress, parallel_progress
 from .run_collection import RunCollection
+from .run_data import load_config
+from .run_info import get_artifact_dir, get_hydra_output_dir
 __all__ = [
     "RunCollection",

hydraflow/asyncio.py CHANGED Viewed

@@ -1,3 +1,5 @@
+"""Provide functionality for running commands and monitoring file changes."""
 from __future__ import annotations
 import asyncio
@@ -27,8 +29,7 @@ async def execute_command(
     stderr: Callable[[str], None] | None = None,
     stop_event: asyncio.Event,
 ) -> int:
-    """
-    Runs a command asynchronously and pass the output to callback functions.
+    """Run a command asynchronously and pass the output to callback functions.
     Args:
         program (str): The program to run.
@@ -39,6 +40,7 @@ async def execute_command(
     Returns:
         int: The return code of the process.
     """
     try:
         process = await asyncio.create_subprocess_exec(
@@ -68,13 +70,13 @@ async def process_stream(
     stream: StreamReader | None,
     callback: Callable[[str], None] | None,
 ) -> None:
-    """
-    Reads a stream asynchronously and pass each line to a callback function.
+    """Read a stream asynchronously and pass each line to a callback function.
     Args:
         stream (StreamReader | None): The stream to read from.
         callback (Callable[[str], None] | None): The callback function to handle
         each line.
     """
     if stream is None or callback is None:
         return
@@ -93,9 +95,7 @@ async def monitor_file_changes(
     stop_event: asyncio.Event,
     **awatch_kwargs,
 ) -> None:
-    """
-    Watches for file changes in specified paths and pass the changes to a
-    callback function.
+    """Watch file changes in specified paths and pass the changes to a callback.
     Args:
         paths (list[str | Path]): List of paths to monitor for changes.
@@ -103,6 +103,7 @@ async def monitor_file_changes(
         function to handle file changes.
         stop_event (asyncio.Event): Event to signal when to stop watching.
         **awatch_kwargs: Additional keyword arguments to pass to watchfiles.awatch.
     """
     str_paths = [str(path) for path in paths]
     try:
@@ -127,8 +128,7 @@ async def run_and_monitor(
     paths: list[str | Path] | None = None,
     **awatch_kwargs,
 ) -> int:
-    """
-    Runs a command and optionally watch for file changes concurrently.
+    """Run a command and optionally watch for file changes concurrently.
     Args:
         program (str): The program to run.
@@ -138,6 +138,8 @@ async def run_and_monitor(
         watch (Callable[[set[tuple[Change, str]]], None] | None): Callback for
         file changes.
         paths (list[str | Path] | None): List of paths to monitor for changes.
+        **awatch_kwargs: Additional keyword arguments to pass to `watchfiles.awatch`.
     """
     stop_event = asyncio.Event()
     run_task = asyncio.create_task(
@@ -184,8 +186,7 @@ def run(
     paths: list[str | Path] | None = None,
     **awatch_kwargs,
 ) -> int:
-    """
-    Run a command synchronously and optionally watch for file changes.
+    """Run a command synchronously and optionally watch for file changes.
     This function is a synchronous wrapper around the asynchronous
     `run_and_monitor` function. It runs a specified command and optionally
@@ -208,6 +209,7 @@ def run(
     Returns:
         int: The return code of the process.
     """
     if watch and not paths:
         paths = [Path.cwd()]

hydraflow/config.py CHANGED Viewed

@@ -1,7 +1,4 @@
-"""
-This module provides functionality for working with configuration
-objects using the OmegaConf library.
-"""
+"""Provide functionality for working with configuration objects using the OmegaConf."""
 from __future__ import annotations
@@ -14,9 +11,22 @@ if TYPE_CHECKING:
     from typing import Any
-def iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
+def collect_params(config: object) -> dict[str, Any]:
+    """Iterate over parameters and collect them into a dictionary.
+    Args:
+        config (object): The configuration object to iterate over.
+        prefix (str): The prefix to prepend to the parameter keys.
+    Returns:
+        dict[str, Any]: A dictionary of collected parameters.
     """
-    Recursively iterate over the parameters in the given configuration object.
+    return dict(iter_params(config))
+def iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
+    """Recursively iterate over the parameters in the given configuration object.
     This function traverses the configuration object and yields key-value pairs
     representing the parameters. The keys are prefixed with the provided prefix.
@@ -29,6 +39,7 @@ def iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
     Yields:
         Key-value pairs representing the parameters in the configuration object.
     """
     if config is None:
         return

hydraflow/context.py CHANGED Viewed

@@ -1,7 +1,4 @@
-"""
-This module provides context managers to log parameters and manage the MLflow
-run context.
-"""
+"""Provide context managers to log parameters and manage the MLflow run context."""
 from __future__ import annotations
@@ -17,8 +14,8 @@ from hydra.core.hydra_config import HydraConfig
 from watchdog.events import FileModifiedEvent, PatternMatchingEventHandler
 from watchdog.observers import Observer
-from hydraflow.info import get_artifact_dir
 from hydraflow.mlflow import log_params
+from hydraflow.run_info import get_artifact_dir
 if TYPE_CHECKING:
     from collections.abc import Callable, Iterator
@@ -34,9 +31,7 @@ def log_run(
     *,
     synchronous: bool | None = None,
 ) -> Iterator[None]:
-    """
-    Log the parameters from the given configuration object and manage the MLflow
-    run context.
+    """Log the parameters from the given configuration object.
     This context manager logs the parameters from the provided configuration object
     using MLflow. It also manages the MLflow run context, ensuring that artifacts
@@ -56,6 +51,7 @@ def log_run(
             # Perform operations within the MLflow run context
             pass
         ```
     """
     log_params(config, synchronous=synchronous)
@@ -98,8 +94,7 @@ def start_run(  # noqa: PLR0913
     log_system_metrics: bool | None = None,
     synchronous: bool | None = None,
 ) -> Iterator[Run]:
-    """
-    Start an MLflow run and log parameters using the provided configuration object.
+    """Start an MLflow run and log parameters using the provided configuration object.
     This context manager starts an MLflow run and logs parameters using the specified
     configuration object. It ensures that the run is properly closed after completion.
@@ -130,6 +125,7 @@ def start_run(  # noqa: PLR0913
         - `mlflow.start_run`: The MLflow function to start a run directly.
         - `log_run`: A context manager to log parameters and manage the MLflow
            run context.
     """
     with (
         mlflow.start_run(
@@ -156,9 +152,7 @@ def watch(
     ignore_patterns: list[str] | None = None,
     ignore_log: bool = True,
 ) -> Iterator[None]:
-    """
-    Watch the given directory for changes and call the provided function
-    when a change is detected.
+    """Watch the given directory for changes.
     This context manager sets up a file system watcher on the specified directory.
     When a file modification is detected, the provided function is called with
@@ -173,6 +167,9 @@ def watch(
             the current MLflow artifact URI is used. Defaults to "".
         timeout (int): The timeout period in seconds for the watcher
             to run after the context is exited. Defaults to 60.
+        ignore_patterns (list[str] | None): A list of glob patterns to ignore.
+            Defaults to None.
+        ignore_log (bool): Whether to ignore log files. Defaults to True.
     Yields:
         None
@@ -183,6 +180,7 @@ def watch(
             # Perform operations while watching the directory for changes
             pass
         ```
     """
     dir = dir or get_artifact_dir()  # noqa: A001
     if isinstance(dir, Path):
@@ -214,6 +212,8 @@ def watch(
 class Handler(PatternMatchingEventHandler):
+    """Monitor file changes and call the given function when a change is detected."""
     def __init__(
         self,
         func: Callable[[Path], None],
@@ -232,6 +232,7 @@ class Handler(PatternMatchingEventHandler):
         super().__init__(ignore_patterns=ignore_patterns)
     def on_modified(self, event: FileModifiedEvent) -> None:
+        """Modify when a file is modified."""
         file = Path(str(event.src_path))
         if file.is_file():
             self.func(file)
@@ -242,9 +243,7 @@ def chdir_artifact(
     run: Run,
     artifact_path: str | None = None,
 ) -> Iterator[Path]:
-    """
-    Change the current working directory to the artifact directory of the
-    given run.
+    """Change the current working directory to the artifact directory of the given run.
     This context manager changes the current working directory to the artifact
     directory of the given run. It ensures that the directory is changed back
@@ -253,6 +252,7 @@ def chdir_artifact(
     Args:
         run (Run): The run to get the artifact directory from.
         artifact_path (str | None): The artifact path.
     """
     curdir = Path.cwd()
     path = mlflow.artifacts.download_artifacts(

hydraflow/mlflow.py CHANGED Viewed

@@ -1,20 +1,17 @@
-"""
-This module provides functionality to log parameters from Hydra configuration objects
-and set up experiments using MLflow. It includes methods for managing experiments,
-searching for runs, and logging parameters and artifacts.
+"""Provide functionality to log parameters from Hydra configuration objects.
+This module provides functions to log parameters from Hydra configuration objects
+to MLflow, set experiments, and manage tracking URIs. It integrates Hydra's
+configuration management with MLflow's experiment tracking capabilities.
 Key Features:
-- **Experiment Management**: Set and manage MLflow experiments with customizable names
-  based on Hydra configuration.
-- **Run Logging**: Log parameters and metrics from Hydra configuration objects to
-  MLflow, ensuring that all relevant information is captured during experiments.
-- **Run Search**: Search for runs based on various criteria, allowing for flexible
-  retrieval of experiment results.
-- **Artifact Management**: Retrieve and log artifacts associated with runs, facilitating
-  easy access to outputs generated during experiments.
-This module is designed to integrate seamlessly with Hydra, providing a robust
-solution for tracking machine learning experiments and their associated metadata.
+- **Experiment Management**: Set experiment names and tracking URIs using Hydra
+  configuration details.
+- **Parameter Logging**: Log parameters from Hydra configuration objects to MLflow,
+  supporting both synchronous and asynchronous logging.
+- **Run Collection**: Utilize the `RunCollection` class to manage and interact with
+  multiple MLflow runs, providing methods to filter and retrieve runs based on
+  various criteria.
 """
 from __future__ import annotations
@@ -40,8 +37,7 @@ def set_experiment(
     suffix: str = "",
     uri: str | Path | None = None,
 ) -> Experiment:
-    """
-    Sets the experiment name and tracking URI optionally.
+    """Set the experiment name and tracking URI optionally.
     This function sets the experiment name by combining the given prefix,
     the job name from HydraConfig, and the given suffix. Optionally, it can
@@ -55,6 +51,7 @@ def set_experiment(
     Returns:
         Experiment: An instance of `mlflow.entities.Experiment` representing
         the new active experiment.
     """
     if uri is not None:
         mlflow.set_tracking_uri(uri)
@@ -65,8 +62,7 @@ def set_experiment(
 def log_params(config: object, *, synchronous: bool | None = None) -> None:
-    """
-    Log the parameters from the given configuration object.
+    """Log the parameters from the given configuration object.
     This method logs the parameters from the provided configuration object
     using MLflow. It iterates over the parameters and logs them using the
@@ -76,6 +72,7 @@ def log_params(config: object, *, synchronous: bool | None = None) -> None:
         config (object): The configuration object to log the parameters from.
         synchronous (bool | None): Whether to log the parameters synchronously.
             Defaults to None.
     """
     for key, value in iter_params(config):
         mlflow.log_param(key, value, synchronous=synchronous)
@@ -91,8 +88,7 @@ def search_runs(  # noqa: PLR0913
     search_all_experiments: bool = False,
     experiment_names: list[str] | None = None,
 ) -> RunCollection:
-    """
-    Search for Runs that fit the specified criteria.
+    """Search for Runs that fit the specified criteria.
     This function wraps the `mlflow.search_runs` function and returns the
     results as a `RunCollection` object. It allows for flexible searching of
@@ -133,6 +129,7 @@ def search_runs(  # noqa: PLR0913
     Returns:
         A `RunCollection` object containing the search results.
     """
     runs = mlflow.search_runs(
         experiment_ids=experiment_ids,
@@ -151,9 +148,9 @@ def search_runs(  # noqa: PLR0913
 def list_runs(
     experiment_names: str | list[str] | None = None,
     n_jobs: int = 0,
+    status: str | list[str] | int | list[int] | None = None,
 ) -> RunCollection:
-    """
-    List all runs for the specified experiments.
+    """List all runs for the specified experiments.
     This function retrieves all runs for the given list of experiment names.
     If no experiment names are provided (None), it defaults to searching all runs
@@ -169,11 +166,27 @@ def list_runs(
             for runs. If None or an empty list is provided, the function will
             search the currently active experiment or all experiments except
             the "Default" experiment.
+        n_jobs (int): The number of jobs to run in parallel. If 0, the function
+            will search runs sequentially.
+        status (str | list[str] | int | list[int] | None): The status of the runs
+            to filter.
     Returns:
         RunCollection: A `RunCollection` instance containing the runs for the
         specified experiments.
     """
+    rc = _list_runs(experiment_names, n_jobs)
+    if status is None:
+        return rc
+    return rc.filter(status=status)
+def _list_runs(
+    experiment_names: str | list[str] | None = None,
+    n_jobs: int = 0,
+) -> RunCollection:
     if isinstance(experiment_names, str):
         experiment_names = [experiment_names]

hydraflow/param.py CHANGED Viewed

@@ -1,3 +1,13 @@
+"""Provide utility functions for parameter matching.
+The main function `match` checks if a given parameter matches a specified value.
+It supports various types of values including None, boolean, list, tuple, int,
+float, and str.
+Helper functions `_match_list` and `_match_tuple` are used internally to handle
+matching for list and tuple types respectively.
+"""
 from __future__ import annotations
 from typing import Any
@@ -13,6 +23,7 @@ def match(param: str, value: Any) -> bool:
     Returns:
         True if the parameter matches the specified value,
         False otherwise.
     """
     if value in [None, True, False]:
         return param == str(value)

hydraflow/progress.py CHANGED Viewed

@@ -1,18 +1,7 @@
-"""
-Module for managing progress tracking in parallel processing using Joblib
-and Rich's Progress bar.
+"""Context managers and functions for parallel task execution with progress.
 Provide context managers and functions to facilitate the execution
 of tasks in parallel while displaying progress updates.
-The following key components are provided:
-- JoblibProgress: A context manager for tracking progress with Rich's progress
-    bar.
-- parallel_progress: A function to execute a given function in parallel over
-    an iterable with progress tracking.
-- multi_tasks_progress: A function to render auto-updating progress bars for
-    multiple tasks concurrently.
 """
 from __future__ import annotations
@@ -37,8 +26,7 @@ def JoblibProgress(  # noqa: N802
     total: int | None = None,
     **kwargs,
 ) -> Iterator[Progress]:
-    """
-    Context manager for tracking progress using Joblib with Rich's Progress bar.
+    """Context manager for tracking progress using Joblib with Rich's Progress bar.
     Args:
         *columns (ProgressColumn | str): Columns to display in the progress bar.
@@ -56,6 +44,7 @@ def JoblibProgress(  # noqa: N802
         with JoblibProgress("task", total=100) as progress:
             # Your parallel processing code here
         ```
     """
     if not columns:
         columns = Progress.get_default_columns()
@@ -94,8 +83,7 @@ def parallel_progress(
     description: str | None = None,
     **kwargs,
 ) -> list[U]:
-    """
-    Execute a function in parallel over an iterable with progress tracking.
+    """Execute a function in parallel over an iterable with progress tracking.
     Args:
         func (Callable[[T], U]): The function to execute on each item in the
@@ -112,6 +100,7 @@ def parallel_progress(
     Returns:
         list[U]: A list of results from applying the function to each item in
         the iterable.
     """
     iterable = list(iterable)
     total = len(iterable)
@@ -130,8 +119,7 @@ def multi_tasks_progress(
     transient: bool | None = None,
     **kwargs,
 ) -> None:
-    """
-    Render auto-updating progress bars for multiple tasks concurrently.
+    """Render auto-updating progress bars for multiple tasks concurrently.
     Args:
         iterables (Iterable[Iterable[int | tuple[int, int]]]): A collection of
@@ -151,6 +139,7 @@ def multi_tasks_progress(
     Returns:
         None
     """
     if not columns:
         columns = Progress.get_default_columns()

hydraflow/run_collection.py CHANGED Viewed

@@ -1,6 +1,6 @@
-"""
-Provide functionality for managing and interacting with MLflow runs.
-It includes the `RunCollection` class, which serves as a container
+"""Provide a collection of MLflow runs.
+This module includes the `RunCollection` class, which serves as a container
 for multiple MLflow `Run` instances, and various methods to filter and
 retrieve these runs.
@@ -23,9 +23,13 @@ from dataclasses import dataclass, field
 from itertools import chain
 from typing import TYPE_CHECKING, Any, Concatenate, ParamSpec, TypeVar, overload
+from mlflow.entities import RunStatus
+from polars.dataframe import DataFrame
 import hydraflow.param
-from hydraflow.config import iter_params
-from hydraflow.info import RunCollectionInfo
+from hydraflow.config import collect_params, iter_params
+from hydraflow.run_data import RunCollectionData
+from hydraflow.run_info import RunCollectionInfo
 if TYPE_CHECKING:
     from collections.abc import Callable, Iterator
@@ -42,8 +46,7 @@ P = ParamSpec("P")
 @dataclass
 class RunCollection:
-    """
-    Represent a collection of MLflow runs.
+    """Represent a collection of MLflow runs.
     Provide methods to interact with the runs, such as filtering,
     retrieving specific runs, and accessing run information.
@@ -60,8 +63,12 @@ class RunCollection:
     _info: RunCollectionInfo = field(init=False)
     """An instance of `RunCollectionInfo`."""
+    _data: RunCollectionData = field(init=False)
+    """An instance of `RunCollectionData`."""
     def __post_init__(self) -> None:
         self._info = RunCollectionInfo(self)
+        self._data = RunCollectionData(self)
     def __repr__(self) -> str:
         return f"{self.__class__.__name__}({len(self)})"
@@ -93,7 +100,6 @@ class RunCollection:
     @classmethod
     def from_list(cls, runs: list[Run]) -> RunCollection:
         """Create a `RunCollection` instance from a list of MLflow `Run` instances."""
         return cls(runs)
     @property
@@ -101,6 +107,11 @@ class RunCollection:
         """An instance of `RunCollectionInfo`."""
         return self._info
+    @property
+    def data(self) -> RunCollectionData:
+        """An instance of `RunCollectionData`."""
+        return self._data
     def take(self, n: int) -> RunCollection:
         """Take the first n runs from the collection.
@@ -114,6 +125,7 @@ class RunCollection:
         Returns:
             A new `RunCollection` instance containing the first n runs if n is
             positive, or the last n runs if n is negative.
         """
         if n < 0:
             return self.__class__(self._runs[n:])
@@ -126,17 +138,28 @@ class RunCollection:
         *,
         reverse: bool = False,
     ) -> None:
+        """Sort the runs in the collection.
+        Sort the runs in the collection according to the provided key function
+        and optional reverse flag.
+        Args:
+            key (Callable[[Run], Any] | None): A function that takes a run and returns
+                a value to sort by.
+            reverse (bool): If True, sort in descending order.
+        """
         self._runs.sort(key=key or (lambda x: x.info.start_time), reverse=reverse)
     def one(self) -> Run:
-        """
-        Get the only `Run` instance in the collection.
+        """Get the only `Run` instance in the collection.
         Returns:
             The only `Run` instance in the collection.
         Raises:
             ValueError: If the collection does not contain exactly one run.
         """
         if len(self._runs) != 1:
             raise ValueError("The collection does not contain exactly one run.")
@@ -144,24 +167,24 @@ class RunCollection:
         return self._runs[0]
     def try_one(self) -> Run | None:
-        """
-        Try to get the only `Run` instance in the collection.
+        """Try to get the only `Run` instance in the collection.
         Returns:
             The only `Run` instance in the collection, or None if the collection
             does not contain exactly one run.
         """
         return self._runs[0] if len(self._runs) == 1 else None
     def first(self) -> Run:
-        """
-        Get the first `Run` instance in the collection.
+        """Get the first `Run` instance in the collection.
         Returns:
             The first `Run` instance in the collection.
         Raises:
             ValueError: If the collection is empty.
         """
         if not self._runs:
             raise ValueError("The collection is empty.")
@@ -169,24 +192,24 @@ class RunCollection:
         return self._runs[0]
     def try_first(self) -> Run | None:
-        """
-        Try to get the first `Run` instance in the collection.
+        """Try to get the first `Run` instance in the collection.
         Returns:
             The first `Run` instance in the collection, or None if the collection
             is empty.
         """
         return self._runs[0] if self._runs else None
     def last(self) -> Run:
-        """
-        Get the last `Run` instance in the collection.
+        """Get the last `Run` instance in the collection.
         Returns:
             The last `Run` instance in the collection.
         Raises:
             ValueError: If the collection is empty.
         """
         if not self._runs:
             raise ValueError("The collection is empty.")
@@ -194,18 +217,17 @@ class RunCollection:
         return self._runs[-1]
     def try_last(self) -> Run | None:
-        """
-        Try to get the last `Run` instance in the collection.
+        """Try to get the last `Run` instance in the collection.
         Returns:
             The last `Run` instance in the collection, or None if the collection
             is empty.
         """
         return self._runs[-1] if self._runs else None
     def filter(self, config: object | None = None, **kwargs) -> RunCollection:
-        """
-        Filter the `Run` instances based on the provided configuration.
+        """Filter the `Run` instances based on the provided configuration.
         This method filters the runs in the collection according to the
         specified configuration object and additional key-value pairs. The
@@ -228,12 +250,12 @@ class RunCollection:
         Returns:
             A new `RunCollection` object containing the filtered runs.
         """
         return RunCollection(filter_runs(self._runs, config, **kwargs))
     def find(self, config: object | None = None, **kwargs) -> Run:
-        """
-        Find the first `Run` instance based on the provided configuration.
+        """Find the first `Run` instance based on the provided configuration.
         This method filters the runs in the collection according to the
         specified configuration object and returns the first run that matches
@@ -252,6 +274,7 @@ class RunCollection:
         See Also:
             `filter`: Perform the actual filtering logic.
         """
         try:
             return self.filter(config, **kwargs).first()
@@ -259,8 +282,7 @@ class RunCollection:
             raise ValueError("No run matches the provided configuration.")
     def try_find(self, config: object | None = None, **kwargs) -> Run | None:
-        """
-        Try to find the first `Run` instance based on the provided configuration.
+        """Try to find the first `Run` instance based on the provided configuration.
         This method filters the runs in the collection according to the
         specified configuration object and returns the first run that matches
@@ -277,12 +299,12 @@ class RunCollection:
         See Also:
             `filter`: Perform the actual filtering logic.
         """
         return self.filter(config, **kwargs).try_first()
     def find_last(self, config: object | None = None, **kwargs) -> Run:
-        """
-        Find the last `Run` instance based on the provided configuration.
+        """Find the last `Run` instance based on the provided configuration.
         This method filters the runs in the collection according to the
         specified configuration object and returns the last run that matches
@@ -301,6 +323,7 @@ class RunCollection:
         See Also:
             `filter`: Perform the actual filtering logic.
         """
         try:
             return self.filter(config, **kwargs).last()
@@ -308,8 +331,7 @@ class RunCollection:
             raise ValueError("No run matches the provided configuration.")
     def try_find_last(self, config: object | None = None, **kwargs) -> Run | None:
-        """
-        Try to find the last `Run` instance based on the provided configuration.
+        """Try to find the last `Run` instance based on the provided configuration.
         This method filters the runs in the collection according to the
         specified configuration object and returns the last run that matches
@@ -326,12 +348,12 @@ class RunCollection:
         See Also:
             `filter`: Perform the actual filtering logic.
         """
         return self.filter(config, **kwargs).try_last()
     def get(self, config: object | None = None, **kwargs) -> Run:
-        """
-        Retrieve a specific `Run` instance based on the provided configuration.
+        """Retrieve a specific `Run` instance based on the provided configuration.
         This method filters the runs in the collection according to the
         specified configuration object and returns the run that matches the
@@ -351,6 +373,7 @@ class RunCollection:
         See Also:
             `filter`: Perform the actual filtering logic.
         """
         try:
             return self.filter(config, **kwargs).one()
@@ -359,8 +382,7 @@ class RunCollection:
             raise ValueError(msg)
     def try_get(self, config: object | None = None, **kwargs) -> Run | None:
-        """
-        Try to retrieve a specific `Run` instance based on the provided configuration.
+        """Try to get a specific `Run` instance based on the provided configuration.
         This method filters the runs in the collection according to the
         specified configuration object and returns the run that matches the
@@ -380,12 +402,12 @@ class RunCollection:
         See Also:
             `filter`: Perform the actual filtering logic.
         """
         return self.filter(config, **kwargs).try_one()
     def get_param_names(self) -> list[str]:
-        """
-        Get the parameter names from the runs.
+        """Get the parameter names from the runs.
         This method extracts the unique parameter names from the provided list
         of runs. It iterates through each run and collects the parameter names
@@ -393,6 +415,7 @@ class RunCollection:
         Returns:
             A list of unique parameter names.
         """
         param_names = set()
@@ -402,24 +425,30 @@ class RunCollection:
         return list(param_names)
-    def get_param_dict(self) -> dict[str, list[str]]:
-        """
-        Get the parameter dictionary from the list of runs.
+    def get_param_dict(self, *, drop_const: bool = False) -> dict[str, list[str]]:
+        """Get the parameter dictionary from the list of runs.
         This method extracts the parameter names and their corresponding values
         from the provided list of runs. It iterates through each run and
         collects the parameter values into a dictionary where the keys are
         parameter names and the values are lists of parameter values.
+        Args:
+            drop_const (bool): If True, drop the parameter values that are constant
+                across all runs.
         Returns:
             A dictionary where the keys are parameter names and the values are
             lists of parameter values.
         """
         params = {}
         for name in self.get_param_names():
             it = (run.data.params[name] for run in self if name in run.data.params)
-            params[name] = sorted(set(it))
+            unique_values = sorted(set(it))
+            if not drop_const or len(unique_values) > 1:
+                params[name] = unique_values
         return params
@@ -429,9 +458,7 @@ class RunCollection:
         *args: P.args,
         **kwargs: P.kwargs,
     ) -> Iterator[T]:
-        """
-        Apply a function to each run in the collection and return an iterator of
-        results.
+        """Return an iterator of results by applying a function to each run.
         This method iterates over each run in the collection and applies the
         provided function to it, along with any additional arguments and
@@ -445,6 +472,7 @@ class RunCollection:
         Yields:
             Results obtained by applying the function to each run in the collection.
         """
         return (func(run, *args, **kwargs) for run in self)
@@ -454,9 +482,7 @@ class RunCollection:
         *args: P.args,
         **kwargs: P.kwargs,
     ) -> Iterator[T]:
-        """
-        Apply a function to each run id in the collection and return an iterator
-        of results.
+        """Return an iterator of results by applying a function to each run id.
         Args:
             func (Callable[[str, P], T]): A function that takes a run id and returns a
@@ -467,6 +493,7 @@ class RunCollection:
         Yields:
             Results obtained by applying the function to each run id in the
             collection.
         """
         return (func(run_id, *args, **kwargs) for run_id in self.info.run_id)
@@ -476,9 +503,7 @@ class RunCollection:
         *args: P.args,
         **kwargs: P.kwargs,
     ) -> Iterator[T]:
-        """
-        Apply a function to each run configuration in the collection and return
-        an iterator of results.
+        """Return an iterator of results by applying a function to each run config.
         Args:
             func (Callable[[DictConfig, P], T]): A function that takes a run
@@ -489,8 +514,9 @@ class RunCollection:
         Yields:
             Results obtained by applying the function to each run configuration
             in the collection.
         """
-        return (func(config, *args, **kwargs) for config in self.info.config)
+        return (func(config, *args, **kwargs) for config in self.data.config)
     def map_uri(
         self,
@@ -498,9 +524,7 @@ class RunCollection:
         *args: P.args,
         **kwargs: P.kwargs,
     ) -> Iterator[T]:
-        """
-        Apply a function to each artifact URI in the collection and return an
-        iterator of results.
+        """Return an iterator of results by applying a function to each artifact URI.
         Iterate over each run in the collection, retrieves the artifact URI, and
         apply the provided function to it. If a run does not have an artifact
@@ -515,6 +539,7 @@ class RunCollection:
         Yields:
             Results obtained by applying the function to each artifact URI in the
             collection.
         """
         return (func(uri, *args, **kwargs) for uri in self.info.artifact_uri)
@@ -524,9 +549,7 @@ class RunCollection:
         *args: P.args,
         **kwargs: P.kwargs,
     ) -> Iterator[T]:
-        """
-        Apply a function to each artifact directory in the collection and return
-        an iterator of results.
+        """Return an iterator of results by applying a function to each artifact dir.
         Iterate over each run in the collection, downloads the artifact
         directory, and apply the provided function to the directory path.
@@ -540,6 +563,7 @@ class RunCollection:
         Yields:
             Results obtained by applying the function to each artifact directory
             in the collection.
         """
         return (func(dir, *args, **kwargs) for dir in self.info.artifact_dir)  # noqa: A001
@@ -547,8 +571,7 @@ class RunCollection:
         self,
         *names: str | list[str],
     ) -> dict[tuple[str | None, ...], RunCollection]:
-        """
-        Group runs by specified parameter names.
+        """Group runs by specified parameter names.
         Group the runs in the collection based on the values of the
         specified parameters. Each unique combination of parameter values will
@@ -563,6 +586,7 @@ class RunCollection:
             dict[tuple[str | None, ...], RunCollection]: A dictionary where the keys
             are tuples of parameter values and the values are RunCollection objects
             containing the runs that match those parameter values.
         """
         grouped_runs: dict[tuple[str | None, ...], list[Run]] = {}
         for run in self._runs:
@@ -571,6 +595,16 @@ class RunCollection:
         return {key: RunCollection(runs) for key, runs in grouped_runs.items()}
+    @property
+    def config(self) -> DataFrame:
+        """Get the runs' configurations as a polars DataFrame.
+        Returns:
+            A polars DataFrame containing the runs' configurations.
+        """
+        return DataFrame(self.map_config(collect_params))
 def _param_matches(run: Run, key: str, value: Any) -> bool:
     params = run.data.params
@@ -588,11 +622,10 @@ def filter_runs(
     runs: list[Run],
     config: object | None = None,
     *,
-    status: str | list[str] | None = None,
+    status: str | list[str] | int | list[int] | None = None,
     **kwargs,
 ) -> list[Run]:
-    """
-    Filter the runs based on the provided configuration.
+    """Filter the runs based on the provided configuration.
     Filter the runs in the collection according to the
     specified configuration object and additional key-value pairs.
@@ -612,33 +645,63 @@ def filter_runs(
         config (object | None): The configuration object to filter the runs.
             This can be any object that provides key-value pairs through the
             `iter_params` function.
-        status (str | list[str] | None): The status of the runs to filter.
+        status (str | list[str] | RunStatus | list[RunStatus] | None): The status of
+            the runs to filter.
         **kwargs: Additional key-value pairs to filter the runs.
     Returns:
         A list of runs that match the specified configuration and key-value pairs.
     """
     for key, value in chain(iter_params(config), kwargs.items()):
         runs = [run for run in runs if _param_matches(run, key, value)]
-        if len(runs) == 0:
+        if not runs:
             return []
-    if isinstance(status, str) and status.startswith("!"):
-        status = status[1:].lower()
-        return [run for run in runs if run.info.status.lower() != status]
+    if status is None:
+        return runs
-    if status:
-        status = [status] if isinstance(status, str) else status
-        status = [s.lower() for s in status]
-        return [run for run in runs if run.info.status.lower() in status]
+    return filter_runs_by_status(runs, status)
-    return runs
+def filter_runs_by_status(
+    runs: list[Run],
+    status: str | list[str] | int | list[int],
+) -> list[Run]:
+    """Filter the runs based on the provided status.
+    Args:
+        runs (list[Run]): The list of runs to filter.
+        status (str | list[str] | int | list[int]): The status of the runs
+            to filter.
+    Returns:
+        A list of runs that match the specified status.
-def get_params(run: Run, *names: str | list[str]) -> tuple[str | None, ...]:
     """
-    Retrieve the values of specified parameters from the given run.
+    if isinstance(status, str):
+        if status.startswith("!"):
+            status = status[1:].lower()
+            return [run for run in runs if run.info.status.lower() != status]
+        status = [status]
+    elif isinstance(status, int):
+        status = [RunStatus.to_string(status)]
+    status = [_to_lower(s) for s in status]
+    return [run for run in runs if run.info.status.lower() in status]
+def _to_lower(status: str | int) -> str:
+    if isinstance(status, str):
+        return status.lower()
+    return RunStatus.to_string(status).lower()
+def get_params(run: Run, *names: str | list[str]) -> tuple[str | None, ...]:
+    """Retrieve the values of specified parameters from the given run.
     This function extracts the values of the parameters identified by the
     provided names from the specified run. It can accept both individual
@@ -653,6 +716,7 @@ def get_params(run: Run, *names: str | list[str]) -> tuple[str | None, ...]:
     Returns:
         tuple[str | None, ...]: A tuple containing the values of the specified
         parameters in the order they were provided.
     """
     names_ = []
     for name in names:

hydraflow/run_data.py ADDED Viewed

@@ -0,0 +1,56 @@
+"""Provide information about MLflow runs."""
+from __future__ import annotations
+from typing import TYPE_CHECKING
+from omegaconf import DictConfig, OmegaConf
+from hydraflow.run_info import get_artifact_dir
+if TYPE_CHECKING:
+    from mlflow.entities import Run
+    from hydraflow.run_collection import RunCollection
+class RunCollectionData:
+    """Provide information about MLflow runs."""
+    def __init__(self, runs: RunCollection) -> None:
+        self._runs = runs
+    @property
+    def params(self) -> list[dict[str, str]]:
+        """Get the parameters for each run in the collection."""
+        return [run.data.params for run in self._runs]
+    @property
+    def metrics(self) -> list[dict[str, float]]:
+        """Get the metrics for each run in the collection."""
+        return [run.data.metrics for run in self._runs]
+    @property
+    def config(self) -> list[DictConfig]:
+        """Get the configuration for each run in the collection."""
+        return [load_config(run) for run in self._runs]
+def load_config(run: Run) -> DictConfig:
+    """Load the configuration for a given run.
+    This function loads the configuration for the provided Run instance
+    by downloading the configuration file from the MLflow artifacts and
+    loading it using OmegaConf. It returns an empty config if
+    `.hydra/config.yaml` is not found in the run's artifact directory.
+    Args:
+        run (Run): The Run instance for which to load the configuration.
+    Returns:
+        The loaded configuration as a DictConfig object. Returns an empty
+        DictConfig if the configuration file is not found.
+    """
+    path = get_artifact_dir(run) / ".hydra/config.yaml"
+    return OmegaConf.load(path)  # type: ignore

hydraflow/{info.py → run_info.py} RENAMED Viewed

@@ -1,3 +1,5 @@
+"""Provide information about MLflow runs."""
 from __future__ import annotations
 from pathlib import Path
@@ -6,7 +8,7 @@ from typing import TYPE_CHECKING
 import mlflow
 from hydra.core.hydra_config import HydraConfig
 from mlflow.tracking import artifact_utils
-from omegaconf import DictConfig, OmegaConf
+from omegaconf import OmegaConf
 if TYPE_CHECKING:
     from mlflow.entities import Run
@@ -15,37 +17,29 @@ if TYPE_CHECKING:
 class RunCollectionInfo:
+    """Provide information about MLflow runs."""
     def __init__(self, runs: RunCollection) -> None:
         self._runs = runs
     @property
     def run_id(self) -> list[str]:
+        """Get the run ID for each run in the collection."""
         return [run.info.run_id for run in self._runs]
-    @property
-    def params(self) -> list[dict[str, str]]:
-        return [run.data.params for run in self._runs]
-    @property
-    def metrics(self) -> list[dict[str, float]]:
-        return [run.data.metrics for run in self._runs]
     @property
     def artifact_uri(self) -> list[str | None]:
+        """Get the artifact URI for each run in the collection."""
         return [run.info.artifact_uri for run in self._runs]
     @property
     def artifact_dir(self) -> list[Path]:
+        """Get the artifact directory for each run in the collection."""
         return [get_artifact_dir(run) for run in self._runs]
-    @property
-    def config(self) -> list[DictConfig]:
-        return [load_config(run) for run in self._runs]
 def get_artifact_dir(run: Run | None = None) -> Path:
-    """
-    Retrieve the artifact directory for the given run.
+    """Retrieve the artifact directory for the given run.
     This function uses MLflow to get the artifact directory for the given run.
@@ -54,6 +48,7 @@ def get_artifact_dir(run: Run | None = None) -> Path:
     Returns:
         The local path to the directory where the artifacts are downloaded.
     """
     if run is None:
         uri = mlflow.get_artifact_uri()
@@ -64,8 +59,7 @@ def get_artifact_dir(run: Run | None = None) -> Path:
 def get_hydra_output_dir(run: Run | None = None) -> Path:
-    """
-    Retrieve the Hydra output directory for the given run.
+    """Retrieve the Hydra output directory for the given run.
     This function returns the Hydra output directory. If no run is provided,
     it retrieves the output directory from the current Hydra configuration.
@@ -82,6 +76,7 @@ def get_hydra_output_dir(run: Run | None = None) -> Path:
     Raises:
         FileNotFoundError: If the Hydra configuration file is not found
             in the artifacts.
     """
     if run is None:
         hc = HydraConfig.get()
@@ -94,23 +89,3 @@ def get_hydra_output_dir(run: Run | None = None) -> Path:
         return Path(hc.hydra.runtime.output_dir)
     raise FileNotFoundError
-def load_config(run: Run) -> DictConfig:
-    """
-    Load the configuration for a given run.
-    This function loads the configuration for the provided Run instance
-    by downloading the configuration file from the MLflow artifacts and
-    loading it using OmegaConf. It returns an empty config if
-    `.hydra/config.yaml` is not found in the run's artifact directory.
-    Args:
-        run (Run): The Run instance for which to load the configuration.
-    Returns:
-        The loaded configuration as a DictConfig object. Returns an empty
-        DictConfig if the configuration file is not found.
-    """
-    path = get_artifact_dir(run) / ".hydra/config.yaml"
-    return OmegaConf.load(path)  # type: ignore

{hydraflow-0.2.17.dist-info → hydraflow-0.3.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: hydraflow
-Version: 0.2.17
+Version: 0.3.0
 Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
 Project-URL: Documentation, https://github.com/daizutabi/hydraflow
 Project-URL: Source, https://github.com/daizutabi/hydraflow
@@ -17,6 +17,7 @@ Requires-Python: >=3.10
 Requires-Dist: hydra-core>=1.3
 Requires-Dist: joblib
 Requires-Dist: mlflow>=2.15
+Requires-Dist: polars
 Requires-Dist: rich
 Requires-Dist: watchdog
 Requires-Dist: watchfiles

hydraflow-0.3.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,15 @@
+hydraflow/__init__.py,sha256=zlLTztJPXyBFJC5Z8G7_OnlfzAHJPRrfE1c2OoDvlTg,667
+hydraflow/asyncio.py,sha256=-i1C8KAmNDImrjHnk92Csaa1mpjdK8Vp4ZVaQV-l94s,6634
+hydraflow/config.py,sha256=Wx7jymwLVr5EfpzXBpvv3Ax3VhGhvWyA7Yy6EzsPYWk,2479
+hydraflow/context.py,sha256=IaDy-ZCdCfWwv95S-gyQNp062oBdtSVaz6dxGmO6Y8w,8226
+hydraflow/mlflow.py,sha256=GkOr_pXfpfY5USYBLrCigHcP13VgrAK_e9kheR1Wke4,8579
+hydraflow/param.py,sha256=dvIXcKgc_MPiju3WEk9qz5FOUeK5qSj-YWN2ophCpUM,1938
+hydraflow/progress.py,sha256=zvKX1HCN8_xDOsgYOEcLLhkhdPdep-U8vHrc0XZ-6SQ,6163
+hydraflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+hydraflow/run_collection.py,sha256=Xv6-KD5ac-vv-4Q3PZrzJy1x84H_g7UoP7ZqZ8_DQeQ,24973
+hydraflow/run_data.py,sha256=HgXGjV5oN6VxOAhrFRjubWz5ZiRqT1a2VdS5OcH2UQQ,1732
+hydraflow/run_info.py,sha256=4QrTmyPEQ_PVn7JKXJIa9NkXGAdqh8k5Sue1ggQS5aQ,2678
+hydraflow-0.3.0.dist-info/METADATA,sha256=DmC1Yjwuc3snUQiePCr5xvdtbfIevOapiA2sg8w6Aho,3840
+hydraflow-0.3.0.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
+hydraflow-0.3.0.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
+hydraflow-0.3.0.dist-info/RECORD,,

hydraflow-0.2.17.dist-info/RECORD DELETED Viewed

@@ -1,14 +0,0 @@
-hydraflow/__init__.py,sha256=B7rWSiGP5WwWjijcb41Bv9uuo5MQ6gbBbVWGAWYtK-k,598
-hydraflow/asyncio.py,sha256=eFnDbNOQ5Hmjdforr8rTW6i_rr-zFIVY3xSQQ45gMPA,6511
-hydraflow/config.py,sha256=YU6xYLinxq-Iqw1R3Zy7s3_u8nfpvnvXlGIkPXJTNLc,2116
-hydraflow/context.py,sha256=4UDaWGoVmeF36UqsKoh6dd_cS_YVRfz80gFr28ouNlo,8040
-hydraflow/info.py,sha256=7EsCMEH6LJZB3FZiQ3IpPFTD3Meaz7G3M-HvDQeo1rw,3466
-hydraflow/mlflow.py,sha256=irD1INrVaI_1RIzUCjI36voBqgZszZ4dkSLo4aT1_FM,8271
-hydraflow/param.py,sha256=W71zJH39s8cJcy3qV-PFQHJYyQnfa1GbnHOIqCMG3Jc,1573
-hydraflow/progress.py,sha256=b5LvLm3d0eW3WsaidZAZotJNTTN3OwSY3XwxXXsJV9A,6561
-hydraflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-hydraflow/run_collection.py,sha256=ym3M5ApEZVwJ1rYgOs4aYluTBfJeOECD6Z9SLFhv5O8,23260
-hydraflow-0.2.17.dist-info/METADATA,sha256=uD6q000C_h2JsuFh0mkf1YmpTYxVDI1RLaAUKzZ6fDw,3819
-hydraflow-0.2.17.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
-hydraflow-0.2.17.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
-hydraflow-0.2.17.dist-info/RECORD,,

{hydraflow-0.2.17.dist-info → hydraflow-0.3.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{hydraflow-0.2.17.dist-info → hydraflow-0.3.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

hydraflow 0.2.17__py3-none-any.whl → 0.3.0__py3-none-any.whl

hydraflow 0.2.17py3-none-any.whl → 0.3.0py3-none-any.whl