PyPI - hydraflow - Versions diffs - 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl - Mend

hydraflow 0.2.1py3-none-any.whl → 0.2.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

hydraflow/__init__.py +6 -2
hydraflow/asyncio.py +199 -0
hydraflow/config.py +3 -0
hydraflow/runs.py +562 -135
{hydraflow-0.2.1.dist-info → hydraflow-0.2.3.dist-info}/METADATA +17 -6
hydraflow-0.2.3.dist-info/RECORD +10 -0
hydraflow-0.2.1.dist-info/RECORD +0 -9
{hydraflow-0.2.1.dist-info → hydraflow-0.2.3.dist-info}/WHEEL +0 -0
{hydraflow-0.2.1.dist-info → hydraflow-0.2.3.dist-info}/licenses/LICENSE +0 -0

hydraflow/__init__.py CHANGED Viewed

@@ -2,25 +2,29 @@ from .context import Info, chdir_artifact, log_run, watch
 from .mlflow import set_experiment
 from .runs import (
     Run,
-    Runs,
+    RunCollection,
     filter_runs,
     get_param_dict,
     get_param_names,
     get_run,
+    list_runs,
     load_config,
+    search_runs,
 )
 __all__ = [
     "Info",
     "Run",
-    "Runs",
+    "RunCollection",
     "chdir_artifact",
     "filter_runs",
     "get_param_dict",
     "get_param_names",
     "get_run",
+    "list_runs",
     "load_config",
     "log_run",
+    "search_runs",
     "set_experiment",
     "watch",
 ]

hydraflow/asyncio.py ADDED Viewed

@@ -0,0 +1,199 @@
+from __future__ import annotations
+import asyncio
+import logging
+from asyncio.subprocess import PIPE
+from pathlib import Path
+from typing import TYPE_CHECKING
+import watchfiles
+if TYPE_CHECKING:
+    from asyncio.streams import StreamReader
+    from collections.abc import Callable
+    from watchfiles import Change
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+async def execute_command(
+    program: str,
+    *args: str,
+    stdout: Callable[[str], None] | None = None,
+    stderr: Callable[[str], None] | None = None,
+    stop_event: asyncio.Event,
+) -> int:
+    """
+    Runs a command asynchronously and pass the output to callback functions.
+    Args:
+        program (str): The program to run.
+        *args (str): Arguments for the program.
+        stdout (Callable[[str], None] | None): Callback for standard output.
+        stderr (Callable[[str], None] | None): Callback for standard error.
+        stop_event (asyncio.Event): Event to signal when the process is done.
+    Returns:
+        int: The return code of the process.
+    """
+    try:
+        process = await asyncio.create_subprocess_exec(program, *args, stdout=PIPE, stderr=PIPE)
+        await asyncio.gather(
+            process_stream(process.stdout, stdout),
+            process_stream(process.stderr, stderr),
+        )
+        returncode = await process.wait()
+    except Exception as e:
+        logger.error(f"Error running command: {e}")
+        returncode = 1
+    finally:
+        stop_event.set()
+    return returncode
+async def process_stream(
+    stream: StreamReader | None,
+    callback: Callable[[str], None] | None,
+) -> None:
+    """
+    Reads a stream asynchronously and pass each line to a callback function.
+    Args:
+        stream (StreamReader | None): The stream to read from.
+        callback (Callable[[str], None] | None): The callback function to handle
+        each line.
+    """
+    if stream is None or callback is None:
+        return
+    while True:
+        line = await stream.readline()
+        if line:
+            callback(line.decode().strip())
+        else:
+            break
+async def monitor_file_changes(
+    paths: list[str | Path],
+    callback: Callable[[set[tuple[Change, str]]], None],
+    stop_event: asyncio.Event,
+    **awatch_kwargs,
+) -> None:
+    """
+    Watches for file changes in specified paths and pass the changes to a
+    callback function.
+    Args:
+        paths (list[str | Path]): List of paths to monitor for changes.
+        callback (Callable[[set[tuple[Change, str]]], None]): The callback
+        function to handle file changes.
+        stop_event (asyncio.Event): Event to signal when to stop watching.
+        **awatch_kwargs: Additional keyword arguments to pass to watchfiles.awatch.
+    """
+    str_paths = [str(path) for path in paths]
+    try:
+        async for changes in watchfiles.awatch(*str_paths, stop_event=stop_event, **awatch_kwargs):
+            callback(changes)
+    except Exception as e:
+        logger.error(f"Error watching files: {e}")
+async def run_and_monitor(
+    program: str,
+    *args: str,
+    stdout: Callable[[str], None] | None = None,
+    stderr: Callable[[str], None] | None = None,
+    watch: Callable[[set[tuple[Change, str]]], None] | None = None,
+    paths: list[str | Path] | None = None,
+    **awatch_kwargs,
+) -> int:
+    """
+    Runs a command and optionally watch for file changes concurrently.
+    Args:
+        program (str): The program to run.
+        *args (str): Arguments for the program.
+        stdout (Callable[[str], None] | None): Callback for standard output.
+        stderr (Callable[[str], None] | None): Callback for standard error.
+        watch (Callable[[set[tuple[Change, str]]], None] | None): Callback for
+        file changes.
+        paths (list[str | Path] | None): List of paths to monitor for changes.
+    """
+    stop_event = asyncio.Event()
+    run_task = asyncio.create_task(
+        execute_command(program, *args, stop_event=stop_event, stdout=stdout, stderr=stderr)
+    )
+    if watch and paths:
+        monitor_task = asyncio.create_task(
+            monitor_file_changes(paths, watch, stop_event, **awatch_kwargs)
+        )
+    else:
+        monitor_task = None
+    try:
+        if monitor_task:
+            await asyncio.gather(run_task, monitor_task)
+        else:
+            await run_task
+    except Exception as e:
+        logger.error(f"Error in run_and_monitor: {e}")
+    finally:
+        stop_event.set()
+        await run_task
+        if monitor_task:
+            await monitor_task
+    return run_task.result()
+def run(
+    program: str,
+    *args: str,
+    stdout: Callable[[str], None] | None = None,
+    stderr: Callable[[str], None] | None = None,
+    watch: Callable[[set[tuple[Change, str]]], None] | None = None,
+    paths: list[str | Path] | None = None,
+    **awatch_kwargs,
+) -> int:
+    """
+    Run a command synchronously and optionally watch for file changes.
+    This function is a synchronous wrapper around the asynchronous `run_and_monitor` function.
+    It runs a specified command and optionally monitors specified paths for file changes,
+    invoking the provided callbacks for standard output, standard error, and file changes.
+    Args:
+        program (str): The program to run.
+        *args (str): Arguments for the program.
+        stdout (Callable[[str], None] | None): Callback for handling standard output lines.
+        stderr (Callable[[str], None] | None): Callback for handling standard error lines.
+        watch (Callable[[set[tuple[Change, str]]], None] | None): Callback for handling file changes.
+        paths (list[str | Path] | None): List of paths to monitor for file changes.
+        **awatch_kwargs: Additional keyword arguments to pass to `watchfiles.awatch`.
+    Returns:
+        int: The return code of the process.
+    """
+    if watch and not paths:
+        paths = [Path.cwd()]
+    return asyncio.run(
+        run_and_monitor(
+            program,
+            *args,
+            stdout=stdout,
+            stderr=stderr,
+            watch=watch,
+            paths=paths,
+            **awatch_kwargs,
+        )
+    )

hydraflow/config.py CHANGED Viewed

@@ -30,6 +30,9 @@ def iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
     Yields:
         Key-value pairs representing the parameters in the configuration object.
     """
+    if config is None:
+        return
     if not isinstance(config, (DictConfig, ListConfig)):
         config = OmegaConf.create(config)  # type: ignore

hydraflow/runs.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """
-This module provides functionality for managing and interacting with MLflow runs.
-It includes the `Runs` class and various methods to filter runs, retrieve run information,
-log artifacts, and load configurations.
+This module provides functionality for managing and interacting with MLflow
+runs. It includes the `RunCollection` class and various methods to filter
+runs, retrieve run information, log artifacts, and load configurations.
 """
 from __future__ import annotations
@@ -9,9 +9,10 @@ from __future__ import annotations
 from dataclasses import dataclass
 from functools import cache
 from itertools import chain
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, TypeVar
 import mlflow
+from mlflow.artifacts import download_artifacts
 from mlflow.entities import ViewType
 from mlflow.entities.run import Run
 from mlflow.tracking.fluent import SEARCH_MAX_RESULTS_PANDAS
@@ -20,6 +21,7 @@ from omegaconf import DictConfig, OmegaConf
 from hydraflow.config import iter_params
 if TYPE_CHECKING:
+    from collections.abc import Callable, Iterator
     from typing import Any
@@ -31,41 +33,46 @@ def search_runs(
     order_by: list[str] | None = None,
     search_all_experiments: bool = False,
     experiment_names: list[str] | None = None,
-) -> Runs:
+) -> RunCollection:
     """
     Search for Runs that fit the specified criteria.
-    This function wraps the `mlflow.search_runs` function and returns the results
-    as a `Runs` object. It allows for flexible searching of MLflow runs based on
-    various criteria.
+    This function wraps the `mlflow.search_runs` function and returns the
+    results as a `RunCollection` object. It allows for flexible searching of
+    MLflow runs based on various criteria.
+    Note:
+        The returned runs are sorted by their start time in ascending order.
     Args:
-        experiment_ids: List of experiment IDs. Search can work with experiment IDs or
-            experiment names, but not both in the same call. Values other than
-            ``None`` or ``[]`` will result in error if ``experiment_names`` is
-            also not ``None`` or ``[]``. ``None`` will default to the active
-            experiment if ``experiment_names`` is ``None`` or ``[]``.
+        experiment_ids: List of experiment IDs. Search can work with experiment
+            IDs or experiment names, but not both in the same call. Values
+            other than ``None`` or ``[]`` will result in error if
+            ``experiment_names`` is also not ``None`` or ``[]``. ``None`` will
+            default to the active experiment if ``experiment_names`` is ``None``
+            or ``[]``.
         filter_string: Filter query string, defaults to searching all runs.
-        run_view_type: one of enum values ``ACTIVE_ONLY``, ``DELETED_ONLY``, or ``ALL`` runs
-            defined in :py:class:`mlflow.entities.ViewType`.
-        max_results: The maximum number of runs to put in the dataframe. Default is 100,000
-            to avoid causing out-of-memory issues on the user's machine.
-        order_by: List of columns to order by (e.g., "metrics.rmse"). The ``order_by`` column
-            can contain an optional ``DESC`` or ``ASC`` value. The default is ``ASC``.
-            The default ordering is to sort by ``start_time DESC``, then ``run_id``.
-        output_format: The output format to be returned. If ``pandas``, a ``pandas.DataFrame``
-            is returned and, if ``list``, a list of :py:class:`mlflow.entities.Run`
-            is returned.
-        search_all_experiments: Boolean specifying whether all experiments should be searched.
-            Only honored if ``experiment_ids`` is ``[]`` or ``None``.
-        experiment_names: List of experiment names. Search can work with experiment IDs or
-            experiment names, but not both in the same call. Values other
-            than ``None`` or ``[]`` will result in error if ``experiment_ids``
-            is also not ``None`` or ``[]``. ``None`` will default to the active
-            experiment if ``experiment_ids`` is ``None`` or ``[]``.
+        run_view_type: one of enum values ``ACTIVE_ONLY``, ``DELETED_ONLY``, or
+            ``ALL`` runs defined in :py:class:`mlflow.entities.ViewType`.
+        max_results: The maximum number of runs to put in the dataframe. Default
+            is 100,000 to avoid causing out-of-memory issues on the user's
+            machine.
+        order_by: List of columns to order by (e.g., "metrics.rmse"). The
+            ``order_by`` column can contain an optional ``DESC`` or ``ASC``
+            value. The default is ``ASC``. The default ordering is to sort by
+            ``start_time DESC``, then ``run_id``.
+        search_all_experiments: Boolean specifying whether all experiments
+            should be searched. Only honored if ``experiment_ids`` is ``[]`` or
+            ``None``.
+        experiment_names: List of experiment names. Search can work with
+            experiment IDs or experiment names, but not both in the same call.
+            Values other than ``None`` or ``[]`` will result in error if
+            ``experiment_ids`` is also not ``None`` or ``[]``. ``None`` will
+            default to the active experiment if ``experiment_ids`` is ``None``
+            or ``[]``.
     Returns:
-        A `Runs` object containing the search results.
+        A `RunCollection` object containing the search results.
     """
     runs = mlflow.search_runs(
         experiment_ids=experiment_ids,
@@ -77,11 +84,44 @@ def search_runs(
         search_all_experiments=search_all_experiments,
         experiment_names=experiment_names,
     )
-    return Runs(runs)  # type: ignore
+    runs = sorted(runs, key=lambda run: run.info.start_time)  # type: ignore
+    return RunCollection(runs)  # type: ignore
+def list_runs(experiment_names: list[str] | None = None) -> RunCollection:
+    """
+    List all runs for the specified experiments.
+    This function retrieves all runs for the given list of experiment names.
+    If no experiment names are provided (None), it defaults to searching all runs
+    for the currently active experiment. If an empty list is provided, the function
+    will search all runs for all experiments except the "Default" experiment.
+    The function returns the results as a `RunCollection` object.
+    Note:
+        The returned runs are sorted by their start time in ascending order.
+    Args:
+        experiment_names: List of experiment names to search for runs.
+        If None or an empty list is provided, the function will search
+        the currently active experiment or all experiments except the
+        "Default" experiment.
+    Returns:
+        A `RunCollection` object containing the runs for the specified experiments.
+    """
+    if experiment_names == []:
+        experiments = mlflow.search_experiments()
+        experiment_names = [e.name for e in experiments if e.name != "Default"]
+    return search_runs(experiment_names=experiment_names)
+T = TypeVar("T")
 @dataclass
-class Runs:
+class RunCollection:
     """
     A class to represent a collection of MLflow runs.
@@ -89,133 +129,414 @@ class Runs:
     retrieving specific runs, and accessing run information.
     """
-    runs: list[Run]
+    _runs: list[Run]
+    """A list of MLflow Run objects."""
     def __repr__(self) -> str:
         return f"{self.__class__.__name__}({len(self)})"
     def __len__(self) -> int:
-        return len(self.runs)
+        return len(self._runs)
+    def first(self) -> Run:
+        """
+        Get the first run in the collection.
+        Returns:
+            The first run object in the collection.
+        Raises:
+            ValueError: If the collection is empty.
+        """
+        if not self._runs:
+            raise ValueError("The collection is empty.")
+        return self._runs[0]
+    def try_first(self) -> Run | None:
+        """
+        Try to get the first run in the collection.
+        Returns:
+            The first run object in the collection, or None if the collection
+            is empty.
+        """
+        return self._runs[0] if self._runs else None
+    def last(self) -> Run:
+        """
+        Get the last run in the collection.
-    def filter(self, config: object) -> Runs:
+        Returns:
+            The last run object in the collection.
+        Raises:
+            ValueError: If the collection is empty.
+        """
+        if not self._runs:
+            raise ValueError("The collection is empty.")
+        return self._runs[-1]
+    def try_last(self) -> Run | None:
+        """
+        Try to get the last run in the collection.
+        Returns:
+            The last run object in the collection, or None if the collection is
+            empty.
+        """
+        return self._runs[-1] if self._runs else None
+    def filter(self, config: object | None = None, **kwargs) -> RunCollection:
         """
         Filter the runs based on the provided configuration.
         This method filters the runs in the collection according to the
-        specified configuration object. The configuration object should
-        contain key-value pairs that correspond to the parameters of the
-        runs. Only the runs that match all the specified parameters will
-        be included in the returned `Runs` object.
+        specified configuration object and additional key-value pairs. The
+        configuration object and key-value pairs should contain key-value pairs
+        that correspond to the parameters of the runs. Only the runs that match
+        all the specified parameters will be included in the returned
+        `RunCollection` object.
+        The filtering supports:
+        - Exact matches for single values.
+        - Membership checks for lists of values.
+        - Range checks for tuples of two values (inclusive of the lower bound
+          and exclusive of the upper bound).
         Args:
-            config: The configuration object to filter the runs.
+            config: The configuration object to filter the runs. This can be
+                any object that provides key-value pairs through the
+                `iter_params` function.
+            **kwargs: Additional key-value pairs to filter the runs.
         Returns:
-            A new `Runs` object containing the filtered runs.
+            A new `RunCollection` object containing the filtered runs.
         """
-        return Runs(filter_runs(self.runs, config))
+        return RunCollection(filter_runs(self._runs, config, **kwargs))
-    def get(self, config: object) -> Run | None:
+    def find(self, config: object | None = None, **kwargs) -> Run:
         """
-        Retrieve a specific run based on the provided configuration.
+        Find the first run based on the provided configuration.
+        This method filters the runs in the collection according to the
+        specified configuration object and returns the first run that matches
+        the provided parameters. If no run matches the criteria, a `ValueError`
+        is raised.
+        Args:
+            config: The configuration object to identify the run.
+            **kwargs: Additional key-value pairs to filter the runs.
+        Returns:
+            The first run object that matches the provided configuration.
+        Raises:
+            ValueError: If no run matches the criteria.
+        See Also:
+            RunCollection.filter: The method that performs the actual filtering
+            logic.
+        """
+        return find_run(self._runs, config, **kwargs)
+    def try_find(self, config: object | None = None, **kwargs) -> Run | None:
+        """
+        Find the first run based on the provided configuration.
+        This method filters the runs in the collection according to the
+        specified configuration object and returns the first run that matches
+        the provided parameters. If no run matches the criteria, None is
+        returned.
+        Args:
+            config: The configuration object to identify the run.
+            **kwargs: Additional key-value pairs to filter the runs.
+        Returns:
+            The first run object that matches the provided configuration, or
+            None if no runs match the criteria.
+        See Also:
+            RunCollection.filter: The method that performs the actual filtering
+            logic.
+        """
+        return try_find_run(self._runs, config, **kwargs)
+    def find_last(self, config: object | None = None, **kwargs) -> Run:
+        """
+        Find the last run based on the provided configuration.
         This method filters the runs in the collection according to the
-        specified configuration object and returns the run that matches
-        the provided parameters. If more than one run matches the criteria,
-        a `ValueError` is raised.
+        specified configuration object and returns the last run that matches
+        the provided parameters. If no run matches the criteria, a `ValueError`
+        is raised.
         Args:
             config: The configuration object to identify the run.
+            **kwargs: Additional key-value pairs to filter the runs.
         Returns:
-            Run: The run object that matches the provided configuration.
-            None, if the runs are not in a DataFrame format.
+            The last run object that matches the provided configuration.
         Raises:
-            ValueError: If the number of filtered runs is not exactly one.
+            ValueError: If no run matches the criteria.
+        See Also:
+            RunCollection.filter: The method that performs the actual filtering
+            logic.
         """
-        return get_run(self.runs, config)
+        return find_last_run(self._runs, config, **kwargs)
-    def get_earliest_run(self, config: object | None = None, **kwargs) -> Run | None:
+    def try_find_last(self, config: object | None = None, **kwargs) -> Run | None:
         """
-        Get the earliest run from the list of runs based on the start time.
+        Find the last run based on the provided configuration.
-        This method filters the runs based on the configuration if provided
-        and returns the run with the earliest start time.
+        This method filters the runs in the collection according to the
+        specified configuration object and returns the last run that matches
+        the provided parameters. If no run matches the criteria, None is
+        returned.
         Args:
-            config: The configuration object to filter the runs.
-                If None, no filtering is applied.
+            config: The configuration object to identify the run.
             **kwargs: Additional key-value pairs to filter the runs.
         Returns:
-            The run with the earliest start time, or None if no runs match the criteria.
+            The last run object that matches the provided configuration, or
+            None if no runs match the criteria.
+        See Also:
+            RunCollection.filter: The method that performs the actual filtering
+            logic.
+        """
+        return try_find_last_run(self._runs, config, **kwargs)
+    def get(self, config: object | None = None, **kwargs) -> Run:
+        """
+        Retrieve a specific run based on the provided configuration.
+        This method filters the runs in the collection according to the
+        specified configuration object and returns the run that matches the
+        provided parameters. If no run matches the criteria, or if more than
+        one run matches the criteria, a `ValueError` is raised.
+        Args:
+            config: The configuration object to identify the run.
+            **kwargs: Additional key-value pairs to filter the runs.
+        Returns:
+            The run object that matches the provided configuration.
+        Raises:
+            ValueError: If no run matches the criteria or if more than one run
+            matches the criteria.
+        See Also:
+            RunCollection.filter: The method that performs the actual filtering
+            logic.
         """
-        return get_earliest_run(self.runs, config, **kwargs)
+        return get_run(self._runs, config, **kwargs)
-    def get_latest_run(self, config: object | None = None, **kwargs) -> Run | None:
+    def try_get(self, config: object | None = None, **kwargs) -> Run | None:
         """
-        Get the latest run from the list of runs based on the start time.
+        Retrieve a specific run based on the provided configuration.
+        This method filters the runs in the collection according to the
+        specified configuration object and returns the run that matches the
+        provided parameters. If no run matches the criteria, None is returned.
+        If more than one run matches the criteria, a `ValueError` is raised.
         Args:
-            config: The configuration object to filter the runs.
-                If None, no filtering is applied.
+            config: The configuration object to identify the run.
             **kwargs: Additional key-value pairs to filter the runs.
         Returns:
-            The run with the latest start time, or None if no runs match the criteria.
+            The run object that matches the provided configuration, or None if
+            no runs match the criteria.
+        Raises:
+            ValueError: If more than one run matches the criteria.
+        See Also:
+            RunCollection.filter: The method that performs the actual filtering
+            logic.
         """
-        return get_latest_run(self.runs, config, **kwargs)
+        return try_get_run(self._runs, config, **kwargs)
     def get_param_names(self) -> list[str]:
         """
         Get the parameter names from the runs.
-        This method extracts the unique parameter names from the provided list of runs.
-        It iterates through each run and collects the parameter names into a set to
-        ensure uniqueness.
+        This method extracts the unique parameter names from the provided list
+        of runs. It iterates through each run and collects the parameter names
+        into a set to ensure uniqueness.
         Returns:
             A list of unique parameter names.
         """
-        return get_param_names(self.runs)
+        return get_param_names(self._runs)
     def get_param_dict(self) -> dict[str, list[str]]:
         """
         Get the parameter dictionary from the list of runs.
         This method extracts the parameter names and their corresponding values
-        from the provided list of runs. It iterates through each run and collects
-        the parameter values into a dictionary where the keys are parameter names
-        and the values are lists of parameter values.
+        from the provided list of runs. It iterates through each run and
+        collects the parameter values into a dictionary where the keys are
+        parameter names and the values are lists of parameter values.
         Returns:
-            A dictionary where the keys are parameter names and the values are lists
-            of parameter values.
+            A dictionary where the keys are parameter names and the values are
+            lists of parameter values.
+        """
+        return get_param_dict(self._runs)
+    def map(self, func: Callable[[Run], T]) -> Iterator[T]:
+        """
+        Apply a function to each run in the collection and return an iterator of
+        results.
+        Args:
+            func: A function that takes a run and returns a result.
+        Yields:
+            Results obtained by applying the function to each run in the
+            collection.
+        """
+        return (func(run) for run in self._runs)
+    def map_run_id(self, func: Callable[[str], T]) -> Iterator[T]:
+        """
+        Apply a function to each run id in the collection and return an iterator
+        of results.
+        Args:
+            func: A function that takes a run id and returns a result.
+        Yields:
+            Results obtained by applying the function to each run id in the
+            collection.
+        """
+        return (func(run.info.run_id) for run in self._runs)
+    def map_config(self, func: Callable[[DictConfig], T]) -> Iterator[T]:
+        """
+        Apply a function to each run configuration in the collection and return
+        an iterator of results.
+        Args:
+            func: A function that takes a run configuration and returns a
+            result.
+        Yields:
+            Results obtained by applying the function to each run configuration
+            in the collection.
+        """
+        return (func(load_config(run)) for run in self._runs)
+    def map_uri(self, func: Callable[[str | None], T]) -> Iterator[T]:
+        """
+        Apply a function to each artifact URI in the collection and return an
+        iterator of results.
+        This method iterates over each run in the collection, retrieves the
+        artifact URI, and applies the provided function to it. If a run does not
+        have an artifact URI, None is passed to the function.
+        Args:
+            func: A function that takes an artifact URI (string or None) and
+            returns a result.
+        Yields:
+            Results obtained by applying the function to each artifact URI in the
+            collection.
         """
-        return get_param_dict(self.runs)
+        return (func(run.info.artifact_uri) for run in self._runs)
+    def map_dir(self, func: Callable[[str], T]) -> Iterator[T]:
+        """
+        Apply a function to each artifact directory in the collection and return
+        an iterator of results.
+        This method iterates over each run in the collection, downloads the
+        artifact directory, and applies the provided function to the directory
+        path.
+        Args:
+            func: A function that takes an artifact directory path (string) and
+            returns a result.
+        Yields:
+            Results obtained by applying the function to each artifact directory
+            in the collection.
+        """
+        return (func(download_artifacts(run_id=run.info.run_id)) for run in self._runs)
+def _param_matches(run: Run, key: str, value: Any) -> bool:
+    """
+    Check if the run's parameter matches the specified key-value pair.
+    This function checks if the run's parameters contain the specified
+    key-value pair. It handles different types of values, including lists
+    and tuples.
+    Args:
+        run: The run object to check.
+        key: The parameter key to check.
+        value: The parameter value to check.
+    Returns:
+        True if the run's parameter matches the specified key-value pair,
+        False otherwise.
+    """
+    param = run.data.params.get(key, value)
+    if param is None:
+        return False
+    if param == "None":
+        return value is None
+    if isinstance(value, list) and value:
+        return type(value[0])(param) in value
+    if isinstance(value, tuple) and len(value) == 2:
+        return value[0] <= type(value[0])(param) < value[1]
+    return type(value)(param) == value
-def filter_runs(runs: list[Run], config: object, **kwargs) -> list[Run]:
+def filter_runs(runs: list[Run], config: object | None = None, **kwargs) -> list[Run]:
     """
     Filter the runs based on the provided configuration.
     This method filters the runs in the collection according to the
-    specified configuration object. The configuration object should
-    contain key-value pairs that correspond to the parameters of the
-    runs. Only the runs that match all the specified parameters will
+    specified configuration object and additional key-value pairs.
+    The configuration object and key-value pairs should contain
+    key-value pairs that correspond to the parameters of the runs.
+    Only the runs that match all the specified parameters will
     be included in the returned list of runs.
+    The filtering supports:
+    - Exact matches for single values.
+    - Membership checks for lists of values.
+    - Range checks for tuples of two values (inclusive of the lower bound and
+      exclusive of the upper bound).
     Args:
-        runs: The runs to filter.
-        config: The configuration object to filter the runs.
+        runs: The list of runs to filter.
+        config: The configuration object to filter the runs. This can be any
+                object that provides key-value pairs through the `iter_params`
+                function.
         **kwargs: Additional key-value pairs to filter the runs.
     Returns:
-        A filtered list of runs.
+        A list of runs that match the specified configuration and key-value pairs.
     """
     for key, value in chain(iter_params(config), kwargs.items()):
-        runs = [run for run in runs if _is_equal(run, key, value)]
+        runs = [run for run in runs if _param_matches(run, key, value)]
         if len(runs) == 0:
             return []
@@ -223,23 +544,44 @@ def filter_runs(runs: list[Run], config: object, **kwargs) -> list[Run]:
     return runs
-def _is_equal(run: Run, key: str, value: Any) -> bool:
-    param = run.data.params.get(key, value)
+def find_run(runs: list[Run], config: object | None = None, **kwargs) -> Run:
+    """
+    Find the first run based on the provided configuration.
-    if param is None:
-        return False
+    This method filters the runs in the collection according to the
+    specified configuration object and returns the first run that matches
+    the provided parameters. If no run matches the criteria, a `ValueError` is
+    raised.
-    return type(value)(param) == value
+    Args:
+        runs: The runs to filter.
+        config: The configuration object to identify the run.
+        **kwargs: Additional key-value pairs to filter the runs.
+    Returns:
+        The first run object that matches the provided configuration.
+    Raises:
+        ValueError: If no run matches the criteria.
-def get_run(runs: list[Run], config: object, **kwargs) -> Run | None:
+    See Also:
+        RunCollection.filter: The method that performs the actual filtering logic.
     """
-    Retrieve a specific run based on the provided configuration.
+    filtered_runs = filter_runs(runs, config, **kwargs)
+    if len(filtered_runs) == 0:
+        raise ValueError("No run matches the provided configuration.")
+    return filtered_runs[0]
+def try_find_run(runs: list[Run], config: object | None = None, **kwargs) -> Run | None:
+    """
+    Find the first run based on the provided configuration.
     This method filters the runs in the collection according to the
-    specified configuration object and returns the run that matches
-    the provided parameters. If more than one run matches the criteria,
-    a `ValueError` is raised.
+    specified configuration object and returns the first run that matches
+    the provided parameters. If no run matches the criteria, None is returned.
     Args:
         runs: The runs to filter.
@@ -247,75 +589,161 @@ def get_run(runs: list[Run], config: object, **kwargs) -> Run | None:
         **kwargs: Additional key-value pairs to filter the runs.
     Returns:
-        The run object that matches the provided configuration, or None
+        The first run object that matches the provided configuration, or None
         if no runs match the criteria.
+    """
+    filtered_runs = filter_runs(runs, config, **kwargs)
+    if len(filtered_runs) == 0:
+        return None
+    return filtered_runs[0]
+def find_last_run(runs: list[Run], config: object | None = None, **kwargs) -> Run:
+    """
+    Find the last run based on the provided configuration.
+    This method filters the runs in the collection according to the
+    specified configuration object and returns the last run that matches
+    the provided parameters. If no run matches the criteria, a `ValueError`
+    is raised.
+    Args:
+        runs: The runs to filter.
+        config: The configuration object to identify the run.
+        **kwargs: Additional key-value pairs to filter the runs.
+    Returns:
+        The last run object that matches the provided configuration.
     Raises:
-        ValueError: If more than one run matches the criteria.
+        ValueError: If no run matches the criteria.
+    See Also:
+        RunCollection.filter: The method that performs the actual filtering
+        logic.
     """
-    runs = filter_runs(runs, config, **kwargs)
+    filtered_runs = filter_runs(runs, config, **kwargs)
-    if len(runs) == 0:
-        return None
+    if len(filtered_runs) == 0:
+        raise ValueError("No run matches the provided configuration.")
-    if len(runs) == 1:
-        return runs[0]
+    return filtered_runs[-1]
-    msg = f"Multiple runs were filtered. Expected number of runs is 1, but found {len(runs)} runs."
-    raise ValueError(msg)
+def try_find_last_run(runs: list[Run], config: object | None = None, **kwargs) -> Run | None:
+    """
+    Find the last run based on the provided configuration.
+    This method filters the runs in the collection according to the
+    specified configuration object and returns the last run that matches
+    the provided parameters. If no run matches the criteria, None is returned.
+    Args:
+        runs: The runs to filter.
+        config: The configuration object to identify the run.
+        **kwargs: Additional key-value pairs to filter the runs.
+    Returns:
+        The last run object that matches the provided configuration, or None
+        if no runs match the criteria.
+    """
+    filtered_runs = filter_runs(runs, config, **kwargs)
+    if len(filtered_runs) == 0:
+        return None
+    return filtered_runs[-1]
-def get_earliest_run(runs: list[Run], config: object | None = None, **kwargs) -> Run | None:
+def get_run(runs: list[Run], config: object | None = None, **kwargs) -> Run:
     """
-    Get the earliest run from the list of runs based on the start time.
+    Retrieve a specific run based on the provided configuration.
-    This method filters the runs based on the configuration if provided
-    and returns the run with the earliest start time.
+    This method filters the runs in the collection according to the
+    specified configuration object and returns the run that matches
+    the provided parameters. If no run matches the criteria, or if more
+    than one run matches the criteria, a `ValueError` is raised.
     Args:
-        runs: The list of runs.
-        config: The configuration object to filter the runs.
-            If None, no filtering is applied.
+        runs: The runs to filter.
+        config: The configuration object to identify the run.
         **kwargs: Additional key-value pairs to filter the runs.
     Returns:
-        The run with the earliest start time, or None if no runs match the criteria.
+        The run object that matches the provided configuration.
+    Raises:
+        ValueError: If no run matches the criteria or if more than one run
+        matches the criteria.
+    See Also:
+        RunCollection.filter: The method that performs the actual filtering
+        logic.
     """
-    if config is not None or kwargs:
-        runs = filter_runs(runs, config or {}, **kwargs)
+    filtered_runs = filter_runs(runs, config, **kwargs)
+    if len(filtered_runs) == 0:
+        raise ValueError("No run matches the provided configuration.")
-    return min(runs, key=lambda run: run.info.start_time, default=None)
+    if len(filtered_runs) == 1:
+        return filtered_runs[0]
+    msg = (
+        f"Multiple runs were filtered. Expected number of runs is 1, "
+        f"but found {len(filtered_runs)} runs."
+    )
+    raise ValueError(msg)
-def get_latest_run(runs: list[Run], config: object | None = None, **kwargs) -> Run | None:
+def try_get_run(runs: list[Run], config: object | None = None, **kwargs) -> Run | None:
     """
-    Get the latest run from the list of runs based on the start time.
+    Retrieve a specific run based on the provided configuration.
-    This method filters the runs based on the configuration if provided
-    and returns the run with the latest start time.
+    This method filters the runs in the collection according to the
+    specified configuration object and returns the run that matches
+    the provided parameters. If no run matches the criteria, None is returned.
+    If more than one run matches the criteria, a `ValueError` is raised.
     Args:
-        runs: The list of runs.
-        config: The configuration object to filter the runs.
-            If None, no filtering is applied.
+        runs: The runs to filter.
+        config: The configuration object to identify the run.
         **kwargs: Additional key-value pairs to filter the runs.
     Returns:
-        The run with the latest start time, or None if no runs match the criteria.
+        The run object that matches the provided configuration, or None
+        if no runs match the criteria.
+    Raises:
+        ValueError: If more than one run matches the criteria.
+    See Also:
+        RunCollection.filter: The method that performs the actual filtering
+        logic.
     """
-    if config is not None or kwargs:
-        runs = filter_runs(runs, config or {}, **kwargs)
+    filtered_runs = filter_runs(runs, config, **kwargs)
+    if len(filtered_runs) == 0:
+        return None
-    return max(runs, key=lambda run: run.info.start_time, default=None)
+    if len(filtered_runs) == 1:
+        return filtered_runs[0]
+    msg = (
+        "Multiple runs were filtered. Expected number of runs is 1, "
+        f"but found {len(filtered_runs)} runs."
+    )
+    raise ValueError(msg)
 def get_param_names(runs: list[Run]) -> list[str]:
     """
     Get the parameter names from the runs.
-    This method extracts the unique parameter names from the provided list of runs.
-    It iterates through each run and collects the parameter names into a set to
-    ensure uniqueness.
+    This method extracts the unique parameter names from the provided list of
+    runs. It iterates through each run and collects the parameter names into a
+    set to ensure uniqueness.
     Args:
         runs: The list of runs from which to extract parameter names.
@@ -363,13 +791,15 @@ def load_config(run: Run) -> DictConfig:
     This function loads the configuration for the provided Run instance
     by downloading the configuration file from the MLflow artifacts and
-    loading it using OmegaConf.
+    loading it using OmegaConf. It returns an empty config if
+    `.hydra/config.yaml` is not found in the run's artifact directory.
     Args:
-        run: The Run instance to load the configuration for.
+        run: The Run instance for which to load the configuration.
     Returns:
-        The loaded configuration.
+        The loaded configuration as a DictConfig object. Returns an empty
+        DictConfig if the configuration file is not found.
     """
     run_id = run.info.run_id
     return _load_config(run_id)
@@ -378,10 +808,7 @@ def load_config(run: Run) -> DictConfig:
 @cache
 def _load_config(run_id: str) -> DictConfig:
     try:
-        path = mlflow.artifacts.download_artifacts(
-            run_id=run_id,
-            artifact_path=".hydra/config.yaml",
-        )
+        path = download_artifacts(run_id=run_id, artifact_path=".hydra/config.yaml")
     except OSError:
         return DictConfig({})

{hydraflow-0.2.1.dist-info → hydraflow-0.2.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: hydraflow
-Version: 0.2.1
+Version: 0.2.3
 Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
 Project-URL: Documentation, https://github.com/daizutabi/hydraflow
 Project-URL: Source, https://github.com/daizutabi/hydraflow
@@ -20,7 +20,9 @@ Requires-Dist: hydra-core>1.3
 Requires-Dist: mlflow>2.15
 Requires-Dist: setuptools
 Requires-Dist: watchdog
+Requires-Dist: watchfiles
 Provides-Extra: dev
+Requires-Dist: pytest-asyncio; extra == 'dev'
 Requires-Dist: pytest-clarity; extra == 'dev'
 Requires-Dist: pytest-cov; extra == 'dev'
 Requires-Dist: pytest-randomly; extra == 'dev'
@@ -46,14 +48,23 @@ Description-Content-Type: text/markdown
 ## Overview
-Hydraflow is a powerful library designed to seamlessly integrate [Hydra](https://hydra.cc/) and [MLflow](https://mlflow.org/), making it easier to manage and track machine learning experiments. By combining the flexibility of Hydra's configuration management with the robust experiment tracking capabilities of MLflow, Hydraflow provides a comprehensive solution for managing complex machine learning workflows.
+Hydraflow is a powerful library designed to seamlessly integrate
+[Hydra](https://hydra.cc/) and [MLflow](https://mlflow.org/), making it easier to
+manage and track machine learning experiments. By combining the flexibility of
+Hydra's configuration management with the robust experiment tracking capabilities
+of MLflow, Hydraflow provides a comprehensive solution for managing complex
+machine learning workflows.
 ## Key Features
-- **Configuration Management**: Utilize Hydra's advanced configuration management to handle complex parameter sweeps and experiment setups.
-- **Experiment Tracking**: Leverage MLflow's tracking capabilities to log parameters, metrics, and artifacts for each run.
-- **Artifact Management**: Automatically log and manage artifacts, such as model checkpoints and configuration files, with MLflow.
-- **Seamless Integration**: Easily integrate Hydra and MLflow in your machine learning projects with minimal setup.
+- **Configuration Management**: Utilize Hydra's advanced configuration management
+  to handle complex parameter sweeps and experiment setups.
+- **Experiment Tracking**: Leverage MLflow's tracking capabilities to log parameters,
+  metrics, and artifacts for each run.
+- **Artifact Management**: Automatically log and manage artifacts, such as model
+  checkpoints and configuration files, with MLflow.
+- **Seamless Integration**: Easily integrate Hydra and MLflow in your machine learning
+  projects with minimal setup.
 ## Installation

hydraflow-0.2.3.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,10 @@
+hydraflow/__init__.py,sha256=9v7p2ezUd_LMoRJQS0ay8c7fpaqPZ6Ofq7YPT0rSO5I,528
+hydraflow/asyncio.py,sha256=yh851L315QHzRBwq6r-uwO2oZKgz1JawHp-fswfxT1E,6175
+hydraflow/config.py,sha256=FNTuCppjCMrZKVByJMrWKbgj3HeMWWwAmQNoyFe029Y,2087
+hydraflow/context.py,sha256=MqkEhKEZL_N3eb3v5u9D4EqKkiSmiPyXXafhPkALRlg,5129
+hydraflow/mlflow.py,sha256=_Los9E38eG8sTiN8bGwZmvjCrS0S-wSGiA4fyhQM3Zw,2251
+hydraflow/runs.py,sha256=0BXSBbNkELP3CzaCGBkejOkpyk5uQUxrdknJPRwR400,29022
+hydraflow-0.2.3.dist-info/METADATA,sha256=h5Pxy6EnxTlyyGL8NRr14ZHtLhA9ldmM9GP5sES6KWU,4304
+hydraflow-0.2.3.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
+hydraflow-0.2.3.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
+hydraflow-0.2.3.dist-info/RECORD,,

hydraflow-0.2.1.dist-info/RECORD DELETED Viewed

@@ -1,9 +0,0 @@
-hydraflow/__init__.py,sha256=PzziOG9RnGAVbl9Yz4ScvsL8nfkjsuN0alMKRvZT-_Y,442
-hydraflow/config.py,sha256=wI8uNuD2D-hIf4BAhEYJaMC6EyO-erKopy_ia_b1pYA,2048
-hydraflow/context.py,sha256=MqkEhKEZL_N3eb3v5u9D4EqKkiSmiPyXXafhPkALRlg,5129
-hydraflow/mlflow.py,sha256=_Los9E38eG8sTiN8bGwZmvjCrS0S-wSGiA4fyhQM3Zw,2251
-hydraflow/runs.py,sha256=NT7IzE-Pf7T2Ey-eWEPZzQQaX4Gt_RKDKSn2pj2yzGc,14304
-hydraflow-0.2.1.dist-info/METADATA,sha256=4C_hnw1gMb8WUQXyqj4q8eA1IVbp0wZuLGGthIk1G7U,4224
-hydraflow-0.2.1.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
-hydraflow-0.2.1.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
-hydraflow-0.2.1.dist-info/RECORD,,

{hydraflow-0.2.1.dist-info → hydraflow-0.2.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{hydraflow-0.2.1.dist-info → hydraflow-0.2.3.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

hydraflow 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl

hydraflow 0.2.1py3-none-any.whl → 0.2.3py3-none-any.whl