PyPI - hydraflow - Versions diffs - 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl - Mend

hydraflow 0.2.7py3-none-any.whl → 0.2.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

hydraflow/__init__.py +7 -4
hydraflow/asyncio.py +9 -3
hydraflow/context.py +24 -8
hydraflow/info.py +57 -4
hydraflow/mlflow.py +98 -42
hydraflow/progress.py +117 -46
hydraflow/run_collection.py +31 -122
{hydraflow-0.2.7.dist-info → hydraflow-0.2.9.dist-info}/METADATA +1 -1
hydraflow-0.2.9.dist-info/RECORD +12 -0
hydraflow-0.2.7.dist-info/RECORD +0 -12
{hydraflow-0.2.7.dist-info → hydraflow-0.2.9.dist-info}/WHEEL +0 -0
{hydraflow-0.2.7.dist-info → hydraflow-0.2.9.dist-info}/licenses/LICENSE +0 -0

hydraflow/__init__.py CHANGED Viewed

@@ -1,11 +1,12 @@
 from .context import chdir_artifact, log_run, start_run, watch
-from .info import load_config
-from .mlflow import get_artifact_dir, get_hydra_output_dir, set_experiment
-from .run_collection import (
-    RunCollection,
+from .info import get_artifact_dir, get_hydra_output_dir, load_config
+from .mlflow import (
     list_runs,
     search_runs,
+    set_experiment,
 )
+from .progress import multi_tasks_progress, parallel_progress
+from .run_collection import RunCollection
 __all__ = [
     "RunCollection",
@@ -15,6 +16,8 @@ __all__ = [
     "list_runs",
     "load_config",
     "log_run",
+    "multi_tasks_progress",
+    "parallel_progress",
     "search_runs",
     "set_experiment",
     "start_run",

hydraflow/asyncio.py CHANGED Viewed

@@ -41,7 +41,9 @@ async def execute_command(
         int: The return code of the process.
     """
     try:
-        process = await asyncio.create_subprocess_exec(program, *args, stdout=PIPE, stderr=PIPE)
+        process = await asyncio.create_subprocess_exec(
+            program, *args, stdout=PIPE, stderr=PIPE
+        )
         await asyncio.gather(
             process_stream(process.stdout, stdout),
             process_stream(process.stderr, stderr),
@@ -100,7 +102,9 @@ async def monitor_file_changes(
     """
     str_paths = [str(path) for path in paths]
     try:
-        async for changes in watchfiles.awatch(*str_paths, stop_event=stop_event, **awatch_kwargs):
+        async for changes in watchfiles.awatch(
+            *str_paths, stop_event=stop_event, **awatch_kwargs
+        ):
             callback(changes)
     except Exception as e:
         logger.error(f"Error watching files: {e}")
@@ -129,7 +133,9 @@ async def run_and_monitor(
     """
     stop_event = asyncio.Event()
     run_task = asyncio.create_task(
-        execute_command(program, *args, stop_event=stop_event, stdout=stdout, stderr=stderr)
+        execute_command(
+            program, *args, stop_event=stop_event, stdout=stdout, stderr=stderr
+        )
     )
     if watch and paths:
         monitor_task = asyncio.create_task(

hydraflow/context.py CHANGED Viewed

@@ -14,10 +14,11 @@ from typing import TYPE_CHECKING
 import mlflow
 from hydra.core.hydra_config import HydraConfig
-from watchdog.events import FileModifiedEvent, FileSystemEventHandler
+from watchdog.events import FileModifiedEvent, PatternMatchingEventHandler
 from watchdog.observers import Observer
-from hydraflow.mlflow import get_artifact_dir, log_params
+from hydraflow.info import get_artifact_dir
+from hydraflow.mlflow import log_params
 if TYPE_CHECKING:
     from collections.abc import Callable, Iterator
@@ -68,7 +69,7 @@ def log_run(
         mlflow.log_artifact(local_path)
     try:
-        with watch(log_artifact, output_dir):
+        with watch(log_artifact, output_dir, ignore_log=False):
             yield
     except Exception as e:
@@ -140,9 +141,11 @@ def start_run(
 @contextmanager
 def watch(
-    func: Callable[[Path], None],
+    callback: Callable[[Path], None],
     dir: Path | str = "",
     timeout: int = 60,
+    ignore_patterns: list[str] | None = None,
+    ignore_log: bool = True,
 ) -> Iterator[None]:
     """
     Watch the given directory for changes and call the provided function
@@ -154,7 +157,7 @@ def watch(
     period or until the context is exited.
     Args:
-        func (Callable[[Path], None]): The function to call when a change is
+        callback (Callable[[Path], None]): The function to call when a change is
             detected. It should accept a single argument of type `Path`,
             which is the path of the modified file.
         dir (Path | str): The directory to watch. If not specified,
@@ -174,7 +177,7 @@ def watch(
     if isinstance(dir, Path):
         dir = dir.as_posix()
-    handler = Handler(func)
+    handler = Handler(callback, ignore_patterns=ignore_patterns, ignore_log=ignore_log)
     observer = Observer()
     observer.schedule(handler, dir, recursive=True)
     observer.start()
@@ -198,10 +201,23 @@ def watch(
         observer.join()
-class Handler(FileSystemEventHandler):
-    def __init__(self, func: Callable[[Path], None]) -> None:
+class Handler(PatternMatchingEventHandler):
+    def __init__(
+        self,
+        func: Callable[[Path], None],
+        ignore_patterns: list[str] | None = None,
+        ignore_log: bool = True,
+    ) -> None:
         self.func = func
+        if ignore_log:
+            if ignore_patterns:
+                ignore_patterns.append("*.log")
+            else:
+                ignore_patterns = ["*.log"]
+        super().__init__(ignore_patterns=ignore_patterns)
     def on_modified(self, event: FileModifiedEvent) -> None:
         file = Path(str(event.src_path))
         if file.is_file():

hydraflow/info.py CHANGED Viewed

@@ -1,14 +1,14 @@
 from __future__ import annotations
+from pathlib import Path
 from typing import TYPE_CHECKING
+import mlflow
+from hydra.core.hydra_config import HydraConfig
+from mlflow.tracking import artifact_utils
 from omegaconf import DictConfig, OmegaConf
-from hydraflow.mlflow import get_artifact_dir
 if TYPE_CHECKING:
-    from pathlib import Path
     from mlflow.entities import Run
     from hydraflow.run_collection import RunCollection
@@ -43,6 +43,59 @@ class RunCollectionInfo:
         return [load_config(run) for run in self._runs]
+def get_artifact_dir(run: Run | None = None) -> Path:
+    """
+    Retrieve the artifact directory for the given run.
+    This function uses MLflow to get the artifact directory for the given run.
+    Args:
+        run (Run | None): The run object. Defaults to None.
+    Returns:
+        The local path to the directory where the artifacts are downloaded.
+    """
+    if run is None:
+        uri = mlflow.get_artifact_uri()
+    else:
+        uri = artifact_utils.get_artifact_uri(run.info.run_id)
+    return Path(mlflow.artifacts.download_artifacts(uri))
+def get_hydra_output_dir(run: Run | None = None) -> Path:
+    """
+    Retrieve the Hydra output directory for the given run.
+    This function returns the Hydra output directory. If no run is provided,
+    it retrieves the output directory from the current Hydra configuration.
+    If a run is provided, it retrieves the artifact path for the run, loads
+    the Hydra configuration from the downloaded artifacts, and returns the
+    output directory specified in that configuration.
+    Args:
+        run (Run | None): The run object. Defaults to None.
+    Returns:
+        Path: The path to the Hydra output directory.
+    Raises:
+        FileNotFoundError: If the Hydra configuration file is not found
+            in the artifacts.
+    """
+    if run is None:
+        hc = HydraConfig.get()
+        return Path(hc.runtime.output_dir)
+    path = get_artifact_dir(run) / ".hydra/hydra.yaml"
+    if path.exists():
+        hc = OmegaConf.load(path)
+        return Path(hc.hydra.runtime.output_dir)
+    raise FileNotFoundError
 def load_config(run: Run) -> DictConfig:
     """
     Load the configuration for a given run.

hydraflow/mlflow.py CHANGED Viewed

@@ -1,6 +1,20 @@
 """
-This module provides functionality to log parameters from Hydra
-configuration objects and set up experiments using MLflow.
+This module provides functionality to log parameters from Hydra configuration objects
+and set up experiments using MLflow. It includes methods for managing experiments,
+searching for runs, and logging parameters and artifacts.
+Key Features:
+- **Experiment Management**: Set and manage MLflow experiments with customizable names
+  based on Hydra configuration.
+- **Run Logging**: Log parameters and metrics from Hydra configuration objects to
+  MLflow, ensuring that all relevant information is captured during experiments.
+- **Run Search**: Search for runs based on various criteria, allowing for flexible
+  retrieval of experiment results.
+- **Artifact Management**: Retrieve and log artifacts associated with runs, facilitating
+  easy access to outputs generated during experiments.
+This module is designed to integrate seamlessly with Hydra, providing a robust
+solution for tracking machine learning experiments and their associated metadata.
 """
 from __future__ import annotations
@@ -10,14 +24,14 @@ from typing import TYPE_CHECKING
 import mlflow
 from hydra.core.hydra_config import HydraConfig
-from mlflow.tracking import artifact_utils
-from omegaconf import OmegaConf
+from mlflow.entities import ViewType
+from mlflow.tracking.fluent import SEARCH_MAX_RESULTS_PANDAS
 from hydraflow.config import iter_params
+from hydraflow.run_collection import RunCollection
 if TYPE_CHECKING:
     from mlflow.entities.experiment import Experiment
-    from mlflow.entities.run import Run
 def set_experiment(
@@ -26,7 +40,7 @@ def set_experiment(
     uri: str | Path | None = None,
 ) -> Experiment:
     """
-    Set the experiment name and tracking URI optionally.
+    Sets the experiment name and tracking URI optionally.
     This function sets the experiment name by combining the given prefix,
     the job name from HydraConfig, and the given suffix. Optionally, it can
@@ -66,54 +80,96 @@ def log_params(config: object, *, synchronous: bool | None = None) -> None:
         mlflow.log_param(key, value, synchronous=synchronous)
-def get_artifact_dir(run: Run | None = None) -> Path:
+def search_runs(
+    experiment_ids: list[str] | None = None,
+    filter_string: str = "",
+    run_view_type: int = ViewType.ACTIVE_ONLY,
+    max_results: int = SEARCH_MAX_RESULTS_PANDAS,
+    order_by: list[str] | None = None,
+    search_all_experiments: bool = False,
+    experiment_names: list[str] | None = None,
+) -> RunCollection:
     """
-    Retrieve the artifact directory for the given run.
+    Search for Runs that fit the specified criteria.
-    This function uses MLflow to get the artifact directory for the given run.
+    This function wraps the `mlflow.search_runs` function and returns the
+    results as a `RunCollection` object. It allows for flexible searching of
+    MLflow runs based on various criteria.
+    Note:
+        The returned runs are sorted by their start time in ascending order.
     Args:
-        run (Run | None): The run object. Defaults to None.
+        experiment_ids (list[str] | None): List of experiment IDs. Search can
+            work with experiment IDs or experiment names, but not both in the
+            same call. Values other than ``None`` or ``[]`` will result in
+            error if ``experiment_names`` is also not ``None`` or ``[]``.
+            ``None`` will default to the active experiment if ``experiment_names``
+            is ``None`` or ``[]``.
+        filter_string (str): Filter query string, defaults to searching all
+            runs.
+        run_view_type (int): one of enum values ``ACTIVE_ONLY``, ``DELETED_ONLY``,
+            or ``ALL`` runs defined in :py:class:`mlflow.entities.ViewType`.
+        max_results (int): The maximum number of runs to put in the dataframe.
+            Default is 100,000 to avoid causing out-of-memory issues on the user's
+            machine.
+        order_by (list[str] | None): List of columns to order by (e.g.,
+            "metrics.rmse"). The ``order_by`` column can contain an optional
+            ``DESC`` or ``ASC`` value. The default is ``ASC``. The default
+            ordering is to sort by ``start_time DESC``, then ``run_id``.
+            ``start_time DESC``, then ``run_id``.
+        search_all_experiments (bool): Boolean specifying whether all
+            experiments should be searched. Only honored if ``experiment_ids``
+            is ``[]`` or ``None``.
+        experiment_names (list[str] | None): List of experiment names. Search
+            can work with experiment IDs or experiment names, but not both in
+            the same call. Values other than ``None`` or ``[]`` will result in
+            error if ``experiment_ids`` is also not ``None`` or ``[]``.
+            ``experiment_ids`` is also not ``None`` or ``[]``. ``None`` will
+            default to the active experiment if ``experiment_ids`` is ``None``
+            or ``[]``.
     Returns:
-        The local path to the directory where the artifacts are downloaded.
+        A `RunCollection` object containing the search results.
     """
-    if run is None:
-        uri = mlflow.get_artifact_uri()
-    else:
-        uri = artifact_utils.get_artifact_uri(run.info.run_id)
-    return Path(mlflow.artifacts.download_artifacts(uri))
-def get_hydra_output_dir(*, run: Run | None = None) -> Path:
+    runs = mlflow.search_runs(
+        experiment_ids=experiment_ids,
+        filter_string=filter_string,
+        run_view_type=run_view_type,
+        max_results=max_results,
+        order_by=order_by,
+        output_format="list",
+        search_all_experiments=search_all_experiments,
+        experiment_names=experiment_names,
+    )
+    runs = sorted(runs, key=lambda run: run.info.start_time)  # type: ignore
+    return RunCollection(runs)  # type: ignore
+def list_runs(experiment_names: list[str] | None = None) -> RunCollection:
     """
-    Retrieve the Hydra output directory for the given run.
+    List all runs for the specified experiments.
-    This function returns the Hydra output directory. If no run is provided,
-    it retrieves the output directory from the current Hydra configuration.
-    If a run is provided, it retrieves the artifact path for the run, loads
-    the Hydra configuration from the downloaded artifacts, and returns the
-    output directory specified in that configuration.
+    This function retrieves all runs for the given list of experiment names.
+    If no experiment names are provided (None), it defaults to searching all runs
+    for the currently active experiment. If an empty list is provided, the function
+    will search all runs for all experiments except the "Default" experiment.
+    The function returns the results as a `RunCollection` object.
+    Note:
+        The returned runs are sorted by their start time in ascending order.
     Args:
-        run (Run | None): The run object. Defaults to None.
+        experiment_names (list[str] | None): List of experiment names to search
+            for runs. If None or an empty list is provided, the function will
+            search the currently active experiment or all experiments except
+            the "Default" experiment.
     Returns:
-        Path: The path to the Hydra output directory.
-    Raises:
-        FileNotFoundError: If the Hydra configuration file is not found
-            in the artifacts.
+        A `RunCollection` object containing the runs for the specified experiments.
     """
-    if run is None:
-        hc = HydraConfig.get()
-        return Path(hc.runtime.output_dir)
-    path = get_artifact_dir(run) / ".hydra/hydra.yaml"
-    if path.exists():
-        hc = OmegaConf.load(path)
-        return Path(hc.hydra.runtime.output_dir)
+    if experiment_names == []:
+        experiments = mlflow.search_experiments()
+        experiment_names = [e.name for e in experiments if e.name != "Default"]
-    raise FileNotFoundError
+    return search_runs(experiment_names=experiment_names)

hydraflow/progress.py CHANGED Viewed

@@ -1,17 +1,129 @@
+"""
+Module for managing progress tracking in parallel processing using Joblib
+and Rich's Progress bar.
+Provide context managers and functions to facilitate the execution
+of tasks in parallel while displaying progress updates.
+The following key components are provided:
+- JoblibProgress: A context manager for tracking progress with Rich's Progress
+    bar.
+- parallel_progress: A function to execute a given function in parallel over
+    an iterable with progress tracking.
+- multi_tasks_progress: A function to render auto-updating progress bars for
+    multiple tasks concurrently.
+Usage:
+    Import the necessary functions and use them to manage progress in your
+    parallel processing tasks.
+"""
 from __future__ import annotations
-from typing import TYPE_CHECKING
+from contextlib import contextmanager
+from typing import TYPE_CHECKING, TypeVar
 import joblib
 from rich.progress import Progress
 if TYPE_CHECKING:
-    from collections.abc import Iterable
+    from collections.abc import Callable, Iterable, Iterator
     from rich.progress import ProgressColumn
-def multi_task_progress(
+# https://github.com/jonghwanhyeon/joblib-progress/blob/main/joblib_progress/__init__.py
+@contextmanager
+def JoblibProgress(
+    *columns: ProgressColumn | str,
+    description: str | None = None,
+    total: int | None = None,
+    **kwargs,
+) -> Iterator[Progress]:
+    """
+    Context manager for tracking progress using Joblib with Rich's Progress bar.
+    Args:
+        *columns (ProgressColumn | str): Columns to display in the progress bar.
+        description (str | None, optional): A description for the progress task.
+            Defaults to None.
+        total (int | None, optional): The total number of tasks. If None, it will
+            be determined automatically.
+        **kwargs: Additional keyword arguments passed to the Progress instance.
+    Yields:
+        Progress: A Progress instance for managing the progress bar.
+    Example:
+        with JoblibProgress("task", total=100) as progress:
+            # Your parallel processing code here
+    """
+    if not columns:
+        columns = Progress.get_default_columns()
+    progress = Progress(*columns, **kwargs)
+    if description is None:
+        description = "Processing..."
+    task_id = progress.add_task(description, total=total)
+    print_progress = joblib.parallel.Parallel.print_progress
+    def update_progress(self: joblib.parallel.Parallel):
+        progress.update(task_id, completed=self.n_completed_tasks, refresh=True)
+        return print_progress(self)
+    try:
+        joblib.parallel.Parallel.print_progress = update_progress
+        progress.start()
+        yield progress
+    finally:
+        progress.stop()
+        joblib.parallel.Parallel.print_progress = print_progress
+T = TypeVar("T")
+U = TypeVar("U")
+def parallel_progress(
+    func: Callable[[T], U],
+    iterable: Iterable[T],
+    *columns: ProgressColumn | str,
+    n_jobs: int = -1,
+    description: str | None = None,
+    **kwargs,
+) -> list[U]:
+    """
+    Execute a function in parallel over an iterable with progress tracking.
+    Args:
+        func (Callable[[T], U]): The function to execute on each item in the
+            iterable.
+        iterable (Iterable[T]): An iterable of items to process.
+        *columns (ProgressColumn | str): Additional columns to display in the
+            progress bar.
+        n_jobs (int, optional): The number of jobs to run in parallel.
+            Defaults to -1 (all processors).
+        description (str | None, optional): A description for the progress bar.
+            Defaults to None.
+        **kwargs: Additional keyword arguments passed to the Progress instance.
+    Returns:
+        list[U]: A list of results from applying the function to each item in
+        the iterable.
+    """
+    iterable = list(iterable)
+    total = len(iterable)
+    with JoblibProgress(*columns, description=description, total=total, **kwargs):
+        it = (joblib.delayed(func)(x) for x in iterable)
+        return joblib.Parallel(n_jobs=n_jobs)(it)  # type: ignore
+def multi_tasks_progress(
     iterables: Iterable[Iterable[int | tuple[int, int]]],
     *columns: ProgressColumn | str,
     n_jobs: int = -1,
@@ -52,7 +164,8 @@ def multi_task_progress(
         task_main = progress.add_task(main_description, total=None) if n > 1 else None
         tasks = [
-            progress.add_task(description.format(i), start=False, total=None) for i in range(n)
+            progress.add_task(description.format(i), start=False, total=None)
+            for i in range(n)
         ]
         total = {}
@@ -87,45 +200,3 @@ def multi_task_progress(
         else:
             func(0)
-if __name__ == "__main__":
-    import random
-    import time
-    from rich.progress import MofNCompleteColumn, Progress, SpinnerColumn, TimeElapsedColumn
-    from hydraflow.progress import multi_task_progress
-    def task(total):
-        for i in range(total or 90):
-            if total is None:
-                yield i
-            else:
-                yield i, total
-            time.sleep(random.random() / 30)
-    def multi_task_progress_test(unknown_total: bool):
-        tasks = [task(random.randint(80, 100)) for _ in range(4)]
-        if unknown_total:
-            tasks = [task(None), *tasks, task(None)]
-        columns = [
-            SpinnerColumn(),
-            *Progress.get_default_columns(),
-            MofNCompleteColumn(),
-            TimeElapsedColumn(),
-        ]
-        kwargs = {}
-        if unknown_total:
-            kwargs["main_description"] = "unknown"
-        multi_task_progress(tasks, *columns, n_jobs=4, **kwargs)
-    multi_task_progress_test(False)
-    multi_task_progress_test(True)
-    multi_task_progress([task(100)])
-    multi_task_progress([task(None)], description="unknown")
-    multi_task_progress([task(100), task(None)], main_description="transient", transient=True)
-    multi_task_progress([task(100)], description="transient", transient=True)

hydraflow/run_collection.py CHANGED Viewed

@@ -1,7 +1,24 @@
 """
-This module provides functionality for managing and interacting with MLflow
-runs. It includes the `RunCollection` class and various methods to filter
-runs, retrieve run information, log artifacts, and load configurations.
+This module provides functionality for managing and interacting with MLflow runs.
+It includes the `RunCollection` class, which serves as a container for multiple MLflow
+run objects, and various methods to filter, retrieve, and manipulate these runs.
+Key Features:
+- **Run Management**: The `RunCollection` class allows for easy management of multiple
+  MLflow runs, providing methods to access, filter, and sort runs based on various
+  criteria.
+- **Filtering**: The module supports filtering runs based on specific configurations
+  and parameters, enabling users to easily find runs that match certain conditions.
+- **Retrieval**: Users can retrieve specific runs, including the first, last, or any
+  run that matches a given configuration.
+- **Artifact Handling**: The module provides methods to access and manipulate the
+  artifacts associated with each run, including retrieving artifact URIs and directories.
+The `RunCollection` class is designed to work seamlessly with the MLflow tracking
+API, providing a robust solution for managing machine learning experiment runs and
+their associated metadata. This module is particularly useful for data scientists and
+machine learning engineers who need to track and analyze the results of their experiments
+efficiently.
 """
 from __future__ import annotations
@@ -10,10 +27,7 @@ from dataclasses import dataclass, field
 from itertools import chain
 from typing import TYPE_CHECKING, Any, Concatenate, ParamSpec, TypeVar
-import mlflow
-from mlflow.entities import ViewType
 from mlflow.entities.run import Run
-from mlflow.tracking.fluent import SEARCH_MAX_RESULTS_PANDAS
 from hydraflow.config import iter_params
 from hydraflow.info import RunCollectionInfo
@@ -26,101 +40,6 @@ if TYPE_CHECKING:
     from omegaconf import DictConfig
-def search_runs(
-    experiment_ids: list[str] | None = None,
-    filter_string: str = "",
-    run_view_type: int = ViewType.ACTIVE_ONLY,
-    max_results: int = SEARCH_MAX_RESULTS_PANDAS,
-    order_by: list[str] | None = None,
-    search_all_experiments: bool = False,
-    experiment_names: list[str] | None = None,
-) -> RunCollection:
-    """
-    Search for Runs that fit the specified criteria.
-    This function wraps the `mlflow.search_runs` function and returns the
-    results as a `RunCollection` object. It allows for flexible searching of
-    MLflow runs based on various criteria.
-    Note:
-        The returned runs are sorted by their start time in ascending order.
-    Args:
-        experiment_ids (list[str] | None): List of experiment IDs. Search can
-            work with experiment IDs or experiment names, but not both in the
-            same call. Values other than ``None`` or ``[]`` will result in
-            error if ``experiment_names`` is also not ``None`` or ``[]``.
-            ``None`` will default to the active experiment if ``experiment_names``
-            is ``None`` or ``[]``.
-        filter_string (str): Filter query string, defaults to searching all
-            runs.
-        run_view_type (int): one of enum values ``ACTIVE_ONLY``, ``DELETED_ONLY``,
-            or ``ALL`` runs defined in :py:class:`mlflow.entities.ViewType`.
-        max_results (int): The maximum number of runs to put in the dataframe.
-            Default is 100,000 to avoid causing out-of-memory issues on the user's
-            machine.
-        order_by (list[str] | None): List of columns to order by (e.g.,
-            "metrics.rmse"). The ``order_by`` column can contain an optional
-            ``DESC`` or ``ASC`` value. The default is ``ASC``. The default
-            ordering is to sort by ``start_time DESC``, then ``run_id``.
-            ``start_time DESC``, then ``run_id``.
-        search_all_experiments (bool): Boolean specifying whether all
-            experiments should be searched. Only honored if ``experiment_ids``
-            is ``[]`` or ``None``.
-        experiment_names (list[str] | None): List of experiment names. Search
-            can work with experiment IDs or experiment names, but not both in
-            the same call. Values other than ``None`` or ``[]`` will result in
-            error if ``experiment_ids`` is also not ``None`` or ``[]``.
-            ``experiment_ids`` is also not ``None`` or ``[]``. ``None`` will
-            default to the active experiment if ``experiment_ids`` is ``None``
-            or ``[]``.
-    Returns:
-        A `RunCollection` object containing the search results.
-    """
-    runs = mlflow.search_runs(
-        experiment_ids=experiment_ids,
-        filter_string=filter_string,
-        run_view_type=run_view_type,
-        max_results=max_results,
-        order_by=order_by,
-        output_format="list",
-        search_all_experiments=search_all_experiments,
-        experiment_names=experiment_names,
-    )
-    runs = sorted(runs, key=lambda run: run.info.start_time)  # type: ignore
-    return RunCollection(runs)  # type: ignore
-def list_runs(experiment_names: list[str] | None = None) -> RunCollection:
-    """
-    List all runs for the specified experiments.
-    This function retrieves all runs for the given list of experiment names.
-    If no experiment names are provided (None), it defaults to searching all runs
-    for the currently active experiment. If an empty list is provided, the function
-    will search all runs for all experiments except the "Default" experiment.
-    The function returns the results as a `RunCollection` object.
-    Note:
-        The returned runs are sorted by their start time in ascending order.
-    Args:
-        experiment_names (list[str] | None): List of experiment names to search
-            for runs. If None or an empty list is provided, the function will
-            search the currently active experiment or all experiments except
-            the "Default" experiment.
-    Returns:
-        A `RunCollection` object containing the runs for the specified experiments.
-    """
-    if experiment_names == []:
-        experiments = mlflow.search_experiments()
-        experiment_names = [e.name for e in experiments if e.name != "Default"]
-    return search_runs(experiment_names=experiment_names)
 T = TypeVar("T")
 P = ParamSpec("P")
@@ -132,6 +51,11 @@ class RunCollection:
     This class provides methods to interact with the runs, such as filtering,
     retrieving specific runs, and accessing run information.
+    Key Features:
+    - Filtering: Easily filter runs based on various criteria.
+    - Retrieval: Access specific runs by index or through methods.
+    - Metadata: Access run metadata and associated information.
     """
     _runs: list[Run]
@@ -544,7 +468,9 @@ class RunCollection:
         """
         return (func(dir, *args, **kwargs) for dir in self.info.artifact_dir)
-    def group_by(self, *names: str | list[str]) -> dict[tuple[str | None, ...], RunCollection]:
+    def group_by(
+        self, *names: str | list[str]
+    ) -> dict[tuple[str | None, ...], RunCollection]:
         """
         Group runs by specified parameter names.
@@ -569,25 +495,6 @@ class RunCollection:
         return {key: RunCollection(runs) for key, runs in grouped_runs.items()}
-    def group_by_values(self, *names: str | list[str]) -> list[RunCollection]:
-        """
-        Group runs by specified parameter names.
-        This method groups the runs in the collection based on the values of the
-        specified parameters. Each unique combination of parameter values will
-        form a separate RunCollection in the returned list.
-        Args:
-            *names (str | list[str]): The names of the parameters to group by.
-                This can be a single parameter name or multiple names provided
-                as separate arguments or as a list.
-        Returns:
-            list[RunCollection]: A list of RunCollection objects, where each
-            object contains runs that match the specified parameter values.
-        """
-        return list(self.group_by(*names).values())
 def _param_matches(run: Run, key: str, value: Any) -> bool:
     """
@@ -747,7 +654,9 @@ def find_last_run(runs: list[Run], config: object | None = None, **kwargs) -> Ru
     return filtered_runs[-1]
-def try_find_last_run(runs: list[Run], config: object | None = None, **kwargs) -> Run | None:
+def try_find_last_run(
+    runs: list[Run], config: object | None = None, **kwargs
+) -> Run | None:
     """
     Find the last run based on the provided configuration.

{hydraflow-0.2.7.dist-info → hydraflow-0.2.9.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: hydraflow
-Version: 0.2.7
+Version: 0.2.9
 Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
 Project-URL: Documentation, https://github.com/daizutabi/hydraflow
 Project-URL: Source, https://github.com/daizutabi/hydraflow

hydraflow-0.2.9.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,12 @@
+hydraflow/__init__.py,sha256=B7rWSiGP5WwWjijcb41Bv9uuo5MQ6gbBbVWGAWYtK-k,598
+hydraflow/asyncio.py,sha256=jdXuEFC6f7L_Dq6beASFZPQSvCnGimVxU-PRFsNc5U0,6241
+hydraflow/config.py,sha256=6TCKNQZ3sSrIEvl245T2udwFuknejyN1dMcIVmOHdrQ,2102
+hydraflow/context.py,sha256=G7JMrG70sgBH2qILXl5nkGWNUoRggj518JWUq0ZiJ9E,7776
+hydraflow/info.py,sha256=Vj2sT66Ric63mmaq7Yu8nDFhsGQYO3MCHrxFpapDufc,3458
+hydraflow/mlflow.py,sha256=Q8RGijSURTjRkEDxzi_2Tk9KOx3QK__al5aArGQriHA,7249
+hydraflow/progress.py,sha256=UIIKlweji3L0uRi4hZ_DrtRcnayHPlsMoug7hVEKq8k,6753
+hydraflow/run_collection.py,sha256=V5lGdGHYgsSpBOYGaVEL1mpKJvdiEshBL0KmmZ8qeZo,29161
+hydraflow-0.2.9.dist-info/METADATA,sha256=ZjJQz_4MogGkcs16dOwnsp_J0icg9ypgQdXOYxVdxJg,4181
+hydraflow-0.2.9.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
+hydraflow-0.2.9.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
+hydraflow-0.2.9.dist-info/RECORD,,

hydraflow-0.2.7.dist-info/RECORD DELETED Viewed

@@ -1,12 +0,0 @@
-hydraflow/__init__.py,sha256=ObIv7fGbNsqUhZf3sst-9pbgyFsJr6jVsNV10NmMQas,483
-hydraflow/asyncio.py,sha256=yh851L315QHzRBwq6r-uwO2oZKgz1JawHp-fswfxT1E,6175
-hydraflow/config.py,sha256=6TCKNQZ3sSrIEvl245T2udwFuknejyN1dMcIVmOHdrQ,2102
-hydraflow/context.py,sha256=8Qn99yCSkCarDDthQ6hjgW80CBBIg0H7fnLvtw4ZXo8,7248
-hydraflow/info.py,sha256=LziP71wQ-tDQPMUPFV_6JExBU8r-Ja-O05F07b_RUcc,1812
-hydraflow/mlflow.py,sha256=USd51C5YFlk4Bjhs4F1PMakxDxjD6Nn2t4GhL6aZ6QQ,3647
-hydraflow/progress.py,sha256=0GJfKnnY_SAHVWpGvLdgOBsogGs8vVofjLuphuUEy2g,4296
-hydraflow/run_collection.py,sha256=NO_QEwIwxU0EouKCJ4HAhXd35uJrxqolI7vM5QfsNxw,33152
-hydraflow-0.2.7.dist-info/METADATA,sha256=_kqK5pFLntvmiFIc1UBWOzDSRMeerXDZ0ZozhlTMkSw,4181
-hydraflow-0.2.7.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
-hydraflow-0.2.7.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
-hydraflow-0.2.7.dist-info/RECORD,,

{hydraflow-0.2.7.dist-info → hydraflow-0.2.9.dist-info}/WHEEL RENAMED Viewed

File without changes

{hydraflow-0.2.7.dist-info → hydraflow-0.2.9.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

hydraflow 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl

hydraflow 0.2.7py3-none-any.whl → 0.2.9py3-none-any.whl