PyPI - hydraflow - Versions diffs - 0.2.11__tar.gz → 0.2.14__tar.gz - Mend

hydraflow 0.2.11tar.gz → 0.2.14tar.gz

Files changed (33) hide show

{hydraflow-0.2.11 → hydraflow-0.2.14}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: hydraflow
-Version: 0.2.11
+Version: 0.2.14
 Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
 Project-URL: Documentation, https://github.com/daizutabi/hydraflow
 Project-URL: Source, https://github.com/daizutabi/hydraflow
@@ -13,8 +13,6 @@ Classifier: Programming Language :: Python
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
-Classifier: Topic :: Documentation
-Classifier: Topic :: Software Development :: Documentation
 Requires-Python: >=3.10
 Requires-Dist: hydra-core>1.3
 Requires-Dist: joblib

hydraflow-0.2.14/mkdocs.yml ADDED Viewed

@@ -0,0 +1,53 @@
+site_name: hydraflow
+site_url: https://daizutabi.github.io/hydraflow/
+site_description: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
+site_author: daizutabi
+repo_url: https://github.com/daizutabi/hydraflow/
+repo_name: daizutabi/hydraflow
+edit_uri: edit/main/docs/
+theme:
+  name: material
+  font:
+    text: Fira Sans
+    code: Fira Code
+  icon:
+    repo: fontawesome/brands/github
+  palette:
+    - scheme: default
+      primary: indigo
+      accent: indigo
+      toggle:
+        icon: material/weather-sunny
+        name: Switch to dark mode
+    - scheme: slate
+      primary: black
+      accent: black
+      toggle:
+        icon: material/weather-night
+        name: Switch to light mode
+  features:
+    - content.code.annotate
+    - content.tooltips
+    - navigation.expand
+    - navigation.footer
+    - navigation.indexes
+    - navigation.sections
+    - navigation.tabs
+    - navigation.tabs.sticky
+    - navigation.top
+    - navigation.tracking
+plugins:
+  - search
+  - mkapi
+markdown_extensions:
+  - pymdownx.magiclink
+  - pymdownx.highlight:
+      use_pygments: true
+  - pymdownx.inlinehilite
+  - pymdownx.snippets
+  - pymdownx.superfences
+  - pymdownx.tabbed:
+      alternate_style: true
+nav:
+  - Home: index.md
+  - Reference: $api/hydraflow.**

{hydraflow-0.2.11 → hydraflow-0.2.14}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "hydraflow"
-version = "0.2.11"
+version = "0.2.14"
 description = "Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments."
 readme = "README.md"
 license = "MIT"
@@ -15,8 +15,6 @@ classifiers = [
   "Programming Language :: Python :: 3.10",
   "Programming Language :: Python :: 3.11",
   "Programming Language :: Python :: 3.12",
-  "Topic :: Documentation",
-  "Topic :: Software Development :: Documentation",
 ]
 requires-python = ">=3.10"
 dependencies = [

{hydraflow-0.2.11 → hydraflow-0.2.14}/src/hydraflow/context.py RENAMED Viewed

@@ -51,9 +51,11 @@ def log_run(
         None
     Example:
+        ```python
         with log_run(config):
             # Perform operations within the MLflow run context
             pass
+        ```
     """
     log_params(config, synchronous=synchronous)
@@ -122,8 +124,9 @@ def start_run(
             pass
     See Also:
-        `mlflow.start_run`: The MLflow function to start a run directly.
-        `log_run`: A context manager to log parameters and manage the MLflow run context.
+        - `mlflow.start_run`: The MLflow function to start a run directly.
+        - `log_run`: A context manager to log parameters and manage the MLflow
+           run context.
     """
     with mlflow.start_run(
         run_id=run_id,
@@ -169,9 +172,11 @@ def watch(
         None
     Example:
+        ```python
         with watch(log_artifact, "/path/to/dir"):
             # Perform operations while watching the directory for changes
             pass
+        ```
     """
     dir = dir or get_artifact_dir()
     if isinstance(dir, Path):

{hydraflow-0.2.11 → hydraflow-0.2.14}/src/hydraflow/mlflow.py RENAMED Viewed

@@ -22,10 +22,11 @@ from __future__ import annotations
 from pathlib import Path
 from typing import TYPE_CHECKING
+import joblib
 import mlflow
 from hydra.core.hydra_config import HydraConfig
 from mlflow.entities import ViewType
-from mlflow.tracking.fluent import SEARCH_MAX_RESULTS_PANDAS
+from mlflow.tracking.fluent import SEARCH_MAX_RESULTS_PANDAS, _get_experiment_id
 from hydraflow.config import iter_params
 from hydraflow.run_collection import RunCollection
@@ -146,7 +147,9 @@ def search_runs(
     return RunCollection(runs)  # type: ignore
-def list_runs(experiment_names: list[str] | None = None) -> RunCollection:
+def list_runs(
+    experiment_names: str | list[str] | None = None, n_jobs: int = 0
+) -> RunCollection:
     """
     List all runs for the specified experiments.
@@ -166,10 +169,34 @@ def list_runs(experiment_names: list[str] | None = None) -> RunCollection:
             the "Default" experiment.
     Returns:
-        A `RunCollection` object containing the runs for the specified experiments.
+        RunCollection: A `RunCollection` instance containing the runs for the
+        specified experiments.
     """
-    if experiment_names == []:
+    if isinstance(experiment_names, str):
+        experiment_names = [experiment_names]
+    elif experiment_names == []:
         experiments = mlflow.search_experiments()
         experiment_names = [e.name for e in experiments if e.name != "Default"]
-    return search_runs(experiment_names=experiment_names)
+    if n_jobs == 0:
+        return search_runs(experiment_names=experiment_names)
+    if experiment_names is None:
+        experiment_id = _get_experiment_id()
+        experiment_names = [mlflow.get_experiment(experiment_id).name]
+    run_ids = []
+    for name in experiment_names:
+        if experiment := mlflow.get_experiment_by_name(name):
+            loc = experiment.artifact_location
+            if isinstance(loc, str) and loc.startswith("file://"):
+                path = Path(mlflow.artifacts.download_artifacts(loc))
+                run_ids.extend(file.stem for file in path.iterdir() if file.is_dir())
+    it = (joblib.delayed(mlflow.get_run)(run_id) for run_id in run_ids)
+    runs = joblib.Parallel(n_jobs, prefer="threads")(it)
+    runs = sorted(runs, key=lambda run: run.info.start_time)  # type: ignore
+    return RunCollection(runs)  # type: ignore

{hydraflow-0.2.11 → hydraflow-0.2.14}/src/hydraflow/progress.py RENAMED Viewed

@@ -7,16 +7,12 @@ of tasks in parallel while displaying progress updates.
 The following key components are provided:
-- JoblibProgress: A context manager for tracking progress with Rich's Progress
+- JoblibProgress: A context manager for tracking progress with Rich's progress
     bar.
 - parallel_progress: A function to execute a given function in parallel over
     an iterable with progress tracking.
 - multi_tasks_progress: A function to render auto-updating progress bars for
     multiple tasks concurrently.
-Usage:
-    Import the necessary functions and use them to manage progress in your
-    parallel processing tasks.
 """
 from __future__ import annotations
@@ -56,8 +52,10 @@ def JoblibProgress(
         Progress: A Progress instance for managing the progress bar.
     Example:
+        ```python
         with JoblibProgress("task", total=100) as progress:
             # Your parallel processing code here
+        ```
     """
     if not columns:
         columns = Progress.get_default_columns()

{hydraflow-0.2.11 → hydraflow-0.2.14}/src/hydraflow/run_collection.py RENAMED Viewed

@@ -1,31 +1,27 @@
 """
-This module provides functionality for managing and interacting with MLflow runs.
-It includes the `RunCollection` class, which serves as a container for multiple MLflow
-run objects, and various methods to filter, retrieve, and manipulate these runs.
+Provide functionality for managing and interacting with MLflow runs.
+It includes the `RunCollection` class, which serves as a container
+for multiple MLflow `Run` instances, and various methods to filter and
+retrieve these runs.
 Key Features:
-- **Run Management**: The `RunCollection` class allows for easy management of multiple
-  MLflow runs, providing methods to access, filter, and sort runs based on various
-  criteria.
-- **Filtering**: The module supports filtering runs based on specific configurations
+- **Run Management**: The `RunCollection` class allows for easy management of
+  multiple MLflow runs, providing methods to filter and retrieve runs based
+  on various criteria.
+- **Filtering**: Support filtering runs based on specific configurations
   and parameters, enabling users to easily find runs that match certain conditions.
 - **Retrieval**: Users can retrieve specific runs, including the first, last, or any
   run that matches a given configuration.
-- **Artifact Handling**: The module provides methods to access and manipulate the
-  artifacts associated with each run, including retrieving artifact URIs and directories.
-The `RunCollection` class is designed to work seamlessly with the MLflow tracking
-API, providing a robust solution for managing machine learning experiment runs and
-their associated metadata. This module is particularly useful for data scientists and
-machine learning engineers who need to track and analyze the results of their experiments
-efficiently.
+- **Artifact Handling**: Provide methods to access and manipulate the
+  artifacts associated with each run, including retrieving artifact URIs and
+  directories.
 """
 from __future__ import annotations
 from dataclasses import dataclass, field
 from itertools import chain
-from typing import TYPE_CHECKING, Any, Concatenate, ParamSpec, TypeVar
+from typing import TYPE_CHECKING, Any, Concatenate, ParamSpec, TypeVar, overload
 from mlflow.entities.run import Run
@@ -47,9 +43,9 @@ P = ParamSpec("P")
 @dataclass
 class RunCollection:
     """
-    A class to represent a collection of MLflow runs.
+    Represent a collection of MLflow runs.
-    This class provides methods to interact with the runs, such as filtering,
+    Provide methods to interact with the runs, such as filtering,
     retrieving specific runs, and accessing run information.
     Key Features:
@@ -59,10 +55,10 @@ class RunCollection:
     """
     _runs: list[Run]
-    """A list of MLflow Run objects."""
+    """A list of MLflow `Run` instances."""
     _info: RunCollectionInfo = field(init=False)
-    """A list of MLflow Run objects."""
+    """An instance of `RunCollectionInfo`."""
     def __post_init__(self):
         self._info = RunCollectionInfo(self)
@@ -76,16 +72,51 @@ class RunCollection:
     def __iter__(self) -> Iterator[Run]:
         return iter(self._runs)
-    def __getitem__(self, index: int) -> Run:
+    @overload
+    def __getitem__(self, index: int) -> Run: ...
+    @overload
+    def __getitem__(self, index: slice) -> RunCollection: ...
+    def __getitem__(self, index: int | slice) -> Run | RunCollection:
+        if isinstance(index, slice):
+            return self.__class__(self._runs[index])
         return self._runs[index]
     def __contains__(self, run: Run) -> bool:
         return run in self._runs
+    @classmethod
+    def from_list(cls, runs: list[Run]) -> RunCollection:
+        """Create a new `RunCollection` instance from a list of MLflow `Run` instances."""
+        return cls(runs)
     @property
     def info(self) -> RunCollectionInfo:
+        """An instance of `RunCollectionInfo`."""
         return self._info
+    def take(self, n: int) -> RunCollection:
+        """Take the first n runs from the collection.
+        If n is negative, the method returns the last n runs
+        from the collection.
+        Args:
+            n (int): The number of runs to take. If n is negative, the method
+            returns the last n runs from the collection.
+        Returns:
+            A new `RunCollection` instance containing the first n runs if n is
+            positive, or the last n runs if n is negative.
+        """
+        if n < 0:
+            return self.__class__(self._runs[n:])
+        return self.__class__(self._runs[:n])
     def sort(
         self,
         key: Callable[[Run], Any] | None = None,
@@ -93,12 +124,37 @@ class RunCollection:
     ) -> None:
         self._runs.sort(key=key or (lambda x: x.info.start_time), reverse=reverse)
+    def one(self) -> Run:
+        """
+        Get the only `Run` instance in the collection.
+        Returns:
+            The only `Run` instance in the collection.
+        Raises:
+            ValueError: If the collection does not contain exactly one run.
+        """
+        if len(self._runs) != 1:
+            raise ValueError("The collection does not contain exactly one run.")
+        return self._runs[0]
+    def try_one(self) -> Run | None:
+        """
+        Try to get the only `Run` instance in the collection.
+        Returns:
+            The only `Run` instance in the collection, or None if the collection
+            does not contain exactly one run.
+        """
+        return self._runs[0] if len(self._runs) == 1 else None
     def first(self) -> Run:
         """
-        Get the first run in the collection.
+        Get the first `Run` instance in the collection.
         Returns:
-            The first run object in the collection.
+            The first `Run` instance in the collection.
         Raises:
             ValueError: If the collection is empty.
@@ -110,20 +166,20 @@ class RunCollection:
     def try_first(self) -> Run | None:
         """
-        Try to get the first run in the collection.
+        Try to get the first `Run` instance in the collection.
         Returns:
-            The first run object in the collection, or None if the collection
+            The first `Run` instance in the collection, or None if the collection
             is empty.
         """
         return self._runs[0] if self._runs else None
     def last(self) -> Run:
         """
-        Get the last run in the collection.
+        Get the last `Run` instance in the collection.
         Returns:
-            The last run object in the collection.
+            The last `Run` instance in the collection.
         Raises:
             ValueError: If the collection is empty.
@@ -135,17 +191,17 @@ class RunCollection:
     def try_last(self) -> Run | None:
         """
-        Try to get the last run in the collection.
+        Try to get the last `Run` instance in the collection.
         Returns:
-            The last run object in the collection, or None if the collection is
-            empty.
+            The last `Run` instance in the collection, or None if the collection
+            is empty.
         """
         return self._runs[-1] if self._runs else None
     def filter(self, config: object | None = None, **kwargs) -> RunCollection:
         """
-        Filter the runs based on the provided configuration.
+        Filter the `Run` instances based on the provided configuration.
         This method filters the runs in the collection according to the
         specified configuration object and additional key-value pairs. The
@@ -173,7 +229,7 @@ class RunCollection:
     def find(self, config: object | None = None, **kwargs) -> Run:
         """
-        Find the first run based on the provided configuration.
+        Find the first `Run` instance based on the provided configuration.
         This method filters the runs in the collection according to the
         specified configuration object and returns the first run that matches
@@ -185,20 +241,22 @@ class RunCollection:
             **kwargs: Additional key-value pairs to filter the runs.
         Returns:
-            The first run object that matches the provided configuration.
+            The first `Run` instance that matches the provided configuration.
         Raises:
             ValueError: If no run matches the criteria.
         See Also:
-            RunCollection.filter: The method that performs the actual filtering
-            logic.
+            `filter`: Perform the actual filtering logic.
         """
-        return find_run(self._runs, config, **kwargs)
+        try:
+            return self.filter(config, **kwargs).first()
+        except ValueError:
+            raise ValueError("No run matches the provided configuration.")
     def try_find(self, config: object | None = None, **kwargs) -> Run | None:
         """
-        Find the first run based on the provided configuration.
+        Try to find the first `Run` instance based on the provided configuration.
         This method filters the runs in the collection according to the
         specified configuration object and returns the first run that matches
@@ -210,18 +268,17 @@ class RunCollection:
             **kwargs: Additional key-value pairs to filter the runs.
         Returns:
-            The first run object that matches the provided configuration, or
+            The first `Run` instance that matches the provided configuration, or
             None if no runs match the criteria.
         See Also:
-            RunCollection.filter: The method that performs the actual filtering
-            logic.
+            `filter`: Perform the actual filtering logic.
         """
-        return try_find_run(self._runs, config, **kwargs)
+        return self.filter(config, **kwargs).try_first()
     def find_last(self, config: object | None = None, **kwargs) -> Run:
         """
-        Find the last run based on the provided configuration.
+        Find the last `Run` instance based on the provided configuration.
         This method filters the runs in the collection according to the
         specified configuration object and returns the last run that matches
@@ -233,20 +290,22 @@ class RunCollection:
             **kwargs: Additional key-value pairs to filter the runs.
         Returns:
-            The last run object that matches the provided configuration.
+            The last `Run` instance that matches the provided configuration.
         Raises:
             ValueError: If no run matches the criteria.
         See Also:
-            RunCollection.filter: The method that performs the actual filtering
-            logic.
+            `filter`: Perform the actual filtering logic.
         """
-        return find_last_run(self._runs, config, **kwargs)
+        try:
+            return self.filter(config, **kwargs).last()
+        except ValueError:
+            raise ValueError("No run matches the provided configuration.")
     def try_find_last(self, config: object | None = None, **kwargs) -> Run | None:
         """
-        Find the last run based on the provided configuration.
+        Try to find the last `Run` instance based on the provided configuration.
         This method filters the runs in the collection according to the
         specified configuration object and returns the last run that matches
@@ -258,18 +317,17 @@ class RunCollection:
             **kwargs: Additional key-value pairs to filter the runs.
         Returns:
-            The last run object that matches the provided configuration, or
+            The last `Run` instance that matches the provided configuration, or
             None if no runs match the criteria.
         See Also:
-            RunCollection.filter: The method that performs the actual filtering
-            logic.
+            `filter`: Perform the actual filtering logic.
         """
-        return try_find_last_run(self._runs, config, **kwargs)
+        return self.filter(config, **kwargs).try_last()
     def get(self, config: object | None = None, **kwargs) -> Run:
         """
-        Retrieve a specific run based on the provided configuration.
+        Retrieve a specific `Run` instance based on the provided configuration.
         This method filters the runs in the collection according to the
         specified configuration object and returns the run that matches the
@@ -281,21 +339,24 @@ class RunCollection:
             **kwargs: Additional key-value pairs to filter the runs.
         Returns:
-            The run object that matches the provided configuration.
+            The `Run` instance that matches the provided configuration.
         Raises:
             ValueError: If no run matches the criteria or if more than one run
             matches the criteria.
         See Also:
-            RunCollection.filter: The method that performs the actual filtering
-            logic.
+            `filter`: Perform the actual filtering logic.
         """
-        return get_run(self._runs, config, **kwargs)
+        try:
+            return self.filter(config, **kwargs).one()
+        except ValueError:
+            msg = "The filtered collection does not contain exactly one run."
+            raise ValueError(msg)
     def try_get(self, config: object | None = None, **kwargs) -> Run | None:
         """
-        Retrieve a specific run based on the provided configuration.
+        Try to retrieve a specific `Run` instance based on the provided configuration.
         This method filters the runs in the collection according to the
         specified configuration object and returns the run that matches the
@@ -307,17 +368,16 @@ class RunCollection:
             **kwargs: Additional key-value pairs to filter the runs.
         Returns:
-            The run object that matches the provided configuration, or None if
-            no runs match the criteria.
+            The `Run` instance that matches the provided configuration, or None
+            if no runs match the criteria.
         Raises:
             ValueError: If more than one run matches the criteria.
         See Also:
-            RunCollection.filter: The method that performs the actual filtering
-            logic.
+            `filter`: Perform the actual filtering logic.
         """
-        return try_get_run(self._runs, config, **kwargs)
+        return self.filter(config, **kwargs).try_one()
     def get_param_names(self) -> list[str]:
         """
@@ -330,7 +390,13 @@ class RunCollection:
         Returns:
             A list of unique parameter names.
         """
-        return get_param_names(self._runs)
+        param_names = set()
+        for run in self:
+            for param in run.data.params.keys():
+                param_names.add(param)
+        return list(param_names)
     def get_param_dict(self) -> dict[str, list[str]]:
         """
@@ -345,7 +411,13 @@ class RunCollection:
             A dictionary where the keys are parameter names and the values are
             lists of parameter values.
         """
-        return get_param_dict(self._runs)
+        params = {}
+        for name in self.get_param_names():
+            it = (run.data.params[name] for run in self if name in run.data.params)
+            params[name] = sorted(set(it))
+        return params
     def map(
         self,
@@ -426,9 +498,9 @@ class RunCollection:
         Apply a function to each artifact URI in the collection and return an
         iterator of results.
-        This method iterates over each run in the collection, retrieves the
-        artifact URI, and applies the provided function to it. If a run does not
-        have an artifact URI, None is passed to the function.
+        Iterate over each run in the collection, retrieves the artifact URI, and
+        apply the provided function to it. If a run does not have an artifact
+        URI, None is passed to the function.
         Args:
             func (Callable[[str | None, P], T]): A function that takes an
@@ -452,9 +524,8 @@ class RunCollection:
         Apply a function to each artifact directory in the collection and return
         an iterator of results.
-        This method iterates over each run in the collection, downloads the
-        artifact directory, and applies the provided function to the directory
-        path.
+        Iterate over each run in the collection, downloads the artifact
+        directory, and apply the provided function to the directory path.
         Args:
             func (Callable[[Path, P], T]): A function that takes an artifact directory
@@ -474,7 +545,7 @@ class RunCollection:
         """
         Group runs by specified parameter names.
-        This method groups the runs in the collection based on the values of the
+        Group the runs in the collection based on the values of the
         specified parameters. Each unique combination of parameter values will
         form a key in the returned dictionary.
@@ -500,7 +571,7 @@ def _param_matches(run: Run, key: str, value: Any) -> bool:
     """
     Check if the run's parameter matches the specified key-value pair.
-    This function checks if the run's parameters contain the specified
+    Check if the run's parameters contain the specified
     key-value pair. It handles different types of values, including lists
     and tuples.
@@ -534,7 +605,7 @@ def filter_runs(runs: list[Run], config: object | None = None, **kwargs) -> list
     """
     Filter the runs based on the provided configuration.
-    This method filters the runs in the collection according to the
+    Filter the runs in the collection according to the
     specified configuration object and additional key-value pairs.
     The configuration object and key-value pairs should contain
     key-value pairs that correspond to the parameters of the runs.
@@ -566,201 +637,6 @@ def filter_runs(runs: list[Run], config: object | None = None, **kwargs) -> list
     return runs
-def find_run(runs: list[Run], config: object | None = None, **kwargs) -> Run:
-    """
-    Find the first run based on the provided configuration.
-    This method filters the runs in the collection according to the
-    specified configuration object and returns the first run that matches
-    the provided parameters. If no run matches the criteria, a `ValueError` is
-    raised.
-    Args:
-        runs (list[Run]): The runs to filter.
-        config (object | None): The configuration object to identify the run.
-        **kwargs: Additional key-value pairs to filter the runs.
-    Returns:
-        The first run object that matches the provided configuration.
-    Raises:
-        ValueError: If no run matches the criteria.
-    See Also:
-        RunCollection.filter: The method that performs the actual filtering logic.
-    """
-    filtered_runs = filter_runs(runs, config, **kwargs)
-    if len(filtered_runs) == 0:
-        raise ValueError("No run matches the provided configuration.")
-    return filtered_runs[0]
-def try_find_run(runs: list[Run], config: object | None = None, **kwargs) -> Run | None:
-    """
-    Find the first run based on the provided configuration.
-    This method filters the runs in the collection according to the
-    specified configuration object and returns the first run that matches
-    the provided parameters. If no run matches the criteria, None is returned.
-    Args:
-        runs (list[Run]): The runs to filter.
-        config (object | None): The configuration object to identify the run.
-        **kwargs: Additional key-value pairs to filter the runs.
-    Returns:
-        The first run object that matches the provided configuration, or None
-        if no runs match the criteria.
-    """
-    filtered_runs = filter_runs(runs, config, **kwargs)
-    if len(filtered_runs) == 0:
-        return None
-    return filtered_runs[0]
-def find_last_run(runs: list[Run], config: object | None = None, **kwargs) -> Run:
-    """
-    Find the last run based on the provided configuration.
-    This method filters the runs in the collection according to the
-    specified configuration object and returns the last run that matches
-    the provided parameters. If no run matches the criteria, a `ValueError`
-    is raised.
-    Args:
-        runs (list[Run]): The runs to filter.
-        config (object | None): The configuration object to identify the run.
-        **kwargs: Additional key-value pairs to filter the runs.
-    Returns:
-        The last run object that matches the provided configuration.
-    Raises:
-        ValueError: If no run matches the criteria.
-    See Also:
-        RunCollection.filter: The method that performs the actual filtering
-        logic.
-    """
-    filtered_runs = filter_runs(runs, config, **kwargs)
-    if len(filtered_runs) == 0:
-        raise ValueError("No run matches the provided configuration.")
-    return filtered_runs[-1]
-def try_find_last_run(
-    runs: list[Run], config: object | None = None, **kwargs
-) -> Run | None:
-    """
-    Find the last run based on the provided configuration.
-    This method filters the runs in the collection according to the
-    specified configuration object and returns the last run that matches
-    the provided parameters. If no run matches the criteria, None is returned.
-    Args:
-        runs (list[Run]): The runs to filter.
-        config (object | None): The configuration object to identify the run.
-        **kwargs: Additional key-value pairs to filter the runs.
-    Returns:
-        The last run object that matches the provided configuration, or None
-        if no runs match the criteria.
-    """
-    filtered_runs = filter_runs(runs, config, **kwargs)
-    if len(filtered_runs) == 0:
-        return None
-    return filtered_runs[-1]
-def get_run(runs: list[Run], config: object | None = None, **kwargs) -> Run:
-    """
-    Retrieve a specific run based on the provided configuration.
-    This method filters the runs in the collection according to the
-    specified configuration object and returns the run that matches
-    the provided parameters. If no run matches the criteria, or if more
-    than one run matches the criteria, a `ValueError` is raised.
-    Args:
-        runs (list[Run]): The runs to filter.
-        config (object | None): The configuration object to identify the run.
-        **kwargs: Additional key-value pairs to filter the runs.
-    Returns:
-        The run object that matches the provided configuration.
-    Raises:
-        ValueError: If no run matches the criteria or if more than one run
-        matches the criteria.
-    See Also:
-        RunCollection.filter: The method that performs the actual filtering
-        logic.
-    """
-    filtered_runs = filter_runs(runs, config, **kwargs)
-    if len(filtered_runs) == 0:
-        raise ValueError("No run matches the provided configuration.")
-    if len(filtered_runs) == 1:
-        return filtered_runs[0]
-    msg = (
-        f"Multiple runs were filtered. Expected number of runs is 1, "
-        f"but found {len(filtered_runs)} runs."
-    )
-    raise ValueError(msg)
-def try_get_run(runs: list[Run], config: object | None = None, **kwargs) -> Run | None:
-    """
-    Retrieve a specific run based on the provided configuration.
-    This method filters the runs in the collection according to the
-    specified configuration object and returns the run that matches
-    the provided parameters. If no run matches the criteria, None is returned.
-    If more than one run matches the criteria, a `ValueError` is raised.
-    Args:
-        runs (list[Run]): The runs to filter.
-        config (object | None): The configuration object to identify the run.
-        **kwargs: Additional key-value pairs to filter the runs.
-    Returns:
-        The run object that matches the provided configuration, or None
-        if no runs match the criteria.
-    Raises:
-        ValueError: If more than one run matches the criteria.
-    See Also:
-        RunCollection.filter: The method that performs the actual filtering
-        logic.
-    """
-    filtered_runs = filter_runs(runs, config, **kwargs)
-    if len(filtered_runs) == 0:
-        return None
-    if len(filtered_runs) == 1:
-        return filtered_runs[0]
-    msg = (
-        "Multiple runs were filtered. Expected number of runs is 1, "
-        f"but found {len(filtered_runs)} runs."
-    )
-    raise ValueError(msg)
 def get_params(run: Run, *names: str | list[str]) -> tuple[str | None, ...]:
     """
     Retrieve the values of specified parameters from the given run.
@@ -787,52 +663,3 @@ def get_params(run: Run, *names: str | list[str]) -> tuple[str | None, ...]:
             names_.append(name)
     return tuple(run.data.params.get(name) for name in names_)
-def get_param_names(runs: list[Run]) -> list[str]:
-    """
-    Get the parameter names from the runs.
-    This method extracts the unique parameter names from the provided list of
-    runs. It iterates through each run and collects the parameter names into a
-    set to ensure uniqueness.
-    Args:
-        runs (list[Run]): The list of runs from which to extract parameter names.
-    Returns:
-        A list of unique parameter names.
-    """
-    param_names = set()
-    for run in runs:
-        for param in run.data.params.keys():
-            param_names.add(param)
-    return list(param_names)
-def get_param_dict(runs: list[Run]) -> dict[str, list[str]]:
-    """
-    Get the parameter dictionary from the list of runs.
-    This method extracts the parameter names and their corresponding values
-    from the provided list of runs. It iterates through each run and collects
-    the parameter values into a dictionary where the keys are parameter names
-    and the values are lists of parameter values.
-    Args:
-        runs (list[Run]): The list of runs from which to extract parameter names
-        and values.
-    Returns:
-        A dictionary where the keys are parameter names and the values are lists
-        of parameter values.
-    """
-    params = {}
-    for name in get_param_names(runs):
-        it = (run.data.params[name] for run in runs if name in run.data.params)
-        params[name] = sorted(set(it))
-    return params

{hydraflow-0.2.11 → hydraflow-0.2.14}/tests/test_app.py RENAMED Viewed

@@ -26,6 +26,48 @@ def rc(monkeypatch, tmp_path):
     yield hydraflow.list_runs()
+def test_list_runs_all(rc: RunCollection):
+    from hydraflow.mlflow import list_runs
+    rc_ = list_runs([])
+    assert len(rc) == len(rc_)
+    for a, b in zip(rc, rc_):
+        assert a.info.run_id == b.info.run_id
+        assert a.info.start_time == b.info.start_time
+        assert a.info.status == b.info.status
+        assert a.info.artifact_uri == b.info.artifact_uri
+@pytest.mark.parametrize("n_jobs", [0, 1, 2, 4, -1])
+def test_list_runs_parallel(rc: RunCollection, n_jobs: int):
+    from hydraflow.mlflow import list_runs
+    rc_ = list_runs("_info_", n_jobs=n_jobs)
+    assert len(rc) == len(rc_)
+    for a, b in zip(rc, rc_):
+        assert a.info.run_id == b.info.run_id
+        assert a.info.start_time == b.info.start_time
+        assert a.info.status == b.info.status
+        assert a.info.artifact_uri == b.info.artifact_uri
+@pytest.mark.parametrize("n_jobs", [0, 1, 2, 4, -1])
+def test_list_runs_parallel_active(rc: RunCollection, n_jobs: int):
+    from hydraflow.mlflow import list_runs
+    mlflow.set_experiment("_info_")
+    rc_ = list_runs(n_jobs=n_jobs)
+    assert len(rc) == len(rc_)
+    for a, b in zip(rc, rc_):
+        assert a.info.run_id == b.info.run_id
+        assert a.info.start_time == b.info.start_time
+        assert a.info.status == b.info.status
+        assert a.info.artifact_uri == b.info.artifact_uri
 def test_app_info_run_id(rc: RunCollection):
     assert len(rc.info.run_id) == 4

{hydraflow-0.2.11 → hydraflow-0.2.14}/tests/test_run_collection.py RENAMED Viewed

@@ -33,6 +33,12 @@ def run_list(runs: RunCollection):
     return runs._runs
+def test_from_list(run_list: list[Run]):
+    rc = RunCollection.from_list(run_list)
+    assert len(rc) == len(run_list)
+    assert all(run in rc for run in run_list)
 def test_search_runs_sorted(run_list: list[Run]):
     assert [run.data.params["p"] for run in run_list] == ["0", "1", "2", "3", "4", "5"]
@@ -84,91 +90,6 @@ def test_filter_invalid_param(run_list: list[Run]):
     assert len(x) == 6
-def test_find_run(run_list: list[Run]):
-    from hydraflow.run_collection import find_run, try_find_run
-    x = find_run(run_list, {"r": 1})
-    assert isinstance(x, Run)
-    assert x.data.params["p"] == "1"
-    x = find_run(run_list, r=2)
-    assert isinstance(x, Run)
-    assert x.data.params["p"] == "2"
-    x = try_find_run(run_list, r=2)
-    assert isinstance(x, Run)
-    assert x.data.params["p"] == "2"
-def test_find_run_none(run_list: list[Run]):
-    from hydraflow.run_collection import find_run
-    with pytest.raises(ValueError):
-        find_run(run_list, {"r": 10})
-def test_try_find_run_none_empty(run_list: list[Run]):
-    from hydraflow.run_collection import try_find_run
-    assert try_find_run([]) is None
-def test_find_last_run(run_list: list[Run]):
-    from hydraflow.run_collection import find_last_run, try_find_last_run
-    x = find_last_run(run_list, {"r": 1})
-    assert isinstance(x, Run)
-    assert x.data.params["p"] == "4"
-    x = find_last_run(run_list, r=2)
-    assert isinstance(x, Run)
-    assert x.data.params["p"] == "5"
-    x = try_find_last_run(run_list, r=2)
-    assert isinstance(x, Run)
-    assert x.data.params["p"] == "5"
-def test_find_last_run_none(run_list: list[Run]):
-    from hydraflow.run_collection import find_last_run
-    with pytest.raises(ValueError):
-        find_last_run(run_list, {"r": 10})
-def test_try_find_last_run_none(run_list: list[Run]):
-    from hydraflow.run_collection import try_find_last_run
-    assert try_find_last_run([]) is None
-def test_get_run(run_list: list[Run]):
-    from hydraflow.run_collection import get_run
-    run = get_run(run_list, {"p": 4})
-    assert isinstance(run, Run)
-    assert run.data.params["p"] == "4"
-def test_get_run_error(run_list: list[Run]):
-    from hydraflow.run_collection import get_run
-    with pytest.raises(ValueError):
-        get_run(run_list, {"q": 0})
-    with pytest.raises(ValueError):
-        get_run(run_list, {"q": -1})
-def test_try_get_run_none(run_list: list[Run]):
-    from hydraflow.run_collection import try_get_run
-    assert try_get_run(run_list, {"q": -1}) is None
-def test_try_get_run_error(run_list: list[Run]):
-    from hydraflow.run_collection import try_get_run
-    with pytest.raises(ValueError):
-        try_get_run(run_list, {"q": 0})
 def test_get_params(run_list: list[Run]):
     from hydraflow.run_collection import get_params
@@ -179,24 +100,6 @@ def test_get_params(run_list: list[Run]):
     assert get_params(run_list[5], ["a", "q"], "r") == (None, "None", "2")
-def test_get_param_names(run_list: list[Run]):
-    from hydraflow.run_collection import get_param_names
-    params = get_param_names(run_list)
-    assert len(params) == 3
-    assert "p" in params
-    assert "q" in params
-    assert "r" in params
-def test_get_param_dict(run_list: list[Run]):
-    from hydraflow.run_collection import get_param_dict
-    params = get_param_dict(run_list)
-    assert len(params["p"]) == 6
-    assert len(params["q"]) == 2
 @pytest.mark.parametrize("i", range(6))
 def test_chdir_artifact_list(i: int, run_list: list[Run]):
     from hydraflow.context import chdir_artifact
@@ -364,14 +267,14 @@ def test_list_runs_empty_list(runs, runs2):
 def test_list_runs_list(runs, runs2, name, n):
     from hydraflow.mlflow import list_runs
-    filtered_runs = list_runs(experiment_names=[name])
+    filtered_runs = list_runs(name)
     assert len(filtered_runs) == n
 def test_list_runs_none(runs, runs2):
     from hydraflow.mlflow import list_runs
-    no_runs = list_runs(experiment_names=["non_existent_experiment"])
+    no_runs = list_runs(["non_existent_experiment"])
     assert len(no_runs) == 0
@@ -427,6 +330,33 @@ def test_run_collection_getitem(runs: RunCollection, i: int):
     assert runs[i] == runs._runs[i]
+@pytest.mark.parametrize("i", range(6))
+def test_run_collection_getitem_slice(runs: RunCollection, i: int):
+    assert runs[i : i + 2]._runs == runs._runs[i : i + 2]
+@pytest.mark.parametrize("i", range(6))
+def test_run_collection_getitem_slice_step(runs: RunCollection, i: int):
+    assert runs[i::2]._runs == runs._runs[i::2]
+@pytest.mark.parametrize("i", range(6))
+def test_run_collection_getitem_slice_step_neg(runs: RunCollection, i: int):
+    assert runs[i::-2]._runs == runs._runs[i::-2]
+def test_run_collection_take(runs: RunCollection):
+    assert runs.take(3)._runs == runs._runs[:3]
+    assert len(runs.take(4)) == 4
+    assert runs.take(10)._runs == runs._runs
+def test_run_collection_take_neg(runs: RunCollection):
+    assert runs.take(-3)._runs == runs._runs[-3:]
+    assert len(runs.take(-4)) == 4
+    assert runs.take(-10)._runs == runs._runs
 @pytest.mark.parametrize("i", range(6))
 def test_run_collection_contains(runs: RunCollection, i: int):
     assert runs[i] in runs
@@ -462,32 +392,24 @@ def test_filter_runs_no_match(run_list: list[Run]):
     assert x == []
-def test_get_run_no_match(run_list: list[Run]):
-    from hydraflow.run_collection import get_run
+def test_get_run_no_match(runs: RunCollection):
     with pytest.raises(ValueError):
-        get_run(run_list, {"p": 10})
+        runs.get({"p": 10})
-def test_get_run_multiple_params(run_list: list[Run]):
-    from hydraflow.run_collection import get_run
-    run = get_run(run_list, {"p": 4, "q": 0})
+def test_get_run_multiple_params(runs: RunCollection):
+    run = runs.get({"p": 4, "q": 0})
     assert isinstance(run, Run)
     assert run.data.params["p"] == "4"
     assert run.data.params["q"] == "0"
-def test_try_get_run_no_match(run_list: list[Run]):
-    from hydraflow.run_collection import try_get_run
-    assert try_get_run(run_list, {"p": 10}) is None
+def test_try_get_run_no_match(runs: RunCollection):
+    assert runs.try_get({"p": 10}) is None
-def test_try_get_run_multiple_params(run_list: list[Run]):
-    from hydraflow.run_collection import try_get_run
-    run = try_get_run(run_list, {"p": 4, "q": 0})
+def test_try_get_run_multiple_params(runs: RunCollection):
+    run = runs.try_get({"p": 4, "q": 0})
     assert isinstance(run, Run)
     assert run.data.params["p"] == "4"
     assert run.data.params["q"] == "0"