PyPI - hydraflow - Versions diffs - 0.2.3__tar.gz → 0.2.5__tar.gz - Mend

hydraflow 0.2.3tar.gz → 0.2.5tar.gz

Files changed (29) hide show

{hydraflow-0.2.3 → hydraflow-0.2.5}/.devcontainer/devcontainer.json RENAMED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "hydraflow",
-  "image": "mcr.microsoft.com/vscode/devcontainers/base:ubuntu-22.04",
+  "image": "mcr.microsoft.com/vscode/devcontainers/python:3.12",
   "features": {
     "ghcr.io/devcontainers-contrib/features/starship:1": {},
     "ghcr.io/va-h/devcontainers-features/uv:1": {}
@@ -9,7 +9,6 @@
     "vscode": {
       "extensions": [
         "charliermarsh.ruff",
-        "henriiik.vscode-sort",
         "ms-python.python",
         "ms-python.vscode-pylance"
       ]

{hydraflow-0.2.3 → hydraflow-0.2.5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: hydraflow
-Version: 0.2.3
+Version: 0.2.5
 Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
 Project-URL: Documentation, https://github.com/daizutabi/hydraflow
 Project-URL: Source, https://github.com/daizutabi/hydraflow
@@ -48,7 +48,7 @@ Description-Content-Type: text/markdown
 ## Overview
-Hydraflow is a powerful library designed to seamlessly integrate
+Hydraflow is a library designed to seamlessly integrate
 [Hydra](https://hydra.cc/) and [MLflow](https://mlflow.org/), making it easier to
 manage and track machine learning experiments. By combining the flexibility of
 Hydra's configuration management with the robust experiment tracking capabilities
@@ -99,13 +99,10 @@ def my_app(cfg: MySQLConfig) -> None:
     # Set experiment by Hydra job name.
     hydraflow.set_experiment()
-    # Automatically log params using Hydra config.
-    with mlflow.start_run(), hydraflow.log_run(cfg) as info:
+    # Automatically log Hydra config as params.
+    with hydraflow.start_run():
         # Your app code below.
-        # `info.output_dir` is the Hydra output directory.
-        # `info.artifact_dir` is the MLflow artifact directory.
         with hydraflow.watch(callback):
             # Watch files in the MLflow artifact directory.
             # You can update metrics or log other artifacts

{hydraflow-0.2.3 → hydraflow-0.2.5}/README.md RENAMED Viewed

@@ -17,7 +17,7 @@
 ## Overview
-Hydraflow is a powerful library designed to seamlessly integrate
+Hydraflow is a library designed to seamlessly integrate
 [Hydra](https://hydra.cc/) and [MLflow](https://mlflow.org/), making it easier to
 manage and track machine learning experiments. By combining the flexibility of
 Hydra's configuration management with the robust experiment tracking capabilities
@@ -68,13 +68,10 @@ def my_app(cfg: MySQLConfig) -> None:
     # Set experiment by Hydra job name.
     hydraflow.set_experiment()
-    # Automatically log params using Hydra config.
-    with mlflow.start_run(), hydraflow.log_run(cfg) as info:
+    # Automatically log Hydra config as params.
+    with hydraflow.start_run():
         # Your app code below.
-        # `info.output_dir` is the Hydra output directory.
-        # `info.artifact_dir` is the MLflow artifact directory.
         with hydraflow.watch(callback):
             # Watch files in the MLflow artifact directory.
             # You can update metrics or log other artifacts

{hydraflow-0.2.3 → hydraflow-0.2.5}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "hydraflow"
-version = "0.2.3"
+version = "0.2.5"
 description = "Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments."
 readme = "README.md"
 license = "MIT"

hydraflow-0.2.5/src/hydraflow/__init__.py ADDED Viewed

@@ -0,0 +1,22 @@
+from .context import chdir_artifact, log_run, start_run, watch
+from .mlflow import get_artifact_dir, get_hydra_output_dir, set_experiment
+from .runs import (
+    RunCollection,
+    list_runs,
+    load_config,
+    search_runs,
+)
+__all__ = [
+    "RunCollection",
+    "chdir_artifact",
+    "get_artifact_dir",
+    "get_hydra_output_dir",
+    "list_runs",
+    "load_config",
+    "log_run",
+    "search_runs",
+    "set_experiment",
+    "start_run",
+    "watch",
+]

{hydraflow-0.2.3 → hydraflow-0.2.5}/src/hydraflow/config.py RENAMED Viewed

@@ -22,9 +22,9 @@ def iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
     representing the parameters. The keys are prefixed with the provided prefix.
     Args:
-        config: The configuration object to iterate over. This can be a dictionary,
-            list, DictConfig, or ListConfig.
-        prefix: The prefix to prepend to the parameter keys.
+        config (object): The configuration object to iterate over. This can be a
+            dictionary, list, DictConfig, or ListConfig.
+        prefix (str): The prefix to prepend to the parameter keys.
             Defaults to an empty string.
     Yields:

{hydraflow-0.2.3 → hydraflow-0.2.5}/src/hydraflow/context.py RENAMED Viewed

@@ -9,7 +9,6 @@ import logging
 import os
 import time
 from contextlib import contextmanager
-from dataclasses import dataclass
 from pathlib import Path
 from typing import TYPE_CHECKING
@@ -28,18 +27,12 @@ if TYPE_CHECKING:
 log = logging.getLogger(__name__)
-@dataclass
-class Info:
-    output_dir: Path
-    artifact_dir: Path
 @contextmanager
 def log_run(
     config: object,
     *,
     synchronous: bool | None = None,
-) -> Iterator[Info]:
+) -> Iterator[None]:
     """
     Log the parameters from the given configuration object and manage the MLflow
     run context.
@@ -49,16 +42,15 @@ def log_run(
     are logged and the run is properly closed.
     Args:
-        config: The configuration object to log the parameters from.
-        synchronous: Whether to log the parameters synchronously.
+        config (object): The configuration object to log the parameters from.
+        synchronous (bool | None): Whether to log the parameters synchronously.
             Defaults to None.
     Yields:
-        Info: An `Info` object containing the output directory and artifact directory
-        paths.
+        None
     Example:
-        with log_run(config) as info:
+        with log_run(config):
             # Perform operations within the MLflow run context
             pass
     """
@@ -66,7 +58,6 @@ def log_run(
     hc = HydraConfig.get()
     output_dir = Path(hc.runtime.output_dir)
-    info = Info(output_dir, get_artifact_dir())
     # Save '.hydra' config directory first.
     output_subdir = output_dir / (hc.output_subdir or "")
@@ -78,7 +69,7 @@ def log_run(
     try:
         with watch(log_artifact, output_dir):
-            yield info
+            yield
     except Exception as e:
         log.error(f"Error during log_run: {e}")
@@ -89,6 +80,64 @@ def log_run(
         mlflow.log_artifacts(output_dir.as_posix())
+@contextmanager
+def start_run(
+    config: object,
+    *,
+    run_id: str | None = None,
+    experiment_id: str | None = None,
+    run_name: str | None = None,
+    nested: bool = False,
+    parent_run_id: str | None = None,
+    tags: dict[str, str] | None = None,
+    description: str | None = None,
+    log_system_metrics: bool | None = None,
+    synchronous: bool | None = None,
+) -> Iterator[Run]:
+    """
+    Start an MLflow run and log parameters using the provided configuration object.
+    This context manager starts an MLflow run and logs parameters using the specified
+    configuration object. It ensures that the run is properly closed after completion.
+    Args:
+        config (object): The configuration object to log parameters from.
+        run_id (str | None): The existing run ID. Defaults to None.
+        experiment_id (str | None): The experiment ID. Defaults to None.
+        run_name (str | None): The name of the run. Defaults to None.
+        nested (bool): Whether to allow nested runs. Defaults to False.
+        parent_run_id (str | None): The parent run ID. Defaults to None.
+        tags (dict[str, str] | None): Tags to associate with the run. Defaults to None.
+        description (str | None): A description of the run. Defaults to None.
+        log_system_metrics (bool | None): Whether to log system metrics. Defaults to None.
+        synchronous (bool | None): Whether to log parameters synchronously. Defaults to None.
+    Yields:
+        Run: An MLflow Run object representing the started run.
+    Example:
+        with start_run(config) as run:
+            # Perform operations within the MLflow run context
+            pass
+    See Also:
+        `mlflow.start_run`: The MLflow function to start a run directly.
+        `log_run`: A context manager to log parameters and manage the MLflow run context.
+    """
+    with mlflow.start_run(
+        run_id=run_id,
+        experiment_id=experiment_id,
+        run_name=run_name,
+        nested=nested,
+        parent_run_id=parent_run_id,
+        tags=tags,
+        description=description,
+        log_system_metrics=log_system_metrics,
+    ) as run:
+        with log_run(config, synchronous=synchronous):
+            yield run
 @contextmanager
 def watch(
     func: Callable[[Path], None],
@@ -105,12 +154,12 @@ def watch(
     period or until the context is exited.
     Args:
-        func: The function to call when a change is
+        func (Callable[[Path], None]): The function to call when a change is
             detected. It should accept a single argument of type `Path`,
             which is the path of the modified file.
-        dir: The directory to watch. If not specified,
+        dir (Path | str): The directory to watch. If not specified,
             the current MLflow artifact URI is used. Defaults to "".
-        timeout: The timeout period in seconds for the watcher
+        timeout (int): The timeout period in seconds for the watcher
             to run after the context is exited. Defaults to 60.
     Yields:
@@ -122,6 +171,8 @@ def watch(
             pass
     """
     dir = dir or get_artifact_dir()
+    if isinstance(dir, Path):
+        dir = dir.as_posix()
     handler = Handler(func)
     observer = Observer()
@@ -152,7 +203,7 @@ class Handler(FileSystemEventHandler):
         self.func = func
     def on_modified(self, event: FileModifiedEvent) -> None:
-        file = Path(event.src_path)
+        file = Path(str(event.src_path))
         if file.is_file():
             self.func(file)
@@ -171,8 +222,8 @@ def chdir_artifact(
     to the original directory after the context is exited.
     Args:
-        run: The run to get the artifact directory from.
-        artifact_path: The artifact path.
+        run (Run): The run to get the artifact directory from.
+        artifact_path (str | None): The artifact path.
     """
     curdir = Path.cwd()
     path = mlflow.artifacts.download_artifacts(

hydraflow-0.2.5/src/hydraflow/mlflow.py ADDED Viewed

@@ -0,0 +1,124 @@
+"""
+This module provides functionality to log parameters from Hydra
+configuration objects and set up experiments using MLflow.
+"""
+from __future__ import annotations
+from pathlib import Path
+from typing import TYPE_CHECKING
+import mlflow
+from hydra.core.hydra_config import HydraConfig
+from mlflow.tracking import artifact_utils
+from omegaconf import OmegaConf
+from hydraflow.config import iter_params
+if TYPE_CHECKING:
+    from mlflow.entities.experiment import Experiment
+def set_experiment(
+    prefix: str = "",
+    suffix: str = "",
+    uri: str | Path | None = None,
+) -> Experiment:
+    """
+    Set the experiment name and tracking URI optionally.
+    This function sets the experiment name by combining the given prefix,
+    the job name from HydraConfig, and the given suffix. Optionally, it can
+    also set the tracking URI.
+    Args:
+        prefix (str): The prefix to prepend to the experiment name.
+        suffix (str): The suffix to append to the experiment name.
+        uri (str | Path | None): The tracking URI to use. Defaults to None.
+    Returns:
+        Experiment: An instance of `mlflow.entities.Experiment` representing
+        the new active experiment.
+    """
+    if uri is not None:
+        mlflow.set_tracking_uri(uri)
+    hc = HydraConfig.get()
+    name = f"{prefix}{hc.job.name}{suffix}"
+    return mlflow.set_experiment(name)
+def log_params(config: object, *, synchronous: bool | None = None) -> None:
+    """
+    Log the parameters from the given configuration object.
+    This method logs the parameters from the provided configuration object
+    using MLflow. It iterates over the parameters and logs them using the
+    `mlflow.log_param` method.
+    Args:
+        config (object): The configuration object to log the parameters from.
+        synchronous (bool | None): Whether to log the parameters synchronously.
+            Defaults to None.
+    """
+    for key, value in iter_params(config):
+        mlflow.log_param(key, value, synchronous=synchronous)
+def get_artifact_dir(
+    artifact_path: str | None = None,
+    *,
+    run_id: str | None = None,
+) -> Path:
+    """
+    Get the artifact directory for the given artifact path.
+    This function retrieves the artifact URI for the specified artifact path
+    using MLflow, downloads the artifacts to a local directory, and returns
+    the path to that directory.
+    Args:
+        artifact_path (str | None): The artifact path for which to get the
+            directory. Defaults to None.
+        run_id (str | None): The run ID for which to get the artifact directory.
+    Returns:
+        The local path to the directory where the artifacts are downloaded.
+    """
+    if run_id is None:
+        uri = mlflow.get_artifact_uri(artifact_path)
+    else:
+        uri = artifact_utils.get_artifact_uri(run_id, artifact_path)
+    dir = mlflow.artifacts.download_artifacts(artifact_uri=uri)
+    return Path(dir)
+def get_hydra_output_dir(*, run_id: str | None = None) -> Path:
+    if run_id is None:
+        hc = HydraConfig.get()
+        return Path(hc.runtime.output_dir)
+    path = get_artifact_dir(run_id=run_id) / ".hydra/hydra.yaml"
+    if path.exists():
+        hc = OmegaConf.load(path)
+        return Path(hc.hydra.runtime.output_dir)
+    raise FileNotFoundError
+# def log_hydra_output_dir(run: Run_ | Series | str) -> None:
+#     """
+#     Log the Hydra output directory.
+#     Args:
+#         run: The run object.
+#     Returns:
+#         None
+#     """
+#     output_dir = get_hydra_output_dir(run)
+#     run_id = run if isinstance(run, str) else run.info.run_id
+#     mlflow.log_artifacts(output_dir.as_posix(), run_id=run_id)

{hydraflow-0.2.3 → hydraflow-0.2.5}/src/hydraflow/runs.py RENAMED Viewed

@@ -45,28 +45,38 @@ def search_runs(
         The returned runs are sorted by their start time in ascending order.
     Args:
-        experiment_ids: List of experiment IDs. Search can work with experiment
-            IDs or experiment names, but not both in the same call. Values
-            other than ``None`` or ``[]`` will result in error if
+        experiment_ids (list[str] | None): List of experiment IDs. Search can
+            work with experiment IDs or experiment names, but not both in the
+            same call. Values other than ``None`` or ``[]`` will result in
+            error if ``experiment_names`` is also not ``None`` or ``[]``.
+            ``None`` will default to the active experiment if ``experiment_names``
+            is ``None`` or ``[]``.
+        experiment_ids (list[str] | None): List of experiment IDs. Search can
+            work with experiment IDs or experiment names, but not both in the
+            same call. Values other than ``None`` or ``[]`` will result in
+            error if ``experiment_names`` is also not ``None`` or ``[]``.
             ``experiment_names`` is also not ``None`` or ``[]``. ``None`` will
             default to the active experiment if ``experiment_names`` is ``None``
             or ``[]``.
-        filter_string: Filter query string, defaults to searching all runs.
-        run_view_type: one of enum values ``ACTIVE_ONLY``, ``DELETED_ONLY``, or
-            ``ALL`` runs defined in :py:class:`mlflow.entities.ViewType`.
-        max_results: The maximum number of runs to put in the dataframe. Default
-            is 100,000 to avoid causing out-of-memory issues on the user's
+        filter_string (str): Filter query string, defaults to searching all
+            runs.
+        run_view_type (int): one of enum values ``ACTIVE_ONLY``, ``DELETED_ONLY``,
+            or ``ALL`` runs defined in :py:class:`mlflow.entities.ViewType`.
+        max_results (int): The maximum number of runs to put in the dataframe.
+            Default is 100,000 to avoid causing out-of-memory issues on the user's
             machine.
-        order_by: List of columns to order by (e.g., "metrics.rmse"). The
-            ``order_by`` column can contain an optional ``DESC`` or ``ASC``
-            value. The default is ``ASC``. The default ordering is to sort by
+        order_by (list[str] | None): List of columns to order by (e.g.,
+            "metrics.rmse"). The ``order_by`` column can contain an optional
+            ``DESC`` or ``ASC`` value. The default is ``ASC``. The default
+            ordering is to sort by ``start_time DESC``, then ``run_id``.
             ``start_time DESC``, then ``run_id``.
-        search_all_experiments: Boolean specifying whether all experiments
-            should be searched. Only honored if ``experiment_ids`` is ``[]`` or
-            ``None``.
-        experiment_names: List of experiment names. Search can work with
-            experiment IDs or experiment names, but not both in the same call.
-            Values other than ``None`` or ``[]`` will result in error if
+        search_all_experiments (bool): Boolean specifying whether all
+            experiments should be searched. Only honored if ``experiment_ids``
+            is ``[]`` or ``None``.
+        experiment_names (list[str] | None): List of experiment names. Search
+            can work with experiment IDs or experiment names, but not both in
+            the same call. Values other than ``None`` or ``[]`` will result in
+            error if ``experiment_ids`` is also not ``None`` or ``[]``.
             ``experiment_ids`` is also not ``None`` or ``[]``. ``None`` will
             default to the active experiment if ``experiment_ids`` is ``None``
             or ``[]``.
@@ -102,10 +112,10 @@ def list_runs(experiment_names: list[str] | None = None) -> RunCollection:
         The returned runs are sorted by their start time in ascending order.
     Args:
-        experiment_names: List of experiment names to search for runs.
-        If None or an empty list is provided, the function will search
-        the currently active experiment or all experiments except the
-        "Default" experiment.
+        experiment_names (list[str] | None): List of experiment names to search
+            for runs. If None or an empty list is provided, the function will
+            search the currently active experiment or all experiments except
+            the "Default" experiment.
     Returns:
         A `RunCollection` object containing the runs for the specified experiments.
@@ -138,6 +148,22 @@ class RunCollection:
     def __len__(self) -> int:
         return len(self._runs)
+    def __iter__(self) -> Iterator[Run]:
+        return iter(self._runs)
+    def __getitem__(self, index: int) -> Run:
+        return self._runs[index]
+    def __contains__(self, run: Run) -> bool:
+        return run in self._runs
+    def sort(
+        self,
+        key: Callable[[Run], Any] | None = None,
+        reverse: bool = False,
+    ) -> None:
+        self._runs.sort(key=key or (lambda x: x.info.start_time), reverse=reverse)
     def first(self) -> Run:
         """
         Get the first run in the collection.
@@ -206,9 +232,9 @@ class RunCollection:
           and exclusive of the upper bound).
         Args:
-            config: The configuration object to filter the runs. This can be
-                any object that provides key-value pairs through the
-                `iter_params` function.
+            config (object | None): The configuration object to filter the runs.
+                This can be any object that provides key-value pairs through
+                the `iter_params` function.
             **kwargs: Additional key-value pairs to filter the runs.
         Returns:
@@ -226,7 +252,7 @@ class RunCollection:
         is raised.
         Args:
-            config: The configuration object to identify the run.
+            config (object | None): The configuration object to identify the run.
             **kwargs: Additional key-value pairs to filter the runs.
         Returns:
@@ -251,7 +277,7 @@ class RunCollection:
         returned.
         Args:
-            config: The configuration object to identify the run.
+            config (object | None): The configuration object to identify the run.
             **kwargs: Additional key-value pairs to filter the runs.
         Returns:
@@ -274,7 +300,7 @@ class RunCollection:
         is raised.
         Args:
-            config: The configuration object to identify the run.
+            config (object | None): The configuration object to identify the run.
             **kwargs: Additional key-value pairs to filter the runs.
         Returns:
@@ -299,7 +325,7 @@ class RunCollection:
         returned.
         Args:
-            config: The configuration object to identify the run.
+            config (object | None): The configuration object to identify the run.
             **kwargs: Additional key-value pairs to filter the runs.
         Returns:
@@ -322,7 +348,7 @@ class RunCollection:
         one run matches the criteria, a `ValueError` is raised.
         Args:
-            config: The configuration object to identify the run.
+            config (object | None): The configuration object to identify the run.
             **kwargs: Additional key-value pairs to filter the runs.
         Returns:
@@ -348,7 +374,7 @@ class RunCollection:
         If more than one run matches the criteria, a `ValueError` is raised.
         Args:
-            config: The configuration object to identify the run.
+            config (object | None): The configuration object to identify the run.
             **kwargs: Additional key-value pairs to filter the runs.
         Returns:
@@ -398,7 +424,8 @@ class RunCollection:
         results.
         Args:
-            func: A function that takes a run and returns a result.
+            func (Callable[[Run], T]): A function that takes a run and returns a
+                result.
         Yields:
             Results obtained by applying the function to each run in the
@@ -412,7 +439,8 @@ class RunCollection:
         of results.
         Args:
-            func: A function that takes a run id and returns a result.
+            func (Callable[[str], T]): A function that takes a run id and returns a
+                result.
         Yields:
             Results obtained by applying the function to each run id in the
@@ -426,8 +454,8 @@ class RunCollection:
         an iterator of results.
         Args:
-            func: A function that takes a run configuration and returns a
-            result.
+            func (Callable[[DictConfig], T]): A function that takes a run
+                configuration and returns a result.
         Yields:
             Results obtained by applying the function to each run configuration
@@ -445,8 +473,8 @@ class RunCollection:
         have an artifact URI, None is passed to the function.
         Args:
-            func: A function that takes an artifact URI (string or None) and
-            returns a result.
+            func (Callable[[str | None], T]): A function that takes an
+            artifact URI (string or None) and returns a result.
         Yields:
             Results obtained by applying the function to each artifact URI in the
@@ -464,8 +492,8 @@ class RunCollection:
         path.
         Args:
-            func: A function that takes an artifact directory path (string) and
-            returns a result.
+            func (Callable[[str], T]): A function that takes an artifact directory
+                path (string) and returns a result.
         Yields:
             Results obtained by applying the function to each artifact directory
@@ -473,6 +501,33 @@ class RunCollection:
         """
         return (func(download_artifacts(run_id=run.info.run_id)) for run in self._runs)
+    def group_by(
+        self, names: list[str] | None = None, *args
+    ) -> dict[tuple[str, ...], RunCollection]:
+        """
+        Group the runs by the specified parameter names and return a dictionary
+        where the keys are the parameter values and the values are the runs.
+        Args:
+            names (list[str] | None): The parameter names to group by.
+            *args: Additional positional arguments to specify parameter names.
+        Returns:
+            A dictionary where the keys are the parameter values and the values
+            are the runs.
+        """
+        names = names[:] if names else []
+        names.extend(args)
+        grouped_runs = {}
+        for run in self._runs:
+            key = get_params(run, names)
+            if key not in grouped_runs:
+                grouped_runs[key] = []
+            grouped_runs[key].append(run)
+        return {key: RunCollection(runs) for key, runs in grouped_runs.items()}
 def _param_matches(run: Run, key: str, value: Any) -> bool:
     """
@@ -483,9 +538,9 @@ def _param_matches(run: Run, key: str, value: Any) -> bool:
     and tuples.
     Args:
-        run: The run object to check.
-        key: The parameter key to check.
-        value: The parameter value to check.
+        run (Run): The run object to check.
+        key (str): The parameter key to check.
+        value (Any): The parameter value to check.
     Returns:
         True if the run's parameter matches the specified key-value pair,
@@ -526,10 +581,10 @@ def filter_runs(runs: list[Run], config: object | None = None, **kwargs) -> list
       exclusive of the upper bound).
     Args:
-        runs: The list of runs to filter.
-        config: The configuration object to filter the runs. This can be any
-                object that provides key-value pairs through the `iter_params`
-                function.
+        runs (list[Run]): The list of runs to filter.
+        config (object | None): The configuration object to filter the runs.
+            This can be any object that provides key-value pairs through the
+            `iter_params` function.
         **kwargs: Additional key-value pairs to filter the runs.
     Returns:
@@ -554,8 +609,8 @@ def find_run(runs: list[Run], config: object | None = None, **kwargs) -> Run:
     raised.
     Args:
-        runs: The runs to filter.
-        config: The configuration object to identify the run.
+        runs (list[Run]): The runs to filter.
+        config (object | None): The configuration object to identify the run.
         **kwargs: Additional key-value pairs to filter the runs.
     Returns:
@@ -584,8 +639,8 @@ def try_find_run(runs: list[Run], config: object | None = None, **kwargs) -> Run
     the provided parameters. If no run matches the criteria, None is returned.
     Args:
-        runs: The runs to filter.
-        config: The configuration object to identify the run.
+        runs (list[Run]): The runs to filter.
+        config (object | None): The configuration object to identify the run.
         **kwargs: Additional key-value pairs to filter the runs.
     Returns:
@@ -610,8 +665,8 @@ def find_last_run(runs: list[Run], config: object | None = None, **kwargs) -> Ru
     is raised.
     Args:
-        runs: The runs to filter.
-        config: The configuration object to identify the run.
+        runs (list[Run]): The runs to filter.
+        config (object | None): The configuration object to identify the run.
         **kwargs: Additional key-value pairs to filter the runs.
     Returns:
@@ -641,8 +696,8 @@ def try_find_last_run(runs: list[Run], config: object | None = None, **kwargs) -
     the provided parameters. If no run matches the criteria, None is returned.
     Args:
-        runs: The runs to filter.
-        config: The configuration object to identify the run.
+        runs (list[Run]): The runs to filter.
+        config (object | None): The configuration object to identify the run.
         **kwargs: Additional key-value pairs to filter the runs.
     Returns:
@@ -667,8 +722,8 @@ def get_run(runs: list[Run], config: object | None = None, **kwargs) -> Run:
     than one run matches the criteria, a `ValueError` is raised.
     Args:
-        runs: The runs to filter.
-        config: The configuration object to identify the run.
+        runs (list[Run]): The runs to filter.
+        config (object | None): The configuration object to identify the run.
         **kwargs: Additional key-value pairs to filter the runs.
     Returns:
@@ -707,8 +762,8 @@ def try_get_run(runs: list[Run], config: object | None = None, **kwargs) -> Run
     If more than one run matches the criteria, a `ValueError` is raised.
     Args:
-        runs: The runs to filter.
-        config: The configuration object to identify the run.
+        runs (list[Run]): The runs to filter.
+        config (object | None): The configuration object to identify the run.
         **kwargs: Additional key-value pairs to filter the runs.
     Returns:
@@ -737,6 +792,13 @@ def try_get_run(runs: list[Run], config: object | None = None, **kwargs) -> Run
     raise ValueError(msg)
+def get_params(run: Run, names: list[str] | None = None, *args) -> tuple[str, ...]:
+    names = names[:] if names else []
+    names.extend(args)
+    return tuple(run.data.params[name] for name in names)
 def get_param_names(runs: list[Run]) -> list[str]:
     """
     Get the parameter names from the runs.
@@ -746,7 +808,7 @@ def get_param_names(runs: list[Run]) -> list[str]:
     set to ensure uniqueness.
     Args:
-        runs: The list of runs from which to extract parameter names.
+        runs (list[Run]): The list of runs from which to extract parameter names.
     Returns:
         A list of unique parameter names.
@@ -770,7 +832,8 @@ def get_param_dict(runs: list[Run]) -> dict[str, list[str]]:
     and the values are lists of parameter values.
     Args:
-        runs: The list of runs from which to extract parameter names and values.
+        runs (list[Run]): The list of runs from which to extract parameter names
+        and values.
     Returns:
         A dictionary where the keys are parameter names and the values are lists
@@ -795,7 +858,7 @@ def load_config(run: Run) -> DictConfig:
     `.hydra/config.yaml` is not found in the run's artifact directory.
     Args:
-        run: The Run instance for which to load the configuration.
+        run (Run): The Run instance for which to load the configuration.
     Returns:
         The loaded configuration as a DictConfig object. Returns an empty
@@ -813,37 +876,3 @@ def _load_config(run_id: str) -> DictConfig:
         return DictConfig({})
     return OmegaConf.load(path)  # type: ignore
-# def get_hydra_output_dir(run: Run_ | Series | str) -> Path:
-#     """
-#     Get the Hydra output directory.
-#     Args:
-#         run: The run object.
-#     Returns:
-#         Path: The Hydra output directory.
-#     """
-#     path = get_artifact_dir(run) / ".hydra/hydra.yaml"
-#     if path.exists():
-#         hc = OmegaConf.load(path)
-#         return Path(hc.hydra.runtime.output_dir)
-#     raise FileNotFoundError
-# def log_hydra_output_dir(run: Run_ | Series | str) -> None:
-#     """
-#     Log the Hydra output directory.
-#     Args:
-#         run: The run object.
-#     Returns:
-#         None
-#     """
-#     output_dir = get_hydra_output_dir(run)
-#     run_id = run if isinstance(run, str) else run.info.run_id
-#     mlflow.log_artifacts(output_dir.as_posix(), run_id=run_id)

{hydraflow-0.2.3 → hydraflow-0.2.5}/tests/scripts/log_run.py RENAMED Viewed

@@ -7,7 +7,7 @@ import hydra
 import mlflow
 from hydra.core.config_store import ConfigStore
-from hydraflow.context import log_run
+import hydraflow
 log = logging.getLogger(__name__)
@@ -25,11 +25,13 @@ cs.store(name="config", node=MySQLConfig)
 @hydra.main(version_base=None, config_name="config")
 def app(cfg: MySQLConfig):
     mlflow.set_experiment("log_run")
-    with mlflow.start_run(), log_run(cfg) as info:
+    with hydraflow.start_run(cfg):
+        artifact_dir = hydraflow.get_artifact_dir()
+        output_dir = hydraflow.get_hydra_output_dir()
         log.info(f"START, {cfg.host}, {cfg.port} ")
-        mlflow.log_text("A " + info.artifact_dir.as_posix(), "artifact_dir.txt")
-        mlflow.log_text("B " + info.output_dir.as_posix(), "output_dir.txt")
-        (info.artifact_dir / "a.txt").write_text("abc")
+        mlflow.log_text("A " + artifact_dir.as_posix(), "artifact_dir.txt")
+        mlflow.log_text("B " + output_dir.as_posix(), "output_dir.txt")
+        (artifact_dir / "a.txt").write_text("abc")
         log.info("END")

hydraflow-0.2.5/tests/test_context.py ADDED Viewed

@@ -0,0 +1,80 @@
+from unittest.mock import MagicMock, patch
+import mlflow
+import pytest
+from hydraflow.context import log_run, start_run, watch
+from hydraflow.runs import RunCollection
+@pytest.fixture
+def runs(monkeypatch, tmp_path):
+    from hydraflow.runs import list_runs
+    monkeypatch.chdir(tmp_path)
+    with (
+        patch("hydraflow.context.HydraConfig.get") as mock_hydra_config,
+        patch("hydraflow.context.mlflow.log_artifacts") as mock_log_artifacts,
+    ):
+        mock_hydra_config.return_value.runtime.output_dir = "/tmp"
+        mock_log_artifacts.return_value = None
+        mlflow.set_experiment("test_run")
+        for x in range(3):
+            cfg = {"x": x, "l": [x, x, x], "d": {"i": x}}
+            with start_run(cfg):
+                mlflow.log_param("y", x)
+        return list_runs(["test_run"])
+def test_runs_len(runs: RunCollection):
+    assert len(runs) == 3
+@pytest.mark.parametrize("i", [0, 1, 2])
+@pytest.mark.parametrize("n", ["x", "y"])
+def test_runs_params(runs: RunCollection, i: int, n: str):
+    assert runs[i].data.params[n] == str(i)
+@pytest.mark.parametrize("i", [0, 1, 2])
+def test_runs_params_list(runs: RunCollection, i: int):
+    assert runs[i].data.params["l"] == f"[{i}, {i}, {i}]"
+@pytest.mark.parametrize("i", [0, 1, 2])
+def test_runs_params_dict(runs: RunCollection, i: int):
+    assert runs[i].data.params["d.i"] == str(i)
+def test_log_run_error_handling():
+    config = MagicMock()
+    config.some_param = "value"
+    with (
+        patch("hydraflow.context.log_params") as mock_log_params,
+        patch("hydraflow.context.HydraConfig.get") as mock_hydra_config,
+        patch("hydraflow.context.mlflow.log_artifacts") as mock_log_artifacts,
+    ):
+        mock_log_params.side_effect = Exception("Test exception")
+        mock_hydra_config.return_value.runtime.output_dir = "/tmp"
+        mock_log_artifacts.return_value = None
+        with pytest.raises(Exception, match="Test exception"):
+            with log_run(config):
+                pass
+def test_watch_error_handling():
+    func = MagicMock()
+    dir = "/tmp"
+    with patch("hydraflow.context.Observer") as mock_observer:
+        mock_observer_instance = mock_observer.return_value
+        mock_observer_instance.start.side_effect = Exception("Test exception")
+        with pytest.raises(Exception, match="Test exception"):
+            with watch(func, dir):
+                pass

{hydraflow-0.2.3 → hydraflow-0.2.5}/tests/test_log_run.py RENAMED Viewed

@@ -26,7 +26,7 @@ def runs(monkeypatch, tmp_path):
 @pytest.fixture(params=range(4))
 def run(runs, request):
-    run = runs[request.param]
+    run = runs[request.param]  # type: ignore
     assert isinstance(run, Run)
     return run

{hydraflow-0.2.3 → hydraflow-0.2.5}/tests/test_runs.py RENAMED Viewed

@@ -396,6 +396,44 @@ def test_run_collection_map_dir(runs: RunCollection):
     assert all(isinstance(dir_path, str) for dir_path in results)
+def test_run_collection_sort(runs: RunCollection):
+    runs.sort(key=lambda x: x.data.params["p"])
+    assert [run.data.params["p"] for run in runs] == ["0", "1", "2", "3", "4", "5"]
+    runs.sort(reverse=True)
+    assert [run.data.params["p"] for run in runs] == ["5", "4", "3", "2", "1", "0"]
+def test_run_collection_iter(runs: RunCollection):
+    assert list(runs) == runs._runs
+@pytest.mark.parametrize("i", range(6))
+def test_run_collection_getitem(runs: RunCollection, i: int):
+    assert runs[i] == runs._runs[i]
+@pytest.mark.parametrize("i", range(6))
+def test_run_collection_contains(runs: RunCollection, i: int):
+    assert runs[i] in runs
+    assert runs._runs[i] in runs
+def test_run_collection_group_by(runs: RunCollection):
+    grouped = runs.group_by(["p"])
+    assert len(grouped) == 6
+    assert all(isinstance(group, RunCollection) for group in grouped.values())
+    assert all(len(group) == 1 for group in grouped.values())
+    assert grouped[("0",)][0] == runs[0]
+    assert grouped[("1",)][0] == runs[1]
+    grouped = runs.group_by(["q"])
+    assert len(grouped) == 2
+    grouped = runs.group_by(["r"])
+    assert len(grouped) == 3
 # def test_hydra_output_dir_error(runs_list: list[Run]):
 #     from hydraflow.runs import get_hydra_output_dir

hydraflow-0.2.3/src/hydraflow/__init__.py DELETED Viewed

@@ -1,30 +0,0 @@
-from .context import Info, chdir_artifact, log_run, watch
-from .mlflow import set_experiment
-from .runs import (
-    Run,
-    RunCollection,
-    filter_runs,
-    get_param_dict,
-    get_param_names,
-    get_run,
-    list_runs,
-    load_config,
-    search_runs,
-)
-__all__ = [
-    "Info",
-    "Run",
-    "RunCollection",
-    "chdir_artifact",
-    "filter_runs",
-    "get_param_dict",
-    "get_param_names",
-    "get_run",
-    "list_runs",
-    "load_config",
-    "log_run",
-    "search_runs",
-    "set_experiment",
-    "watch",
-]

hydraflow-0.2.3/src/hydraflow/mlflow.py DELETED Viewed

@@ -1,72 +0,0 @@
-"""
-This module provides functionality to log parameters from Hydra
-configuration objects and set up experiments using MLflow.
-"""
-from __future__ import annotations
-from pathlib import Path
-import mlflow
-from hydra.core.hydra_config import HydraConfig
-from hydraflow.config import iter_params
-def set_experiment(prefix: str = "", suffix: str = "", uri: str | None = None) -> None:
-    """
-    Set the experiment name and tracking URI optionally.
-    This function sets the experiment name by combining the given prefix,
-    the job name from HydraConfig, and the given suffix. Optionally, it can
-    also set the tracking URI.
-    Args:
-        prefix: The prefix to prepend to the experiment name.
-        suffix: The suffix to append to the experiment name.
-        uri: The tracking URI to use.
-    """
-    if uri:
-        mlflow.set_tracking_uri(uri)
-    hc = HydraConfig.get()
-    name = f"{prefix}{hc.job.name}{suffix}"
-    mlflow.set_experiment(name)
-def log_params(config: object, *, synchronous: bool | None = None) -> None:
-    """
-    Log the parameters from the given configuration object.
-    This method logs the parameters from the provided configuration object
-    using MLflow. It iterates over the parameters and logs them using the
-    `mlflow.log_param` method.
-    Args:
-        config: The configuration object to log the parameters from.
-        synchronous: Whether to log the parameters synchronously.
-            Defaults to None.
-    """
-    for key, value in iter_params(config):
-        mlflow.log_param(key, value, synchronous=synchronous)
-def get_artifact_dir(artifact_path: str | None = None) -> Path:
-    """
-    Get the artifact directory for the given artifact path.
-    This function retrieves the artifact URI for the specified artifact path
-    using MLflow, downloads the artifacts to a local directory, and returns
-    the path to that directory.
-    Args:
-        artifact_path: The artifact path for which to get the directory.
-            Defaults to None.
-    Returns:
-        The local path to the directory where the artifacts are downloaded.
-    """
-    uri = mlflow.get_artifact_uri(artifact_path)
-    dir = mlflow.artifacts.download_artifacts(artifact_uri=uri)
-    return Path(dir)

hydraflow-0.2.3/tests/test_context.py DELETED Viewed

@@ -1,36 +0,0 @@
-from unittest.mock import MagicMock, patch
-import pytest
-from hydraflow.context import log_run, watch
-def test_log_run_error_handling():
-    config = MagicMock()
-    config.some_param = "value"
-    with (
-        patch("hydraflow.context.log_params") as mock_log_params,
-        patch("hydraflow.context.HydraConfig.get") as mock_hydra_config,
-        patch("hydraflow.context.mlflow.log_artifacts") as mock_log_artifacts,
-    ):
-        mock_log_params.side_effect = Exception("Test exception")
-        mock_hydra_config.return_value.runtime.output_dir = "/tmp"
-        mock_log_artifacts.return_value = None
-        with pytest.raises(Exception, match="Test exception"):
-            with log_run(config):
-                pass
-def test_watch_error_handling():
-    func = MagicMock()
-    dir = "/tmp"
-    with patch("hydraflow.context.Observer") as mock_observer:
-        mock_observer_instance = mock_observer.return_value
-        mock_observer_instance.start.side_effect = Exception("Test exception")
-        with pytest.raises(Exception, match="Test exception"):
-            with watch(func, dir):
-                pass