PyPI - hydraflow - Versions diffs - 0.2.5__tar.gz → 0.2.7__tar.gz - Mend

hydraflow 0.2.5tar.gz → 0.2.7tar.gz

Files changed (31) hide show

{hydraflow-0.2.5 → hydraflow-0.2.7}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: hydraflow
-Version: 0.2.5
+Version: 0.2.7
 Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
 Project-URL: Documentation, https://github.com/daizutabi/hydraflow
 Project-URL: Source, https://github.com/daizutabi/hydraflow
@@ -17,7 +17,9 @@ Classifier: Topic :: Documentation
 Classifier: Topic :: Software Development :: Documentation
 Requires-Python: >=3.10
 Requires-Dist: hydra-core>1.3
+Requires-Dist: joblib
 Requires-Dist: mlflow>2.15
+Requires-Dist: rich
 Requires-Dist: setuptools
 Requires-Dist: watchdog
 Requires-Dist: watchfiles

hydraflow-0.2.7/mlruns/0/meta.yaml ADDED Viewed

@@ -0,0 +1,6 @@
+artifact_location: file:///workspaces/hydraflow/mlruns/0
+creation_time: 1725536713011
+experiment_id: '0'
+last_update_time: 1725536713011
+lifecycle_stage: active
+name: Default

{hydraflow-0.2.5 → hydraflow-0.2.7}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "hydraflow"
-version = "0.2.5"
+version = "0.2.7"
 description = "Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments."
 readme = "README.md"
 license = "MIT"
@@ -21,7 +21,9 @@ classifiers = [
 requires-python = ">=3.10"
 dependencies = [
   "hydra-core>1.3",
+  "joblib",
   "mlflow>2.15",
+  "rich",
   "setuptools",
   "watchdog",
   "watchfiles",

{hydraflow-0.2.5 → hydraflow-0.2.7}/src/hydraflow/__init__.py RENAMED Viewed

@@ -1,9 +1,9 @@
 from .context import chdir_artifact, log_run, start_run, watch
+from .info import load_config
 from .mlflow import get_artifact_dir, get_hydra_output_dir, set_experiment
-from .runs import (
+from .run_collection import (
     RunCollection,
     list_runs,
-    load_config,
     search_runs,
 )

hydraflow-0.2.7/src/hydraflow/info.py ADDED Viewed

@@ -0,0 +1,63 @@
+from __future__ import annotations
+from typing import TYPE_CHECKING
+from omegaconf import DictConfig, OmegaConf
+from hydraflow.mlflow import get_artifact_dir
+if TYPE_CHECKING:
+    from pathlib import Path
+    from mlflow.entities import Run
+    from hydraflow.run_collection import RunCollection
+class RunCollectionInfo:
+    def __init__(self, runs: RunCollection):
+        self._runs = runs
+    @property
+    def run_id(self) -> list[str]:
+        return [run.info.run_id for run in self._runs]
+    @property
+    def params(self) -> list[dict[str, str]]:
+        return [run.data.params for run in self._runs]
+    @property
+    def metrics(self) -> list[dict[str, float]]:
+        return [run.data.metrics for run in self._runs]
+    @property
+    def artifact_uri(self) -> list[str | None]:
+        return [run.info.artifact_uri for run in self._runs]
+    @property
+    def artifact_dir(self) -> list[Path]:
+        return [get_artifact_dir(run) for run in self._runs]
+    @property
+    def config(self) -> list[DictConfig]:
+        return [load_config(run) for run in self._runs]
+def load_config(run: Run) -> DictConfig:
+    """
+    Load the configuration for a given run.
+    This function loads the configuration for the provided Run instance
+    by downloading the configuration file from the MLflow artifacts and
+    loading it using OmegaConf. It returns an empty config if
+    `.hydra/config.yaml` is not found in the run's artifact directory.
+    Args:
+        run (Run): The Run instance for which to load the configuration.
+    Returns:
+        The loaded configuration as a DictConfig object. Returns an empty
+        DictConfig if the configuration file is not found.
+    """
+    path = get_artifact_dir(run) / ".hydra/config.yaml"
+    return OmegaConf.load(path)  # type: ignore

{hydraflow-0.2.5 → hydraflow-0.2.7}/src/hydraflow/mlflow.py RENAMED Viewed

@@ -17,6 +17,7 @@ from hydraflow.config import iter_params
 if TYPE_CHECKING:
     from mlflow.entities.experiment import Experiment
+    from mlflow.entities.run import Run
 def set_experiment(
@@ -65,60 +66,54 @@ def log_params(config: object, *, synchronous: bool | None = None) -> None:
         mlflow.log_param(key, value, synchronous=synchronous)
-def get_artifact_dir(
-    artifact_path: str | None = None,
-    *,
-    run_id: str | None = None,
-) -> Path:
+def get_artifact_dir(run: Run | None = None) -> Path:
     """
-    Get the artifact directory for the given artifact path.
+    Retrieve the artifact directory for the given run.
-    This function retrieves the artifact URI for the specified artifact path
-    using MLflow, downloads the artifacts to a local directory, and returns
-    the path to that directory.
+    This function uses MLflow to get the artifact directory for the given run.
     Args:
-        artifact_path (str | None): The artifact path for which to get the
-            directory. Defaults to None.
-        run_id (str | None): The run ID for which to get the artifact directory.
+        run (Run | None): The run object. Defaults to None.
     Returns:
         The local path to the directory where the artifacts are downloaded.
     """
-    if run_id is None:
-        uri = mlflow.get_artifact_uri(artifact_path)
+    if run is None:
+        uri = mlflow.get_artifact_uri()
     else:
-        uri = artifact_utils.get_artifact_uri(run_id, artifact_path)
+        uri = artifact_utils.get_artifact_uri(run.info.run_id)
-    dir = mlflow.artifacts.download_artifacts(artifact_uri=uri)
+    return Path(mlflow.artifacts.download_artifacts(uri))
-    return Path(dir)
+def get_hydra_output_dir(*, run: Run | None = None) -> Path:
+    """
+    Retrieve the Hydra output directory for the given run.
+    This function returns the Hydra output directory. If no run is provided,
+    it retrieves the output directory from the current Hydra configuration.
+    If a run is provided, it retrieves the artifact path for the run, loads
+    the Hydra configuration from the downloaded artifacts, and returns the
+    output directory specified in that configuration.
+    Args:
+        run (Run | None): The run object. Defaults to None.
+    Returns:
+        Path: The path to the Hydra output directory.
-def get_hydra_output_dir(*, run_id: str | None = None) -> Path:
-    if run_id is None:
+    Raises:
+        FileNotFoundError: If the Hydra configuration file is not found
+            in the artifacts.
+    """
+    if run is None:
         hc = HydraConfig.get()
         return Path(hc.runtime.output_dir)
-    path = get_artifact_dir(run_id=run_id) / ".hydra/hydra.yaml"
+    path = get_artifact_dir(run) / ".hydra/hydra.yaml"
     if path.exists():
         hc = OmegaConf.load(path)
         return Path(hc.hydra.runtime.output_dir)
     raise FileNotFoundError
-# def log_hydra_output_dir(run: Run_ | Series | str) -> None:
-#     """
-#     Log the Hydra output directory.
-#     Args:
-#         run: The run object.
-#     Returns:
-#         None
-#     """
-#     output_dir = get_hydra_output_dir(run)
-#     run_id = run if isinstance(run, str) else run.info.run_id
-#     mlflow.log_artifacts(output_dir.as_posix(), run_id=run_id)

hydraflow-0.2.7/src/hydraflow/progress.py ADDED Viewed

@@ -0,0 +1,131 @@
+from __future__ import annotations
+from typing import TYPE_CHECKING
+import joblib
+from rich.progress import Progress
+if TYPE_CHECKING:
+    from collections.abc import Iterable
+    from rich.progress import ProgressColumn
+def multi_task_progress(
+    iterables: Iterable[Iterable[int | tuple[int, int]]],
+    *columns: ProgressColumn | str,
+    n_jobs: int = -1,
+    description: str = "#{:0>3}",
+    main_description: str = "main",
+    transient: bool | None = None,
+    **kwargs,
+) -> None:
+    """
+    Render auto-updating progress bars for multiple tasks concurrently.
+    Args:
+        iterables (Iterable[Iterable[int | tuple[int, int]]]): A collection of
+            iterables, each representing a task. Each iterable can yield
+            integers (completed) or tuples of integers (completed, total).
+        *columns (ProgressColumn | str): Additional columns to display in the
+            progress bars.
+        n_jobs (int, optional): Number of jobs to run in parallel. Defaults to
+            -1, which means using all processors.
+        description (str, optional): Format string for describing tasks. Defaults to
+            "#{:0>3}".
+        main_description (str, optional): Description for the main task.
+            Defaults to "main".
+        transient (bool | None, optional): Whether to remove the progress bar
+            after completion. Defaults to None.
+        **kwargs: Additional keyword arguments passed to the Progress instance.
+    Returns:
+        None
+    """
+    if not columns:
+        columns = Progress.get_default_columns()
+    iterables = list(iterables)
+    with Progress(*columns, transient=transient or False, **kwargs) as progress:
+        n = len(iterables)
+        task_main = progress.add_task(main_description, total=None) if n > 1 else None
+        tasks = [
+            progress.add_task(description.format(i), start=False, total=None) for i in range(n)
+        ]
+        total = {}
+        completed = {}
+        def func(i: int) -> None:
+            completed[i] = 0
+            total[i] = None
+            progress.start_task(tasks[i])
+            for index in iterables[i]:
+                if isinstance(index, tuple):
+                    completed[i], total[i] = index[0] + 1, index[1]
+                else:
+                    completed[i] = index + 1
+                progress.update(tasks[i], total=total[i], completed=completed[i])
+                if task_main is not None:
+                    if all(t is not None for t in total.values()):
+                        t = sum(total.values())
+                    else:
+                        t = None
+                    c = sum(completed.values())
+                    progress.update(task_main, total=t, completed=c)
+            if transient or n > 1:
+                progress.remove_task(tasks[i])
+        if n > 1:
+            it = (joblib.delayed(func)(i) for i in range(n))
+            joblib.Parallel(n_jobs, prefer="threads")(it)
+        else:
+            func(0)
+if __name__ == "__main__":
+    import random
+    import time
+    from rich.progress import MofNCompleteColumn, Progress, SpinnerColumn, TimeElapsedColumn
+    from hydraflow.progress import multi_task_progress
+    def task(total):
+        for i in range(total or 90):
+            if total is None:
+                yield i
+            else:
+                yield i, total
+            time.sleep(random.random() / 30)
+    def multi_task_progress_test(unknown_total: bool):
+        tasks = [task(random.randint(80, 100)) for _ in range(4)]
+        if unknown_total:
+            tasks = [task(None), *tasks, task(None)]
+        columns = [
+            SpinnerColumn(),
+            *Progress.get_default_columns(),
+            MofNCompleteColumn(),
+            TimeElapsedColumn(),
+        ]
+        kwargs = {}
+        if unknown_total:
+            kwargs["main_description"] = "unknown"
+        multi_task_progress(tasks, *columns, n_jobs=4, **kwargs)
+    multi_task_progress_test(False)
+    multi_task_progress_test(True)
+    multi_task_progress([task(100)])
+    multi_task_progress([task(None)], description="unknown")
+    multi_task_progress([task(100), task(None)], main_description="transient", transient=True)
+    multi_task_progress([task(100)], description="transient", transient=True)

hydraflow-0.2.5/src/hydraflow/runs.py → hydraflow-0.2.7/src/hydraflow/run_collection.py RENAMED Viewed

@@ -6,24 +6,25 @@ runs, retrieve run information, log artifacts, and load configurations.
 from __future__ import annotations
-from dataclasses import dataclass
-from functools import cache
+from dataclasses import dataclass, field
 from itertools import chain
-from typing import TYPE_CHECKING, Any, TypeVar
+from typing import TYPE_CHECKING, Any, Concatenate, ParamSpec, TypeVar
 import mlflow
-from mlflow.artifacts import download_artifacts
 from mlflow.entities import ViewType
 from mlflow.entities.run import Run
 from mlflow.tracking.fluent import SEARCH_MAX_RESULTS_PANDAS
-from omegaconf import DictConfig, OmegaConf
 from hydraflow.config import iter_params
+from hydraflow.info import RunCollectionInfo
 if TYPE_CHECKING:
     from collections.abc import Callable, Iterator
+    from pathlib import Path
     from typing import Any
+    from omegaconf import DictConfig
 def search_runs(
     experiment_ids: list[str] | None = None,
@@ -51,13 +52,6 @@ def search_runs(
             error if ``experiment_names`` is also not ``None`` or ``[]``.
             ``None`` will default to the active experiment if ``experiment_names``
             is ``None`` or ``[]``.
-        experiment_ids (list[str] | None): List of experiment IDs. Search can
-            work with experiment IDs or experiment names, but not both in the
-            same call. Values other than ``None`` or ``[]`` will result in
-            error if ``experiment_names`` is also not ``None`` or ``[]``.
-            ``experiment_names`` is also not ``None`` or ``[]``. ``None`` will
-            default to the active experiment if ``experiment_names`` is ``None``
-            or ``[]``.
         filter_string (str): Filter query string, defaults to searching all
             runs.
         run_view_type (int): one of enum values ``ACTIVE_ONLY``, ``DELETED_ONLY``,
@@ -128,6 +122,7 @@ def list_runs(experiment_names: list[str] | None = None) -> RunCollection:
 T = TypeVar("T")
+P = ParamSpec("P")
 @dataclass
@@ -142,6 +137,12 @@ class RunCollection:
     _runs: list[Run]
     """A list of MLflow Run objects."""
+    _info: RunCollectionInfo = field(init=False)
+    """A list of MLflow Run objects."""
+    def __post_init__(self):
+        self._info = RunCollectionInfo(self)
     def __repr__(self) -> str:
         return f"{self.__class__.__name__}({len(self)})"
@@ -157,6 +158,10 @@ class RunCollection:
     def __contains__(self, run: Run) -> bool:
         return run in self._runs
+    @property
+    def info(self) -> RunCollectionInfo:
+        return self._info
     def sort(
         self,
         key: Callable[[Run], Any] | None = None,
@@ -418,52 +423,81 @@ class RunCollection:
         """
         return get_param_dict(self._runs)
-    def map(self, func: Callable[[Run], T]) -> Iterator[T]:
+    def map(
+        self,
+        func: Callable[Concatenate[Run, P], T],
+        *args: P.args,
+        **kwargs: P.kwargs,
+    ) -> Iterator[T]:
         """
         Apply a function to each run in the collection and return an iterator of
         results.
+        This method iterates over each run in the collection and applies the
+        provided function to it, along with any additional arguments and
+        keyword arguments.
         Args:
-            func (Callable[[Run], T]): A function that takes a run and returns a
-                result.
+            func (Callable[[Run, P], T]): A function that takes a run and
+                additional arguments and returns a result.
+            *args: Additional arguments to pass to the function.
+            **kwargs: Additional keyword arguments to pass to the function.
         Yields:
-            Results obtained by applying the function to each run in the
-            collection.
+            Results obtained by applying the function to each run in the collection.
         """
-        return (func(run) for run in self._runs)
+        return (func(run, *args, **kwargs) for run in self)
-    def map_run_id(self, func: Callable[[str], T]) -> Iterator[T]:
+    def map_run_id(
+        self,
+        func: Callable[Concatenate[str, P], T],
+        *args: P.args,
+        **kwargs: P.kwargs,
+    ) -> Iterator[T]:
         """
         Apply a function to each run id in the collection and return an iterator
         of results.
         Args:
-            func (Callable[[str], T]): A function that takes a run id and returns a
+            func (Callable[[str, P], T]): A function that takes a run id and returns a
                 result.
+            *args: Additional arguments to pass to the function.
+            **kwargs: Additional keyword arguments to pass to the function.
         Yields:
             Results obtained by applying the function to each run id in the
             collection.
         """
-        return (func(run.info.run_id) for run in self._runs)
+        return (func(run_id, *args, **kwargs) for run_id in self.info.run_id)
-    def map_config(self, func: Callable[[DictConfig], T]) -> Iterator[T]:
+    def map_config(
+        self,
+        func: Callable[Concatenate[DictConfig, P], T],
+        *args: P.args,
+        **kwargs: P.kwargs,
+    ) -> Iterator[T]:
         """
         Apply a function to each run configuration in the collection and return
         an iterator of results.
         Args:
-            func (Callable[[DictConfig], T]): A function that takes a run
+            func (Callable[[DictConfig, P], T]): A function that takes a run
                 configuration and returns a result.
+            *args: Additional arguments to pass to the function.
+            **kwargs: Additional keyword arguments to pass to the function.
         Yields:
             Results obtained by applying the function to each run configuration
             in the collection.
         """
-        return (func(load_config(run)) for run in self._runs)
+        return (func(config, *args, **kwargs) for config in self.info.config)
-    def map_uri(self, func: Callable[[str | None], T]) -> Iterator[T]:
+    def map_uri(
+        self,
+        func: Callable[Concatenate[str | None, P], T],
+        *args: P.args,
+        **kwargs: P.kwargs,
+    ) -> Iterator[T]:
         """
         Apply a function to each artifact URI in the collection and return an
         iterator of results.
@@ -473,16 +507,23 @@ class RunCollection:
         have an artifact URI, None is passed to the function.
         Args:
-            func (Callable[[str | None], T]): A function that takes an
-            artifact URI (string or None) and returns a result.
+            func (Callable[[str | None, P], T]): A function that takes an
+                artifact URI (string or None) and returns a result.
+            *args: Additional arguments to pass to the function.
+            **kwargs: Additional keyword arguments to pass to the function.
         Yields:
             Results obtained by applying the function to each artifact URI in the
             collection.
         """
-        return (func(run.info.artifact_uri) for run in self._runs)
+        return (func(uri, *args, **kwargs) for uri in self.info.artifact_uri)
-    def map_dir(self, func: Callable[[str], T]) -> Iterator[T]:
+    def map_dir(
+        self,
+        func: Callable[Concatenate[Path, P], T],
+        *args: P.args,
+        **kwargs: P.kwargs,
+    ) -> Iterator[T]:
         """
         Apply a function to each artifact directory in the collection and return
         an iterator of results.
@@ -492,42 +533,61 @@ class RunCollection:
         path.
         Args:
-            func (Callable[[str], T]): A function that takes an artifact directory
+            func (Callable[[Path, P], T]): A function that takes an artifact directory
                 path (string) and returns a result.
+            *args: Additional arguments to pass to the function.
+            **kwargs: Additional keyword arguments to pass to the function.
         Yields:
             Results obtained by applying the function to each artifact directory
             in the collection.
         """
-        return (func(download_artifacts(run_id=run.info.run_id)) for run in self._runs)
+        return (func(dir, *args, **kwargs) for dir in self.info.artifact_dir)
-    def group_by(
-        self, names: list[str] | None = None, *args
-    ) -> dict[tuple[str, ...], RunCollection]:
+    def group_by(self, *names: str | list[str]) -> dict[tuple[str | None, ...], RunCollection]:
         """
-        Group the runs by the specified parameter names and return a dictionary
-        where the keys are the parameter values and the values are the runs.
+        Group runs by specified parameter names.
+        This method groups the runs in the collection based on the values of the
+        specified parameters. Each unique combination of parameter values will
+        form a key in the returned dictionary.
         Args:
-            names (list[str] | None): The parameter names to group by.
-            *args: Additional positional arguments to specify parameter names.
+            *names (str | list[str]): The names of the parameters to group by.
+                This can be a single parameter name or multiple names provided
+                as separate arguments or as a list.
         Returns:
-            A dictionary where the keys are the parameter values and the values
-            are the runs.
+            dict[tuple[str | None, ...], RunCollection]: A dictionary where the keys
+            are tuples of parameter values and the values are RunCollection objects
+            containing the runs that match those parameter values.
         """
-        names = names[:] if names else []
-        names.extend(args)
-        grouped_runs = {}
+        grouped_runs: dict[tuple[str | None, ...], list[Run]] = {}
         for run in self._runs:
-            key = get_params(run, names)
-            if key not in grouped_runs:
-                grouped_runs[key] = []
-            grouped_runs[key].append(run)
+            key = get_params(run, *names)
+            grouped_runs.setdefault(key, []).append(run)
         return {key: RunCollection(runs) for key, runs in grouped_runs.items()}
+    def group_by_values(self, *names: str | list[str]) -> list[RunCollection]:
+        """
+        Group runs by specified parameter names.
+        This method groups the runs in the collection based on the values of the
+        specified parameters. Each unique combination of parameter values will
+        form a separate RunCollection in the returned list.
+        Args:
+            *names (str | list[str]): The names of the parameters to group by.
+                This can be a single parameter name or multiple names provided
+                as separate arguments or as a list.
+        Returns:
+            list[RunCollection]: A list of RunCollection objects, where each
+            object contains runs that match the specified parameter values.
+        """
+        return list(self.group_by(*names).values())
 def _param_matches(run: Run, key: str, value: Any) -> bool:
     """
@@ -792,11 +852,32 @@ def try_get_run(runs: list[Run], config: object | None = None, **kwargs) -> Run
     raise ValueError(msg)
-def get_params(run: Run, names: list[str] | None = None, *args) -> tuple[str, ...]:
-    names = names[:] if names else []
-    names.extend(args)
+def get_params(run: Run, *names: str | list[str]) -> tuple[str | None, ...]:
+    """
+    Retrieve the values of specified parameters from the given run.
+    This function extracts the values of the parameters identified by the
+    provided names from the specified run. It can accept both individual
+    parameter names and lists of parameter names.
+    Args:
+        run (Run): The run object from which to extract parameter values.
+        *names (str | list[str]): The names of the parameters to retrieve.
+            This can be a single parameter name or multiple names provided
+            as separate arguments or as a list.
+    Returns:
+        tuple[str | None, ...]: A tuple containing the values of the specified
+        parameters in the order they were provided.
+    """
+    names_ = []
+    for name in names:
+        if isinstance(name, list):
+            names_.extend(name)
+        else:
+            names_.append(name)
-    return tuple(run.data.params[name] for name in names)
+    return tuple(run.data.params.get(name) for name in names_)
 def get_param_names(runs: list[Run]) -> list[str]:
@@ -846,33 +927,3 @@ def get_param_dict(runs: list[Run]) -> dict[str, list[str]]:
         params[name] = sorted(set(it))
     return params
-def load_config(run: Run) -> DictConfig:
-    """
-    Load the configuration for a given run.
-    This function loads the configuration for the provided Run instance
-    by downloading the configuration file from the MLflow artifacts and
-    loading it using OmegaConf. It returns an empty config if
-    `.hydra/config.yaml` is not found in the run's artifact directory.
-    Args:
-        run (Run): The Run instance for which to load the configuration.
-    Returns:
-        The loaded configuration as a DictConfig object. Returns an empty
-        DictConfig if the configuration file is not found.
-    """
-    run_id = run.info.run_id
-    return _load_config(run_id)
-@cache
-def _load_config(run_id: str) -> DictConfig:
-    try:
-        path = download_artifacts(run_id=run_id, artifact_path=".hydra/config.yaml")
-    except OSError:
-        return DictConfig({})
-    return OmegaConf.load(path)  # type: ignore

{hydraflow-0.2.5 → hydraflow-0.2.7}/tests/test_asyncio.py RENAMED Viewed

@@ -77,6 +77,7 @@ async def test_monitor_file_changes(tmp_path: Path, write_soon: Callable[[Path],
     await asyncio.sleep(1)
     stop_event.set()
     await monitor_task
+    await asyncio.sleep(1)
     assert len(changes_detected) > 0

{hydraflow-0.2.5 → hydraflow-0.2.7}/tests/test_context.py RENAMED Viewed

@@ -1,15 +1,17 @@
+import time
+from pathlib import Path
 from unittest.mock import MagicMock, patch
 import mlflow
 import pytest
 from hydraflow.context import log_run, start_run, watch
-from hydraflow.runs import RunCollection
+from hydraflow.run_collection import RunCollection
 @pytest.fixture
 def runs(monkeypatch, tmp_path):
-    from hydraflow.runs import list_runs
+    from hydraflow.run_collection import list_runs
     monkeypatch.chdir(tmp_path)
@@ -17,7 +19,7 @@ def runs(monkeypatch, tmp_path):
         patch("hydraflow.context.HydraConfig.get") as mock_hydra_config,
         patch("hydraflow.context.mlflow.log_artifacts") as mock_log_artifacts,
     ):
-        mock_hydra_config.return_value.runtime.output_dir = "/tmp"
+        mock_hydra_config.return_value.runtime.output_dir = tmp_path.as_posix()
         mock_log_artifacts.return_value = None
         mlflow.set_experiment("test_run")
@@ -49,7 +51,7 @@ def test_runs_params_dict(runs: RunCollection, i: int):
     assert runs[i].data.params["d.i"] == str(i)
-def test_log_run_error_handling():
+def test_log_run_error_handling(tmp_path: Path):
     config = MagicMock()
     config.some_param = "value"
@@ -59,7 +61,7 @@ def test_log_run_error_handling():
         patch("hydraflow.context.mlflow.log_artifacts") as mock_log_artifacts,
     ):
         mock_log_params.side_effect = Exception("Test exception")
-        mock_hydra_config.return_value.runtime.output_dir = "/tmp"
+        mock_hydra_config.return_value.runtime.output_dir = tmp_path.as_posix()
         mock_log_artifacts.return_value = None
         with pytest.raises(Exception, match="Test exception"):
@@ -67,14 +69,20 @@ def test_log_run_error_handling():
                 pass
-def test_watch_error_handling():
-    func = MagicMock()
-    dir = "/tmp"
+def test_watch_context_manager(tmp_path: Path):
+    test_dir = tmp_path / "test_watch"
+    test_dir.mkdir(parents=True, exist_ok=True)
+    test_file = test_dir / "test_file.txt"
-    with patch("hydraflow.context.Observer") as mock_observer:
-        mock_observer_instance = mock_observer.return_value
-        mock_observer_instance.start.side_effect = Exception("Test exception")
+    called = []
-        with pytest.raises(Exception, match="Test exception"):
-            with watch(func, dir):
-                pass
+    def mock_func(path: Path):
+        assert path == test_file
+        called.append(path)
+    with watch(mock_func, test_dir):
+        test_file.write_text("new content")
+        time.sleep(1)
+    assert len(called) == 1
+    assert called[0] == test_file

hydraflow-0.2.7/tests/test_info.py ADDED Viewed

@@ -0,0 +1,51 @@
+from pathlib import Path
+import mlflow
+import pytest
+from hydraflow.run_collection import RunCollection
+@pytest.fixture
+def runs(monkeypatch, tmp_path):
+    from hydraflow.run_collection import search_runs
+    monkeypatch.chdir(tmp_path)
+    mlflow.set_experiment("test_info")
+    for x in range(3):
+        with mlflow.start_run(run_name=f"{x}"):
+            mlflow.log_param("p", x)
+            mlflow.log_metric("metric1", x + 1)
+            mlflow.log_metric("metric2", x + 2)
+    x = search_runs()
+    assert isinstance(x, RunCollection)
+    return x
+def test_info_run_id(runs: RunCollection):
+    assert len(runs.info.run_id) == 3
+def test_info_params(runs: RunCollection):
+    assert runs.info.params == [{"p": "0"}, {"p": "1"}, {"p": "2"}]
+def test_info_metrics(runs: RunCollection):
+    m = runs.info.metrics
+    assert m[0] == {"metric1": 1, "metric2": 2}
+    assert m[1] == {"metric1": 2, "metric2": 3}
+    assert m[2] == {"metric1": 3, "metric2": 4}
+def test_info_artifact_uri(runs: RunCollection):
+    uri = runs.info.artifact_uri
+    assert all(u.startswith("file://") for u in uri)  # type: ignore
+    assert all(u.endswith("/artifacts") for u in uri)  # type: ignore
+def test_info_artifact_dir(runs: RunCollection):
+    dir = runs.info.artifact_dir
+    assert all(isinstance(d, Path) for d in dir)
+    assert all(d.stem == "artifacts" for d in dir)  # type: ignore

{hydraflow-0.2.5 → hydraflow-0.2.7}/tests/test_log_run.py RENAMED Viewed

@@ -49,7 +49,7 @@ def read_log(run_id: str, path: str) -> str:
 def test_load_config(run: Run):
-    from hydraflow.runs import load_config
+    from hydraflow.info import load_config
     log = read_log(run.info.run_id, "log_run.log")
     assert "START" in log

hydraflow-0.2.7/tests/test_progress.py ADDED Viewed

@@ -0,0 +1,12 @@
+import sys
+from subprocess import run
+import pytest
+@pytest.mark.skipif(
+    sys.platform == "win32", reason="'cp932' codec can't encode character '\\u2807'"
+)
+def test_progress_bar():
+    cp = run([sys.executable, "-m", "hydraflow.progress"])
+    assert cp.returncode == 0

hydraflow-0.2.5/tests/test_runs.py → hydraflow-0.2.7/tests/test_run_collection.py RENAMED Viewed

@@ -5,14 +5,13 @@ from pathlib import Path
 import mlflow
 import pytest
 from mlflow.entities import Run
-from omegaconf import DictConfig
-from hydraflow.runs import RunCollection
+from hydraflow.run_collection import RunCollection
 @pytest.fixture
 def runs(monkeypatch, tmp_path):
-    from hydraflow.runs import search_runs
+    from hydraflow.run_collection import search_runs
     monkeypatch.chdir(tmp_path)
@@ -39,13 +38,13 @@ def test_search_runs_sorted(run_list: list[Run]):
 def test_filter_none(run_list: list[Run]):
-    from hydraflow.runs import filter_runs
+    from hydraflow.run_collection import filter_runs
     assert run_list == filter_runs(run_list)
 def test_filter_one(run_list: list[Run]):
-    from hydraflow.runs import filter_runs
+    from hydraflow.run_collection import filter_runs
     assert len(run_list) == 6
     x = filter_runs(run_list, {"p": 1})
@@ -55,7 +54,7 @@ def test_filter_one(run_list: list[Run]):
 def test_filter_all(run_list: list[Run]):
-    from hydraflow.runs import filter_runs
+    from hydraflow.run_collection import filter_runs
     assert len(run_list) == 6
     x = filter_runs(run_list, {"q": 0})
@@ -65,28 +64,28 @@ def test_filter_all(run_list: list[Run]):
 def test_filter_list(run_list: list[Run]):
-    from hydraflow.runs import filter_runs
+    from hydraflow.run_collection import filter_runs
     x = filter_runs(run_list, p=[0, 4, 5])
     assert len(x) == 3
 def test_filter_tuple(run_list: list[Run]):
-    from hydraflow.runs import filter_runs
+    from hydraflow.run_collection import filter_runs
     x = filter_runs(run_list, p=(1, 3))
     assert len(x) == 2
 def test_filter_invalid_param(run_list: list[Run]):
-    from hydraflow.runs import filter_runs
+    from hydraflow.run_collection import filter_runs
     x = filter_runs(run_list, {"invalid": 0})
     assert len(x) == 6
 def test_find_run(run_list: list[Run]):
-    from hydraflow.runs import find_run, try_find_run
+    from hydraflow.run_collection import find_run, try_find_run
     x = find_run(run_list, {"r": 1})
     assert isinstance(x, Run)
@@ -100,20 +99,20 @@ def test_find_run(run_list: list[Run]):
 def test_find_run_none(run_list: list[Run]):
-    from hydraflow.runs import find_run
+    from hydraflow.run_collection import find_run
     with pytest.raises(ValueError):
         find_run(run_list, {"r": 10})
 def test_try_find_run_none_empty(run_list: list[Run]):
-    from hydraflow.runs import try_find_run
+    from hydraflow.run_collection import try_find_run
     assert try_find_run([]) is None
 def test_find_last_run(run_list: list[Run]):
-    from hydraflow.runs import find_last_run, try_find_last_run
+    from hydraflow.run_collection import find_last_run, try_find_last_run
     x = find_last_run(run_list, {"r": 1})
     assert isinstance(x, Run)
@@ -127,20 +126,20 @@ def test_find_last_run(run_list: list[Run]):
 def test_find_last_run_none(run_list: list[Run]):
-    from hydraflow.runs import find_last_run
+    from hydraflow.run_collection import find_last_run
     with pytest.raises(ValueError):
         find_last_run(run_list, {"r": 10})
 def test_try_find_last_run_none(run_list: list[Run]):
-    from hydraflow.runs import try_find_last_run
+    from hydraflow.run_collection import try_find_last_run
     assert try_find_last_run([]) is None
 def test_get_run(run_list: list[Run]):
-    from hydraflow.runs import get_run
+    from hydraflow.run_collection import get_run
     run = get_run(run_list, {"p": 4})
     assert isinstance(run, Run)
@@ -148,7 +147,7 @@ def test_get_run(run_list: list[Run]):
 def test_get_run_error(run_list: list[Run]):
-    from hydraflow.runs import get_run
+    from hydraflow.run_collection import get_run
     with pytest.raises(ValueError):
         get_run(run_list, {"q": 0})
@@ -158,20 +157,30 @@ def test_get_run_error(run_list: list[Run]):
 def test_try_get_run_none(run_list: list[Run]):
-    from hydraflow.runs import try_get_run
+    from hydraflow.run_collection import try_get_run
     assert try_get_run(run_list, {"q": -1}) is None
 def test_try_get_run_error(run_list: list[Run]):
-    from hydraflow.runs import try_get_run
+    from hydraflow.run_collection import try_get_run
     with pytest.raises(ValueError):
         try_get_run(run_list, {"q": 0})
+def test_get_params(run_list: list[Run]):
+    from hydraflow.run_collection import get_params
+    assert get_params(run_list[1], "p") == ("1",)
+    assert get_params(run_list[2], "p", "q") == ("2", "0")
+    assert get_params(run_list[3], ["p", "q"]) == ("3", "0")
+    assert get_params(run_list[4], "p", ["q", "r"]) == ("4", "0", "1")
+    assert get_params(run_list[5], ["a", "q"], "r") == (None, "None", "2")
 def test_get_param_names(run_list: list[Run]):
-    from hydraflow.runs import get_param_names
+    from hydraflow.run_collection import get_param_names
     params = get_param_names(run_list)
     assert len(params) == 3
@@ -181,7 +190,7 @@ def test_get_param_names(run_list: list[Run]):
 def test_get_param_dict(run_list: list[Run]):
-    from hydraflow.runs import get_param_dict
+    from hydraflow.run_collection import get_param_dict
     params = get_param_dict(run_list)
     assert len(params["p"]) == 6
@@ -250,7 +259,7 @@ def test_runs_filter(runs: RunCollection):
 def test_runs_get(runs: RunCollection):
-    from hydraflow.runs import Run
+    from hydraflow.run_collection import Run
     run = runs.get({"p": 4})
     assert isinstance(run, Run)
@@ -283,7 +292,7 @@ def test_runs_get_params_dict(runs: RunCollection):
 def test_runs_find(runs: RunCollection):
-    from hydraflow.runs import Run
+    from hydraflow.run_collection import Run
     run = runs.find({"r": 0})
     assert isinstance(run, Run)
@@ -304,7 +313,7 @@ def test_runs_try_find_none(runs: RunCollection):
 def test_runs_find_last(runs: RunCollection):
-    from hydraflow.runs import Run
+    from hydraflow.run_collection import Run
     run = runs.find_last({"r": 0})
     assert isinstance(run, Run)
@@ -333,7 +342,7 @@ def runs2(monkeypatch, tmp_path):
 def test_list_runs(runs, runs2):
-    from hydraflow.runs import list_runs
+    from hydraflow.run_collection import list_runs
     mlflow.set_experiment("test_run")
     all_runs = list_runs()
@@ -345,7 +354,7 @@ def test_list_runs(runs, runs2):
 def test_list_runs_empty_list(runs, runs2):
-    from hydraflow.runs import list_runs
+    from hydraflow.run_collection import list_runs
     all_runs = list_runs([])
     assert len(all_runs) == 9
@@ -353,14 +362,14 @@ def test_list_runs_empty_list(runs, runs2):
 @pytest.mark.parametrize(["name", "n"], [("test_run", 6), ("test_run2", 3)])
 def test_list_runs_list(runs, runs2, name, n):
-    from hydraflow.runs import list_runs
+    from hydraflow.run_collection import list_runs
     filtered_runs = list_runs(experiment_names=[name])
     assert len(filtered_runs) == n
 def test_list_runs_none(runs, runs2):
-    from hydraflow.runs import list_runs
+    from hydraflow.run_collection import list_runs
     no_runs = list_runs(experiment_names=["non_existent_experiment"])
     assert len(no_runs) == 0
@@ -372,16 +381,20 @@ def test_run_collection_map(runs: RunCollection):
     assert all(isinstance(run_id, str) for run_id in results)
+def test_run_collection_map_args(runs: RunCollection):
+    results = list(runs.map(lambda run, x: run.info.run_id + x, "test"))
+    assert all(x.endswith("test") for x in results)
 def test_run_collection_map_run_id(runs: RunCollection):
     results = list(runs.map_run_id(lambda run_id: run_id))
     assert len(results) == len(runs._runs)
     assert all(isinstance(run_id, str) for run_id in results)
-def test_run_collection_map_config(runs: RunCollection):
-    results = list(runs.map_config(lambda config: config))
-    assert len(results) == len(runs._runs)
-    assert all(isinstance(config, DictConfig) for config in results)
+def test_run_collection_map_run_id_kwargs(runs: RunCollection):
+    results = list(runs.map_run_id(lambda run_id, x: x + run_id, x="test"))
+    assert all(x.startswith("test") for x in results)
 def test_run_collection_map_uri(runs: RunCollection):
@@ -391,9 +404,10 @@ def test_run_collection_map_uri(runs: RunCollection):
 def test_run_collection_map_dir(runs: RunCollection):
-    results = list(runs.map_dir(lambda dir_path: dir_path))
+    results = list(runs.map_dir(lambda dir_path, x: dir_path / x, "a.csv"))
     assert len(results) == len(runs._runs)
-    assert all(isinstance(dir_path, str) for dir_path in results)
+    assert all(isinstance(dir_path, Path) for dir_path in results)
+    assert all(dir_path.stem == "a" for dir_path in results)
 def test_run_collection_sort(runs: RunCollection):
@@ -427,15 +441,53 @@ def test_run_collection_group_by(runs: RunCollection):
     assert grouped[("0",)][0] == runs[0]
     assert grouped[("1",)][0] == runs[1]
-    grouped = runs.group_by(["q"])
+    grouped = runs.group_by("q")
     assert len(grouped) == 2
-    grouped = runs.group_by(["r"])
+    grouped = runs.group_by("r")
     assert len(grouped) == 3
-# def test_hydra_output_dir_error(runs_list: list[Run]):
-#     from hydraflow.runs import get_hydra_output_dir
+def test_filter_runs_empty_list():
+    from hydraflow.run_collection import filter_runs
+    x = filter_runs([], p=[0, 1, 2])
+    assert x == []
+def test_filter_runs_no_match(run_list: list[Run]):
+    from hydraflow.run_collection import filter_runs
+    x = filter_runs(run_list, p=[10, 11, 12])
+    assert x == []
+def test_get_run_no_match(run_list: list[Run]):
+    from hydraflow.run_collection import get_run
+    with pytest.raises(ValueError):
+        get_run(run_list, {"p": 10})
+def test_get_run_multiple_params(run_list: list[Run]):
+    from hydraflow.run_collection import get_run
+    run = get_run(run_list, {"p": 4, "q": 0})
+    assert isinstance(run, Run)
+    assert run.data.params["p"] == "4"
+    assert run.data.params["q"] == "0"
-#     with pytest.raises(FileNotFoundError):
-#         get_hydra_output_dir(runs_list[0])
+def test_try_get_run_no_match(run_list: list[Run]):
+    from hydraflow.run_collection import try_get_run
+    assert try_get_run(run_list, {"p": 10}) is None
+def test_try_get_run_multiple_params(run_list: list[Run]):
+    from hydraflow.run_collection import try_get_run
+    run = try_get_run(run_list, {"p": 4, "q": 0})
+    assert isinstance(run, Run)
+    assert run.data.params["p"] == "4"
+    assert run.data.params["q"] == "0"

{hydraflow-0.2.5 → hydraflow-0.2.7}/tests/test_watch.py RENAMED Viewed

@@ -1,6 +1,7 @@
 from __future__ import annotations
 import subprocess
+import time
 from pathlib import Path
 import pytest
@@ -21,6 +22,7 @@ def test_watch(dir, monkeypatch, tmp_path):
     with watch(func, dir if isinstance(dir, str) else dir()):
         subprocess.check_call(["python", file])
+        time.sleep(1)
-    assert results[0][0] == "watch.txt"
-    assert results[0][1] == "watch"
+    assert results[0][0] == "watch.txt"  # type: ignore
+    assert results[0][1] == "watch"  # type: ignore