PyPI - hydraflow - Versions diffs - 0.2.5__tar.gz → 0.2.6__tar.gz - Mend

hydraflow 0.2.5tar.gz → 0.2.6tar.gz

Files changed (29) hide show

{hydraflow-0.2.5 → hydraflow-0.2.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: hydraflow
-Version: 0.2.5
+Version: 0.2.6
 Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
 Project-URL: Documentation, https://github.com/daizutabi/hydraflow
 Project-URL: Source, https://github.com/daizutabi/hydraflow
@@ -17,7 +17,9 @@ Classifier: Topic :: Documentation
 Classifier: Topic :: Software Development :: Documentation
 Requires-Python: >=3.10
 Requires-Dist: hydra-core>1.3
+Requires-Dist: joblib
 Requires-Dist: mlflow>2.15
+Requires-Dist: rich
 Requires-Dist: setuptools
 Requires-Dist: watchdog
 Requires-Dist: watchfiles

{hydraflow-0.2.5 → hydraflow-0.2.6}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "hydraflow"
-version = "0.2.5"
+version = "0.2.6"
 description = "Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments."
 readme = "README.md"
 license = "MIT"
@@ -21,7 +21,9 @@ classifiers = [
 requires-python = ">=3.10"
 dependencies = [
   "hydra-core>1.3",
+  "joblib",
   "mlflow>2.15",
+  "rich",
   "setuptools",
   "watchdog",
   "watchfiles",

hydraflow-0.2.6/src/hydraflow/progress.py ADDED Viewed

@@ -0,0 +1,56 @@
+from __future__ import annotations
+from typing import TYPE_CHECKING
+import joblib
+from rich.progress import Progress, SpinnerColumn, TimeElapsedColumn
+if TYPE_CHECKING:
+    from collections.abc import Iterable
+def progress(
+    *iterables: Iterable[int | tuple[int, int]],
+    n_jobs: int = -1,
+    task_name: str = "#{:0>3}",
+    main_task_name: str = "main",
+) -> None:
+    with Progress(
+        SpinnerColumn(),
+        *Progress.get_default_columns(),
+        TimeElapsedColumn(),
+    ) as progress:
+        n = len(iterables)
+        task_main = progress.add_task(main_task_name, total=None) if n > 1 else None
+        tasks = [progress.add_task(task_name.format(i), start=False, total=None) for i in range(n)]
+        total = {}
+        completed = {}
+        def func(i: int) -> None:
+            completed[i] = 0
+            total[i] = None
+            progress.start_task(tasks[i])
+            for index in iterables[i]:
+                if isinstance(index, tuple):
+                    completed[i], total[i] = index[0] + 1, index[1]
+                else:
+                    completed[i] = index + 1
+                progress.update(tasks[i], total=total[i], completed=completed[i])
+                if task_main is not None:
+                    if all(t is not None for t in total.values()):
+                        t = sum(total.values())
+                    else:
+                        t = None
+                    c = sum(completed.values())
+                    progress.update(task_main, total=t, completed=c)
+        if n > 1:
+            it = (joblib.delayed(func)(i) for i in range(n))
+            joblib.Parallel(n_jobs, prefer="threads")(it)
+        else:
+            func(0)

{hydraflow-0.2.5 → hydraflow-0.2.6}/src/hydraflow/runs.py RENAMED Viewed

@@ -51,13 +51,6 @@ def search_runs(
             error if ``experiment_names`` is also not ``None`` or ``[]``.
             ``None`` will default to the active experiment if ``experiment_names``
             is ``None`` or ``[]``.
-        experiment_ids (list[str] | None): List of experiment IDs. Search can
-            work with experiment IDs or experiment names, but not both in the
-            same call. Values other than ``None`` or ``[]`` will result in
-            error if ``experiment_names`` is also not ``None`` or ``[]``.
-            ``experiment_names`` is also not ``None`` or ``[]``. ``None`` will
-            default to the active experiment if ``experiment_names`` is ``None``
-            or ``[]``.
         filter_string (str): Filter query string, defaults to searching all
             runs.
         run_view_type (int): one of enum values ``ACTIVE_ONLY``, ``DELETED_ONLY``,
@@ -501,30 +494,28 @@ class RunCollection:
         """
         return (func(download_artifacts(run_id=run.info.run_id)) for run in self._runs)
-    def group_by(
-        self, names: list[str] | None = None, *args
-    ) -> dict[tuple[str, ...], RunCollection]:
+    def group_by(self, *names: str | list[str]) -> dict[tuple[str | None, ...], RunCollection]:
         """
-        Group the runs by the specified parameter names and return a dictionary
-        where the keys are the parameter values and the values are the runs.
+        Group runs by specified parameter names.
+        This method groups the runs in the collection based on the values of the
+        specified parameters. Each unique combination of parameter values will
+        form a key in the returned dictionary.
         Args:
-            names (list[str] | None): The parameter names to group by.
-            *args: Additional positional arguments to specify parameter names.
+            *names (str | list[str]): The names of the parameters to group by.
+                This can be a single parameter name or multiple names provided
+                as separate arguments or as a list.
         Returns:
-            A dictionary where the keys are the parameter values and the values
-            are the runs.
+            dict[tuple[str | None, ...], RunCollection]: A dictionary where the keys
+            are tuples of parameter values and the values are RunCollection objects
+            containing the runs that match those parameter values.
         """
-        names = names[:] if names else []
-        names.extend(args)
-        grouped_runs = {}
+        grouped_runs: dict[tuple[str | None, ...], list[Run]] = {}
         for run in self._runs:
-            key = get_params(run, names)
-            if key not in grouped_runs:
-                grouped_runs[key] = []
-            grouped_runs[key].append(run)
+            key = get_params(run, *names)
+            grouped_runs.setdefault(key, []).append(run)
         return {key: RunCollection(runs) for key, runs in grouped_runs.items()}
@@ -792,11 +783,32 @@ def try_get_run(runs: list[Run], config: object | None = None, **kwargs) -> Run
     raise ValueError(msg)
-def get_params(run: Run, names: list[str] | None = None, *args) -> tuple[str, ...]:
-    names = names[:] if names else []
-    names.extend(args)
+def get_params(run: Run, *names: str | list[str]) -> tuple[str | None, ...]:
+    """
+    Retrieve the values of specified parameters from the given run.
+    This function extracts the values of the parameters identified by the
+    provided names from the specified run. It can accept both individual
+    parameter names and lists of parameter names.
+    Args:
+        run (Run): The run object from which to extract parameter values.
+        *names (str | list[str]): The names of the parameters to retrieve.
+            This can be a single parameter name or multiple names provided
+            as separate arguments or as a list.
-    return tuple(run.data.params[name] for name in names)
+    Returns:
+        tuple[str | None, ...]: A tuple containing the values of the specified
+        parameters in the order they were provided.
+    """
+    names_ = []
+    for name in names:
+        if isinstance(name, list):
+            names_.extend(name)
+        else:
+            names_.append(name)
+    return tuple(run.data.params.get(name) for name in names_)
 def get_param_names(runs: list[Run]) -> list[str]:

hydraflow-0.2.6/tests/scripts/progress.py ADDED Viewed

@@ -0,0 +1,22 @@
+import random
+import time
+from hydraflow.progress import progress
+def task(total):
+    def func():
+        for i in range(total):
+            yield i, total
+            time.sleep(random.random())
+    return func()
+def main():
+    tasks = [task(random.randint(10, 20)) for _ in range(12)]
+    progress(*tasks, n_jobs=4)
+if __name__ == "__main__":
+    main()

{hydraflow-0.2.5 → hydraflow-0.2.6}/tests/test_asyncio.py RENAMED Viewed

@@ -77,6 +77,7 @@ async def test_monitor_file_changes(tmp_path: Path, write_soon: Callable[[Path],
     await asyncio.sleep(1)
     stop_event.set()
     await monitor_task
+    await asyncio.sleep(1)
     assert len(changes_detected) > 0

{hydraflow-0.2.5 → hydraflow-0.2.6}/tests/test_context.py RENAMED Viewed

@@ -1,3 +1,5 @@
+import time
+from pathlib import Path
 from unittest.mock import MagicMock, patch
 import mlflow
@@ -17,7 +19,7 @@ def runs(monkeypatch, tmp_path):
         patch("hydraflow.context.HydraConfig.get") as mock_hydra_config,
         patch("hydraflow.context.mlflow.log_artifacts") as mock_log_artifacts,
     ):
-        mock_hydra_config.return_value.runtime.output_dir = "/tmp"
+        mock_hydra_config.return_value.runtime.output_dir = tmp_path.as_posix()
         mock_log_artifacts.return_value = None
         mlflow.set_experiment("test_run")
@@ -49,7 +51,7 @@ def test_runs_params_dict(runs: RunCollection, i: int):
     assert runs[i].data.params["d.i"] == str(i)
-def test_log_run_error_handling():
+def test_log_run_error_handling(tmp_path: Path):
     config = MagicMock()
     config.some_param = "value"
@@ -59,7 +61,7 @@ def test_log_run_error_handling():
         patch("hydraflow.context.mlflow.log_artifacts") as mock_log_artifacts,
     ):
         mock_log_params.side_effect = Exception("Test exception")
-        mock_hydra_config.return_value.runtime.output_dir = "/tmp"
+        mock_hydra_config.return_value.runtime.output_dir = tmp_path.as_posix()
         mock_log_artifacts.return_value = None
         with pytest.raises(Exception, match="Test exception"):
@@ -67,14 +69,20 @@ def test_log_run_error_handling():
                 pass
-def test_watch_error_handling():
-    func = MagicMock()
-    dir = "/tmp"
+def test_watch_context_manager(tmp_path: Path):
+    test_dir = tmp_path / "test_watch"
+    test_dir.mkdir(parents=True, exist_ok=True)
+    test_file = test_dir / "test_file.txt"
-    with patch("hydraflow.context.Observer") as mock_observer:
-        mock_observer_instance = mock_observer.return_value
-        mock_observer_instance.start.side_effect = Exception("Test exception")
+    called = []
-        with pytest.raises(Exception, match="Test exception"):
-            with watch(func, dir):
-                pass
+    def mock_func(path: Path):
+        assert path == test_file
+        called.append(path)
+    with watch(mock_func, test_dir):
+        test_file.write_text("new content")
+        time.sleep(1)
+    assert len(called) == 1
+    assert called[0] == test_file

hydraflow-0.2.6/tests/test_progress.py ADDED Viewed

File without changes

{hydraflow-0.2.5 → hydraflow-0.2.6}/tests/test_runs.py RENAMED Viewed

@@ -170,6 +170,16 @@ def test_try_get_run_error(run_list: list[Run]):
         try_get_run(run_list, {"q": 0})
+def test_get_params(run_list: list[Run]):
+    from hydraflow.runs import get_params
+    assert get_params(run_list[1], "p") == ("1",)
+    assert get_params(run_list[2], "p", "q") == ("2", "0")
+    assert get_params(run_list[3], ["p", "q"]) == ("3", "0")
+    assert get_params(run_list[4], "p", ["q", "r"]) == ("4", "0", "1")
+    assert get_params(run_list[5], ["a", "q"], "r") == (None, "None", "2")
 def test_get_param_names(run_list: list[Run]):
     from hydraflow.runs import get_param_names
@@ -427,15 +437,53 @@ def test_run_collection_group_by(runs: RunCollection):
     assert grouped[("0",)][0] == runs[0]
     assert grouped[("1",)][0] == runs[1]
-    grouped = runs.group_by(["q"])
+    grouped = runs.group_by("q")
     assert len(grouped) == 2
-    grouped = runs.group_by(["r"])
+    grouped = runs.group_by("r")
     assert len(grouped) == 3
-# def test_hydra_output_dir_error(runs_list: list[Run]):
-#     from hydraflow.runs import get_hydra_output_dir
+def test_filter_runs_empty_list():
+    from hydraflow.runs import filter_runs
+    x = filter_runs([], p=[0, 1, 2])
+    assert x == []
+def test_filter_runs_no_match(run_list: list[Run]):
+    from hydraflow.runs import filter_runs
+    x = filter_runs(run_list, p=[10, 11, 12])
+    assert x == []
+def test_get_run_no_match(run_list: list[Run]):
+    from hydraflow.runs import get_run
+    with pytest.raises(ValueError):
+        get_run(run_list, {"p": 10})
-#     with pytest.raises(FileNotFoundError):
-#         get_hydra_output_dir(runs_list[0])
+def test_get_run_multiple_params(run_list: list[Run]):
+    from hydraflow.runs import get_run
+    run = get_run(run_list, {"p": 4, "q": 0})
+    assert isinstance(run, Run)
+    assert run.data.params["p"] == "4"
+    assert run.data.params["q"] == "0"
+def test_try_get_run_no_match(run_list: list[Run]):
+    from hydraflow.runs import try_get_run
+    assert try_get_run(run_list, {"p": 10}) is None
+def test_try_get_run_multiple_params(run_list: list[Run]):
+    from hydraflow.runs import try_get_run
+    run = try_get_run(run_list, {"p": 4, "q": 0})
+    assert isinstance(run, Run)
+    assert run.data.params["p"] == "4"
+    assert run.data.params["q"] == "0"

{hydraflow-0.2.5 → hydraflow-0.2.6}/tests/test_watch.py RENAMED Viewed

@@ -1,6 +1,7 @@
 from __future__ import annotations
 import subprocess
+import time
 from pathlib import Path
 import pytest
@@ -21,6 +22,7 @@ def test_watch(dir, monkeypatch, tmp_path):
     with watch(func, dir if isinstance(dir, str) else dir()):
         subprocess.check_call(["python", file])
+        time.sleep(1)
-    assert results[0][0] == "watch.txt"
-    assert results[0][1] == "watch"
+    assert results[0][0] == "watch.txt"  # type: ignore
+    assert results[0][1] == "watch"  # type: ignore