PyPI - hydraflow - Versions diffs - 0.2.10__tar.gz → 0.2.12__tar.gz - Mend

hydraflow 0.2.10tar.gz → 0.2.12tar.gz

Files changed (32) hide show

{hydraflow-0.2.10 → hydraflow-0.2.12}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: hydraflow
-Version: 0.2.10
+Version: 0.2.12
 Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
 Project-URL: Documentation, https://github.com/daizutabi/hydraflow
 Project-URL: Source, https://github.com/daizutabi/hydraflow

{hydraflow-0.2.10 → hydraflow-0.2.12}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "hydraflow"
-version = "0.2.10"
+version = "0.2.12"
 description = "Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments."
 readme = "README.md"
 license = "MIT"

{hydraflow-0.2.10 → hydraflow-0.2.12}/src/hydraflow/mlflow.py RENAMED Viewed

@@ -22,6 +22,7 @@ from __future__ import annotations
 from pathlib import Path
 from typing import TYPE_CHECKING
+import joblib
 import mlflow
 from hydra.core.hydra_config import HydraConfig
 from mlflow.entities import ViewType
@@ -146,7 +147,9 @@ def search_runs(
     return RunCollection(runs)  # type: ignore
-def list_runs(experiment_names: list[str] | None = None) -> RunCollection:
+def list_runs(
+    experiment_names: str | list[str] | None = None, *, n_jobs: int = 0
+) -> RunCollection:
     """
     List all runs for the specified experiments.
@@ -168,8 +171,30 @@ def list_runs(experiment_names: list[str] | None = None) -> RunCollection:
     Returns:
         A `RunCollection` object containing the runs for the specified experiments.
     """
-    if experiment_names == []:
+    if isinstance(experiment_names, str):
+        experiment_names = [experiment_names]
+    elif experiment_names == []:
         experiments = mlflow.search_experiments()
         experiment_names = [e.name for e in experiments if e.name != "Default"]
-    return search_runs(experiment_names=experiment_names)
+    if n_jobs == 0:
+        return search_runs(experiment_names=experiment_names)
+    if experiment_names is None:
+        raise NotImplementedError
+    run_ids = []
+    for name in experiment_names:
+        if experiment := mlflow.get_experiment_by_name(name):
+            loc = experiment.artifact_location
+            if isinstance(loc, str) and loc.startswith("file://"):
+                path = Path(mlflow.artifacts.download_artifacts(loc))
+                run_ids.extend(file.stem for file in path.iterdir() if file.is_dir())
+    it = (joblib.delayed(mlflow.get_run)(run_id) for run_id in run_ids)
+    runs = joblib.Parallel(n_jobs, prefer="threads")(it)
+    runs = sorted(runs, key=lambda run: run.info.start_time)  # type: ignore
+    return RunCollection(runs)  # type: ignore

{hydraflow-0.2.10 → hydraflow-0.2.12}/src/hydraflow/progress.py RENAMED Viewed

@@ -161,21 +161,27 @@ def multi_tasks_progress(
     with Progress(*columns, transient=transient or False, **kwargs) as progress:
         task_main = progress.add_task(main_description, total=None)
+        task_ids = [
+            progress.add_task(description.format(i), start=False, total=None)
+            for i in range(len(iterables))
+        ]
         total = {}
         completed = {}
-        def func(i: int, iterable: Iterable[int | tuple[int, int]]) -> None:
-            task_id = progress.add_task(description.format(i), total=None)
+        def func(i: int) -> None:
             completed[i] = 0
             total[i] = None
+            progress.start_task(task_ids[i])
-            for index in iterable:
+            for index in iterables[i]:
                 if isinstance(index, tuple):
                     completed[i], total[i] = index[0] + 1, index[1]
                 else:
                     completed[i] = index + 1
-                progress.update(task_id, total=total[i], completed=completed[i])
+                progress.update(task_ids[i], total=total[i], completed=completed[i])
                 if all(t is not None for t in total.values()):
                     t = sum(total.values())
@@ -185,7 +191,7 @@ def multi_tasks_progress(
                 progress.update(task_main, total=t, completed=c)
             if transient is not False:
-                progress.remove_task(task_id)
+                progress.remove_task(task_ids[i])
-        it = (joblib.delayed(func)(i, it) for i, it in enumerate(iterables))
+        it = (joblib.delayed(func)(i) for i in range(len(iterables)))
         joblib.Parallel(n_jobs, prefer="threads")(it)

{hydraflow-0.2.10 → hydraflow-0.2.12}/tests/test_app.py RENAMED Viewed

@@ -26,6 +26,20 @@ def rc(monkeypatch, tmp_path):
     yield hydraflow.list_runs()
+@pytest.mark.parametrize("n_jobs", [0, 1, 2, 4, -1])
+def test_list_runs_parallel(rc: RunCollection, n_jobs: int):
+    from hydraflow.mlflow import list_runs
+    rc_ = list_runs("_info_", n_jobs=n_jobs)
+    assert len(rc) == len(rc_)
+    for a, b in zip(rc, rc_):
+        assert a.info.run_id == b.info.run_id
+        assert a.info.start_time == b.info.start_time
+        assert a.info.status == b.info.status
+        assert a.info.artifact_uri == b.info.artifact_uri
 def test_app_info_run_id(rc: RunCollection):
     assert len(rc.info.run_id) == 4

{hydraflow-0.2.10 → hydraflow-0.2.12}/tests/test_run_collection.py RENAMED Viewed

@@ -364,14 +364,14 @@ def test_list_runs_empty_list(runs, runs2):
 def test_list_runs_list(runs, runs2, name, n):
     from hydraflow.mlflow import list_runs
-    filtered_runs = list_runs(experiment_names=[name])
+    filtered_runs = list_runs(name)
     assert len(filtered_runs) == n
 def test_list_runs_none(runs, runs2):
     from hydraflow.mlflow import list_runs
-    no_runs = list_runs(experiment_names=["non_existent_experiment"])
+    no_runs = list_runs(["non_existent_experiment"])
     assert len(no_runs) == 0