PyPI - hydraflow - Versions diffs - 0.6.0__tar.gz → 0.6.2__tar.gz - Mend

hydraflow 0.6.0tar.gz → 0.6.2tar.gz

Files changed (56) hide show

{hydraflow-0.6.0 → hydraflow-0.6.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hydraflow
-Version: 0.6.0
+Version: 0.6.2
 Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
 Project-URL: Documentation, https://daizutabi.github.io/hydraflow/
 Project-URL: Source, https://github.com/daizutabi/hydraflow

{hydraflow-0.6.0 → hydraflow-0.6.2}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "hydraflow"
-version = "0.6.0"
+version = "0.6.2"
 description = "Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments."
 readme = "README.md"
 license = { file = "LICENSE" }

{hydraflow-0.6.0 → hydraflow-0.6.2}/src/hydraflow/__init__.py RENAMED Viewed

@@ -1,7 +1,7 @@
 """Integrate Hydra and MLflow to manage and track machine learning experiments."""
 from .config import select_config, select_overrides
-from .context import chdir_artifact, chdir_hydra_output, log_run, start_run
+from .context import chdir_artifact, log_run, start_run
 from .mlflow import list_runs, search_runs, set_experiment
 from .run_collection import RunCollection
 from .utils import (
@@ -17,7 +17,6 @@ from .utils import (
 __all__ = [
     "RunCollection",
     "chdir_artifact",
-    "chdir_hydra_output",
     "get_artifact_dir",
     "get_artifact_path",
     "get_hydra_output_dir",

{hydraflow-0.6.0 → hydraflow-0.6.2}/src/hydraflow/context.py RENAMED Viewed

@@ -13,6 +13,7 @@ import mlflow.artifacts
 from hydra.core.hydra_config import HydraConfig
 from hydraflow.mlflow import log_params
+from hydraflow.utils import get_artifact_dir
 if TYPE_CHECKING:
     from collections.abc import Iterator
@@ -69,24 +70,26 @@ def log_run(
         raise
     finally:
-        log_hydra(output_dir)
+        log_text(output_dir)
-def log_hydra(output_dir: Path) -> None:
-    """Log hydra logs of the current run as artifacts.
+def log_text(directory: Path, pattern: str = "*.log") -> None:
+    """Log text files in the given directory as artifacts.
+    Append the text files to the existing text file in the artifact directory.
     Args:
-        output_dir (Path): The output directory of the Hydra job.
+        directory (Path): The directory to find the logs in.
+        pattern (str): The pattern to match the logs.
     """
-    uri = mlflow.get_artifact_uri()
-    artifact_dir = Path(mlflow.artifacts.download_artifacts(uri))
+    artifact_dir = get_artifact_dir()
-    for file_hydra in output_dir.glob("*.log"):
-        if not file_hydra.is_file():
+    for file in directory.glob(pattern):
+        if not file.is_file():
             continue
-        file_artifact = artifact_dir / file_hydra.name
+        file_artifact = artifact_dir / file.name
         if file_artifact.exists():
             text = file_artifact.read_text()
             if not text.endswith("\n"):
@@ -94,8 +97,8 @@ def log_hydra(output_dir: Path) -> None:
         else:
             text = ""
-        text += file_hydra.read_text()
-        mlflow.log_text(text, file_hydra.name)
+        text += file.read_text()
+        mlflow.log_text(text, file.name)
 @contextmanager
@@ -174,29 +177,7 @@ def start_run(  # noqa: PLR0913
 @contextmanager
-def chdir_hydra_output() -> Iterator[Path]:
-    """Change the current working directory to the hydra output directory.
-    This context manager changes the current working directory to the hydra output
-    directory. It ensures that the directory is changed back to the original
-    directory after the context is exited.
-    """
-    curdir = Path.cwd()
-    path = HydraConfig.get().runtime.output_dir
-    os.chdir(path)
-    try:
-        yield Path(path)
-    finally:
-        os.chdir(curdir)
-@contextmanager
-def chdir_artifact(
-    run: Run,
-    artifact_path: str | None = None,
-) -> Iterator[Path]:
+def chdir_artifact(run: Run | None = None) -> Iterator[Path]:
     """Change the current working directory to the artifact directory of the given run.
     This context manager changes the current working directory to the artifact
@@ -204,19 +185,16 @@ def chdir_artifact(
     to the original directory after the context is exited.
     Args:
-        run (Run): The run to get the artifact directory from.
-        artifact_path (str | None): The artifact path.
+        run (Run | None): The run to get the artifact directory from.
     """
     curdir = Path.cwd()
-    path = mlflow.artifacts.download_artifacts(
-        run_id=run.info.run_id,
-        artifact_path=artifact_path,
-    )
+    artifact_dir = get_artifact_dir(run)
+    os.chdir(artifact_dir)
-    os.chdir(path)
     try:
-        yield Path(path)
+        yield artifact_dir
     finally:
         os.chdir(curdir)

{hydraflow-0.6.0 → hydraflow-0.6.2}/src/hydraflow/mlflow.py RENAMED Viewed

@@ -16,7 +16,6 @@ Key Features:
 from __future__ import annotations
-from pathlib import Path
 from typing import TYPE_CHECKING
 import joblib
@@ -28,8 +27,11 @@ from mlflow.tracking.fluent import SEARCH_MAX_RESULTS_PANDAS, _get_experiment_id
 from hydraflow.config import iter_params
 from hydraflow.run_collection import RunCollection
+from hydraflow.utils import get_artifact_dir
 if TYPE_CHECKING:
+    from pathlib import Path
     from mlflow.entities.experiment import Experiment
@@ -211,16 +213,10 @@ def _list_runs(
     for name in experiment_names:
         if experiment := mlflow.get_experiment_by_name(name):
-            loc = experiment.artifact_location
-            if isinstance(loc, str):
-                if loc.startswith("file:"):
-                    path = Path(mlflow.artifacts.download_artifacts(loc))
-                elif Path(loc).is_dir():
-                    path = Path(loc)
-                else:
-                    continue  # no cov
+            uri = experiment.artifact_location
+            if isinstance(uri, str):
+                path = get_artifact_dir(uri=uri)
                 run_ids.extend(file.stem for file in path.iterdir() if file.is_dir())
     it = (joblib.delayed(mlflow.get_run)(run_id) for run_id in run_ids)

{hydraflow-0.6.0 → hydraflow-0.6.2}/src/hydraflow/run_collection.py RENAMED Viewed

@@ -236,7 +236,7 @@ class RunCollection:
     def filter(
         self,
-        config: object | None = None,
+        config: object | Callable[[Run], bool] | None = None,
         *,
         override: bool = False,
         select: list[str] | None = None,
@@ -257,11 +257,13 @@ class RunCollection:
         - Membership checks for lists of values.
         - Range checks for tuples of two values (inclusive of both the lower
           and upper bound).
+        - Callable that takes a `Run` object and returns a boolean value.
         Args:
-            config (object | None): The configuration object to filter the runs.
-                This can be any object that provides key-value pairs through
-                the `iter_params` function.
+            config (object | Callable[[Run], bool] | None): The configuration object
+                to filter the runs. This can be any object that provides key-value
+                pairs through the `iter_params` function, or a callable that
+                takes a `Run` object and returns a boolean value.
             override (bool): If True, override the configuration object with the
                 provided key-value pairs.
             select (list[str] | None): The list of parameters to select.
@@ -711,7 +713,7 @@ def _param_matches(run: Run, key: str, value: Any) -> bool:
 def filter_runs(
     runs: list[Run],
-    config: object | None = None,
+    config: object | Callable[[Run], bool] | None = None,
     *,
     override: bool = False,
     select: list[str] | None = None,
@@ -735,9 +737,11 @@ def filter_runs(
     Args:
         runs (list[Run]): The list of runs to filter.
-        config (object | None, optional): The configuration object to filter the
-            runs. This can be any object that provides key-value pairs through
-            the `iter_params` function. Defaults to None.
+        config (object | Callable[[Run], bool] | None, optional): The
+            configuration object to filter the runs. This can be any object
+            that provides key-value pairs through the `iter_params` function.
+            This can also be a callable that takes a `Run` object and returns
+            a boolean value. Defaults to None.
         override (bool, optional): If True, filter the runs based on
             the overrides. Defaults to False.
         select (list[str] | None, optional): The list of parameters to select.
@@ -750,15 +754,19 @@ def filter_runs(
         A list of runs that match the specified configuration and key-value pairs.
     """
-    if override:
-        config = select_overrides(config)
-    elif select:
-        config = select_config(config, select)
-    for key, value in chain(iter_params(config), kwargs.items()):
-        runs = [run for run in runs if _param_matches(run, key, value)]
-        if not runs:
-            return []
+    if callable(config):
+        runs = [run for run in runs if config(run)]
+    else:
+        if override:
+            config = select_overrides(config)
+        elif select:
+            config = select_config(config, select)
+        for key, value in chain(iter_params(config), kwargs.items()):
+            runs = [run for run in runs if _param_matches(run, key, value)]
+            if not runs:
+                return []
     if status is None:
         return runs

{hydraflow-0.6.0 → hydraflow-0.6.2}/src/hydraflow/utils.py RENAMED Viewed

@@ -3,6 +3,8 @@
 from __future__ import annotations
 import shutil
+import urllib.parse
+import urllib.request
 from pathlib import Path
 from typing import TYPE_CHECKING
@@ -16,30 +18,40 @@ if TYPE_CHECKING:
     from collections.abc import Iterable
-def get_artifact_dir(run: Run | None = None) -> Path:
+def get_artifact_dir(run: Run | None = None, uri: str | None = None) -> Path:
     """Retrieve the artifact directory for the given run.
     This function uses MLflow to get the artifact directory for the given run.
     Args:
         run (Run | None): The run object. Defaults to None.
+        uri (str | None): The URI of the artifact. Defaults to None.
     Returns:
         The local path to the directory where the artifacts are downloaded.
     """
-    uri = mlflow.get_artifact_uri() if run is None else run.info.artifact_uri
+    if run is not None and uri is not None:
+        raise ValueError("Cannot provide both run and uri")
+    if run is None and uri is None:
+        uri = mlflow.get_artifact_uri()
+    elif run:
+        uri = run.info.artifact_uri
     if not isinstance(uri, str):
         raise NotImplementedError
     if uri.startswith("file:"):
-        return Path(mlflow.artifacts.download_artifacts(uri))
+        return file_uri_to_path(uri)
+    return Path(uri)
-    if Path(uri).is_dir():
-        return Path(uri)
-    raise NotImplementedError
+def file_uri_to_path(uri: str) -> Path:
+    """Convert a file URI to a local path."""
+    path = urllib.parse.urlparse(uri).path
+    return Path(urllib.request.url2pathname(path))  # for Windows
 def get_artifact_path(run: Run | None, path: str) -> Path:

{hydraflow-0.6.0 → hydraflow-0.6.2}/tests/context/logging.py RENAMED Viewed

@@ -6,6 +6,7 @@ from pathlib import Path
 import hydra
 from hydra.core.config_store import ConfigStore
+from hydra.core.hydra_config import HydraConfig
 import hydraflow
@@ -30,9 +31,9 @@ def app(cfg: Config):
         log.info("second" if run else "first")
         log.info(cfg.count)
-        with hydraflow.chdir_hydra_output():
-            Path("text.log").write_text("text\n")
-            Path("dir.log").mkdir()
+        output_dir = Path(HydraConfig.get().runtime.output_dir)
+        (output_dir / "text.log").write_text("text\n")
+        (output_dir / "dir.log").mkdir()
 if __name__ == "__main__":

{hydraflow-0.6.0 → hydraflow-0.6.2}/tests/run/test_collection.py RENAMED Viewed

@@ -129,6 +129,12 @@ def test_filter_list_none(runs: list[Run]):
     assert not filter_runs(runs, ["invalid=0"])
+def test_filter_callable(runs: list[Run]):
+    runs = filter_runs(runs, lambda run: run.data.params["r"] == "0")
+    assert len(runs) == 2
+    assert all(run.data.params["q"] == "0" for run in runs)
 @pytest.mark.parametrize(
     ("status", "n"),
     [

{hydraflow-0.6.0 → hydraflow-0.6.2}/tests/utils/test_utils.py RENAMED Viewed

@@ -1,3 +1,4 @@
+import sys
 from typing import TYPE_CHECKING
 import pytest
@@ -27,6 +28,30 @@ def run(rc: RunCollection):
     return rc.first()
+@pytest.mark.parametrize(
+    ("uri", "path"),
+    [("/a/b/c", "/a/b/c"), ("file:///a/b/c", "/a/b/c"), ("file:C:/a/b/c", "C:/a/b/c")],
+)
+def test_file_uri_to_path(uri, path):
+    from hydraflow.utils import file_uri_to_path
+    assert file_uri_to_path(uri).as_posix() == path
+@pytest.mark.skipif(sys.platform != "win32", reason="This test is for Windows")
+def test_file_uri_to_path_win_python_310_311():
+    from hydraflow.utils import file_uri_to_path
+    assert file_uri_to_path("file:///C:/a/b/c").as_posix() == "C:/a/b/c"
+def test_artifact_dir_error(run: Run):
+    from hydraflow.utils import get_artifact_dir
+    with pytest.raises(ValueError):
+        get_artifact_dir(run, "a")
 def test_hydra_output_dir(run: Run):
     from hydraflow.utils import get_artifact_path, get_hydra_output_dir