PyPI - hydraflow - Versions diffs - 0.2.7__tar.gz → 0.2.8__tar.gz - Mend

hydraflow 0.2.7tar.gz → 0.2.8tar.gz

Files changed (33) hide show

{hydraflow-0.2.7 → hydraflow-0.2.8}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: hydraflow
-Version: 0.2.7
+Version: 0.2.8
 Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
 Project-URL: Documentation, https://github.com/daizutabi/hydraflow
 Project-URL: Source, https://github.com/daizutabi/hydraflow

{hydraflow-0.2.7 → hydraflow-0.2.8}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "hydraflow"
-version = "0.2.7"
+version = "0.2.8"
 description = "Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments."
 readme = "README.md"
 license = "MIT"

{hydraflow-0.2.7 → hydraflow-0.2.8}/src/hydraflow/__init__.py RENAMED Viewed

@@ -1,11 +1,11 @@
 from .context import chdir_artifact, log_run, start_run, watch
-from .info import load_config
-from .mlflow import get_artifact_dir, get_hydra_output_dir, set_experiment
-from .run_collection import (
-    RunCollection,
+from .info import get_artifact_dir, get_hydra_output_dir, load_config
+from .mlflow import (
     list_runs,
     search_runs,
+    set_experiment,
 )
+from .run_collection import RunCollection
 __all__ = [
     "RunCollection",

{hydraflow-0.2.7 → hydraflow-0.2.8}/src/hydraflow/context.py RENAMED Viewed

@@ -14,10 +14,11 @@ from typing import TYPE_CHECKING
 import mlflow
 from hydra.core.hydra_config import HydraConfig
-from watchdog.events import FileModifiedEvent, FileSystemEventHandler
+from watchdog.events import FileModifiedEvent, PatternMatchingEventHandler
 from watchdog.observers import Observer
-from hydraflow.mlflow import get_artifact_dir, log_params
+from hydraflow.info import get_artifact_dir
+from hydraflow.mlflow import log_params
 if TYPE_CHECKING:
     from collections.abc import Callable, Iterator
@@ -68,7 +69,7 @@ def log_run(
         mlflow.log_artifact(local_path)
     try:
-        with watch(log_artifact, output_dir):
+        with watch(log_artifact, output_dir, ignore_log=False):
             yield
     except Exception as e:
@@ -140,9 +141,11 @@ def start_run(
 @contextmanager
 def watch(
-    func: Callable[[Path], None],
+    callback: Callable[[Path], None],
     dir: Path | str = "",
     timeout: int = 60,
+    ignore_patterns: list[str] | None = None,
+    ignore_log: bool = True,
 ) -> Iterator[None]:
     """
     Watch the given directory for changes and call the provided function
@@ -154,7 +157,7 @@ def watch(
     period or until the context is exited.
     Args:
-        func (Callable[[Path], None]): The function to call when a change is
+        callback (Callable[[Path], None]): The function to call when a change is
             detected. It should accept a single argument of type `Path`,
             which is the path of the modified file.
         dir (Path | str): The directory to watch. If not specified,
@@ -174,7 +177,7 @@ def watch(
     if isinstance(dir, Path):
         dir = dir.as_posix()
-    handler = Handler(func)
+    handler = Handler(callback, ignore_patterns=ignore_patterns, ignore_log=ignore_log)
     observer = Observer()
     observer.schedule(handler, dir, recursive=True)
     observer.start()
@@ -198,10 +201,23 @@ def watch(
         observer.join()
-class Handler(FileSystemEventHandler):
-    def __init__(self, func: Callable[[Path], None]) -> None:
+class Handler(PatternMatchingEventHandler):
+    def __init__(
+        self,
+        func: Callable[[Path], None],
+        ignore_patterns: list[str] | None = None,
+        ignore_log: bool = True,
+    ) -> None:
         self.func = func
+        if ignore_log:
+            if ignore_patterns:
+                ignore_patterns.append("*.log")
+            else:
+                ignore_patterns = ["*.log"]
+        super().__init__(ignore_patterns=ignore_patterns)
     def on_modified(self, event: FileModifiedEvent) -> None:
         file = Path(str(event.src_path))
         if file.is_file():

{hydraflow-0.2.7 → hydraflow-0.2.8}/src/hydraflow/info.py RENAMED Viewed

@@ -1,14 +1,14 @@
 from __future__ import annotations
+from pathlib import Path
 from typing import TYPE_CHECKING
+import mlflow
+from hydra.core.hydra_config import HydraConfig
+from mlflow.tracking import artifact_utils
 from omegaconf import DictConfig, OmegaConf
-from hydraflow.mlflow import get_artifact_dir
 if TYPE_CHECKING:
-    from pathlib import Path
     from mlflow.entities import Run
     from hydraflow.run_collection import RunCollection
@@ -43,6 +43,59 @@ class RunCollectionInfo:
         return [load_config(run) for run in self._runs]
+def get_artifact_dir(run: Run | None = None) -> Path:
+    """
+    Retrieve the artifact directory for the given run.
+    This function uses MLflow to get the artifact directory for the given run.
+    Args:
+        run (Run | None): The run object. Defaults to None.
+    Returns:
+        The local path to the directory where the artifacts are downloaded.
+    """
+    if run is None:
+        uri = mlflow.get_artifact_uri()
+    else:
+        uri = artifact_utils.get_artifact_uri(run.info.run_id)
+    return Path(mlflow.artifacts.download_artifacts(uri))
+def get_hydra_output_dir(run: Run | None = None) -> Path:
+    """
+    Retrieve the Hydra output directory for the given run.
+    This function returns the Hydra output directory. If no run is provided,
+    it retrieves the output directory from the current Hydra configuration.
+    If a run is provided, it retrieves the artifact path for the run, loads
+    the Hydra configuration from the downloaded artifacts, and returns the
+    output directory specified in that configuration.
+    Args:
+        run (Run | None): The run object. Defaults to None.
+    Returns:
+        Path: The path to the Hydra output directory.
+    Raises:
+        FileNotFoundError: If the Hydra configuration file is not found
+            in the artifacts.
+    """
+    if run is None:
+        hc = HydraConfig.get()
+        return Path(hc.runtime.output_dir)
+    path = get_artifact_dir(run) / ".hydra/hydra.yaml"
+    if path.exists():
+        hc = OmegaConf.load(path)
+        return Path(hc.hydra.runtime.output_dir)
+    raise FileNotFoundError
 def load_config(run: Run) -> DictConfig:
     """
     Load the configuration for a given run.

hydraflow-0.2.8/src/hydraflow/mlflow.py ADDED Viewed

@@ -0,0 +1,175 @@
+"""
+This module provides functionality to log parameters from Hydra configuration objects
+and set up experiments using MLflow. It includes methods for managing experiments,
+searching for runs, and logging parameters and artifacts.
+Key Features:
+- **Experiment Management**: Set and manage MLflow experiments with customizable names
+  based on Hydra configuration.
+- **Run Logging**: Log parameters and metrics from Hydra configuration objects to
+  MLflow, ensuring that all relevant information is captured during experiments.
+- **Run Search**: Search for runs based on various criteria, allowing for flexible
+  retrieval of experiment results.
+- **Artifact Management**: Retrieve and log artifacts associated with runs, facilitating
+  easy access to outputs generated during experiments.
+This module is designed to integrate seamlessly with Hydra, providing a robust
+solution for tracking machine learning experiments and their associated metadata.
+"""
+from __future__ import annotations
+from pathlib import Path
+from typing import TYPE_CHECKING
+import mlflow
+from hydra.core.hydra_config import HydraConfig
+from mlflow.entities import ViewType
+from mlflow.tracking.fluent import SEARCH_MAX_RESULTS_PANDAS
+from hydraflow.config import iter_params
+from hydraflow.run_collection import RunCollection
+if TYPE_CHECKING:
+    from mlflow.entities.experiment import Experiment
+def set_experiment(
+    prefix: str = "",
+    suffix: str = "",
+    uri: str | Path | None = None,
+) -> Experiment:
+    """
+    Sets the experiment name and tracking URI optionally.
+    This function sets the experiment name by combining the given prefix,
+    the job name from HydraConfig, and the given suffix. Optionally, it can
+    also set the tracking URI.
+    Args:
+        prefix (str): The prefix to prepend to the experiment name.
+        suffix (str): The suffix to append to the experiment name.
+        uri (str | Path | None): The tracking URI to use. Defaults to None.
+    Returns:
+        Experiment: An instance of `mlflow.entities.Experiment` representing
+        the new active experiment.
+    """
+    if uri is not None:
+        mlflow.set_tracking_uri(uri)
+    hc = HydraConfig.get()
+    name = f"{prefix}{hc.job.name}{suffix}"
+    return mlflow.set_experiment(name)
+def log_params(config: object, *, synchronous: bool | None = None) -> None:
+    """
+    Log the parameters from the given configuration object.
+    This method logs the parameters from the provided configuration object
+    using MLflow. It iterates over the parameters and logs them using the
+    `mlflow.log_param` method.
+    Args:
+        config (object): The configuration object to log the parameters from.
+        synchronous (bool | None): Whether to log the parameters synchronously.
+            Defaults to None.
+    """
+    for key, value in iter_params(config):
+        mlflow.log_param(key, value, synchronous=synchronous)
+def search_runs(
+    experiment_ids: list[str] | None = None,
+    filter_string: str = "",
+    run_view_type: int = ViewType.ACTIVE_ONLY,
+    max_results: int = SEARCH_MAX_RESULTS_PANDAS,
+    order_by: list[str] | None = None,
+    search_all_experiments: bool = False,
+    experiment_names: list[str] | None = None,
+) -> RunCollection:
+    """
+    Search for Runs that fit the specified criteria.
+    This function wraps the `mlflow.search_runs` function and returns the
+    results as a `RunCollection` object. It allows for flexible searching of
+    MLflow runs based on various criteria.
+    Note:
+        The returned runs are sorted by their start time in ascending order.
+    Args:
+        experiment_ids (list[str] | None): List of experiment IDs. Search can
+            work with experiment IDs or experiment names, but not both in the
+            same call. Values other than ``None`` or ``[]`` will result in
+            error if ``experiment_names`` is also not ``None`` or ``[]``.
+            ``None`` will default to the active experiment if ``experiment_names``
+            is ``None`` or ``[]``.
+        filter_string (str): Filter query string, defaults to searching all
+            runs.
+        run_view_type (int): one of enum values ``ACTIVE_ONLY``, ``DELETED_ONLY``,
+            or ``ALL`` runs defined in :py:class:`mlflow.entities.ViewType`.
+        max_results (int): The maximum number of runs to put in the dataframe.
+            Default is 100,000 to avoid causing out-of-memory issues on the user's
+            machine.
+        order_by (list[str] | None): List of columns to order by (e.g.,
+            "metrics.rmse"). The ``order_by`` column can contain an optional
+            ``DESC`` or ``ASC`` value. The default is ``ASC``. The default
+            ordering is to sort by ``start_time DESC``, then ``run_id``.
+            ``start_time DESC``, then ``run_id``.
+        search_all_experiments (bool): Boolean specifying whether all
+            experiments should be searched. Only honored if ``experiment_ids``
+            is ``[]`` or ``None``.
+        experiment_names (list[str] | None): List of experiment names. Search
+            can work with experiment IDs or experiment names, but not both in
+            the same call. Values other than ``None`` or ``[]`` will result in
+            error if ``experiment_ids`` is also not ``None`` or ``[]``.
+            ``experiment_ids`` is also not ``None`` or ``[]``. ``None`` will
+            default to the active experiment if ``experiment_ids`` is ``None``
+            or ``[]``.
+    Returns:
+        A `RunCollection` object containing the search results.
+    """
+    runs = mlflow.search_runs(
+        experiment_ids=experiment_ids,
+        filter_string=filter_string,
+        run_view_type=run_view_type,
+        max_results=max_results,
+        order_by=order_by,
+        output_format="list",
+        search_all_experiments=search_all_experiments,
+        experiment_names=experiment_names,
+    )
+    runs = sorted(runs, key=lambda run: run.info.start_time)  # type: ignore
+    return RunCollection(runs)  # type: ignore
+def list_runs(experiment_names: list[str] | None = None) -> RunCollection:
+    """
+    List all runs for the specified experiments.
+    This function retrieves all runs for the given list of experiment names.
+    If no experiment names are provided (None), it defaults to searching all runs
+    for the currently active experiment. If an empty list is provided, the function
+    will search all runs for all experiments except the "Default" experiment.
+    The function returns the results as a `RunCollection` object.
+    Note:
+        The returned runs are sorted by their start time in ascending order.
+    Args:
+        experiment_names (list[str] | None): List of experiment names to search
+            for runs. If None or an empty list is provided, the function will
+            search the currently active experiment or all experiments except
+            the "Default" experiment.
+    Returns:
+        A `RunCollection` object containing the runs for the specified experiments.
+    """
+    if experiment_names == []:
+        experiments = mlflow.search_experiments()
+        experiment_names = [e.name for e in experiments if e.name != "Default"]
+    return search_runs(experiment_names=experiment_names)

{hydraflow-0.2.7 → hydraflow-0.2.8}/src/hydraflow/run_collection.py RENAMED Viewed

@@ -1,7 +1,24 @@
 """
-This module provides functionality for managing and interacting with MLflow
-runs. It includes the `RunCollection` class and various methods to filter
-runs, retrieve run information, log artifacts, and load configurations.
+This module provides functionality for managing and interacting with MLflow runs.
+It includes the `RunCollection` class, which serves as a container for multiple MLflow
+run objects, and various methods to filter, retrieve, and manipulate these runs.
+Key Features:
+- **Run Management**: The `RunCollection` class allows for easy management of multiple
+  MLflow runs, providing methods to access, filter, and sort runs based on various
+  criteria.
+- **Filtering**: The module supports filtering runs based on specific configurations
+  and parameters, enabling users to easily find runs that match certain conditions.
+- **Retrieval**: Users can retrieve specific runs, including the first, last, or any
+  run that matches a given configuration.
+- **Artifact Handling**: The module provides methods to access and manipulate the
+  artifacts associated with each run, including retrieving artifact URIs and directories.
+The `RunCollection` class is designed to work seamlessly with the MLflow tracking
+API, providing a robust solution for managing machine learning experiment runs and
+their associated metadata. This module is particularly useful for data scientists and
+machine learning engineers who need to track and analyze the results of their experiments
+efficiently.
 """
 from __future__ import annotations
@@ -10,10 +27,7 @@ from dataclasses import dataclass, field
 from itertools import chain
 from typing import TYPE_CHECKING, Any, Concatenate, ParamSpec, TypeVar
-import mlflow
-from mlflow.entities import ViewType
 from mlflow.entities.run import Run
-from mlflow.tracking.fluent import SEARCH_MAX_RESULTS_PANDAS
 from hydraflow.config import iter_params
 from hydraflow.info import RunCollectionInfo
@@ -26,101 +40,6 @@ if TYPE_CHECKING:
     from omegaconf import DictConfig
-def search_runs(
-    experiment_ids: list[str] | None = None,
-    filter_string: str = "",
-    run_view_type: int = ViewType.ACTIVE_ONLY,
-    max_results: int = SEARCH_MAX_RESULTS_PANDAS,
-    order_by: list[str] | None = None,
-    search_all_experiments: bool = False,
-    experiment_names: list[str] | None = None,
-) -> RunCollection:
-    """
-    Search for Runs that fit the specified criteria.
-    This function wraps the `mlflow.search_runs` function and returns the
-    results as a `RunCollection` object. It allows for flexible searching of
-    MLflow runs based on various criteria.
-    Note:
-        The returned runs are sorted by their start time in ascending order.
-    Args:
-        experiment_ids (list[str] | None): List of experiment IDs. Search can
-            work with experiment IDs or experiment names, but not both in the
-            same call. Values other than ``None`` or ``[]`` will result in
-            error if ``experiment_names`` is also not ``None`` or ``[]``.
-            ``None`` will default to the active experiment if ``experiment_names``
-            is ``None`` or ``[]``.
-        filter_string (str): Filter query string, defaults to searching all
-            runs.
-        run_view_type (int): one of enum values ``ACTIVE_ONLY``, ``DELETED_ONLY``,
-            or ``ALL`` runs defined in :py:class:`mlflow.entities.ViewType`.
-        max_results (int): The maximum number of runs to put in the dataframe.
-            Default is 100,000 to avoid causing out-of-memory issues on the user's
-            machine.
-        order_by (list[str] | None): List of columns to order by (e.g.,
-            "metrics.rmse"). The ``order_by`` column can contain an optional
-            ``DESC`` or ``ASC`` value. The default is ``ASC``. The default
-            ordering is to sort by ``start_time DESC``, then ``run_id``.
-            ``start_time DESC``, then ``run_id``.
-        search_all_experiments (bool): Boolean specifying whether all
-            experiments should be searched. Only honored if ``experiment_ids``
-            is ``[]`` or ``None``.
-        experiment_names (list[str] | None): List of experiment names. Search
-            can work with experiment IDs or experiment names, but not both in
-            the same call. Values other than ``None`` or ``[]`` will result in
-            error if ``experiment_ids`` is also not ``None`` or ``[]``.
-            ``experiment_ids`` is also not ``None`` or ``[]``. ``None`` will
-            default to the active experiment if ``experiment_ids`` is ``None``
-            or ``[]``.
-    Returns:
-        A `RunCollection` object containing the search results.
-    """
-    runs = mlflow.search_runs(
-        experiment_ids=experiment_ids,
-        filter_string=filter_string,
-        run_view_type=run_view_type,
-        max_results=max_results,
-        order_by=order_by,
-        output_format="list",
-        search_all_experiments=search_all_experiments,
-        experiment_names=experiment_names,
-    )
-    runs = sorted(runs, key=lambda run: run.info.start_time)  # type: ignore
-    return RunCollection(runs)  # type: ignore
-def list_runs(experiment_names: list[str] | None = None) -> RunCollection:
-    """
-    List all runs for the specified experiments.
-    This function retrieves all runs for the given list of experiment names.
-    If no experiment names are provided (None), it defaults to searching all runs
-    for the currently active experiment. If an empty list is provided, the function
-    will search all runs for all experiments except the "Default" experiment.
-    The function returns the results as a `RunCollection` object.
-    Note:
-        The returned runs are sorted by their start time in ascending order.
-    Args:
-        experiment_names (list[str] | None): List of experiment names to search
-            for runs. If None or an empty list is provided, the function will
-            search the currently active experiment or all experiments except
-            the "Default" experiment.
-    Returns:
-        A `RunCollection` object containing the runs for the specified experiments.
-    """
-    if experiment_names == []:
-        experiments = mlflow.search_experiments()
-        experiment_names = [e.name for e in experiments if e.name != "Default"]
-    return search_runs(experiment_names=experiment_names)
 T = TypeVar("T")
 P = ParamSpec("P")
@@ -132,6 +51,11 @@ class RunCollection:
     This class provides methods to interact with the runs, such as filtering,
     retrieving specific runs, and accessing run information.
+    Key Features:
+    - Filtering: Easily filter runs based on various criteria.
+    - Retrieval: Access specific runs by index or through methods.
+    - Metadata: Access run metadata and associated information.
     """
     _runs: list[Run]

hydraflow-0.2.7/tests/scripts/log_run.py → hydraflow-0.2.8/tests/scripts/app.py RENAMED Viewed

@@ -1,7 +1,9 @@
 from __future__ import annotations
 import logging
+import time
 from dataclasses import dataclass
+from pathlib import Path
 import hydra
 import mlflow
@@ -24,16 +26,32 @@ cs.store(name="config", node=MySQLConfig)
 @hydra.main(version_base=None, config_name="config")
 def app(cfg: MySQLConfig):
-    mlflow.set_experiment("log_run")
+    hydraflow.set_experiment(prefix="_", suffix="_")
     with hydraflow.start_run(cfg):
+        log.info(f"START, {cfg.host}, {cfg.port} ")
         artifact_dir = hydraflow.get_artifact_dir()
         output_dir = hydraflow.get_hydra_output_dir()
-        log.info(f"START, {cfg.host}, {cfg.port} ")
         mlflow.log_text("A " + artifact_dir.as_posix(), "artifact_dir.txt")
         mlflow.log_text("B " + output_dir.as_posix(), "output_dir.txt")
-        (artifact_dir / "a.txt").write_text("abc")
+        with hydraflow.watch(callback, ignore_patterns=["b.txt"]):
+            (artifact_dir / "a.txt").write_text("abc")
+            time.sleep(0.1)
+        mlflow.log_metric("m", cfg.port + 1, 1)
+        if cfg.host == "x":
+            mlflow.log_metric("m", cfg.port + 10, 2)
         log.info("END")
+def callback(path: Path):
+    log.info(f"WATCH, {path.as_posix()}")
+    m = len(path.read_text())  # len("abc") == 3
+    mlflow.log_metric("watch", m, 1, synchronous=True)
 if __name__ == "__main__":
     app()

hydraflow-0.2.8/tests/test_app.py ADDED Viewed

@@ -0,0 +1,109 @@
+from __future__ import annotations
+import subprocess
+import sys
+from pathlib import Path
+import mlflow
+import pytest
+from omegaconf import DictConfig
+from hydraflow.run_collection import RunCollection
+@pytest.fixture
+def rc(monkeypatch, tmp_path):
+    import hydraflow
+    file = Path("tests/scripts/app.py").absolute()
+    monkeypatch.chdir(tmp_path)
+    args = [sys.executable, file.as_posix(), "-m"]
+    args += ["host=x,y", "port=1,2", "hydra.job.name=info"]
+    subprocess.check_call(args)
+    mlflow.set_experiment("_info_")
+    yield hydraflow.list_runs()
+def test_app_info_run_id(rc: RunCollection):
+    assert len(rc.info.run_id) == 4
+def test_app_info_params(rc: RunCollection):
+    params = rc.info.params
+    assert params[0] == {"port": "1", "host": "x"}
+    assert params[1] == {"port": "2", "host": "x"}
+    assert params[2] == {"port": "1", "host": "y"}
+    assert params[3] == {"port": "2", "host": "y"}
+def test_app_info_metrics(rc: RunCollection):
+    metrics = rc.info.metrics
+    assert metrics[0] == {"m": 11, "watch": 3}
+    assert metrics[1] == {"m": 12, "watch": 3}
+    assert metrics[2] == {"m": 2, "watch": 3}
+    assert metrics[3] == {"m": 3, "watch": 3}
+def test_app_info_config(rc: RunCollection):
+    config = rc.info.config
+    assert config[0].port == 1
+    assert config[1].port == 2
+    assert config[2].host == "y"
+    assert config[3].host == "y"
+def test_app_info_artifact_uri(rc: RunCollection):
+    uris = rc.info.artifact_uri
+    print(uris)
+    assert all(uri.startswith("file://") for uri in uris)  # type: ignore
+    assert all(uri.endswith("/artifacts") for uri in uris)  # type: ignore
+    assert all("mlruns" in uri for uri in uris)  # type: ignore
+def test_app_info_artifact_dir(rc: RunCollection):
+    from hydraflow.info import get_artifact_dir
+    dirs = list(rc.map(get_artifact_dir))
+    assert rc.info.artifact_dir == dirs
+def test_app_hydra_output_dir(rc: RunCollection):
+    from hydraflow.info import get_hydra_output_dir
+    dirs = list(rc.map(get_hydra_output_dir))
+    assert dirs[0].stem == "0"
+    assert dirs[1].stem == "1"
+    assert dirs[2].stem == "2"
+    assert dirs[3].stem == "3"
+def test_app_map_config(rc: RunCollection):
+    ports = []
+    def func(c: DictConfig, *, a: int):
+        ports.append(c.port + 1)
+        return c.host
+    hosts = list(rc.map_config(func, a=1))
+    assert hosts == ["x", "x", "y", "y"]
+    assert ports == [2, 3, 2, 3]
+def test_app_group_by(rc: RunCollection):
+    grouped = rc.group_by("host")
+    assert len(grouped) == 2
+    assert grouped[("x",)].info.params[0] == {"port": "1", "host": "x"}
+    assert grouped[("x",)].info.params[1] == {"port": "2", "host": "x"}
+    assert grouped[("y",)].info.params[0] == {"port": "1", "host": "y"}
+    assert grouped[("y",)].info.params[1] == {"port": "2", "host": "y"}
+def test_app_group_by_values(rc: RunCollection):
+    grouped = rc.group_by_values("port")
+    assert len(grouped) == 2
+    assert grouped[0].info.params[0] == {"port": "1", "host": "x"}
+    assert grouped[0].info.params[1] == {"port": "1", "host": "y"}
+    assert grouped[1].info.params[0] == {"port": "2", "host": "x"}
+    assert grouped[1].info.params[1] == {"port": "2", "host": "y"}

{hydraflow-0.2.7 → hydraflow-0.2.8}/tests/test_context.py RENAMED Viewed

@@ -11,7 +11,7 @@ from hydraflow.run_collection import RunCollection
 @pytest.fixture
 def runs(monkeypatch, tmp_path):
-    from hydraflow.run_collection import list_runs
+    from hydraflow.mlflow import list_runs
     monkeypatch.chdir(tmp_path)

{hydraflow-0.2.7 → hydraflow-0.2.8}/tests/test_info.py RENAMED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 from pathlib import Path
 import mlflow
@@ -8,11 +10,12 @@ from hydraflow.run_collection import RunCollection
 @pytest.fixture
 def runs(monkeypatch, tmp_path):
-    from hydraflow.run_collection import search_runs
+    from hydraflow.mlflow import search_runs
     monkeypatch.chdir(tmp_path)
     mlflow.set_experiment("test_info")
     for x in range(3):
         with mlflow.start_run(run_name=f"{x}"):
             mlflow.log_param("p", x)
@@ -49,3 +52,13 @@ def test_info_artifact_dir(runs: RunCollection):
     dir = runs.info.artifact_dir
     assert all(isinstance(d, Path) for d in dir)
     assert all(d.stem == "artifacts" for d in dir)  # type: ignore
+def test_info_empty_run_collection():
+    rc = RunCollection([])
+    assert rc.info.run_id == []
+    assert rc.info.params == []
+    assert rc.info.metrics == []
+    assert rc.info.artifact_uri == []
+    assert rc.info.artifact_dir == []
+    assert rc.info.config == []

{hydraflow-0.2.7 → hydraflow-0.2.8}/tests/test_log_run.py RENAMED Viewed

@@ -12,12 +12,14 @@ from mlflow.entities.run import Run
 @pytest.fixture
 def runs(monkeypatch, tmp_path):
-    file = Path("tests/scripts/log_run.py").absolute()
+    file = Path("tests/scripts/app.py").absolute()
     monkeypatch.chdir(tmp_path)
-    subprocess.check_call([sys.executable, file.as_posix(), "-m", "host=x,y", "port=1,2"])
+    args = [sys.executable, file.as_posix(), "-m"]
+    args += ["host=x,y", "port=1,2", "hydra.job.name=log_run"]
+    subprocess.check_call(args)
-    mlflow.set_experiment("log_run")
+    mlflow.set_experiment("_log_run_")
     runs = mlflow.search_runs(output_format="list")
     assert len(runs) == 4
     assert isinstance(runs, list)

{hydraflow-0.2.7 → hydraflow-0.2.8}/tests/test_run_collection.py RENAMED Viewed

@@ -11,7 +11,7 @@ from hydraflow.run_collection import RunCollection
 @pytest.fixture
 def runs(monkeypatch, tmp_path):
-    from hydraflow.run_collection import search_runs
+    from hydraflow.mlflow import search_runs
     monkeypatch.chdir(tmp_path)
@@ -342,7 +342,7 @@ def runs2(monkeypatch, tmp_path):
 def test_list_runs(runs, runs2):
-    from hydraflow.run_collection import list_runs
+    from hydraflow.mlflow import list_runs
     mlflow.set_experiment("test_run")
     all_runs = list_runs()
@@ -354,7 +354,7 @@ def test_list_runs(runs, runs2):
 def test_list_runs_empty_list(runs, runs2):
-    from hydraflow.run_collection import list_runs
+    from hydraflow.mlflow import list_runs
     all_runs = list_runs([])
     assert len(all_runs) == 9
@@ -362,14 +362,14 @@ def test_list_runs_empty_list(runs, runs2):
 @pytest.mark.parametrize(["name", "n"], [("test_run", 6), ("test_run2", 3)])
 def test_list_runs_list(runs, runs2, name, n):
-    from hydraflow.run_collection import list_runs
+    from hydraflow.mlflow import list_runs
     filtered_runs = list_runs(experiment_names=[name])
     assert len(filtered_runs) == n
 def test_list_runs_none(runs, runs2):
-    from hydraflow.run_collection import list_runs
+    from hydraflow.mlflow import list_runs
     no_runs = list_runs(experiment_names=["non_existent_experiment"])
     assert len(no_runs) == 0

hydraflow-0.2.7/mlruns/0/meta.yaml DELETED Viewed

@@ -1,6 +0,0 @@
-artifact_location: file:///workspaces/hydraflow/mlruns/0
-creation_time: 1725536713011
-experiment_id: '0'
-last_update_time: 1725536713011
-lifecycle_stage: active
-name: Default

hydraflow-0.2.7/src/hydraflow/mlflow.py DELETED Viewed

@@ -1,119 +0,0 @@
-"""
-This module provides functionality to log parameters from Hydra
-configuration objects and set up experiments using MLflow.
-"""
-from __future__ import annotations
-from pathlib import Path
-from typing import TYPE_CHECKING
-import mlflow
-from hydra.core.hydra_config import HydraConfig
-from mlflow.tracking import artifact_utils
-from omegaconf import OmegaConf
-from hydraflow.config import iter_params
-if TYPE_CHECKING:
-    from mlflow.entities.experiment import Experiment
-    from mlflow.entities.run import Run
-def set_experiment(
-    prefix: str = "",
-    suffix: str = "",
-    uri: str | Path | None = None,
-) -> Experiment:
-    """
-    Set the experiment name and tracking URI optionally.
-    This function sets the experiment name by combining the given prefix,
-    the job name from HydraConfig, and the given suffix. Optionally, it can
-    also set the tracking URI.
-    Args:
-        prefix (str): The prefix to prepend to the experiment name.
-        suffix (str): The suffix to append to the experiment name.
-        uri (str | Path | None): The tracking URI to use. Defaults to None.
-    Returns:
-        Experiment: An instance of `mlflow.entities.Experiment` representing
-        the new active experiment.
-    """
-    if uri is not None:
-        mlflow.set_tracking_uri(uri)
-    hc = HydraConfig.get()
-    name = f"{prefix}{hc.job.name}{suffix}"
-    return mlflow.set_experiment(name)
-def log_params(config: object, *, synchronous: bool | None = None) -> None:
-    """
-    Log the parameters from the given configuration object.
-    This method logs the parameters from the provided configuration object
-    using MLflow. It iterates over the parameters and logs them using the
-    `mlflow.log_param` method.
-    Args:
-        config (object): The configuration object to log the parameters from.
-        synchronous (bool | None): Whether to log the parameters synchronously.
-            Defaults to None.
-    """
-    for key, value in iter_params(config):
-        mlflow.log_param(key, value, synchronous=synchronous)
-def get_artifact_dir(run: Run | None = None) -> Path:
-    """
-    Retrieve the artifact directory for the given run.
-    This function uses MLflow to get the artifact directory for the given run.
-    Args:
-        run (Run | None): The run object. Defaults to None.
-    Returns:
-        The local path to the directory where the artifacts are downloaded.
-    """
-    if run is None:
-        uri = mlflow.get_artifact_uri()
-    else:
-        uri = artifact_utils.get_artifact_uri(run.info.run_id)
-    return Path(mlflow.artifacts.download_artifacts(uri))
-def get_hydra_output_dir(*, run: Run | None = None) -> Path:
-    """
-    Retrieve the Hydra output directory for the given run.
-    This function returns the Hydra output directory. If no run is provided,
-    it retrieves the output directory from the current Hydra configuration.
-    If a run is provided, it retrieves the artifact path for the run, loads
-    the Hydra configuration from the downloaded artifacts, and returns the
-    output directory specified in that configuration.
-    Args:
-        run (Run | None): The run object. Defaults to None.
-    Returns:
-        Path: The path to the Hydra output directory.
-    Raises:
-        FileNotFoundError: If the Hydra configuration file is not found
-            in the artifacts.
-    """
-    if run is None:
-        hc = HydraConfig.get()
-        return Path(hc.runtime.output_dir)
-    path = get_artifact_dir(run) / ".hydra/hydra.yaml"
-    if path.exists():
-        hc = OmegaConf.load(path)
-        return Path(hc.hydra.runtime.output_dir)
-    raise FileNotFoundError