PyPI - hydraflow - Versions diffs - 0.1.4__tar.gz → 0.2.0__tar.gz - Mend

hydraflow 0.1.4tar.gz → 0.2.0tar.gz

Files changed (32) hide show

hydraflow-0.2.0/PKG-INFO +111 -0
hydraflow-0.2.0/README.md +82 -0
{hydraflow-0.1.4 → hydraflow-0.2.0}/pyproject.toml +2 -2
{hydraflow-0.1.4 → hydraflow-0.2.0}/src/hydraflow/__init__.py +0 -10
hydraflow-0.2.0/src/hydraflow/config.py +55 -0
hydraflow-0.2.0/src/hydraflow/context.py +188 -0
hydraflow-0.2.0/src/hydraflow/mlflow.py +72 -0
hydraflow-0.2.0/src/hydraflow/runs.py +422 -0
{hydraflow-0.1.4 → hydraflow-0.2.0}/tests/scripts/log_run.py +2 -2
{hydraflow-0.1.4 → hydraflow-0.2.0}/tests/test_config.py +1 -2
hydraflow-0.2.0/tests/test_context.py +36 -0
{hydraflow-0.1.4 → hydraflow-0.2.0}/tests/test_log_run.py +26 -8
hydraflow-0.2.0/tests/test_mlflow.py +35 -0
hydraflow-0.2.0/tests/test_runs.py +277 -0
hydraflow-0.1.4/PKG-INFO +0 -45
hydraflow-0.1.4/README.md +0 -16
hydraflow-0.1.4/src/hydraflow/config.py +0 -30
hydraflow-0.1.4/src/hydraflow/context.py +0 -111
hydraflow-0.1.4/src/hydraflow/mlflow.py +0 -20
hydraflow-0.1.4/src/hydraflow/runs.py +0 -217
hydraflow-0.1.4/src/hydraflow/util.py +0 -11
hydraflow-0.1.4/tests/test_runs.py +0 -260
{hydraflow-0.1.4 → hydraflow-0.2.0}/.devcontainer/devcontainer.json +0 -0
{hydraflow-0.1.4 → hydraflow-0.2.0}/.devcontainer/postCreate.sh +0 -0
{hydraflow-0.1.4 → hydraflow-0.2.0}/.devcontainer/starship.toml +0 -0
{hydraflow-0.1.4 → hydraflow-0.2.0}/.gitattributes +0 -0
{hydraflow-0.1.4 → hydraflow-0.2.0}/.gitignore +0 -0
{hydraflow-0.1.4 → hydraflow-0.2.0}/LICENSE +0 -0
{hydraflow-0.1.4 → hydraflow-0.2.0}/tests/scripts/__init__.py +0 -0
{hydraflow-0.1.4 → hydraflow-0.2.0}/tests/scripts/watch.py +0 -0
{hydraflow-0.1.4 → hydraflow-0.2.0}/tests/test_version.py +0 -0
{hydraflow-0.1.4 → hydraflow-0.2.0}/tests/test_watch.py +0 -0

hydraflow-0.2.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,111 @@
+Metadata-Version: 2.3
+Name: hydraflow
+Version: 0.2.0
+Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
+Project-URL: Documentation, https://github.com/daizutabi/hydraflow
+Project-URL: Source, https://github.com/daizutabi/hydraflow
+Project-URL: Issues, https://github.com/daizutabi/hydraflow/issues
+Author-email: daizutabi <daizutabi@gmail.com>
+License-Expression: MIT
+License-File: LICENSE
+Classifier: Development Status :: 4 - Beta
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Documentation
+Classifier: Topic :: Software Development :: Documentation
+Requires-Python: >=3.10
+Requires-Dist: hydra-core>1.3
+Requires-Dist: mlflow>2.15
+Requires-Dist: setuptools
+Requires-Dist: watchdog
+Provides-Extra: dev
+Requires-Dist: pytest-clarity; extra == 'dev'
+Requires-Dist: pytest-cov; extra == 'dev'
+Requires-Dist: pytest-randomly; extra == 'dev'
+Requires-Dist: pytest-xdist; extra == 'dev'
+Description-Content-Type: text/markdown
+# Hydraflow
+[![PyPI Version][pypi-v-image]][pypi-v-link]
+[![Python Version][python-v-image]][python-v-link]
+[![Build Status][GHAction-image]][GHAction-link]
+[![Coverage Status][codecov-image]][codecov-link]
+<!-- Badges -->
+[pypi-v-image]: https://img.shields.io/pypi/v/hydraflow.svg
+[pypi-v-link]: https://pypi.org/project/hydraflow/
+[python-v-image]: https://img.shields.io/pypi/pyversions/hydraflow.svg
+[python-v-link]: https://pypi.org/project/hydraflow
+[GHAction-image]: https://github.com/daizutabi/hydraflow/actions/workflows/ci.yml/badge.svg?branch=main&event=push
+[GHAction-link]: https://github.com/daizutabi/hydraflow/actions?query=event%3Apush+branch%3Amain
+[codecov-image]: https://codecov.io/github/daizutabi/hydraflow/coverage.svg?branch=main
+[codecov-link]: https://codecov.io/github/daizutabi/hydraflow?branch=main
+## Overview
+Hydraflow is a powerful library designed to seamlessly integrate [Hydra](https://hydra.cc/) and [MLflow](https://mlflow.org/), making it easier to manage and track machine learning experiments. By combining the flexibility of Hydra's configuration management with the robust experiment tracking capabilities of MLflow, Hydraflow provides a comprehensive solution for managing complex machine learning workflows.
+## Key Features
+- **Configuration Management**: Utilize Hydra's advanced configuration management to handle complex parameter sweeps and experiment setups.
+- **Experiment Tracking**: Leverage MLflow's tracking capabilities to log parameters, metrics, and artifacts for each run.
+- **Artifact Management**: Automatically log and manage artifacts, such as model checkpoints and configuration files, with MLflow.
+- **Seamless Integration**: Easily integrate Hydra and MLflow in your machine learning projects with minimal setup.
+## Installation
+You can install Hydraflow via pip:
+```bash
+pip install hydraflow
+```
+## Getting Started
+Here is a simple example to get you started with Hydraflow:
+```python
+import hydra
+import hydraflow
+import mlflow
+from dataclasses import dataclass
+from hydra.core.config_store import ConfigStore
+from pathlib import Path
+@dataclass
+class MySQLConfig:
+    host: str = "localhost"
+    port: int = 3306
+cs = ConfigStore.instance()
+cs.store(name="config", node=MySQLConfig)
+@hydra.main(version_base=None, config_name="config")
+def my_app(cfg: MySQLConfig) -> None:
+    # Set experiment by Hydra job name.
+    hydraflow.set_experiment()
+    # Automatically log params using Hydra config.
+    with mlflow.start_run(), hydraflow.log_run(cfg) as info:
+        # Your app code below.
+        # `info.output_dir` is the Hydra output directory.
+        # `info.artifact_dir` is the MLflow artifact directory.
+        with hydraflow.watch(callback):
+            # Watch files in the MLflow artifact directory.
+            # You can update metrics or log other artifacts
+            # according to the watched files in your callback
+            # function.
+            pass
+# Your callback function here.
+def callback(file: Path) -> None:
+    pass
+if __name__ == "__main__":
+    my_app()
+```

hydraflow-0.2.0/README.md ADDED Viewed

@@ -0,0 +1,82 @@
+# Hydraflow
+[![PyPI Version][pypi-v-image]][pypi-v-link]
+[![Python Version][python-v-image]][python-v-link]
+[![Build Status][GHAction-image]][GHAction-link]
+[![Coverage Status][codecov-image]][codecov-link]
+<!-- Badges -->
+[pypi-v-image]: https://img.shields.io/pypi/v/hydraflow.svg
+[pypi-v-link]: https://pypi.org/project/hydraflow/
+[python-v-image]: https://img.shields.io/pypi/pyversions/hydraflow.svg
+[python-v-link]: https://pypi.org/project/hydraflow
+[GHAction-image]: https://github.com/daizutabi/hydraflow/actions/workflows/ci.yml/badge.svg?branch=main&event=push
+[GHAction-link]: https://github.com/daizutabi/hydraflow/actions?query=event%3Apush+branch%3Amain
+[codecov-image]: https://codecov.io/github/daizutabi/hydraflow/coverage.svg?branch=main
+[codecov-link]: https://codecov.io/github/daizutabi/hydraflow?branch=main
+## Overview
+Hydraflow is a powerful library designed to seamlessly integrate [Hydra](https://hydra.cc/) and [MLflow](https://mlflow.org/), making it easier to manage and track machine learning experiments. By combining the flexibility of Hydra's configuration management with the robust experiment tracking capabilities of MLflow, Hydraflow provides a comprehensive solution for managing complex machine learning workflows.
+## Key Features
+- **Configuration Management**: Utilize Hydra's advanced configuration management to handle complex parameter sweeps and experiment setups.
+- **Experiment Tracking**: Leverage MLflow's tracking capabilities to log parameters, metrics, and artifacts for each run.
+- **Artifact Management**: Automatically log and manage artifacts, such as model checkpoints and configuration files, with MLflow.
+- **Seamless Integration**: Easily integrate Hydra and MLflow in your machine learning projects with minimal setup.
+## Installation
+You can install Hydraflow via pip:
+```bash
+pip install hydraflow
+```
+## Getting Started
+Here is a simple example to get you started with Hydraflow:
+```python
+import hydra
+import hydraflow
+import mlflow
+from dataclasses import dataclass
+from hydra.core.config_store import ConfigStore
+from pathlib import Path
+@dataclass
+class MySQLConfig:
+    host: str = "localhost"
+    port: int = 3306
+cs = ConfigStore.instance()
+cs.store(name="config", node=MySQLConfig)
+@hydra.main(version_base=None, config_name="config")
+def my_app(cfg: MySQLConfig) -> None:
+    # Set experiment by Hydra job name.
+    hydraflow.set_experiment()
+    # Automatically log params using Hydra config.
+    with mlflow.start_run(), hydraflow.log_run(cfg) as info:
+        # Your app code below.
+        # `info.output_dir` is the Hydra output directory.
+        # `info.artifact_dir` is the MLflow artifact directory.
+        with hydraflow.watch(callback):
+            # Watch files in the MLflow artifact directory.
+            # You can update metrics or log other artifacts
+            # according to the watched files in your callback
+            # function.
+            pass
+# Your callback function here.
+def callback(file: Path) -> None:
+    pass
+if __name__ == "__main__":
+    my_app()
+```

{hydraflow-0.1.4 → hydraflow-0.2.0}/pyproject.toml RENAMED Viewed

@@ -4,8 +4,8 @@ build-backend = "hatchling.build"
 [project]
 name = "hydraflow"
-version = "0.1.4"
-description = "Hydra with MLflow"
+version = "0.2.0"
+description = "Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments."
 readme = "README.md"
 license = "MIT"
 authors = [{ name = "daizutabi", email = "daizutabi@gmail.com" }]

{hydraflow-0.1.4 → hydraflow-0.2.0}/src/hydraflow/__init__.py RENAMED Viewed

@@ -3,15 +3,10 @@ from .mlflow import set_experiment
 from .runs import (
     Run,
     Runs,
-    drop_unique_params,
     filter_runs,
-    get_artifact_dir,
-    get_artifact_path,
-    get_artifact_uri,
     get_param_dict,
     get_param_names,
     get_run,
-    get_run_id,
     load_config,
 )
@@ -20,15 +15,10 @@ __all__ = [
     "Run",
     "Runs",
     "chdir_artifact",
-    "drop_unique_params",
     "filter_runs",
-    "get_artifact_dir",
-    "get_artifact_path",
-    "get_artifact_uri",
     "get_param_dict",
     "get_param_names",
     "get_run",
-    "get_run_id",
     "load_config",
     "log_run",
     "set_experiment",

hydraflow-0.2.0/src/hydraflow/config.py ADDED Viewed

@@ -0,0 +1,55 @@
+"""
+This module provides functionality for working with configuration
+objects using the OmegaConf library.
+"""
+from __future__ import annotations
+from typing import TYPE_CHECKING
+from omegaconf import DictConfig, ListConfig, OmegaConf
+if TYPE_CHECKING:
+    from collections.abc import Iterator
+    from typing import Any
+def iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
+    """
+    Recursively iterate over the parameters in the given configuration object.
+    This function traverses the configuration object and yields key-value pairs
+    representing the parameters. The keys are prefixed with the provided prefix.
+    Args:
+        config: The configuration object to iterate over. This can be a dictionary,
+            list, DictConfig, or ListConfig.
+        prefix: The prefix to prepend to the parameter keys.
+            Defaults to an empty string.
+    Yields:
+        Key-value pairs representing the parameters in the configuration object.
+    """
+    if not isinstance(config, (DictConfig, ListConfig)):
+        config = OmegaConf.create(config)  # type: ignore
+    if isinstance(config, DictConfig):
+        for key, value in config.items():
+            if isinstance(value, ListConfig) and not any(
+                isinstance(v, (DictConfig, ListConfig)) for v in value
+            ):
+                yield f"{prefix}{key}", value
+            elif isinstance(value, (DictConfig, ListConfig)):
+                yield from iter_params(value, f"{prefix}{key}.")
+            else:
+                yield f"{prefix}{key}", value
+    elif isinstance(config, ListConfig):
+        for index, value in enumerate(config):
+            if isinstance(value, (DictConfig, ListConfig)):
+                yield from iter_params(value, f"{prefix}{index}.")
+            else:
+                yield f"{prefix}{index}", value

hydraflow-0.2.0/src/hydraflow/context.py ADDED Viewed

@@ -0,0 +1,188 @@
+"""
+This module provides context managers to log parameters and manage the MLflow
+run context.
+"""
+from __future__ import annotations
+import logging
+import os
+import time
+from contextlib import contextmanager
+from dataclasses import dataclass
+from pathlib import Path
+from typing import TYPE_CHECKING
+import mlflow
+from hydra.core.hydra_config import HydraConfig
+from watchdog.events import FileModifiedEvent, FileSystemEventHandler
+from watchdog.observers import Observer
+from hydraflow.mlflow import get_artifact_dir, log_params
+if TYPE_CHECKING:
+    from collections.abc import Callable, Iterator
+    from mlflow.entities.run import Run
+log = logging.getLogger(__name__)
+@dataclass
+class Info:
+    output_dir: Path
+    artifact_dir: Path
+@contextmanager
+def log_run(
+    config: object,
+    *,
+    synchronous: bool | None = None,
+) -> Iterator[Info]:
+    """
+    Log the parameters from the given configuration object and manage the MLflow
+    run context.
+    This context manager logs the parameters from the provided configuration object
+    using MLflow. It also manages the MLflow run context, ensuring that artifacts
+    are logged and the run is properly closed.
+    Args:
+        config: The configuration object to log the parameters from.
+        synchronous: Whether to log the parameters synchronously.
+            Defaults to None.
+    Yields:
+        Info: An `Info` object containing the output directory and artifact directory
+        paths.
+    Example:
+        with log_run(config) as info:
+            # Perform operations within the MLflow run context
+            pass
+    """
+    log_params(config, synchronous=synchronous)
+    hc = HydraConfig.get()
+    output_dir = Path(hc.runtime.output_dir)
+    info = Info(output_dir, get_artifact_dir())
+    # Save '.hydra' config directory first.
+    output_subdir = output_dir / (hc.output_subdir or "")
+    mlflow.log_artifacts(output_subdir.as_posix(), hc.output_subdir)
+    def log_artifact(path: Path) -> None:
+        local_path = (output_dir / path).as_posix()
+        mlflow.log_artifact(local_path)
+    try:
+        with watch(log_artifact, output_dir):
+            yield info
+    except Exception as e:
+        log.error(f"Error during log_run: {e}")
+        raise
+    finally:
+        # Save output_dir including '.hydra' config directory.
+        mlflow.log_artifacts(output_dir.as_posix())
+@contextmanager
+def watch(
+    func: Callable[[Path], None],
+    dir: Path | str = "",
+    timeout: int = 60,
+) -> Iterator[None]:
+    """
+    Watch the given directory for changes and call the provided function
+    when a change is detected.
+    This context manager sets up a file system watcher on the specified directory.
+    When a file modification is detected, the provided function is called with
+    the path of the modified file. The watcher runs for the specified timeout
+    period or until the context is exited.
+    Args:
+        func: The function to call when a change is
+            detected. It should accept a single argument of type `Path`,
+            which is the path of the modified file.
+        dir: The directory to watch. If not specified,
+            the current MLflow artifact URI is used. Defaults to "".
+        timeout: The timeout period in seconds for the watcher
+            to run after the context is exited. Defaults to 60.
+    Yields:
+        None
+    Example:
+        with watch(log_artifact, "/path/to/dir"):
+            # Perform operations while watching the directory for changes
+            pass
+    """
+    dir = dir or get_artifact_dir()
+    handler = Handler(func)
+    observer = Observer()
+    observer.schedule(handler, dir, recursive=True)
+    observer.start()
+    try:
+        yield
+    except Exception as e:
+        log.error(f"Error during watch: {e}")
+        raise
+    finally:
+        elapsed = 0
+        while not observer.event_queue.empty():
+            time.sleep(0.2)
+            elapsed += 0.2
+            if elapsed > timeout:
+                break
+        observer.stop()
+        observer.join()
+class Handler(FileSystemEventHandler):
+    def __init__(self, func: Callable[[Path], None]) -> None:
+        self.func = func
+    def on_modified(self, event: FileModifiedEvent) -> None:
+        file = Path(event.src_path)
+        if file.is_file():
+            self.func(file)
+@contextmanager
+def chdir_artifact(
+    run: Run,
+    artifact_path: str | None = None,
+) -> Iterator[Path]:
+    """
+    Change the current working directory to the artifact directory of the
+    given run.
+    This context manager changes the current working directory to the artifact
+    directory of the given run. It ensures that the directory is changed back
+    to the original directory after the context is exited.
+    Args:
+        run: The run to get the artifact directory from.
+        artifact_path: The artifact path.
+    """
+    curdir = Path.cwd()
+    path = mlflow.artifacts.download_artifacts(
+        run_id=run.info.run_id,
+        artifact_path=artifact_path,
+    )
+    os.chdir(path)
+    try:
+        yield Path(path)
+    finally:
+        os.chdir(curdir)

hydraflow-0.2.0/src/hydraflow/mlflow.py ADDED Viewed

@@ -0,0 +1,72 @@
+"""
+This module provides functionality to log parameters from Hydra
+configuration objects and set up experiments using MLflow.
+"""
+from __future__ import annotations
+from pathlib import Path
+import mlflow
+from hydra.core.hydra_config import HydraConfig
+from hydraflow.config import iter_params
+def set_experiment(prefix: str = "", suffix: str = "", uri: str | None = None) -> None:
+    """
+    Set the experiment name and tracking URI optionally.
+    This function sets the experiment name by combining the given prefix,
+    the job name from HydraConfig, and the given suffix. Optionally, it can
+    also set the tracking URI.
+    Args:
+        prefix: The prefix to prepend to the experiment name.
+        suffix: The suffix to append to the experiment name.
+        uri: The tracking URI to use.
+    """
+    if uri:
+        mlflow.set_tracking_uri(uri)
+    hc = HydraConfig.get()
+    name = f"{prefix}{hc.job.name}{suffix}"
+    mlflow.set_experiment(name)
+def log_params(config: object, *, synchronous: bool | None = None) -> None:
+    """
+    Log the parameters from the given configuration object.
+    This method logs the parameters from the provided configuration object
+    using MLflow. It iterates over the parameters and logs them using the
+    `mlflow.log_param` method.
+    Args:
+        config: The configuration object to log the parameters from.
+        synchronous: Whether to log the parameters synchronously.
+            Defaults to None.
+    """
+    for key, value in iter_params(config):
+        mlflow.log_param(key, value, synchronous=synchronous)
+def get_artifact_dir(artifact_path: str | None = None) -> Path:
+    """
+    Get the artifact directory for the given artifact path.
+    This function retrieves the artifact URI for the specified artifact path
+    using MLflow, downloads the artifacts to a local directory, and returns
+    the path to that directory.
+    Args:
+        artifact_path: The artifact path for which to get the directory.
+            Defaults to None.
+    Returns:
+        The local path to the directory where the artifacts are downloaded.
+    """
+    uri = mlflow.get_artifact_uri(artifact_path)
+    dir = mlflow.artifacts.download_artifacts(artifact_uri=uri)
+    return Path(dir)

hydraflow 0.1.4__tar.gz → 0.2.0__tar.gz

hydraflow 0.1.4tar.gz → 0.2.0tar.gz