PyPI - hydraflow - Versions diffs - 0.2.2__tar.gz → 0.2.4__tar.gz - Mend

hydraflow 0.2.2tar.gz → 0.2.4tar.gz

Files changed (30) hide show

{hydraflow-0.2.2 → hydraflow-0.2.4}/.devcontainer/devcontainer.json RENAMED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "hydraflow",
-  "image": "mcr.microsoft.com/vscode/devcontainers/base:ubuntu-22.04",
+  "image": "mcr.microsoft.com/vscode/devcontainers/python:3.12",
   "features": {
     "ghcr.io/devcontainers-contrib/features/starship:1": {},
     "ghcr.io/va-h/devcontainers-features/uv:1": {}
@@ -9,7 +9,6 @@
     "vscode": {
       "extensions": [
         "charliermarsh.ruff",
-        "henriiik.vscode-sort",
         "ms-python.python",
         "ms-python.vscode-pylance"
       ]

{hydraflow-0.2.2 → hydraflow-0.2.4}/.gitignore RENAMED Viewed

@@ -1,5 +1,6 @@
 .coverage
+.env
 .venv/
 __pycache__/
-lcov.info
-dist/
+dist/
+lcov.info

{hydraflow-0.2.2 → hydraflow-0.2.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: hydraflow
-Version: 0.2.2
+Version: 0.2.4
 Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
 Project-URL: Documentation, https://github.com/daizutabi/hydraflow
 Project-URL: Source, https://github.com/daizutabi/hydraflow
@@ -20,7 +20,9 @@ Requires-Dist: hydra-core>1.3
 Requires-Dist: mlflow>2.15
 Requires-Dist: setuptools
 Requires-Dist: watchdog
+Requires-Dist: watchfiles
 Provides-Extra: dev
+Requires-Dist: pytest-asyncio; extra == 'dev'
 Requires-Dist: pytest-clarity; extra == 'dev'
 Requires-Dist: pytest-cov; extra == 'dev'
 Requires-Dist: pytest-randomly; extra == 'dev'
@@ -97,13 +99,10 @@ def my_app(cfg: MySQLConfig) -> None:
     # Set experiment by Hydra job name.
     hydraflow.set_experiment()
-    # Automatically log params using Hydra config.
-    with mlflow.start_run(), hydraflow.log_run(cfg) as info:
+    # Automatically log Hydra config as params.
+    with hydraflow.start_run():
         # Your app code below.
-        # `info.output_dir` is the Hydra output directory.
-        # `info.artifact_dir` is the MLflow artifact directory.
         with hydraflow.watch(callback):
             # Watch files in the MLflow artifact directory.
             # You can update metrics or log other artifacts

{hydraflow-0.2.2 → hydraflow-0.2.4}/README.md RENAMED Viewed

@@ -68,13 +68,10 @@ def my_app(cfg: MySQLConfig) -> None:
     # Set experiment by Hydra job name.
     hydraflow.set_experiment()
-    # Automatically log params using Hydra config.
-    with mlflow.start_run(), hydraflow.log_run(cfg) as info:
+    # Automatically log Hydra config as params.
+    with hydraflow.start_run():
         # Your app code below.
-        # `info.output_dir` is the Hydra output directory.
-        # `info.artifact_dir` is the MLflow artifact directory.
         with hydraflow.watch(callback):
             # Watch files in the MLflow artifact directory.
             # You can update metrics or log other artifacts

{hydraflow-0.2.2 → hydraflow-0.2.4}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "hydraflow"
-version = "0.2.2"
+version = "0.2.4"
 description = "Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments."
 readme = "README.md"
 license = "MIT"
@@ -19,10 +19,22 @@ classifiers = [
   "Topic :: Software Development :: Documentation",
 ]
 requires-python = ">=3.10"
-dependencies = ["hydra-core>1.3", "mlflow>2.15", "setuptools", "watchdog"]
+dependencies = [
+  "hydra-core>1.3",
+  "mlflow>2.15",
+  "setuptools",
+  "watchdog",
+  "watchfiles",
+]
 [project.optional-dependencies]
-dev = ["pytest-clarity", "pytest-cov", "pytest-randomly", "pytest-xdist"]
+dev = [
+  "pytest-asyncio",
+  "pytest-clarity",
+  "pytest-cov",
+  "pytest-randomly",
+  "pytest-xdist",
+]
 [project.urls]
 Documentation = "https://github.com/daizutabi/hydraflow"
@@ -41,9 +53,9 @@ addopts = [
   "--cov=hydraflow",
   "--cov-report=lcov:lcov.info",
 ]
 doctest_optionflags = ["NORMALIZE_WHITESPACE", "IGNORE_EXCEPTION_DETAIL"]
 filterwarnings = ['ignore:pkg_resources is deprecated:DeprecationWarning']
+asyncio_default_fixture_loop_scope = "function"
 [tool.coverage.report]
 exclude_lines = ["no cov", "raise NotImplementedError", "if TYPE_CHECKING:"]

hydraflow-0.2.4/src/hydraflow/__init__.py ADDED Viewed

@@ -0,0 +1,22 @@
+from .context import chdir_artifact, log_run, start_run, watch
+from .mlflow import get_artifact_dir, get_hydra_output_dir, set_experiment
+from .runs import (
+    RunCollection,
+    list_runs,
+    load_config,
+    search_runs,
+)
+__all__ = [
+    "RunCollection",
+    "chdir_artifact",
+    "get_artifact_dir",
+    "get_hydra_output_dir",
+    "list_runs",
+    "load_config",
+    "log_run",
+    "search_runs",
+    "set_experiment",
+    "start_run",
+    "watch",
+]

hydraflow-0.2.4/src/hydraflow/asyncio.py ADDED Viewed

@@ -0,0 +1,199 @@
+from __future__ import annotations
+import asyncio
+import logging
+from asyncio.subprocess import PIPE
+from pathlib import Path
+from typing import TYPE_CHECKING
+import watchfiles
+if TYPE_CHECKING:
+    from asyncio.streams import StreamReader
+    from collections.abc import Callable
+    from watchfiles import Change
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+async def execute_command(
+    program: str,
+    *args: str,
+    stdout: Callable[[str], None] | None = None,
+    stderr: Callable[[str], None] | None = None,
+    stop_event: asyncio.Event,
+) -> int:
+    """
+    Runs a command asynchronously and pass the output to callback functions.
+    Args:
+        program (str): The program to run.
+        *args (str): Arguments for the program.
+        stdout (Callable[[str], None] | None): Callback for standard output.
+        stderr (Callable[[str], None] | None): Callback for standard error.
+        stop_event (asyncio.Event): Event to signal when the process is done.
+    Returns:
+        int: The return code of the process.
+    """
+    try:
+        process = await asyncio.create_subprocess_exec(program, *args, stdout=PIPE, stderr=PIPE)
+        await asyncio.gather(
+            process_stream(process.stdout, stdout),
+            process_stream(process.stderr, stderr),
+        )
+        returncode = await process.wait()
+    except Exception as e:
+        logger.error(f"Error running command: {e}")
+        returncode = 1
+    finally:
+        stop_event.set()
+    return returncode
+async def process_stream(
+    stream: StreamReader | None,
+    callback: Callable[[str], None] | None,
+) -> None:
+    """
+    Reads a stream asynchronously and pass each line to a callback function.
+    Args:
+        stream (StreamReader | None): The stream to read from.
+        callback (Callable[[str], None] | None): The callback function to handle
+        each line.
+    """
+    if stream is None or callback is None:
+        return
+    while True:
+        line = await stream.readline()
+        if line:
+            callback(line.decode().strip())
+        else:
+            break
+async def monitor_file_changes(
+    paths: list[str | Path],
+    callback: Callable[[set[tuple[Change, str]]], None],
+    stop_event: asyncio.Event,
+    **awatch_kwargs,
+) -> None:
+    """
+    Watches for file changes in specified paths and pass the changes to a
+    callback function.
+    Args:
+        paths (list[str | Path]): List of paths to monitor for changes.
+        callback (Callable[[set[tuple[Change, str]]], None]): The callback
+        function to handle file changes.
+        stop_event (asyncio.Event): Event to signal when to stop watching.
+        **awatch_kwargs: Additional keyword arguments to pass to watchfiles.awatch.
+    """
+    str_paths = [str(path) for path in paths]
+    try:
+        async for changes in watchfiles.awatch(*str_paths, stop_event=stop_event, **awatch_kwargs):
+            callback(changes)
+    except Exception as e:
+        logger.error(f"Error watching files: {e}")
+async def run_and_monitor(
+    program: str,
+    *args: str,
+    stdout: Callable[[str], None] | None = None,
+    stderr: Callable[[str], None] | None = None,
+    watch: Callable[[set[tuple[Change, str]]], None] | None = None,
+    paths: list[str | Path] | None = None,
+    **awatch_kwargs,
+) -> int:
+    """
+    Runs a command and optionally watch for file changes concurrently.
+    Args:
+        program (str): The program to run.
+        *args (str): Arguments for the program.
+        stdout (Callable[[str], None] | None): Callback for standard output.
+        stderr (Callable[[str], None] | None): Callback for standard error.
+        watch (Callable[[set[tuple[Change, str]]], None] | None): Callback for
+        file changes.
+        paths (list[str | Path] | None): List of paths to monitor for changes.
+    """
+    stop_event = asyncio.Event()
+    run_task = asyncio.create_task(
+        execute_command(program, *args, stop_event=stop_event, stdout=stdout, stderr=stderr)
+    )
+    if watch and paths:
+        monitor_task = asyncio.create_task(
+            monitor_file_changes(paths, watch, stop_event, **awatch_kwargs)
+        )
+    else:
+        monitor_task = None
+    try:
+        if monitor_task:
+            await asyncio.gather(run_task, monitor_task)
+        else:
+            await run_task
+    except Exception as e:
+        logger.error(f"Error in run_and_monitor: {e}")
+    finally:
+        stop_event.set()
+        await run_task
+        if monitor_task:
+            await monitor_task
+    return run_task.result()
+def run(
+    program: str,
+    *args: str,
+    stdout: Callable[[str], None] | None = None,
+    stderr: Callable[[str], None] | None = None,
+    watch: Callable[[set[tuple[Change, str]]], None] | None = None,
+    paths: list[str | Path] | None = None,
+    **awatch_kwargs,
+) -> int:
+    """
+    Run a command synchronously and optionally watch for file changes.
+    This function is a synchronous wrapper around the asynchronous `run_and_monitor` function.
+    It runs a specified command and optionally monitors specified paths for file changes,
+    invoking the provided callbacks for standard output, standard error, and file changes.
+    Args:
+        program (str): The program to run.
+        *args (str): Arguments for the program.
+        stdout (Callable[[str], None] | None): Callback for handling standard output lines.
+        stderr (Callable[[str], None] | None): Callback for handling standard error lines.
+        watch (Callable[[set[tuple[Change, str]]], None] | None): Callback for handling file changes.
+        paths (list[str | Path] | None): List of paths to monitor for file changes.
+        **awatch_kwargs: Additional keyword arguments to pass to `watchfiles.awatch`.
+    Returns:
+        int: The return code of the process.
+    """
+    if watch and not paths:
+        paths = [Path.cwd()]
+    return asyncio.run(
+        run_and_monitor(
+            program,
+            *args,
+            stdout=stdout,
+            stderr=stderr,
+            watch=watch,
+            paths=paths,
+            **awatch_kwargs,
+        )
+    )

{hydraflow-0.2.2 → hydraflow-0.2.4}/src/hydraflow/config.py RENAMED Viewed

@@ -22,9 +22,9 @@ def iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
     representing the parameters. The keys are prefixed with the provided prefix.
     Args:
-        config: The configuration object to iterate over. This can be a dictionary,
-            list, DictConfig, or ListConfig.
-        prefix: The prefix to prepend to the parameter keys.
+        config (object): The configuration object to iterate over. This can be a
+            dictionary, list, DictConfig, or ListConfig.
+        prefix (str): The prefix to prepend to the parameter keys.
             Defaults to an empty string.
     Yields:

{hydraflow-0.2.2 → hydraflow-0.2.4}/src/hydraflow/context.py RENAMED Viewed

@@ -9,7 +9,6 @@ import logging
 import os
 import time
 from contextlib import contextmanager
-from dataclasses import dataclass
 from pathlib import Path
 from typing import TYPE_CHECKING
@@ -28,18 +27,12 @@ if TYPE_CHECKING:
 log = logging.getLogger(__name__)
-@dataclass
-class Info:
-    output_dir: Path
-    artifact_dir: Path
 @contextmanager
 def log_run(
     config: object,
     *,
     synchronous: bool | None = None,
-) -> Iterator[Info]:
+) -> Iterator[None]:
     """
     Log the parameters from the given configuration object and manage the MLflow
     run context.
@@ -49,16 +42,15 @@ def log_run(
     are logged and the run is properly closed.
     Args:
-        config: The configuration object to log the parameters from.
-        synchronous: Whether to log the parameters synchronously.
+        config (object): The configuration object to log the parameters from.
+        synchronous (bool | None): Whether to log the parameters synchronously.
             Defaults to None.
     Yields:
-        Info: An `Info` object containing the output directory and artifact directory
-        paths.
+        None
     Example:
-        with log_run(config) as info:
+        with log_run(config):
             # Perform operations within the MLflow run context
             pass
     """
@@ -66,7 +58,6 @@ def log_run(
     hc = HydraConfig.get()
     output_dir = Path(hc.runtime.output_dir)
-    info = Info(output_dir, get_artifact_dir())
     # Save '.hydra' config directory first.
     output_subdir = output_dir / (hc.output_subdir or "")
@@ -78,7 +69,7 @@ def log_run(
     try:
         with watch(log_artifact, output_dir):
-            yield info
+            yield
     except Exception as e:
         log.error(f"Error during log_run: {e}")
@@ -89,6 +80,64 @@ def log_run(
         mlflow.log_artifacts(output_dir.as_posix())
+@contextmanager
+def start_run(
+    config: object,
+    *,
+    run_id: str | None = None,
+    experiment_id: str | None = None,
+    run_name: str | None = None,
+    nested: bool = False,
+    parent_run_id: str | None = None,
+    tags: dict[str, str] | None = None,
+    description: str | None = None,
+    log_system_metrics: bool | None = None,
+    synchronous: bool | None = None,
+) -> Iterator[Run]:
+    """
+    Start an MLflow run and log parameters using the provided configuration object.
+    This context manager starts an MLflow run and logs parameters using the specified
+    configuration object. It ensures that the run is properly closed after completion.
+    Args:
+        config (object): The configuration object to log parameters from.
+        run_id (str | None): The existing run ID. Defaults to None.
+        experiment_id (str | None): The experiment ID. Defaults to None.
+        run_name (str | None): The name of the run. Defaults to None.
+        nested (bool): Whether to allow nested runs. Defaults to False.
+        parent_run_id (str | None): The parent run ID. Defaults to None.
+        tags (dict[str, str] | None): Tags to associate with the run. Defaults to None.
+        description (str | None): A description of the run. Defaults to None.
+        log_system_metrics (bool | None): Whether to log system metrics. Defaults to None.
+        synchronous (bool | None): Whether to log parameters synchronously. Defaults to None.
+    Yields:
+        Run: An MLflow Run object representing the started run.
+    Example:
+        with start_run(config) as run:
+            # Perform operations within the MLflow run context
+            pass
+    See Also:
+        `mlflow.start_run`: The MLflow function to start a run directly.
+        `log_run`: A context manager to log parameters and manage the MLflow run context.
+    """
+    with mlflow.start_run(
+        run_id=run_id,
+        experiment_id=experiment_id,
+        run_name=run_name,
+        nested=nested,
+        parent_run_id=parent_run_id,
+        tags=tags,
+        description=description,
+        log_system_metrics=log_system_metrics,
+    ) as run:
+        with log_run(config, synchronous=synchronous):
+            yield run
 @contextmanager
 def watch(
     func: Callable[[Path], None],
@@ -105,12 +154,12 @@ def watch(
     period or until the context is exited.
     Args:
-        func: The function to call when a change is
+        func (Callable[[Path], None]): The function to call when a change is
             detected. It should accept a single argument of type `Path`,
             which is the path of the modified file.
-        dir: The directory to watch. If not specified,
+        dir (Path | str): The directory to watch. If not specified,
             the current MLflow artifact URI is used. Defaults to "".
-        timeout: The timeout period in seconds for the watcher
+        timeout (int): The timeout period in seconds for the watcher
             to run after the context is exited. Defaults to 60.
     Yields:
@@ -122,6 +171,8 @@ def watch(
             pass
     """
     dir = dir or get_artifact_dir()
+    if isinstance(dir, Path):
+        dir = dir.as_posix()
     handler = Handler(func)
     observer = Observer()
@@ -152,7 +203,7 @@ class Handler(FileSystemEventHandler):
         self.func = func
     def on_modified(self, event: FileModifiedEvent) -> None:
-        file = Path(event.src_path)
+        file = Path(str(event.src_path))
         if file.is_file():
             self.func(file)
@@ -171,8 +222,8 @@ def chdir_artifact(
     to the original directory after the context is exited.
     Args:
-        run: The run to get the artifact directory from.
-        artifact_path: The artifact path.
+        run (Run): The run to get the artifact directory from.
+        artifact_path (str | None): The artifact path.
     """
     curdir = Path.cwd()
     path = mlflow.artifacts.download_artifacts(

hydraflow-0.2.4/src/hydraflow/mlflow.py ADDED Viewed

@@ -0,0 +1,124 @@
+"""
+This module provides functionality to log parameters from Hydra
+configuration objects and set up experiments using MLflow.
+"""
+from __future__ import annotations
+from pathlib import Path
+from typing import TYPE_CHECKING
+import mlflow
+from hydra.core.hydra_config import HydraConfig
+from mlflow.tracking import artifact_utils
+from omegaconf import OmegaConf
+from hydraflow.config import iter_params
+if TYPE_CHECKING:
+    from mlflow.entities.experiment import Experiment
+def set_experiment(
+    prefix: str = "",
+    suffix: str = "",
+    uri: str | Path | None = None,
+) -> Experiment:
+    """
+    Set the experiment name and tracking URI optionally.
+    This function sets the experiment name by combining the given prefix,
+    the job name from HydraConfig, and the given suffix. Optionally, it can
+    also set the tracking URI.
+    Args:
+        prefix (str): The prefix to prepend to the experiment name.
+        suffix (str): The suffix to append to the experiment name.
+        uri (str | Path | None): The tracking URI to use. Defaults to None.
+    Returns:
+        Experiment: An instance of `mlflow.entities.Experiment` representing
+        the new active experiment.
+    """
+    if uri is not None:
+        mlflow.set_tracking_uri(uri)
+    hc = HydraConfig.get()
+    name = f"{prefix}{hc.job.name}{suffix}"
+    return mlflow.set_experiment(name)
+def log_params(config: object, *, synchronous: bool | None = None) -> None:
+    """
+    Log the parameters from the given configuration object.
+    This method logs the parameters from the provided configuration object
+    using MLflow. It iterates over the parameters and logs them using the
+    `mlflow.log_param` method.
+    Args:
+        config (object): The configuration object to log the parameters from.
+        synchronous (bool | None): Whether to log the parameters synchronously.
+            Defaults to None.
+    """
+    for key, value in iter_params(config):
+        mlflow.log_param(key, value, synchronous=synchronous)
+def get_artifact_dir(
+    artifact_path: str | None = None,
+    *,
+    run_id: str | None = None,
+) -> Path:
+    """
+    Get the artifact directory for the given artifact path.
+    This function retrieves the artifact URI for the specified artifact path
+    using MLflow, downloads the artifacts to a local directory, and returns
+    the path to that directory.
+    Args:
+        artifact_path (str | None): The artifact path for which to get the
+            directory. Defaults to None.
+        run_id (str | None): The run ID for which to get the artifact directory.
+    Returns:
+        The local path to the directory where the artifacts are downloaded.
+    """
+    if run_id is None:
+        uri = mlflow.get_artifact_uri(artifact_path)
+    else:
+        uri = artifact_utils.get_artifact_uri(run_id, artifact_path)
+    dir = mlflow.artifacts.download_artifacts(artifact_uri=uri)
+    return Path(dir)
+def get_hydra_output_dir(*, run_id: str | None = None) -> Path:
+    if run_id is None:
+        hc = HydraConfig.get()
+        return Path(hc.runtime.output_dir)
+    path = get_artifact_dir(run_id=run_id) / ".hydra/hydra.yaml"
+    if path.exists():
+        hc = OmegaConf.load(path)
+        return Path(hc.hydra.runtime.output_dir)
+    raise FileNotFoundError
+# def log_hydra_output_dir(run: Run_ | Series | str) -> None:
+#     """
+#     Log the Hydra output directory.
+#     Args:
+#         run: The run object.
+#     Returns:
+#         None
+#     """
+#     output_dir = get_hydra_output_dir(run)
+#     run_id = run if isinstance(run, str) else run.info.run_id
+#     mlflow.log_artifacts(output_dir.as_posix(), run_id=run_id)

hydraflow 0.2.2__tar.gz → 0.2.4__tar.gz

hydraflow 0.2.2tar.gz → 0.2.4tar.gz