PyPI - hydraflow - Versions diffs - 0.2.2__py3-none-any.whl → 0.2.4__py3-none-any.whl - Mend

hydraflow 0.2.2py3-none-any.whl → 0.2.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

hydraflow/__init__.py +7 -13
hydraflow/asyncio.py +199 -0
hydraflow/config.py +3 -3
hydraflow/context.py +72 -21
hydraflow/mlflow.py +64 -12
hydraflow/runs.py +428 -190
{hydraflow-0.2.2.dist-info → hydraflow-0.2.4.dist-info}/METADATA +5 -6
hydraflow-0.2.4.dist-info/RECORD +10 -0
hydraflow-0.2.2.dist-info/RECORD +0 -9
{hydraflow-0.2.2.dist-info → hydraflow-0.2.4.dist-info}/WHEEL +0 -0
{hydraflow-0.2.2.dist-info → hydraflow-0.2.4.dist-info}/licenses/LICENSE +0 -0

hydraflow/__init__.py CHANGED Viewed

@@ -1,28 +1,22 @@
-from .context import Info, chdir_artifact, log_run, watch
-from .mlflow import set_experiment
+from .context import chdir_artifact, log_run, start_run, watch
+from .mlflow import get_artifact_dir, get_hydra_output_dir, set_experiment
 from .runs import (
-    Run,
     RunCollection,
-    filter_runs,
-    get_param_dict,
-    get_param_names,
-    get_run,
+    list_runs,
     load_config,
     search_runs,
 )
 __all__ = [
-    "Info",
-    "Run",
     "RunCollection",
     "chdir_artifact",
-    "filter_runs",
-    "get_param_dict",
-    "get_param_names",
-    "get_run",
+    "get_artifact_dir",
+    "get_hydra_output_dir",
+    "list_runs",
     "load_config",
     "log_run",
     "search_runs",
     "set_experiment",
+    "start_run",
     "watch",
 ]

hydraflow/asyncio.py ADDED Viewed

@@ -0,0 +1,199 @@
+from __future__ import annotations
+import asyncio
+import logging
+from asyncio.subprocess import PIPE
+from pathlib import Path
+from typing import TYPE_CHECKING
+import watchfiles
+if TYPE_CHECKING:
+    from asyncio.streams import StreamReader
+    from collections.abc import Callable
+    from watchfiles import Change
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+async def execute_command(
+    program: str,
+    *args: str,
+    stdout: Callable[[str], None] | None = None,
+    stderr: Callable[[str], None] | None = None,
+    stop_event: asyncio.Event,
+) -> int:
+    """
+    Runs a command asynchronously and pass the output to callback functions.
+    Args:
+        program (str): The program to run.
+        *args (str): Arguments for the program.
+        stdout (Callable[[str], None] | None): Callback for standard output.
+        stderr (Callable[[str], None] | None): Callback for standard error.
+        stop_event (asyncio.Event): Event to signal when the process is done.
+    Returns:
+        int: The return code of the process.
+    """
+    try:
+        process = await asyncio.create_subprocess_exec(program, *args, stdout=PIPE, stderr=PIPE)
+        await asyncio.gather(
+            process_stream(process.stdout, stdout),
+            process_stream(process.stderr, stderr),
+        )
+        returncode = await process.wait()
+    except Exception as e:
+        logger.error(f"Error running command: {e}")
+        returncode = 1
+    finally:
+        stop_event.set()
+    return returncode
+async def process_stream(
+    stream: StreamReader | None,
+    callback: Callable[[str], None] | None,
+) -> None:
+    """
+    Reads a stream asynchronously and pass each line to a callback function.
+    Args:
+        stream (StreamReader | None): The stream to read from.
+        callback (Callable[[str], None] | None): The callback function to handle
+        each line.
+    """
+    if stream is None or callback is None:
+        return
+    while True:
+        line = await stream.readline()
+        if line:
+            callback(line.decode().strip())
+        else:
+            break
+async def monitor_file_changes(
+    paths: list[str | Path],
+    callback: Callable[[set[tuple[Change, str]]], None],
+    stop_event: asyncio.Event,
+    **awatch_kwargs,
+) -> None:
+    """
+    Watches for file changes in specified paths and pass the changes to a
+    callback function.
+    Args:
+        paths (list[str | Path]): List of paths to monitor for changes.
+        callback (Callable[[set[tuple[Change, str]]], None]): The callback
+        function to handle file changes.
+        stop_event (asyncio.Event): Event to signal when to stop watching.
+        **awatch_kwargs: Additional keyword arguments to pass to watchfiles.awatch.
+    """
+    str_paths = [str(path) for path in paths]
+    try:
+        async for changes in watchfiles.awatch(*str_paths, stop_event=stop_event, **awatch_kwargs):
+            callback(changes)
+    except Exception as e:
+        logger.error(f"Error watching files: {e}")
+async def run_and_monitor(
+    program: str,
+    *args: str,
+    stdout: Callable[[str], None] | None = None,
+    stderr: Callable[[str], None] | None = None,
+    watch: Callable[[set[tuple[Change, str]]], None] | None = None,
+    paths: list[str | Path] | None = None,
+    **awatch_kwargs,
+) -> int:
+    """
+    Runs a command and optionally watch for file changes concurrently.
+    Args:
+        program (str): The program to run.
+        *args (str): Arguments for the program.
+        stdout (Callable[[str], None] | None): Callback for standard output.
+        stderr (Callable[[str], None] | None): Callback for standard error.
+        watch (Callable[[set[tuple[Change, str]]], None] | None): Callback for
+        file changes.
+        paths (list[str | Path] | None): List of paths to monitor for changes.
+    """
+    stop_event = asyncio.Event()
+    run_task = asyncio.create_task(
+        execute_command(program, *args, stop_event=stop_event, stdout=stdout, stderr=stderr)
+    )
+    if watch and paths:
+        monitor_task = asyncio.create_task(
+            monitor_file_changes(paths, watch, stop_event, **awatch_kwargs)
+        )
+    else:
+        monitor_task = None
+    try:
+        if monitor_task:
+            await asyncio.gather(run_task, monitor_task)
+        else:
+            await run_task
+    except Exception as e:
+        logger.error(f"Error in run_and_monitor: {e}")
+    finally:
+        stop_event.set()
+        await run_task
+        if monitor_task:
+            await monitor_task
+    return run_task.result()
+def run(
+    program: str,
+    *args: str,
+    stdout: Callable[[str], None] | None = None,
+    stderr: Callable[[str], None] | None = None,
+    watch: Callable[[set[tuple[Change, str]]], None] | None = None,
+    paths: list[str | Path] | None = None,
+    **awatch_kwargs,
+) -> int:
+    """
+    Run a command synchronously and optionally watch for file changes.
+    This function is a synchronous wrapper around the asynchronous `run_and_monitor` function.
+    It runs a specified command and optionally monitors specified paths for file changes,
+    invoking the provided callbacks for standard output, standard error, and file changes.
+    Args:
+        program (str): The program to run.
+        *args (str): Arguments for the program.
+        stdout (Callable[[str], None] | None): Callback for handling standard output lines.
+        stderr (Callable[[str], None] | None): Callback for handling standard error lines.
+        watch (Callable[[set[tuple[Change, str]]], None] | None): Callback for handling file changes.
+        paths (list[str | Path] | None): List of paths to monitor for file changes.
+        **awatch_kwargs: Additional keyword arguments to pass to `watchfiles.awatch`.
+    Returns:
+        int: The return code of the process.
+    """
+    if watch and not paths:
+        paths = [Path.cwd()]
+    return asyncio.run(
+        run_and_monitor(
+            program,
+            *args,
+            stdout=stdout,
+            stderr=stderr,
+            watch=watch,
+            paths=paths,
+            **awatch_kwargs,
+        )
+    )

hydraflow/config.py CHANGED Viewed

@@ -22,9 +22,9 @@ def iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
     representing the parameters. The keys are prefixed with the provided prefix.
     Args:
-        config: The configuration object to iterate over. This can be a dictionary,
-            list, DictConfig, or ListConfig.
-        prefix: The prefix to prepend to the parameter keys.
+        config (object): The configuration object to iterate over. This can be a
+            dictionary, list, DictConfig, or ListConfig.
+        prefix (str): The prefix to prepend to the parameter keys.
             Defaults to an empty string.
     Yields:

hydraflow/context.py CHANGED Viewed

@@ -9,7 +9,6 @@ import logging
 import os
 import time
 from contextlib import contextmanager
-from dataclasses import dataclass
 from pathlib import Path
 from typing import TYPE_CHECKING
@@ -28,18 +27,12 @@ if TYPE_CHECKING:
 log = logging.getLogger(__name__)
-@dataclass
-class Info:
-    output_dir: Path
-    artifact_dir: Path
 @contextmanager
 def log_run(
     config: object,
     *,
     synchronous: bool | None = None,
-) -> Iterator[Info]:
+) -> Iterator[None]:
     """
     Log the parameters from the given configuration object and manage the MLflow
     run context.
@@ -49,16 +42,15 @@ def log_run(
     are logged and the run is properly closed.
     Args:
-        config: The configuration object to log the parameters from.
-        synchronous: Whether to log the parameters synchronously.
+        config (object): The configuration object to log the parameters from.
+        synchronous (bool | None): Whether to log the parameters synchronously.
             Defaults to None.
     Yields:
-        Info: An `Info` object containing the output directory and artifact directory
-        paths.
+        None
     Example:
-        with log_run(config) as info:
+        with log_run(config):
             # Perform operations within the MLflow run context
             pass
     """
@@ -66,7 +58,6 @@ def log_run(
     hc = HydraConfig.get()
     output_dir = Path(hc.runtime.output_dir)
-    info = Info(output_dir, get_artifact_dir())
     # Save '.hydra' config directory first.
     output_subdir = output_dir / (hc.output_subdir or "")
@@ -78,7 +69,7 @@ def log_run(
     try:
         with watch(log_artifact, output_dir):
-            yield info
+            yield
     except Exception as e:
         log.error(f"Error during log_run: {e}")
@@ -89,6 +80,64 @@ def log_run(
         mlflow.log_artifacts(output_dir.as_posix())
+@contextmanager
+def start_run(
+    config: object,
+    *,
+    run_id: str | None = None,
+    experiment_id: str | None = None,
+    run_name: str | None = None,
+    nested: bool = False,
+    parent_run_id: str | None = None,
+    tags: dict[str, str] | None = None,
+    description: str | None = None,
+    log_system_metrics: bool | None = None,
+    synchronous: bool | None = None,
+) -> Iterator[Run]:
+    """
+    Start an MLflow run and log parameters using the provided configuration object.
+    This context manager starts an MLflow run and logs parameters using the specified
+    configuration object. It ensures that the run is properly closed after completion.
+    Args:
+        config (object): The configuration object to log parameters from.
+        run_id (str | None): The existing run ID. Defaults to None.
+        experiment_id (str | None): The experiment ID. Defaults to None.
+        run_name (str | None): The name of the run. Defaults to None.
+        nested (bool): Whether to allow nested runs. Defaults to False.
+        parent_run_id (str | None): The parent run ID. Defaults to None.
+        tags (dict[str, str] | None): Tags to associate with the run. Defaults to None.
+        description (str | None): A description of the run. Defaults to None.
+        log_system_metrics (bool | None): Whether to log system metrics. Defaults to None.
+        synchronous (bool | None): Whether to log parameters synchronously. Defaults to None.
+    Yields:
+        Run: An MLflow Run object representing the started run.
+    Example:
+        with start_run(config) as run:
+            # Perform operations within the MLflow run context
+            pass
+    See Also:
+        `mlflow.start_run`: The MLflow function to start a run directly.
+        `log_run`: A context manager to log parameters and manage the MLflow run context.
+    """
+    with mlflow.start_run(
+        run_id=run_id,
+        experiment_id=experiment_id,
+        run_name=run_name,
+        nested=nested,
+        parent_run_id=parent_run_id,
+        tags=tags,
+        description=description,
+        log_system_metrics=log_system_metrics,
+    ) as run:
+        with log_run(config, synchronous=synchronous):
+            yield run
 @contextmanager
 def watch(
     func: Callable[[Path], None],
@@ -105,12 +154,12 @@ def watch(
     period or until the context is exited.
     Args:
-        func: The function to call when a change is
+        func (Callable[[Path], None]): The function to call when a change is
             detected. It should accept a single argument of type `Path`,
             which is the path of the modified file.
-        dir: The directory to watch. If not specified,
+        dir (Path | str): The directory to watch. If not specified,
             the current MLflow artifact URI is used. Defaults to "".
-        timeout: The timeout period in seconds for the watcher
+        timeout (int): The timeout period in seconds for the watcher
             to run after the context is exited. Defaults to 60.
     Yields:
@@ -122,6 +171,8 @@ def watch(
             pass
     """
     dir = dir or get_artifact_dir()
+    if isinstance(dir, Path):
+        dir = dir.as_posix()
     handler = Handler(func)
     observer = Observer()
@@ -152,7 +203,7 @@ class Handler(FileSystemEventHandler):
         self.func = func
     def on_modified(self, event: FileModifiedEvent) -> None:
-        file = Path(event.src_path)
+        file = Path(str(event.src_path))
         if file.is_file():
             self.func(file)
@@ -171,8 +222,8 @@ def chdir_artifact(
     to the original directory after the context is exited.
     Args:
-        run: The run to get the artifact directory from.
-        artifact_path: The artifact path.
+        run (Run): The run to get the artifact directory from.
+        artifact_path (str | None): The artifact path.
     """
     curdir = Path.cwd()
     path = mlflow.artifacts.download_artifacts(

hydraflow/mlflow.py CHANGED Viewed

@@ -6,14 +6,24 @@ configuration objects and set up experiments using MLflow.
 from __future__ import annotations
 from pathlib import Path
+from typing import TYPE_CHECKING
 import mlflow
 from hydra.core.hydra_config import HydraConfig
+from mlflow.tracking import artifact_utils
+from omegaconf import OmegaConf
 from hydraflow.config import iter_params
+if TYPE_CHECKING:
+    from mlflow.entities.experiment import Experiment
-def set_experiment(prefix: str = "", suffix: str = "", uri: str | None = None) -> None:
+def set_experiment(
+    prefix: str = "",
+    suffix: str = "",
+    uri: str | Path | None = None,
+) -> Experiment:
     """
     Set the experiment name and tracking URI optionally.
@@ -22,16 +32,20 @@ def set_experiment(prefix: str = "", suffix: str = "", uri: str | None = None) -
     also set the tracking URI.
     Args:
-        prefix: The prefix to prepend to the experiment name.
-        suffix: The suffix to append to the experiment name.
-        uri: The tracking URI to use.
+        prefix (str): The prefix to prepend to the experiment name.
+        suffix (str): The suffix to append to the experiment name.
+        uri (str | Path | None): The tracking URI to use. Defaults to None.
+    Returns:
+        Experiment: An instance of `mlflow.entities.Experiment` representing
+        the new active experiment.
     """
-    if uri:
+    if uri is not None:
         mlflow.set_tracking_uri(uri)
     hc = HydraConfig.get()
     name = f"{prefix}{hc.job.name}{suffix}"
-    mlflow.set_experiment(name)
+    return mlflow.set_experiment(name)
 def log_params(config: object, *, synchronous: bool | None = None) -> None:
@@ -43,15 +57,19 @@ def log_params(config: object, *, synchronous: bool | None = None) -> None:
     `mlflow.log_param` method.
     Args:
-        config: The configuration object to log the parameters from.
-        synchronous: Whether to log the parameters synchronously.
+        config (object): The configuration object to log the parameters from.
+        synchronous (bool | None): Whether to log the parameters synchronously.
             Defaults to None.
     """
     for key, value in iter_params(config):
         mlflow.log_param(key, value, synchronous=synchronous)
-def get_artifact_dir(artifact_path: str | None = None) -> Path:
+def get_artifact_dir(
+    artifact_path: str | None = None,
+    *,
+    run_id: str | None = None,
+) -> Path:
     """
     Get the artifact directory for the given artifact path.
@@ -60,13 +78,47 @@ def get_artifact_dir(artifact_path: str | None = None) -> Path:
     the path to that directory.
     Args:
-        artifact_path: The artifact path for which to get the directory.
-            Defaults to None.
+        artifact_path (str | None): The artifact path for which to get the
+            directory. Defaults to None.
+        run_id (str | None): The run ID for which to get the artifact directory.
     Returns:
         The local path to the directory where the artifacts are downloaded.
     """
-    uri = mlflow.get_artifact_uri(artifact_path)
+    if run_id is None:
+        uri = mlflow.get_artifact_uri(artifact_path)
+    else:
+        uri = artifact_utils.get_artifact_uri(run_id, artifact_path)
     dir = mlflow.artifacts.download_artifacts(artifact_uri=uri)
     return Path(dir)
+def get_hydra_output_dir(*, run_id: str | None = None) -> Path:
+    if run_id is None:
+        hc = HydraConfig.get()
+        return Path(hc.runtime.output_dir)
+    path = get_artifact_dir(run_id=run_id) / ".hydra/hydra.yaml"
+    if path.exists():
+        hc = OmegaConf.load(path)
+        return Path(hc.hydra.runtime.output_dir)
+    raise FileNotFoundError
+# def log_hydra_output_dir(run: Run_ | Series | str) -> None:
+#     """
+#     Log the Hydra output directory.
+#     Args:
+#         run: The run object.
+#     Returns:
+#         None
+#     """
+#     output_dir = get_hydra_output_dir(run)
+#     run_id = run if isinstance(run, str) else run.info.run_id
+#     mlflow.log_artifacts(output_dir.as_posix(), run_id=run_id)

hydraflow 0.2.2__py3-none-any.whl → 0.2.4__py3-none-any.whl

hydraflow 0.2.2py3-none-any.whl → 0.2.4py3-none-any.whl