PyPI - hydraflow - Versions diffs - 0.1.4__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

hydraflow 0.1.4py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

hydraflow/__init__.py +0 -10
hydraflow/config.py +27 -2
hydraflow/context.py +92 -15
hydraflow/mlflow.py +52 -0
hydraflow/runs.py +315 -110
hydraflow-0.2.0.dist-info/METADATA +111 -0
hydraflow-0.2.0.dist-info/RECORD +9 -0
hydraflow/util.py +0 -11
hydraflow-0.1.4.dist-info/METADATA +0 -45
hydraflow-0.1.4.dist-info/RECORD +0 -10
{hydraflow-0.1.4.dist-info → hydraflow-0.2.0.dist-info}/WHEEL +0 -0
{hydraflow-0.1.4.dist-info → hydraflow-0.2.0.dist-info}/licenses/LICENSE +0 -0

hydraflow/__init__.py CHANGED Viewed

@@ -3,15 +3,10 @@ from .mlflow import set_experiment
 from .runs import (
     Run,
     Runs,
-    drop_unique_params,
     filter_runs,
-    get_artifact_dir,
-    get_artifact_path,
-    get_artifact_uri,
     get_param_dict,
     get_param_names,
     get_run,
-    get_run_id,
     load_config,
 )
@@ -20,15 +15,10 @@ __all__ = [
     "Run",
     "Runs",
     "chdir_artifact",
-    "drop_unique_params",
     "filter_runs",
-    "get_artifact_dir",
-    "get_artifact_path",
-    "get_artifact_uri",
     "get_param_dict",
     "get_param_names",
     "get_run",
-    "get_run_id",
     "load_config",
     "log_run",
     "set_experiment",

hydraflow/config.py CHANGED Viewed

@@ -1,3 +1,8 @@
+"""
+This module provides functionality for working with configuration
+objects using the OmegaConf library.
+"""
 from __future__ import annotations
 from typing import TYPE_CHECKING
@@ -10,12 +15,32 @@ if TYPE_CHECKING:
 def iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
-    if not isinstance(config, DictConfig | ListConfig):
+    """
+    Recursively iterate over the parameters in the given configuration object.
+    This function traverses the configuration object and yields key-value pairs
+    representing the parameters. The keys are prefixed with the provided prefix.
+    Args:
+        config: The configuration object to iterate over. This can be a dictionary,
+            list, DictConfig, or ListConfig.
+        prefix: The prefix to prepend to the parameter keys.
+            Defaults to an empty string.
+    Yields:
+        Key-value pairs representing the parameters in the configuration object.
+    """
+    if not isinstance(config, (DictConfig, ListConfig)):
         config = OmegaConf.create(config)  # type: ignore
     if isinstance(config, DictConfig):
         for key, value in config.items():
-            if isinstance(value, (DictConfig, ListConfig)):
+            if isinstance(value, ListConfig) and not any(
+                isinstance(v, (DictConfig, ListConfig)) for v in value
+            ):
+                yield f"{prefix}{key}", value
+            elif isinstance(value, (DictConfig, ListConfig)):
                 yield from iter_params(value, f"{prefix}{key}.")
             else:

hydraflow/context.py CHANGED Viewed

@@ -1,5 +1,11 @@
+"""
+This module provides context managers to log parameters and manage the MLflow
+run context.
+"""
 from __future__ import annotations
+import logging
 import os
 import time
 from contextlib import contextmanager
@@ -12,15 +18,14 @@ from hydra.core.hydra_config import HydraConfig
 from watchdog.events import FileModifiedEvent, FileSystemEventHandler
 from watchdog.observers import Observer
-from hydraflow.mlflow import log_params
-from hydraflow.runs import get_artifact_path
-from hydraflow.util import uri_to_path
+from hydraflow.mlflow import get_artifact_dir, log_params
 if TYPE_CHECKING:
     from collections.abc import Callable, Iterator
     from mlflow.entities.run import Run
-    from pandas import Series
+log = logging.getLogger(__name__)
 @dataclass
@@ -35,12 +40,33 @@ def log_run(
     *,
     synchronous: bool | None = None,
 ) -> Iterator[Info]:
+    """
+    Log the parameters from the given configuration object and manage the MLflow
+    run context.
+    This context manager logs the parameters from the provided configuration object
+    using MLflow. It also manages the MLflow run context, ensuring that artifacts
+    are logged and the run is properly closed.
+    Args:
+        config: The configuration object to log the parameters from.
+        synchronous: Whether to log the parameters synchronously.
+            Defaults to None.
+    Yields:
+        Info: An `Info` object containing the output directory and artifact directory
+        paths.
+    Example:
+        with log_run(config) as info:
+            # Perform operations within the MLflow run context
+            pass
+    """
     log_params(config, synchronous=synchronous)
     hc = HydraConfig.get()
     output_dir = Path(hc.runtime.output_dir)
-    uri = mlflow.get_artifact_uri()
-    info = Info(output_dir, uri_to_path(uri))
+    info = Info(output_dir, get_artifact_dir())
     # Save '.hydra' config directory first.
     output_subdir = output_dir / (hc.output_subdir or "")
@@ -54,16 +80,48 @@ def log_run(
         with watch(log_artifact, output_dir):
             yield info
+    except Exception as e:
+        log.error(f"Error during log_run: {e}")
+        raise
     finally:
         # Save output_dir including '.hydra' config directory.
         mlflow.log_artifacts(output_dir.as_posix())
 @contextmanager
-def watch(func: Callable[[Path], None], dir: Path | str = "", timeout: int = 60) -> Iterator[None]:
-    if not dir:
-        uri = mlflow.get_artifact_uri()
-        dir = uri_to_path(uri)
+def watch(
+    func: Callable[[Path], None],
+    dir: Path | str = "",
+    timeout: int = 60,
+) -> Iterator[None]:
+    """
+    Watch the given directory for changes and call the provided function
+    when a change is detected.
+    This context manager sets up a file system watcher on the specified directory.
+    When a file modification is detected, the provided function is called with
+    the path of the modified file. The watcher runs for the specified timeout
+    period or until the context is exited.
+    Args:
+        func: The function to call when a change is
+            detected. It should accept a single argument of type `Path`,
+            which is the path of the modified file.
+        dir: The directory to watch. If not specified,
+            the current MLflow artifact URI is used. Defaults to "".
+        timeout: The timeout period in seconds for the watcher
+            to run after the context is exited. Defaults to 60.
+    Yields:
+        None
+    Example:
+        with watch(log_artifact, "/path/to/dir"):
+            # Perform operations while watching the directory for changes
+            pass
+    """
+    dir = dir or get_artifact_dir()
     handler = Handler(func)
     observer = Observer()
@@ -73,6 +131,10 @@ def watch(func: Callable[[Path], None], dir: Path | str = "", timeout: int = 60)
     try:
         yield
+    except Exception as e:
+        log.error(f"Error during watch: {e}")
+        raise
     finally:
         elapsed = 0
         while not observer.event_queue.empty():
@@ -97,15 +159,30 @@ class Handler(FileSystemEventHandler):
 @contextmanager
 def chdir_artifact(
-    run: Run | Series | str,
+    run: Run,
     artifact_path: str | None = None,
 ) -> Iterator[Path]:
+    """
+    Change the current working directory to the artifact directory of the
+    given run.
+    This context manager changes the current working directory to the artifact
+    directory of the given run. It ensures that the directory is changed back
+    to the original directory after the context is exited.
+    Args:
+        run: The run to get the artifact directory from.
+        artifact_path: The artifact path.
+    """
     curdir = Path.cwd()
+    path = mlflow.artifacts.download_artifacts(
+        run_id=run.info.run_id,
+        artifact_path=artifact_path,
+    )
-    artifact_dir = get_artifact_path(run, artifact_path)
-    os.chdir(artifact_dir)
+    os.chdir(path)
     try:
-        yield artifact_dir
+        yield Path(path)
     finally:
         os.chdir(curdir)

hydraflow/mlflow.py CHANGED Viewed

@@ -1,5 +1,12 @@
+"""
+This module provides functionality to log parameters from Hydra
+configuration objects and set up experiments using MLflow.
+"""
 from __future__ import annotations
+from pathlib import Path
 import mlflow
 from hydra.core.hydra_config import HydraConfig
@@ -7,6 +14,18 @@ from hydraflow.config import iter_params
 def set_experiment(prefix: str = "", suffix: str = "", uri: str | None = None) -> None:
+    """
+    Set the experiment name and tracking URI optionally.
+    This function sets the experiment name by combining the given prefix,
+    the job name from HydraConfig, and the given suffix. Optionally, it can
+    also set the tracking URI.
+    Args:
+        prefix: The prefix to prepend to the experiment name.
+        suffix: The suffix to append to the experiment name.
+        uri: The tracking URI to use.
+    """
     if uri:
         mlflow.set_tracking_uri(uri)
@@ -16,5 +35,38 @@ def set_experiment(prefix: str = "", suffix: str = "", uri: str | None = None) -
 def log_params(config: object, *, synchronous: bool | None = None) -> None:
+    """
+    Log the parameters from the given configuration object.
+    This method logs the parameters from the provided configuration object
+    using MLflow. It iterates over the parameters and logs them using the
+    `mlflow.log_param` method.
+    Args:
+        config: The configuration object to log the parameters from.
+        synchronous: Whether to log the parameters synchronously.
+            Defaults to None.
+    """
     for key, value in iter_params(config):
         mlflow.log_param(key, value, synchronous=synchronous)
+def get_artifact_dir(artifact_path: str | None = None) -> Path:
+    """
+    Get the artifact directory for the given artifact path.
+    This function retrieves the artifact URI for the specified artifact path
+    using MLflow, downloads the artifacts to a local directory, and returns
+    the path to that directory.
+    Args:
+        artifact_path: The artifact path for which to get the directory.
+            Defaults to None.
+    Returns:
+        The local path to the directory where the artifacts are downloaded.
+    """
+    uri = mlflow.get_artifact_uri(artifact_path)
+    dir = mlflow.artifacts.download_artifacts(artifact_uri=uri)
+    return Path(dir)

hydraflow/runs.py CHANGED Viewed

@@ -1,27 +1,95 @@
+"""
+This module provides functionality for managing and interacting with MLflow runs.
+It includes the `Runs` class and various methods to filter runs, retrieve run information,
+log artifacts, and load configurations.
+"""
 from __future__ import annotations
 from dataclasses import dataclass
 from functools import cache
-from pathlib import Path
+from itertools import chain
 from typing import TYPE_CHECKING, Any
 import mlflow
-import numpy as np
-from mlflow.entities.run import Run as Run_
-from mlflow.tracking import artifact_utils
+from mlflow.entities import ViewType
+from mlflow.entities.run import Run
+from mlflow.tracking.fluent import SEARCH_MAX_RESULTS_PANDAS
 from omegaconf import DictConfig, OmegaConf
-from pandas import DataFrame, Series
 from hydraflow.config import iter_params
-from hydraflow.util import uri_to_path
 if TYPE_CHECKING:
     from typing import Any
+def search_runs(
+    experiment_ids: list[str] | None = None,
+    filter_string: str = "",
+    run_view_type: int = ViewType.ACTIVE_ONLY,
+    max_results: int = SEARCH_MAX_RESULTS_PANDAS,
+    order_by: list[str] | None = None,
+    search_all_experiments: bool = False,
+    experiment_names: list[str] | None = None,
+) -> Runs:
+    """
+    Search for Runs that fit the specified criteria.
+    This function wraps the `mlflow.search_runs` function and returns the results
+    as a `Runs` object. It allows for flexible searching of MLflow runs based on
+    various criteria.
+    Args:
+        experiment_ids: List of experiment IDs. Search can work with experiment IDs or
+            experiment names, but not both in the same call. Values other than
+            ``None`` or ``[]`` will result in error if ``experiment_names`` is
+            also not ``None`` or ``[]``. ``None`` will default to the active
+            experiment if ``experiment_names`` is ``None`` or ``[]``.
+        filter_string: Filter query string, defaults to searching all runs.
+        run_view_type: one of enum values ``ACTIVE_ONLY``, ``DELETED_ONLY``, or ``ALL`` runs
+            defined in :py:class:`mlflow.entities.ViewType`.
+        max_results: The maximum number of runs to put in the dataframe. Default is 100,000
+            to avoid causing out-of-memory issues on the user's machine.
+        order_by: List of columns to order by (e.g., "metrics.rmse"). The ``order_by`` column
+            can contain an optional ``DESC`` or ``ASC`` value. The default is ``ASC``.
+            The default ordering is to sort by ``start_time DESC``, then ``run_id``.
+        output_format: The output format to be returned. If ``pandas``, a ``pandas.DataFrame``
+            is returned and, if ``list``, a list of :py:class:`mlflow.entities.Run`
+            is returned.
+        search_all_experiments: Boolean specifying whether all experiments should be searched.
+            Only honored if ``experiment_ids`` is ``[]`` or ``None``.
+        experiment_names: List of experiment names. Search can work with experiment IDs or
+            experiment names, but not both in the same call. Values other
+            than ``None`` or ``[]`` will result in error if ``experiment_ids``
+            is also not ``None`` or ``[]``. ``None`` will default to the active
+            experiment if ``experiment_ids`` is ``None`` or ``[]``.
+    Returns:
+        A `Runs` object containing the search results.
+    """
+    runs = mlflow.search_runs(
+        experiment_ids=experiment_ids,
+        filter_string=filter_string,
+        run_view_type=run_view_type,
+        max_results=max_results,
+        order_by=order_by,
+        output_format="list",
+        search_all_experiments=search_all_experiments,
+        experiment_names=experiment_names,
+    )
+    return Runs(runs)  # type: ignore
 @dataclass
 class Runs:
-    runs: list[Run_] | DataFrame
+    """
+    A class to represent a collection of MLflow runs.
+    This class provides methods to interact with the runs, such as filtering,
+    retrieving specific runs, and accessing run information.
+    """
+    runs: list[Run]
     def __repr__(self) -> str:
         return f"{self.__class__.__name__}({len(self)})"
@@ -30,161 +98,280 @@ class Runs:
         return len(self.runs)
     def filter(self, config: object) -> Runs:
+        """
+        Filter the runs based on the provided configuration.
+        This method filters the runs in the collection according to the
+        specified configuration object. The configuration object should
+        contain key-value pairs that correspond to the parameters of the
+        runs. Only the runs that match all the specified parameters will
+        be included in the returned `Runs` object.
+        Args:
+            config: The configuration object to filter the runs.
+        Returns:
+            A new `Runs` object containing the filtered runs.
+        """
         return Runs(filter_runs(self.runs, config))
-    def get(self, config: object) -> Run:
-        return Run(get_run(self.runs, config))
+    def get(self, config: object) -> Run | None:
+        """
+        Retrieve a specific run based on the provided configuration.
-    def drop_unique_params(self) -> Runs:
-        if isinstance(self.runs, DataFrame):
-            return Runs(drop_unique_params(self.runs))
+        This method filters the runs in the collection according to the
+        specified configuration object and returns the run that matches
+        the provided parameters. If more than one run matches the criteria,
+        a `ValueError` is raised.
-        raise NotImplementedError
+        Args:
+            config: The configuration object to identify the run.
-    def get_param_names(self) -> list[str]:
-        if isinstance(self.runs, DataFrame):
-            return get_param_names(self.runs)
+        Returns:
+            Run: The run object that matches the provided configuration.
+            None, if the runs are not in a DataFrame format.
-        raise NotImplementedError
+        Raises:
+            ValueError: If the number of filtered runs is not exactly one.
+        """
+        return get_run(self.runs, config)
-    def get_param_dict(self) -> dict[str, list[str]]:
-        if isinstance(self.runs, DataFrame):
-            return get_param_dict(self.runs)
+    def get_earliest_run(self, config: object | None = None, **kwargs) -> Run | None:
+        """
+        Get the earliest run from the list of runs based on the start time.
-        raise NotImplementedError
+        This method filters the runs based on the configuration if provided
+        and returns the run with the earliest start time.
+        Args:
+            config: The configuration object to filter the runs.
+                If None, no filtering is applied.
+            **kwargs: Additional key-value pairs to filter the runs.
-def filter_runs(runs: list[Run_] | DataFrame, config: object) -> list[Run_] | DataFrame:
-    if isinstance(runs, list):
-        return filter_runs_list(runs, config)
+        Returns:
+            The run with the earliest start time, or None if no runs match the criteria.
+        """
+        return get_earliest_run(self.runs, config, **kwargs)
-    return filter_runs_dataframe(runs, config)
+    def get_latest_run(self, config: object | None = None, **kwargs) -> Run | None:
+        """
+        Get the latest run from the list of runs based on the start time.
+        Args:
+            config: The configuration object to filter the runs.
+                If None, no filtering is applied.
+            **kwargs: Additional key-value pairs to filter the runs.
-def _is_equal(run: Run_, key: str, value: Any) -> bool:
-    param = run.data.params.get(key, value)
+        Returns:
+            The run with the latest start time, or None if no runs match the criteria.
+        """
+        return get_latest_run(self.runs, config, **kwargs)
-    if param is None:
-        return False
+    def get_param_names(self) -> list[str]:
+        """
+        Get the parameter names from the runs.
-    return type(value)(param) == value
+        This method extracts the unique parameter names from the provided list of runs.
+        It iterates through each run and collects the parameter names into a set to
+        ensure uniqueness.
+        Returns:
+            A list of unique parameter names.
+        """
+        return get_param_names(self.runs)
-def filter_runs_list(runs: list[Run_], config: object) -> list[Run_]:
-    for key, value in iter_params(config):
+    def get_param_dict(self) -> dict[str, list[str]]:
+        """
+        Get the parameter dictionary from the list of runs.
+        This method extracts the parameter names and their corresponding values
+        from the provided list of runs. It iterates through each run and collects
+        the parameter values into a dictionary where the keys are parameter names
+        and the values are lists of parameter values.
+        Returns:
+            A dictionary where the keys are parameter names and the values are lists
+            of parameter values.
+        """
+        return get_param_dict(self.runs)
+def filter_runs(runs: list[Run], config: object, **kwargs) -> list[Run]:
+    """
+    Filter the runs based on the provided configuration.
+    This method filters the runs in the collection according to the
+    specified configuration object. The configuration object should
+    contain key-value pairs that correspond to the parameters of the
+    runs. Only the runs that match all the specified parameters will
+    be included in the returned list of runs.
+    Args:
+        runs: The runs to filter.
+        config: The configuration object to filter the runs.
+        **kwargs: Additional key-value pairs to filter the runs.
+    Returns:
+        A filtered list of runs.
+    """
+    for key, value in chain(iter_params(config), kwargs.items()):
         runs = [run for run in runs if _is_equal(run, key, value)]
+        if len(runs) == 0:
+            return []
     return runs
-def filter_runs_dataframe(runs: DataFrame, config: object) -> DataFrame:
-    index = np.ones(len(runs), dtype=bool)
+def _is_equal(run: Run, key: str, value: Any) -> bool:
+    param = run.data.params.get(key, value)
-    for key, value in iter_params(config):
-        name = f"params.{key}"
+    if param is None:
+        return False
+    return type(value)(param) == value
+def get_run(runs: list[Run], config: object, **kwargs) -> Run | None:
+    """
+    Retrieve a specific run based on the provided configuration.
-        if name in runs:
-            series = runs[name]
-            is_value = -series.isna()
-            param = series.fillna(value).astype(type(value))
-            index &= is_value & (param == value)
+    This method filters the runs in the collection according to the
+    specified configuration object and returns the run that matches
+    the provided parameters. If more than one run matches the criteria,
+    a `ValueError` is raised.
-    return runs[index]
+    Args:
+        runs: The runs to filter.
+        config: The configuration object to identify the run.
+        **kwargs: Additional key-value pairs to filter the runs.
+    Returns:
+        The run object that matches the provided configuration, or None
+        if no runs match the criteria.
-def get_run(runs: list[Run_] | DataFrame, config: object) -> Run_ | Series:
-    runs = filter_runs(runs, config)
+    Raises:
+        ValueError: If more than one run matches the criteria.
+    """
+    runs = filter_runs(runs, config, **kwargs)
+    if len(runs) == 0:
+        return None
     if len(runs) == 1:
-        return runs[0] if isinstance(runs, list) else runs.iloc[0]
+        return runs[0]
-    msg = f"number of filtered runs is not 1: got {len(runs)}"
+    msg = f"Multiple runs were filtered. Expected number of runs is 1, but found {len(runs)} runs."
     raise ValueError(msg)
-def drop_unique_params(runs: DataFrame) -> DataFrame:
-    def select(column: str) -> bool:
-        return not column.startswith("params.") or len(runs[column].unique()) > 1
+def get_earliest_run(runs: list[Run], config: object | None = None, **kwargs) -> Run | None:
+    """
+    Get the earliest run from the list of runs based on the start time.
-    columns = [select(column) for column in runs.columns]
-    return runs.iloc[:, columns]
+    This method filters the runs based on the configuration if provided
+    and returns the run with the earliest start time.
+    Args:
+        runs: The list of runs.
+        config: The configuration object to filter the runs.
+            If None, no filtering is applied.
+        **kwargs: Additional key-value pairs to filter the runs.
-def get_param_names(runs: DataFrame) -> list[str]:
-    def get_name(column: str) -> str:
-        if column.startswith("params."):
-            return column.split(".", maxsplit=1)[-1]
+    Returns:
+        The run with the earliest start time, or None if no runs match the criteria.
+    """
+    if config is not None or kwargs:
+        runs = filter_runs(runs, config or {}, **kwargs)
-        return ""
+    return min(runs, key=lambda run: run.info.start_time, default=None)
-    columns = [get_name(column) for column in runs.columns]
-    return [column for column in columns if column]
+def get_latest_run(runs: list[Run], config: object | None = None, **kwargs) -> Run | None:
+    """
+    Get the latest run from the list of runs based on the start time.
-def get_param_dict(runs: DataFrame) -> dict[str, list[str]]:
-    params = {}
-    for name in get_param_names(runs):
-        params[name] = list(runs[f"params.{name}"].unique())
+    This method filters the runs based on the configuration if provided
+    and returns the run with the latest start time.
-    return params
+    Args:
+        runs: The list of runs.
+        config: The configuration object to filter the runs.
+            If None, no filtering is applied.
+        **kwargs: Additional key-value pairs to filter the runs.
+    Returns:
+        The run with the latest start time, or None if no runs match the criteria.
+    """
+    if config is not None or kwargs:
+        runs = filter_runs(runs, config or {}, **kwargs)
-@dataclass
-class Run:
-    run: Run_ | Series | str
+    return max(runs, key=lambda run: run.info.start_time, default=None)
-    def __repr__(self) -> str:
-        return f"{self.__class__.__name__}({self.run_id!r})"
-    @property
-    def run_id(self) -> str:
-        return get_run_id(self.run)
+def get_param_names(runs: list[Run]) -> list[str]:
+    """
+    Get the parameter names from the runs.
-    def artifact_uri(self, artifact_path: str | None = None) -> str:
-        return get_artifact_uri(self.run, artifact_path)
+    This method extracts the unique parameter names from the provided list of runs.
+    It iterates through each run and collects the parameter names into a set to
+    ensure uniqueness.
-    @property
-    def artifact_dir(self) -> Path:
-        return get_artifact_dir(self.run)
+    Args:
+        runs: The list of runs from which to extract parameter names.
-    def artifact_path(self, artifact_path: str | None = None) -> Path:
-        return get_artifact_path(self.run, artifact_path)
+    Returns:
+        A list of unique parameter names.
+    """
+    param_names = set()
-    @property
-    def config(self) -> DictConfig:
-        return load_config(self.run)
+    for run in runs:
+        for param in run.data.params.keys():
+            param_names.add(param)
-    def log_hydra_output_dir(self) -> None:
-        log_hydra_output_dir(self.run)
+    return list(param_names)
-def get_run_id(run: Run_ | Series | str) -> str:
-    if isinstance(run, str):
-        return run
+def get_param_dict(runs: list[Run]) -> dict[str, list[str]]:
+    """
+    Get the parameter dictionary from the list of runs.
-    if isinstance(run, Run_):
-        return run.info.run_id
+    This method extracts the parameter names and their corresponding values
+    from the provided list of runs. It iterates through each run and collects
+    the parameter values into a dictionary where the keys are parameter names
+    and the values are lists of parameter values.
-    return run.run_id
+    Args:
+        runs: The list of runs from which to extract parameter names and values.
+    Returns:
+        A dictionary where the keys are parameter names and the values are lists
+        of parameter values.
+    """
+    params = {}
-def get_artifact_uri(run: Run_ | Series | str, artifact_path: str | None = None) -> str:
-    run_id = get_run_id(run)
-    return artifact_utils.get_artifact_uri(run_id, artifact_path)
+    for name in get_param_names(runs):
+        it = (run.data.params[name] for run in runs if name in run.data.params)
+        params[name] = sorted(set(it))
+    return params
-def get_artifact_dir(run: Run_ | Series | str) -> Path:
-    uri = get_artifact_uri(run)
-    return uri_to_path(uri)
+def load_config(run: Run) -> DictConfig:
+    """
+    Load the configuration for a given run.
-def get_artifact_path(run: Run_ | Series | str, artifact_path: str | None = None) -> Path:
-    artifact_dir = get_artifact_dir(run)
-    return artifact_dir / artifact_path if artifact_path else artifact_dir
+    This function loads the configuration for the provided Run instance
+    by downloading the configuration file from the MLflow artifacts and
+    loading it using OmegaConf.
+    Args:
+        run: The Run instance to load the configuration for.
-def load_config(run: Run_ | Series | str) -> DictConfig:
-    run_id = get_run_id(run)
+    Returns:
+        The loaded configuration.
+    """
+    run_id = run.info.run_id
     return _load_config(run_id)
@@ -201,17 +388,35 @@ def _load_config(run_id: str) -> DictConfig:
     return OmegaConf.load(path)  # type: ignore
-def get_hydra_output_dir(run: Run_ | Series | str) -> Path:
-    path = get_artifact_dir(run) / ".hydra/hydra.yaml"
+# def get_hydra_output_dir(run: Run_ | Series | str) -> Path:
+#     """
+#     Get the Hydra output directory.
+#     Args:
+#         run: The run object.
+#     Returns:
+#         Path: The Hydra output directory.
+#     """
+#     path = get_artifact_dir(run) / ".hydra/hydra.yaml"
+#     if path.exists():
+#         hc = OmegaConf.load(path)
+#         return Path(hc.hydra.runtime.output_dir)
+#     raise FileNotFoundError
-    if path.exists():
-        hc = OmegaConf.load(path)
-        return Path(hc.hydra.runtime.output_dir)
-    raise FileNotFoundError
+# def log_hydra_output_dir(run: Run_ | Series | str) -> None:
+#     """
+#     Log the Hydra output directory.
+#     Args:
+#         run: The run object.
-def log_hydra_output_dir(run: Run_ | Series | str) -> None:
-    output_dir = get_hydra_output_dir(run)
-    run_id = run if isinstance(run, str) else run.info.run_id
-    mlflow.log_artifacts(output_dir.as_posix(), run_id=run_id)
+#     Returns:
+#         None
+#     """
+#     output_dir = get_hydra_output_dir(run)
+#     run_id = run if isinstance(run, str) else run.info.run_id
+#     mlflow.log_artifacts(output_dir.as_posix(), run_id=run_id)

hydraflow-0.2.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,111 @@
+Metadata-Version: 2.3
+Name: hydraflow
+Version: 0.2.0
+Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
+Project-URL: Documentation, https://github.com/daizutabi/hydraflow
+Project-URL: Source, https://github.com/daizutabi/hydraflow
+Project-URL: Issues, https://github.com/daizutabi/hydraflow/issues
+Author-email: daizutabi <daizutabi@gmail.com>
+License-Expression: MIT
+License-File: LICENSE
+Classifier: Development Status :: 4 - Beta
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Documentation
+Classifier: Topic :: Software Development :: Documentation
+Requires-Python: >=3.10
+Requires-Dist: hydra-core>1.3
+Requires-Dist: mlflow>2.15
+Requires-Dist: setuptools
+Requires-Dist: watchdog
+Provides-Extra: dev
+Requires-Dist: pytest-clarity; extra == 'dev'
+Requires-Dist: pytest-cov; extra == 'dev'
+Requires-Dist: pytest-randomly; extra == 'dev'
+Requires-Dist: pytest-xdist; extra == 'dev'
+Description-Content-Type: text/markdown
+# Hydraflow
+[![PyPI Version][pypi-v-image]][pypi-v-link]
+[![Python Version][python-v-image]][python-v-link]
+[![Build Status][GHAction-image]][GHAction-link]
+[![Coverage Status][codecov-image]][codecov-link]
+<!-- Badges -->
+[pypi-v-image]: https://img.shields.io/pypi/v/hydraflow.svg
+[pypi-v-link]: https://pypi.org/project/hydraflow/
+[python-v-image]: https://img.shields.io/pypi/pyversions/hydraflow.svg
+[python-v-link]: https://pypi.org/project/hydraflow
+[GHAction-image]: https://github.com/daizutabi/hydraflow/actions/workflows/ci.yml/badge.svg?branch=main&event=push
+[GHAction-link]: https://github.com/daizutabi/hydraflow/actions?query=event%3Apush+branch%3Amain
+[codecov-image]: https://codecov.io/github/daizutabi/hydraflow/coverage.svg?branch=main
+[codecov-link]: https://codecov.io/github/daizutabi/hydraflow?branch=main
+## Overview
+Hydraflow is a powerful library designed to seamlessly integrate [Hydra](https://hydra.cc/) and [MLflow](https://mlflow.org/), making it easier to manage and track machine learning experiments. By combining the flexibility of Hydra's configuration management with the robust experiment tracking capabilities of MLflow, Hydraflow provides a comprehensive solution for managing complex machine learning workflows.
+## Key Features
+- **Configuration Management**: Utilize Hydra's advanced configuration management to handle complex parameter sweeps and experiment setups.
+- **Experiment Tracking**: Leverage MLflow's tracking capabilities to log parameters, metrics, and artifacts for each run.
+- **Artifact Management**: Automatically log and manage artifacts, such as model checkpoints and configuration files, with MLflow.
+- **Seamless Integration**: Easily integrate Hydra and MLflow in your machine learning projects with minimal setup.
+## Installation
+You can install Hydraflow via pip:
+```bash
+pip install hydraflow
+```
+## Getting Started
+Here is a simple example to get you started with Hydraflow:
+```python
+import hydra
+import hydraflow
+import mlflow
+from dataclasses import dataclass
+from hydra.core.config_store import ConfigStore
+from pathlib import Path
+@dataclass
+class MySQLConfig:
+    host: str = "localhost"
+    port: int = 3306
+cs = ConfigStore.instance()
+cs.store(name="config", node=MySQLConfig)
+@hydra.main(version_base=None, config_name="config")
+def my_app(cfg: MySQLConfig) -> None:
+    # Set experiment by Hydra job name.
+    hydraflow.set_experiment()
+    # Automatically log params using Hydra config.
+    with mlflow.start_run(), hydraflow.log_run(cfg) as info:
+        # Your app code below.
+        # `info.output_dir` is the Hydra output directory.
+        # `info.artifact_dir` is the MLflow artifact directory.
+        with hydraflow.watch(callback):
+            # Watch files in the MLflow artifact directory.
+            # You can update metrics or log other artifacts
+            # according to the watched files in your callback
+            # function.
+            pass
+# Your callback function here.
+def callback(file: Path) -> None:
+    pass
+if __name__ == "__main__":
+    my_app()
+```

hydraflow-0.2.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,9 @@
+hydraflow/__init__.py,sha256=PzziOG9RnGAVbl9Yz4ScvsL8nfkjsuN0alMKRvZT-_Y,442
+hydraflow/config.py,sha256=BcyOYvdiqSCsmUMA_EvnWPXuW0fC5cT-Q2ilBk9-5gc,1863
+hydraflow/context.py,sha256=MqkEhKEZL_N3eb3v5u9D4EqKkiSmiPyXXafhPkALRlg,5129
+hydraflow/mlflow.py,sha256=_Los9E38eG8sTiN8bGwZmvjCrS0S-wSGiA4fyhQM3Zw,2251
+hydraflow/runs.py,sha256=NT7IzE-Pf7T2Ey-eWEPZzQQaX4Gt_RKDKSn2pj2yzGc,14304
+hydraflow-0.2.0.dist-info/METADATA,sha256=dfQ2_-Nk79yVazy5BHasYK681kiG1z-_i4VxWT8fJjg,4224
+hydraflow-0.2.0.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
+hydraflow-0.2.0.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
+hydraflow-0.2.0.dist-info/RECORD,,

hydraflow/util.py DELETED Viewed

@@ -1,11 +0,0 @@
-import platform
-from pathlib import Path
-from urllib.parse import urlparse
-def uri_to_path(uri: str) -> Path:
-    path = urlparse(uri).path
-    if platform.system() == "Windows" and path.startswith("/"):
-        path = path[1:]
-    return Path(path)

hydraflow-0.1.4.dist-info/METADATA DELETED Viewed

@@ -1,45 +0,0 @@
-Metadata-Version: 2.3
-Name: hydraflow
-Version: 0.1.4
-Summary: Hydra with MLflow
-Project-URL: Documentation, https://github.com/daizutabi/hydraflow
-Project-URL: Source, https://github.com/daizutabi/hydraflow
-Project-URL: Issues, https://github.com/daizutabi/hydraflow/issues
-Author-email: daizutabi <daizutabi@gmail.com>
-License-Expression: MIT
-License-File: LICENSE
-Classifier: Development Status :: 4 - Beta
-Classifier: Programming Language :: Python
-Classifier: Programming Language :: Python :: 3.10
-Classifier: Programming Language :: Python :: 3.11
-Classifier: Programming Language :: Python :: 3.12
-Classifier: Topic :: Documentation
-Classifier: Topic :: Software Development :: Documentation
-Requires-Python: >=3.10
-Requires-Dist: hydra-core>1.3
-Requires-Dist: mlflow>2.15
-Requires-Dist: setuptools
-Requires-Dist: watchdog
-Provides-Extra: dev
-Requires-Dist: pytest-clarity; extra == 'dev'
-Requires-Dist: pytest-cov; extra == 'dev'
-Requires-Dist: pytest-randomly; extra == 'dev'
-Requires-Dist: pytest-xdist; extra == 'dev'
-Description-Content-Type: text/markdown
-# hydraflow
-[![PyPI Version][pypi-v-image]][pypi-v-link]
-[![Python Version][python-v-image]][python-v-link]
-[![Build Status][GHAction-image]][GHAction-link]
-[![Coverage Status][codecov-image]][codecov-link]
-<!-- Badges -->
-[pypi-v-image]: https://img.shields.io/pypi/v/hydraflow.svg
-[pypi-v-link]: https://pypi.org/project/hydraflow/
-[python-v-image]: https://img.shields.io/pypi/pyversions/hydraflow.svg
-[python-v-link]: https://pypi.org/project/hydraflow
-[GHAction-image]: https://github.com/daizutabi/hydraflow/actions/workflows/ci.yml/badge.svg?branch=main&event=push
-[GHAction-link]: https://github.com/daizutabi/hydraflow/actions?query=event%3Apush+branch%3Amain
-[codecov-image]: https://codecov.io/github/daizutabi/hydraflow/coverage.svg?branch=main
-[codecov-link]: https://codecov.io/github/daizutabi/hydraflow?branch=main

hydraflow-0.1.4.dist-info/RECORD DELETED Viewed

@@ -1,10 +0,0 @@
-hydraflow/__init__.py,sha256=e1Q0Sskx39jaU2zkGNXjFWNC5xugEz_hDERTN_6Mzy8,666
-hydraflow/config.py,sha256=b3Plh_lmq94loZNw9QP2asd6thCLyTzzYSutH0cONXA,964
-hydraflow/context.py,sha256=3vejDbRYQBuBwlhpBpOv5aoyZ-yS8UUzpbCFK1V1uvw,2720
-hydraflow/mlflow.py,sha256=unBP3Y7ujTM3E_Hq_eYvRVFZoGfTA7B0h4FkOZtPPqc,566
-hydraflow/runs.py,sha256=127YykWzmiNUUuJSGPOCZasXmd6tcE15HU32j8x71ck,5864
-hydraflow/util.py,sha256=_BdOMq5tKPm8HOehb2s2ZIBpJYyVpvO_yaAIxbSj51I,253
-hydraflow-0.1.4.dist-info/METADATA,sha256=Xw-xcDKdzkHa7bKDZUI6MXpOKekcyFbMyBy1yANjNQs,1903
-hydraflow-0.1.4.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
-hydraflow-0.1.4.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
-hydraflow-0.1.4.dist-info/RECORD,,

{hydraflow-0.1.4.dist-info → hydraflow-0.2.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{hydraflow-0.1.4.dist-info → hydraflow-0.2.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

hydraflow 0.1.4__py3-none-any.whl → 0.2.0__py3-none-any.whl

hydraflow 0.1.4py3-none-any.whl → 0.2.0py3-none-any.whl