PyPI - hydraflow - Versions diffs - 0.1.4__tar.gz → 0.1.5__tar.gz - Mend

hydraflow 0.1.4tar.gz → 0.1.5tar.gz

Files changed (29) hide show

hydraflow-0.1.5/PKG-INFO ADDED Viewed

@@ -0,0 +1,111 @@
+Metadata-Version: 2.3
+Name: hydraflow
+Version: 0.1.5
+Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
+Project-URL: Documentation, https://github.com/daizutabi/hydraflow
+Project-URL: Source, https://github.com/daizutabi/hydraflow
+Project-URL: Issues, https://github.com/daizutabi/hydraflow/issues
+Author-email: daizutabi <daizutabi@gmail.com>
+License-Expression: MIT
+License-File: LICENSE
+Classifier: Development Status :: 4 - Beta
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Documentation
+Classifier: Topic :: Software Development :: Documentation
+Requires-Python: >=3.10
+Requires-Dist: hydra-core>1.3
+Requires-Dist: mlflow>2.15
+Requires-Dist: setuptools
+Requires-Dist: watchdog
+Provides-Extra: dev
+Requires-Dist: pytest-clarity; extra == 'dev'
+Requires-Dist: pytest-cov; extra == 'dev'
+Requires-Dist: pytest-randomly; extra == 'dev'
+Requires-Dist: pytest-xdist; extra == 'dev'
+Description-Content-Type: text/markdown
+# Hydraflow
+[![PyPI Version][pypi-v-image]][pypi-v-link]
+[![Python Version][python-v-image]][python-v-link]
+[![Build Status][GHAction-image]][GHAction-link]
+[![Coverage Status][codecov-image]][codecov-link]
+<!-- Badges -->
+[pypi-v-image]: https://img.shields.io/pypi/v/hydraflow.svg
+[pypi-v-link]: https://pypi.org/project/hydraflow/
+[python-v-image]: https://img.shields.io/pypi/pyversions/hydraflow.svg
+[python-v-link]: https://pypi.org/project/hydraflow
+[GHAction-image]: https://github.com/daizutabi/hydraflow/actions/workflows/ci.yml/badge.svg?branch=main&event=push
+[GHAction-link]: https://github.com/daizutabi/hydraflow/actions?query=event%3Apush+branch%3Amain
+[codecov-image]: https://codecov.io/github/daizutabi/hydraflow/coverage.svg?branch=main
+[codecov-link]: https://codecov.io/github/daizutabi/hydraflow?branch=main
+## Overview
+Hydraflow is a powerful library designed to seamlessly integrate [Hydra](https://hydra.cc/) and [MLflow](https://mlflow.org/), making it easier to manage and track machine learning experiments. By combining the flexibility of Hydra's configuration management with the robust experiment tracking capabilities of MLflow, Hydraflow provides a comprehensive solution for managing complex machine learning workflows.
+## Key Features
+- **Configuration Management**: Utilize Hydra's advanced configuration management to handle complex parameter sweeps and experiment setups.
+- **Experiment Tracking**: Leverage MLflow's tracking capabilities to log parameters, metrics, and artifacts for each run.
+- **Artifact Management**: Automatically log and manage artifacts, such as model checkpoints and configuration files, with MLflow.
+- **Seamless Integration**: Easily integrate Hydra and MLflow in your machine learning projects with minimal setup.
+## Installation
+You can install Hydraflow via pip:
+```bash
+pip install hydraflow
+```
+## Getting Started
+Here is a simple example to get you started with Hydraflow:
+```python
+import hydra
+import hydraflow
+import mlflow
+from dataclasses import dataclass
+from hydra.core.config_store import ConfigStore
+from pathlib import Path
+@dataclass
+class MySQLConfig:
+    host: str = "localhost"
+    port: int = 3306
+cs = ConfigStore.instance()
+cs.store(name="config", node=MySQLConfig)
+@hydra.main(version_base=None, config_name="config")
+def my_app(cfg: MySQLConfig) -> None:
+    # Set experiment by Hydra job name.
+    hydraflow.set_experiment()
+    # Automatically log params using Hydra config.
+    with mlflow.start_run(), hydraflow.log_run(cfg) as info:
+        # Your app code below.
+        # `info.output_dir` is the Hydra output directory.
+        # `info.artifact_dir` is the MLflow artifact directory.
+        with hydraflow.watch(callback):
+            # Watch files in the MLflow artifact directory.
+            # You can update metrics or log other artifacts
+            # according to the watched files in your callback
+            # function.
+            pass
+# Your callback function here.
+def callback(file: Path) -> None:
+    pass
+if __name__ == "__main__":
+    my_app()
+```

hydraflow-0.1.5/README.md ADDED Viewed

@@ -0,0 +1,82 @@
+# Hydraflow
+[![PyPI Version][pypi-v-image]][pypi-v-link]
+[![Python Version][python-v-image]][python-v-link]
+[![Build Status][GHAction-image]][GHAction-link]
+[![Coverage Status][codecov-image]][codecov-link]
+<!-- Badges -->
+[pypi-v-image]: https://img.shields.io/pypi/v/hydraflow.svg
+[pypi-v-link]: https://pypi.org/project/hydraflow/
+[python-v-image]: https://img.shields.io/pypi/pyversions/hydraflow.svg
+[python-v-link]: https://pypi.org/project/hydraflow
+[GHAction-image]: https://github.com/daizutabi/hydraflow/actions/workflows/ci.yml/badge.svg?branch=main&event=push
+[GHAction-link]: https://github.com/daizutabi/hydraflow/actions?query=event%3Apush+branch%3Amain
+[codecov-image]: https://codecov.io/github/daizutabi/hydraflow/coverage.svg?branch=main
+[codecov-link]: https://codecov.io/github/daizutabi/hydraflow?branch=main
+## Overview
+Hydraflow is a powerful library designed to seamlessly integrate [Hydra](https://hydra.cc/) and [MLflow](https://mlflow.org/), making it easier to manage and track machine learning experiments. By combining the flexibility of Hydra's configuration management with the robust experiment tracking capabilities of MLflow, Hydraflow provides a comprehensive solution for managing complex machine learning workflows.
+## Key Features
+- **Configuration Management**: Utilize Hydra's advanced configuration management to handle complex parameter sweeps and experiment setups.
+- **Experiment Tracking**: Leverage MLflow's tracking capabilities to log parameters, metrics, and artifacts for each run.
+- **Artifact Management**: Automatically log and manage artifacts, such as model checkpoints and configuration files, with MLflow.
+- **Seamless Integration**: Easily integrate Hydra and MLflow in your machine learning projects with minimal setup.
+## Installation
+You can install Hydraflow via pip:
+```bash
+pip install hydraflow
+```
+## Getting Started
+Here is a simple example to get you started with Hydraflow:
+```python
+import hydra
+import hydraflow
+import mlflow
+from dataclasses import dataclass
+from hydra.core.config_store import ConfigStore
+from pathlib import Path
+@dataclass
+class MySQLConfig:
+    host: str = "localhost"
+    port: int = 3306
+cs = ConfigStore.instance()
+cs.store(name="config", node=MySQLConfig)
+@hydra.main(version_base=None, config_name="config")
+def my_app(cfg: MySQLConfig) -> None:
+    # Set experiment by Hydra job name.
+    hydraflow.set_experiment()
+    # Automatically log params using Hydra config.
+    with mlflow.start_run(), hydraflow.log_run(cfg) as info:
+        # Your app code below.
+        # `info.output_dir` is the Hydra output directory.
+        # `info.artifact_dir` is the MLflow artifact directory.
+        with hydraflow.watch(callback):
+            # Watch files in the MLflow artifact directory.
+            # You can update metrics or log other artifacts
+            # according to the watched files in your callback
+            # function.
+            pass
+# Your callback function here.
+def callback(file: Path) -> None:
+    pass
+if __name__ == "__main__":
+    my_app()
+```

{hydraflow-0.1.4 → hydraflow-0.1.5}/pyproject.toml RENAMED Viewed

@@ -4,8 +4,8 @@ build-backend = "hatchling.build"
 [project]
 name = "hydraflow"
-version = "0.1.4"
-description = "Hydra with MLflow"
+version = "0.1.5"
+description = "Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments."
 readme = "README.md"
 license = "MIT"
 authors = [{ name = "daizutabi", email = "daizutabi@gmail.com" }]

hydraflow-0.1.5/src/hydraflow/config.py ADDED Viewed

@@ -0,0 +1,54 @@
+"""
+This module provides functionality for working with configuration
+objects using the OmegaConf library.
+"""
+from __future__ import annotations
+from typing import TYPE_CHECKING
+from omegaconf import DictConfig, ListConfig, OmegaConf
+if TYPE_CHECKING:
+    from collections.abc import Iterator
+    from typing import Any
+def iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
+    """
+    Iterate over the parameters in the given configuration object.
+    This function recursively traverses the configuration object and yields
+    key-value pairs representing the parameters.
+    Args:
+        config (object): The configuration object to iterate over.
+        prefix (str, optional): The prefix to prepend to the parameter keys.
+            Defaults to "".
+    Yields:
+        Key-value pairs representing the parameters.
+    """
+    if not isinstance(config, (DictConfig, ListConfig)):
+        config = OmegaConf.create(config)  # type: ignore
+    if isinstance(config, DictConfig):
+        for key, value in config.items():
+            if isinstance(value, ListConfig) and not any(
+                isinstance(v, (DictConfig, ListConfig)) for v in value
+            ):
+                yield f"{prefix}{key}", value
+            elif isinstance(value, (DictConfig, ListConfig)):
+                yield from iter_params(value, f"{prefix}{key}.")
+            else:
+                yield f"{prefix}{key}", value
+    elif isinstance(config, ListConfig):
+        for index, value in enumerate(config):
+            if isinstance(value, (DictConfig, ListConfig)):
+                yield from iter_params(value, f"{prefix}{index}.")
+            else:
+                yield f"{prefix}{index}", value

{hydraflow-0.1.4 → hydraflow-0.1.5}/src/hydraflow/context.py RENAMED Viewed

@@ -1,3 +1,8 @@
+"""
+This module provides context managers to log parameters and manage the MLflow
+run context.
+"""
 from __future__ import annotations
 import os
@@ -35,6 +40,28 @@ def log_run(
     *,
     synchronous: bool | None = None,
 ) -> Iterator[Info]:
+    """
+    Log the parameters from the given configuration object and manage the MLflow
+    run context.
+    This context manager logs the parameters from the provided configuration object
+    using MLflow. It also manages the MLflow run context, ensuring that artifacts
+    are logged and the run is properly closed.
+    Args:
+        config: The configuration object to log the parameters from.
+        synchronous: Whether to log the parameters synchronously.
+            Defaults to None.
+    Yields:
+        Info: An `Info` object containing the output directory and artifact directory
+        paths.
+    Example:
+        with log_run(config) as info:
+            # Perform operations within the MLflow run context
+            pass
+    """
     log_params(config, synchronous=synchronous)
     hc = HydraConfig.get()
@@ -61,6 +88,32 @@ def log_run(
 @contextmanager
 def watch(func: Callable[[Path], None], dir: Path | str = "", timeout: int = 60) -> Iterator[None]:
+    """
+    Watch the given directory for changes and call the provided function
+    when a change is detected.
+    This context manager sets up a file system watcher on the specified directory.
+    When a file modification is detected, the provided function is called with
+    the path of the modified file. The watcher runs for the specified timeout
+    period or until the context is exited.
+    Args:
+        func (Callable[[Path], None]): The function to call when a change is
+            detected. It should accept a single argument of type `Path`,
+            which is the path of the modified file.
+        dir (Path | str, optional): The directory to watch. If not specified,
+            the current MLflow artifact URI is used. Defaults to "".
+        timeout (int, optional): The timeout period in seconds for the watcher
+            to run after the context is exited. Defaults to 60.
+    Yields:
+        None: This context manager does not return any value.
+    Example:
+        with watch(log_artifact, "/path/to/dir"):
+            # Perform operations while watching the directory for changes
+            pass
+    """
     if not dir:
         uri = mlflow.get_artifact_uri()
         dir = uri_to_path(uri)
@@ -100,6 +153,18 @@ def chdir_artifact(
     run: Run | Series | str,
     artifact_path: str | None = None,
 ) -> Iterator[Path]:
+    """
+    Change the current working directory to the artifact directory of the
+    given run.
+    This context manager changes the current working directory to the artifact
+    directory of the given run. It ensures that the directory is changed back
+    to the original directory after the context is exited.
+    Args:
+        run: The run to get the artifact directory from.
+        artifact_path: The artifact path.
+    """
     curdir = Path.cwd()
     artifact_dir = get_artifact_path(run, artifact_path)

hydraflow-0.1.5/src/hydraflow/mlflow.py ADDED Viewed

@@ -0,0 +1,49 @@
+"""
+This module provides functionality to log parameters from Hydra
+configuration objects and set up experiments using MLflow.
+"""
+from __future__ import annotations
+import mlflow
+from hydra.core.hydra_config import HydraConfig
+from hydraflow.config import iter_params
+def set_experiment(prefix: str = "", suffix: str = "", uri: str | None = None) -> None:
+    """
+    Set the experiment name and tracking URI optionally.
+    This function sets the experiment name by combining the given prefix,
+    the job name from HydraConfig, and the given suffix. Optionally, it can
+    also set the tracking URI.
+    Args:
+        prefix: The prefix to prepend to the experiment name.
+        suffix: The suffix to append to the experiment name.
+        uri: The tracking URI to use.
+    """
+    if uri:
+        mlflow.set_tracking_uri(uri)
+    hc = HydraConfig.get()
+    name = f"{prefix}{hc.job.name}{suffix}"
+    mlflow.set_experiment(name)
+def log_params(config: object, *, synchronous: bool | None = None) -> None:
+    """
+    Log the parameters from the given configuration object.
+    This method logs the parameters from the provided configuration object
+    using MLflow. It iterates over the parameters and logs them using the
+    `mlflow.log_param` method.
+    Args:
+        config: The configuration object to log the parameters from.
+        synchronous: Whether to log the parameters synchronously.
+            Defaults to None.
+    """
+    for key, value in iter_params(config):
+        mlflow.log_param(key, value, synchronous=synchronous)

hydraflow-0.1.5/src/hydraflow/runs.py ADDED Viewed

@@ -0,0 +1,512 @@
+"""
+This module provides functionality for managing and interacting with MLflow runs.
+It includes classes and functions to filter runs, retrieve run information, and
+log artifacts and configurations.
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+from functools import cache
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+import mlflow
+import numpy as np
+from mlflow.entities.run import Run as Run_
+from mlflow.tracking import artifact_utils
+from omegaconf import DictConfig, OmegaConf
+from pandas import DataFrame, Series
+from hydraflow.config import iter_params
+from hydraflow.util import uri_to_path
+if TYPE_CHECKING:
+    from typing import Any
+@dataclass
+class Runs:
+    """
+    A class to represent a collection of MLflow runs.
+    This class provides methods to interact with the runs, such as filtering,
+    retrieving specific runs, and accessing run information.
+    """
+    runs: list[Run_] | DataFrame
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}({len(self)})"
+    def __len__(self) -> int:
+        return len(self.runs)
+    def filter(self, config: object) -> Runs:
+        """
+        Filter the runs based on the provided configuration.
+        This method filters the runs in the collection according to the
+        specified configuration object. The configuration object should
+        contain key-value pairs that correspond to the parameters of the
+        runs. Only the runs that match all the specified parameters will
+        be included in the returned `Runs` object.
+        Args:
+            config (object): The configuration object to filter the runs.
+                This object should contain key-value pairs representing
+                the parameters to filter by.
+        Returns:
+            Runs: A new `Runs` object containing the filtered runs.
+        """
+        return Runs(filter_runs(self.runs, config))
+    def get(self, config: object) -> Run:
+        """
+        Retrieve a specific run based on the provided configuration.
+        This method filters the runs in the collection according to the
+        specified configuration object and returns the run that matches
+        the provided parameters. If more than one run matches the criteria,
+        an error is raised.
+        Args:
+            config (object): The configuration object to identify the run.
+        Returns:
+            Run: The run object that matches the provided configuration.
+        Raises:
+            ValueError: If the number of filtered runs is not exactly one.
+        """
+        return Run(get_run(self.runs, config))
+    def drop_unique_params(self) -> Runs:
+        """
+        Drop unique parameters from the runs and return a new Runs object.
+        This method removes parameters that have unique values across all runs
+        in the collection. This is useful for identifying common parameters
+        that are shared among multiple runs.
+        Returns:
+            Runs: A new `Runs` object with unique parameters dropped.
+        Raises:
+            NotImplementedError: If the runs are not in a DataFrame format.
+        """
+        if isinstance(self.runs, DataFrame):
+            return Runs(drop_unique_params(self.runs))
+        raise NotImplementedError
+    def get_param_names(self) -> list[str]:
+        """
+        Get the parameter names from the runs.
+        This method extracts the parameter names from the runs in the collection.
+        If the runs are stored in a DataFrame, it retrieves the column names
+        that correspond to the parameters.
+        Returns:
+            list[str]: A list of parameter names.
+        Raises:
+            NotImplementedError: If the runs are not in a DataFrame format.
+        """
+        if isinstance(self.runs, DataFrame):
+            return get_param_names(self.runs)
+        raise NotImplementedError
+    def get_param_dict(self) -> dict[str, list[str]]:
+        """
+        Get the parameter dictionary from the runs.
+        This method extracts the parameter names and their corresponding values
+        from the runs in the collection. If the runs are stored in a DataFrame,
+        it retrieves the unique values for each parameter.
+        Returns:
+            dict[str, list[str]]: A dictionary of parameter names and their
+            corresponding values.
+        Raises:
+            NotImplementedError: If the runs are not in a DataFrame format.
+        """
+        if isinstance(self.runs, DataFrame):
+            return get_param_dict(self.runs)
+        raise NotImplementedError
+def search_runs(*args, **kwargs) -> Runs:
+    """
+    Search for runs that match the specified criteria.
+    This function wraps the `mlflow.search_runs` function and returns the results
+    as a `Runs` object.  It allows for flexible searching of MLflow runs based on
+    various criteria.
+    Args:
+        *args: Positional arguments to pass to `mlflow.search_runs`.
+        **kwargs: Keyword arguments to pass to `mlflow.search_runs`.
+    Returns:
+        Runs: A `Runs` object containing the search results.
+    """
+    runs = mlflow.search_runs(*args, **kwargs)
+    return Runs(runs)
+def filter_runs(runs: list[Run_] | DataFrame, config: object) -> list[Run_] | DataFrame:
+    """
+    Filter the runs based on the provided configuration.
+    This method filters the runs in the collection according to the
+    specified configuration object. The configuration object should
+    contain key-value pairs that correspond to the parameters of the
+    runs. Only the runs that match all the specified parameters will
+    be included in the returned `Runs` object.
+    Args:
+        runs: The runs to filter.
+        config: The configuration object to filter the runs.
+    Returns:
+        Runs: A filtered list of runs or a DataFrame.
+    """
+    if isinstance(runs, list):
+        return _filter_runs_list(runs, config)
+    return _filter_runs_dataframe(runs, config)
+def _is_equal(run: Run_, key: str, value: Any) -> bool:
+    param = run.data.params.get(key, value)
+    if param is None:
+        return False
+    return type(value)(param) == value
+def _filter_runs_list(runs: list[Run_], config: object) -> list[Run_]:
+    for key, value in iter_params(config):
+        runs = [run for run in runs if _is_equal(run, key, value)]
+    return runs
+def _filter_runs_dataframe(runs: DataFrame, config: object) -> DataFrame:
+    index = np.ones(len(runs), dtype=bool)
+    for key, value in iter_params(config):
+        name = f"params.{key}"
+        if name in runs:
+            series = runs[name]
+            is_value = -series.isna()
+            param = series.fillna(value).astype(type(value))
+            index &= is_value & (param == value)
+    return runs[index]
+def get_run(runs: list[Run_] | DataFrame, config: object) -> Run_ | Series:
+    """
+    Retrieve a specific run based on the provided configuration.
+    This method filters the runs in the collection according to the
+    specified configuration object and returns the run that matches
+    the provided parameters. If more than one run matches the criteria,
+    an error is raised.
+    Args:
+        runs: The runs to filter.
+        config: The configuration object to identify the run.
+    Returns:
+        Run: The run object that matches the provided configuration.
+    """
+    runs = filter_runs(runs, config)
+    if len(runs) == 1:
+        return runs[0] if isinstance(runs, list) else runs.iloc[0]
+    msg = f"number of filtered runs is not 1: got {len(runs)}"
+    raise ValueError(msg)
+def drop_unique_params(runs: DataFrame) -> DataFrame:
+    """
+    Drop unique parameters from the runs and return a new DataFrame.
+    This method removes parameters that have unique values across all runs
+    in the collection. This is useful for identifying common parameters
+    that are shared among multiple runs.
+    Args:
+        runs: The DataFrame containing the runs.
+    Returns:
+        DataFrame: A new DataFrame with unique parameters dropped.
+    """
+    def select(column: str) -> bool:
+        return not column.startswith("params.") or len(runs[column].unique()) > 1
+    columns = [select(column) for column in runs.columns]
+    return runs.iloc[:, columns]
+def get_param_names(runs: DataFrame) -> list[str]:
+    """
+    Get the parameter names from the runs.
+    This method extracts the parameter names from the runs in the collection.
+    If the runs are stored in a DataFrame, it retrieves the column names
+    that correspond to the parameters.
+    Args:
+        runs: The DataFrame containing the runs.
+    Returns:
+        list[str]: A list of parameter names.
+    """
+    def get_name(column: str) -> str:
+        if column.startswith("params."):
+            return column.split(".", maxsplit=1)[-1]
+        return ""
+    columns = [get_name(column) for column in runs.columns]
+    return [column for column in columns if column]
+def get_param_dict(runs: DataFrame) -> dict[str, list[str]]:
+    """
+    Get the parameter dictionary from the runs.
+    This method extracts the parameter names and their corresponding values
+    from the runs in the collection. If the runs are stored in a DataFrame,
+    it retrieves the unique values for each parameter.
+    Args:
+        runs: The DataFrame containing the runs.
+    Returns:
+        dict[str, list[str]]: A dictionary of parameter names and
+        their corresponding values.
+    """
+    params = {}
+    for name in get_param_names(runs):
+        params[name] = list(runs[f"params.{name}"].unique())
+    return params
+@dataclass
+class Run:
+    """
+    A class to represent a specific MLflow run.
+    This class provides methods to interact with the run, such as retrieving
+    the run ID, artifact URI, and configuration. It also includes properties
+    to access the artifact directory, artifact path, and Hydra output directory.
+    """
+    run: Run_ | Series | str
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}({self.run_id!r})"
+    @property
+    def run_id(self) -> str:
+        """
+        Get the run ID.
+        Returns:
+            str: The run ID.
+        """
+        return get_run_id(self.run)
+    def artifact_uri(self, artifact_path: str | None = None) -> str:
+        """
+        Get the artifact URI.
+        Args:
+            artifact_path (str | None): The artifact path.
+        Returns:
+            str: The artifact URI.
+        """
+        return get_artifact_uri(self.run, artifact_path)
+    @property
+    def artifact_dir(self) -> Path:
+        """
+        Get the artifact directory.
+        Returns:
+            Path: The artifact directory.
+        """
+        return get_artifact_dir(self.run)
+    def artifact_path(self, artifact_path: str | None = None) -> Path:
+        """
+        Get the artifact path.
+        Args:
+            artifact_path: The artifact path.
+        Returns:
+            Path: The artifact path.
+        """
+        return get_artifact_path(self.run, artifact_path)
+    @property
+    def config(self) -> DictConfig:
+        """
+        Get the configuration.
+        Returns:
+            DictConfig: The configuration.
+        """
+        return load_config(self.run)
+    def log_hydra_output_dir(self) -> None:
+        """
+        Log the Hydra output directory.
+        Returns:
+            None
+        """
+        log_hydra_output_dir(self.run)
+def get_run_id(run: Run_ | Series | str) -> str:
+    """
+    Get the run ID.
+    Args:
+        run: The run object.
+    Returns:
+        str: The run ID.
+    """
+    if isinstance(run, str):
+        return run
+    if isinstance(run, Run_):
+        return run.info.run_id
+    return run.run_id
+def get_artifact_uri(run: Run_ | Series | str, artifact_path: str | None = None) -> str:
+    """
+    Get the artifact URI.
+    Args:
+        run: The run object.
+        artifact_path: The artifact path.
+    Returns:
+        str: The artifact URI.
+    """
+    run_id = get_run_id(run)
+    return artifact_utils.get_artifact_uri(run_id, artifact_path)
+def get_artifact_dir(run: Run_ | Series | str) -> Path:
+    """
+    Get the artifact directory.
+    Args:
+        run: The run object.
+    Returns:
+        Path: The artifact directory.
+    """
+    uri = get_artifact_uri(run)
+    return uri_to_path(uri)
+def get_artifact_path(run: Run_ | Series | str, artifact_path: str | None = None) -> Path:
+    """
+    Get the artifact path.
+    Args:
+        run: The run object.
+        artifact_path: The artifact path.
+    Returns:
+        Path: The artifact path.
+    """
+    artifact_dir = get_artifact_dir(run)
+    return artifact_dir / artifact_path if artifact_path else artifact_dir
+def load_config(run: Run_ | Series | str) -> DictConfig:
+    """
+    Load the configuration.
+    Args:
+        run: The run object.
+    Returns:
+        DictConfig: The configuration.
+    """
+    run_id = get_run_id(run)
+    return _load_config(run_id)
+@cache
+def _load_config(run_id: str) -> DictConfig:
+    try:
+        path = mlflow.artifacts.download_artifacts(
+            run_id=run_id,
+            artifact_path=".hydra/config.yaml",
+        )
+    except OSError:
+        return DictConfig({})
+    return OmegaConf.load(path)  # type: ignore
+def get_hydra_output_dir(run: Run_ | Series | str) -> Path:
+    """
+    Get the Hydra output directory.
+    Args:
+        run: The run object.
+    Returns:
+        Path: The Hydra output directory.
+    """
+    path = get_artifact_dir(run) / ".hydra/hydra.yaml"
+    if path.exists():
+        hc = OmegaConf.load(path)
+        return Path(hc.hydra.runtime.output_dir)
+    raise FileNotFoundError
+def log_hydra_output_dir(run: Run_ | Series | str) -> None:
+    """
+    Log the Hydra output directory.
+    Args:
+        run: The run object.
+    Returns:
+        None
+    """
+    output_dir = get_hydra_output_dir(run)
+    run_id = run if isinstance(run, str) else run.info.run_id
+    mlflow.log_artifacts(output_dir.as_posix(), run_id=run_id)

hydraflow-0.1.5/src/hydraflow/util.py ADDED Viewed

@@ -0,0 +1,24 @@
+import platform
+from pathlib import Path
+from urllib.parse import urlparse
+def uri_to_path(uri: str) -> Path:
+    """
+    Convert a URI to a path.
+    This function parses the given URI and converts it to a local file system
+    path. On Windows, if the path starts with a forward slash, it is removed
+    to ensure the path is correctly formatted.
+    Args:
+        uri (str): The URI to convert.
+    Returns:
+        Path: The path corresponding to the URI.
+    """
+    path = urlparse(uri).path
+    if platform.system() == "Windows" and path.startswith("/"):
+        path = path[1:]
+    return Path(path)

{hydraflow-0.1.4 → hydraflow-0.1.5}/tests/test_config.py RENAMED Viewed

@@ -59,5 +59,4 @@ def test_iter_params_from_config(cfg):
     assert next(it) == ("size.y", 2)
     assert next(it) == ("db.name", "name")
     assert next(it) == ("db.port", 100)
-    assert next(it) == ("store.items.0", "a")
-    assert next(it) == ("store.items.1", "b")
+    assert next(it) == ("store.items", ["a", "b"])

hydraflow-0.1.4/PKG-INFO DELETED Viewed

@@ -1,45 +0,0 @@
-Metadata-Version: 2.3
-Name: hydraflow
-Version: 0.1.4
-Summary: Hydra with MLflow
-Project-URL: Documentation, https://github.com/daizutabi/hydraflow
-Project-URL: Source, https://github.com/daizutabi/hydraflow
-Project-URL: Issues, https://github.com/daizutabi/hydraflow/issues
-Author-email: daizutabi <daizutabi@gmail.com>
-License-Expression: MIT
-License-File: LICENSE
-Classifier: Development Status :: 4 - Beta
-Classifier: Programming Language :: Python
-Classifier: Programming Language :: Python :: 3.10
-Classifier: Programming Language :: Python :: 3.11
-Classifier: Programming Language :: Python :: 3.12
-Classifier: Topic :: Documentation
-Classifier: Topic :: Software Development :: Documentation
-Requires-Python: >=3.10
-Requires-Dist: hydra-core>1.3
-Requires-Dist: mlflow>2.15
-Requires-Dist: setuptools
-Requires-Dist: watchdog
-Provides-Extra: dev
-Requires-Dist: pytest-clarity; extra == 'dev'
-Requires-Dist: pytest-cov; extra == 'dev'
-Requires-Dist: pytest-randomly; extra == 'dev'
-Requires-Dist: pytest-xdist; extra == 'dev'
-Description-Content-Type: text/markdown
-# hydraflow
-[![PyPI Version][pypi-v-image]][pypi-v-link]
-[![Python Version][python-v-image]][python-v-link]
-[![Build Status][GHAction-image]][GHAction-link]
-[![Coverage Status][codecov-image]][codecov-link]
-<!-- Badges -->
-[pypi-v-image]: https://img.shields.io/pypi/v/hydraflow.svg
-[pypi-v-link]: https://pypi.org/project/hydraflow/
-[python-v-image]: https://img.shields.io/pypi/pyversions/hydraflow.svg
-[python-v-link]: https://pypi.org/project/hydraflow
-[GHAction-image]: https://github.com/daizutabi/hydraflow/actions/workflows/ci.yml/badge.svg?branch=main&event=push
-[GHAction-link]: https://github.com/daizutabi/hydraflow/actions?query=event%3Apush+branch%3Amain
-[codecov-image]: https://codecov.io/github/daizutabi/hydraflow/coverage.svg?branch=main
-[codecov-link]: https://codecov.io/github/daizutabi/hydraflow?branch=main

hydraflow-0.1.4/README.md DELETED Viewed

@@ -1,16 +0,0 @@
-# hydraflow
-[![PyPI Version][pypi-v-image]][pypi-v-link]
-[![Python Version][python-v-image]][python-v-link]
-[![Build Status][GHAction-image]][GHAction-link]
-[![Coverage Status][codecov-image]][codecov-link]
-<!-- Badges -->
-[pypi-v-image]: https://img.shields.io/pypi/v/hydraflow.svg
-[pypi-v-link]: https://pypi.org/project/hydraflow/
-[python-v-image]: https://img.shields.io/pypi/pyversions/hydraflow.svg
-[python-v-link]: https://pypi.org/project/hydraflow
-[GHAction-image]: https://github.com/daizutabi/hydraflow/actions/workflows/ci.yml/badge.svg?branch=main&event=push
-[GHAction-link]: https://github.com/daizutabi/hydraflow/actions?query=event%3Apush+branch%3Amain
-[codecov-image]: https://codecov.io/github/daizutabi/hydraflow/coverage.svg?branch=main
-[codecov-link]: https://codecov.io/github/daizutabi/hydraflow?branch=main

hydraflow-0.1.4/src/hydraflow/config.py DELETED Viewed

@@ -1,30 +0,0 @@
-from __future__ import annotations
-from typing import TYPE_CHECKING
-from omegaconf import DictConfig, ListConfig, OmegaConf
-if TYPE_CHECKING:
-    from collections.abc import Iterator
-    from typing import Any
-def iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
-    if not isinstance(config, DictConfig | ListConfig):
-        config = OmegaConf.create(config)  # type: ignore
-    if isinstance(config, DictConfig):
-        for key, value in config.items():
-            if isinstance(value, (DictConfig, ListConfig)):
-                yield from iter_params(value, f"{prefix}{key}.")
-            else:
-                yield f"{prefix}{key}", value
-    elif isinstance(config, ListConfig):
-        for index, value in enumerate(config):
-            if isinstance(value, (DictConfig, ListConfig)):
-                yield from iter_params(value, f"{prefix}{index}.")
-            else:
-                yield f"{prefix}{index}", value

hydraflow-0.1.4/src/hydraflow/mlflow.py DELETED Viewed

@@ -1,20 +0,0 @@
-from __future__ import annotations
-import mlflow
-from hydra.core.hydra_config import HydraConfig
-from hydraflow.config import iter_params
-def set_experiment(prefix: str = "", suffix: str = "", uri: str | None = None) -> None:
-    if uri:
-        mlflow.set_tracking_uri(uri)
-    hc = HydraConfig.get()
-    name = f"{prefix}{hc.job.name}{suffix}"
-    mlflow.set_experiment(name)
-def log_params(config: object, *, synchronous: bool | None = None) -> None:
-    for key, value in iter_params(config):
-        mlflow.log_param(key, value, synchronous=synchronous)

hydraflow-0.1.4/src/hydraflow/runs.py DELETED Viewed

@@ -1,217 +0,0 @@
-from __future__ import annotations
-from dataclasses import dataclass
-from functools import cache
-from pathlib import Path
-from typing import TYPE_CHECKING, Any
-import mlflow
-import numpy as np
-from mlflow.entities.run import Run as Run_
-from mlflow.tracking import artifact_utils
-from omegaconf import DictConfig, OmegaConf
-from pandas import DataFrame, Series
-from hydraflow.config import iter_params
-from hydraflow.util import uri_to_path
-if TYPE_CHECKING:
-    from typing import Any
-@dataclass
-class Runs:
-    runs: list[Run_] | DataFrame
-    def __repr__(self) -> str:
-        return f"{self.__class__.__name__}({len(self)})"
-    def __len__(self) -> int:
-        return len(self.runs)
-    def filter(self, config: object) -> Runs:
-        return Runs(filter_runs(self.runs, config))
-    def get(self, config: object) -> Run:
-        return Run(get_run(self.runs, config))
-    def drop_unique_params(self) -> Runs:
-        if isinstance(self.runs, DataFrame):
-            return Runs(drop_unique_params(self.runs))
-        raise NotImplementedError
-    def get_param_names(self) -> list[str]:
-        if isinstance(self.runs, DataFrame):
-            return get_param_names(self.runs)
-        raise NotImplementedError
-    def get_param_dict(self) -> dict[str, list[str]]:
-        if isinstance(self.runs, DataFrame):
-            return get_param_dict(self.runs)
-        raise NotImplementedError
-def filter_runs(runs: list[Run_] | DataFrame, config: object) -> list[Run_] | DataFrame:
-    if isinstance(runs, list):
-        return filter_runs_list(runs, config)
-    return filter_runs_dataframe(runs, config)
-def _is_equal(run: Run_, key: str, value: Any) -> bool:
-    param = run.data.params.get(key, value)
-    if param is None:
-        return False
-    return type(value)(param) == value
-def filter_runs_list(runs: list[Run_], config: object) -> list[Run_]:
-    for key, value in iter_params(config):
-        runs = [run for run in runs if _is_equal(run, key, value)]
-    return runs
-def filter_runs_dataframe(runs: DataFrame, config: object) -> DataFrame:
-    index = np.ones(len(runs), dtype=bool)
-    for key, value in iter_params(config):
-        name = f"params.{key}"
-        if name in runs:
-            series = runs[name]
-            is_value = -series.isna()
-            param = series.fillna(value).astype(type(value))
-            index &= is_value & (param == value)
-    return runs[index]
-def get_run(runs: list[Run_] | DataFrame, config: object) -> Run_ | Series:
-    runs = filter_runs(runs, config)
-    if len(runs) == 1:
-        return runs[0] if isinstance(runs, list) else runs.iloc[0]
-    msg = f"number of filtered runs is not 1: got {len(runs)}"
-    raise ValueError(msg)
-def drop_unique_params(runs: DataFrame) -> DataFrame:
-    def select(column: str) -> bool:
-        return not column.startswith("params.") or len(runs[column].unique()) > 1
-    columns = [select(column) for column in runs.columns]
-    return runs.iloc[:, columns]
-def get_param_names(runs: DataFrame) -> list[str]:
-    def get_name(column: str) -> str:
-        if column.startswith("params."):
-            return column.split(".", maxsplit=1)[-1]
-        return ""
-    columns = [get_name(column) for column in runs.columns]
-    return [column for column in columns if column]
-def get_param_dict(runs: DataFrame) -> dict[str, list[str]]:
-    params = {}
-    for name in get_param_names(runs):
-        params[name] = list(runs[f"params.{name}"].unique())
-    return params
-@dataclass
-class Run:
-    run: Run_ | Series | str
-    def __repr__(self) -> str:
-        return f"{self.__class__.__name__}({self.run_id!r})"
-    @property
-    def run_id(self) -> str:
-        return get_run_id(self.run)
-    def artifact_uri(self, artifact_path: str | None = None) -> str:
-        return get_artifact_uri(self.run, artifact_path)
-    @property
-    def artifact_dir(self) -> Path:
-        return get_artifact_dir(self.run)
-    def artifact_path(self, artifact_path: str | None = None) -> Path:
-        return get_artifact_path(self.run, artifact_path)
-    @property
-    def config(self) -> DictConfig:
-        return load_config(self.run)
-    def log_hydra_output_dir(self) -> None:
-        log_hydra_output_dir(self.run)
-def get_run_id(run: Run_ | Series | str) -> str:
-    if isinstance(run, str):
-        return run
-    if isinstance(run, Run_):
-        return run.info.run_id
-    return run.run_id
-def get_artifact_uri(run: Run_ | Series | str, artifact_path: str | None = None) -> str:
-    run_id = get_run_id(run)
-    return artifact_utils.get_artifact_uri(run_id, artifact_path)
-def get_artifact_dir(run: Run_ | Series | str) -> Path:
-    uri = get_artifact_uri(run)
-    return uri_to_path(uri)
-def get_artifact_path(run: Run_ | Series | str, artifact_path: str | None = None) -> Path:
-    artifact_dir = get_artifact_dir(run)
-    return artifact_dir / artifact_path if artifact_path else artifact_dir
-def load_config(run: Run_ | Series | str) -> DictConfig:
-    run_id = get_run_id(run)
-    return _load_config(run_id)
-@cache
-def _load_config(run_id: str) -> DictConfig:
-    try:
-        path = mlflow.artifacts.download_artifacts(
-            run_id=run_id,
-            artifact_path=".hydra/config.yaml",
-        )
-    except OSError:
-        return DictConfig({})
-    return OmegaConf.load(path)  # type: ignore
-def get_hydra_output_dir(run: Run_ | Series | str) -> Path:
-    path = get_artifact_dir(run) / ".hydra/hydra.yaml"
-    if path.exists():
-        hc = OmegaConf.load(path)
-        return Path(hc.hydra.runtime.output_dir)
-    raise FileNotFoundError
-def log_hydra_output_dir(run: Run_ | Series | str) -> None:
-    output_dir = get_hydra_output_dir(run)
-    run_id = run if isinstance(run, str) else run.info.run_id
-    mlflow.log_artifacts(output_dir.as_posix(), run_id=run_id)

hydraflow-0.1.4/src/hydraflow/util.py DELETED Viewed

@@ -1,11 +0,0 @@
-import platform
-from pathlib import Path
-from urllib.parse import urlparse
-def uri_to_path(uri: str) -> Path:
-    path = urlparse(uri).path
-    if platform.system() == "Windows" and path.startswith("/"):
-        path = path[1:]
-    return Path(path)