PyPI - hydraflow - Versions diffs - 0.1.5__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

hydraflow 0.1.5py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hydraflow might be problematic. Click here for more details.

Files changed (11) hide show

hydraflow/__init__.py +0 -10
hydraflow/config.py +8 -7
hydraflow/context.py +31 -19
hydraflow/mlflow.py +23 -0
hydraflow/runs.py +213 -303
{hydraflow-0.1.5.dist-info → hydraflow-0.2.0.dist-info}/METADATA +1 -1
hydraflow-0.2.0.dist-info/RECORD +9 -0
hydraflow/util.py +0 -24
hydraflow-0.1.5.dist-info/RECORD +0 -10
{hydraflow-0.1.5.dist-info → hydraflow-0.2.0.dist-info}/WHEEL +0 -0
{hydraflow-0.1.5.dist-info → hydraflow-0.2.0.dist-info}/licenses/LICENSE +0 -0

hydraflow/__init__.py CHANGED Viewed

@@ -3,15 +3,10 @@ from .mlflow import set_experiment
 from .runs import (
     Run,
     Runs,
-    drop_unique_params,
     filter_runs,
-    get_artifact_dir,
-    get_artifact_path,
-    get_artifact_uri,
     get_param_dict,
     get_param_names,
     get_run,
-    get_run_id,
     load_config,
 )
@@ -20,15 +15,10 @@ __all__ = [
     "Run",
     "Runs",
     "chdir_artifact",
-    "drop_unique_params",
     "filter_runs",
-    "get_artifact_dir",
-    "get_artifact_path",
-    "get_artifact_uri",
     "get_param_dict",
     "get_param_names",
     "get_run",
-    "get_run_id",
     "load_config",
     "log_run",
     "set_experiment",

hydraflow/config.py CHANGED Viewed

@@ -16,18 +16,19 @@ if TYPE_CHECKING:
 def iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
     """
-    Iterate over the parameters in the given configuration object.
+    Recursively iterate over the parameters in the given configuration object.
-    This function recursively traverses the configuration object and yields
-    key-value pairs representing the parameters.
+    This function traverses the configuration object and yields key-value pairs
+    representing the parameters. The keys are prefixed with the provided prefix.
     Args:
-        config (object): The configuration object to iterate over.
-        prefix (str, optional): The prefix to prepend to the parameter keys.
-            Defaults to "".
+        config: The configuration object to iterate over. This can be a dictionary,
+            list, DictConfig, or ListConfig.
+        prefix: The prefix to prepend to the parameter keys.
+            Defaults to an empty string.
     Yields:
-        Key-value pairs representing the parameters.
+        Key-value pairs representing the parameters in the configuration object.
     """
     if not isinstance(config, (DictConfig, ListConfig)):
         config = OmegaConf.create(config)  # type: ignore

hydraflow/context.py CHANGED Viewed

@@ -5,6 +5,7 @@ run context.
 from __future__ import annotations
+import logging
 import os
 import time
 from contextlib import contextmanager
@@ -17,15 +18,14 @@ from hydra.core.hydra_config import HydraConfig
 from watchdog.events import FileModifiedEvent, FileSystemEventHandler
 from watchdog.observers import Observer
-from hydraflow.mlflow import log_params
-from hydraflow.runs import get_artifact_path
-from hydraflow.util import uri_to_path
+from hydraflow.mlflow import get_artifact_dir, log_params
 if TYPE_CHECKING:
     from collections.abc import Callable, Iterator
     from mlflow.entities.run import Run
-    from pandas import Series
+log = logging.getLogger(__name__)
 @dataclass
@@ -66,8 +66,7 @@ def log_run(
     hc = HydraConfig.get()
     output_dir = Path(hc.runtime.output_dir)
-    uri = mlflow.get_artifact_uri()
-    info = Info(output_dir, uri_to_path(uri))
+    info = Info(output_dir, get_artifact_dir())
     # Save '.hydra' config directory first.
     output_subdir = output_dir / (hc.output_subdir or "")
@@ -81,13 +80,21 @@ def log_run(
         with watch(log_artifact, output_dir):
             yield info
+    except Exception as e:
+        log.error(f"Error during log_run: {e}")
+        raise
     finally:
         # Save output_dir including '.hydra' config directory.
         mlflow.log_artifacts(output_dir.as_posix())
 @contextmanager
-def watch(func: Callable[[Path], None], dir: Path | str = "", timeout: int = 60) -> Iterator[None]:
+def watch(
+    func: Callable[[Path], None],
+    dir: Path | str = "",
+    timeout: int = 60,
+) -> Iterator[None]:
     """
     Watch the given directory for changes and call the provided function
     when a change is detected.
@@ -98,25 +105,23 @@ def watch(func: Callable[[Path], None], dir: Path | str = "", timeout: int = 60)
     period or until the context is exited.
     Args:
-        func (Callable[[Path], None]): The function to call when a change is
+        func: The function to call when a change is
             detected. It should accept a single argument of type `Path`,
             which is the path of the modified file.
-        dir (Path | str, optional): The directory to watch. If not specified,
+        dir: The directory to watch. If not specified,
             the current MLflow artifact URI is used. Defaults to "".
-        timeout (int, optional): The timeout period in seconds for the watcher
+        timeout: The timeout period in seconds for the watcher
             to run after the context is exited. Defaults to 60.
     Yields:
-        None: This context manager does not return any value.
+        None
     Example:
         with watch(log_artifact, "/path/to/dir"):
             # Perform operations while watching the directory for changes
             pass
     """
-    if not dir:
-        uri = mlflow.get_artifact_uri()
-        dir = uri_to_path(uri)
+    dir = dir or get_artifact_dir()
     handler = Handler(func)
     observer = Observer()
@@ -126,6 +131,10 @@ def watch(func: Callable[[Path], None], dir: Path | str = "", timeout: int = 60)
     try:
         yield
+    except Exception as e:
+        log.error(f"Error during watch: {e}")
+        raise
     finally:
         elapsed = 0
         while not observer.event_queue.empty():
@@ -150,7 +159,7 @@ class Handler(FileSystemEventHandler):
 @contextmanager
 def chdir_artifact(
-    run: Run | Series | str,
+    run: Run,
     artifact_path: str | None = None,
 ) -> Iterator[Path]:
     """
@@ -166,11 +175,14 @@ def chdir_artifact(
         artifact_path: The artifact path.
     """
     curdir = Path.cwd()
+    path = mlflow.artifacts.download_artifacts(
+        run_id=run.info.run_id,
+        artifact_path=artifact_path,
+    )
-    artifact_dir = get_artifact_path(run, artifact_path)
-    os.chdir(artifact_dir)
+    os.chdir(path)
     try:
-        yield artifact_dir
+        yield Path(path)
     finally:
         os.chdir(curdir)

hydraflow/mlflow.py CHANGED Viewed

@@ -5,6 +5,8 @@ configuration objects and set up experiments using MLflow.
 from __future__ import annotations
+from pathlib import Path
 import mlflow
 from hydra.core.hydra_config import HydraConfig
@@ -47,3 +49,24 @@ def log_params(config: object, *, synchronous: bool | None = None) -> None:
     """
     for key, value in iter_params(config):
         mlflow.log_param(key, value, synchronous=synchronous)
+def get_artifact_dir(artifact_path: str | None = None) -> Path:
+    """
+    Get the artifact directory for the given artifact path.
+    This function retrieves the artifact URI for the specified artifact path
+    using MLflow, downloads the artifacts to a local directory, and returns
+    the path to that directory.
+    Args:
+        artifact_path: The artifact path for which to get the directory.
+            Defaults to None.
+    Returns:
+        The local path to the directory where the artifacts are downloaded.
+    """
+    uri = mlflow.get_artifact_uri(artifact_path)
+    dir = mlflow.artifacts.download_artifacts(artifact_uri=uri)
+    return Path(dir)

hydraflow/runs.py CHANGED Viewed

@@ -1,30 +1,85 @@
 """
 This module provides functionality for managing and interacting with MLflow runs.
-It includes classes and functions to filter runs, retrieve run information, and
-log artifacts and configurations.
+It includes the `Runs` class and various methods to filter runs, retrieve run information,
+log artifacts, and load configurations.
 """
 from __future__ import annotations
 from dataclasses import dataclass
 from functools import cache
-from pathlib import Path
+from itertools import chain
 from typing import TYPE_CHECKING, Any
 import mlflow
-import numpy as np
-from mlflow.entities.run import Run as Run_
-from mlflow.tracking import artifact_utils
+from mlflow.entities import ViewType
+from mlflow.entities.run import Run
+from mlflow.tracking.fluent import SEARCH_MAX_RESULTS_PANDAS
 from omegaconf import DictConfig, OmegaConf
-from pandas import DataFrame, Series
 from hydraflow.config import iter_params
-from hydraflow.util import uri_to_path
 if TYPE_CHECKING:
     from typing import Any
+def search_runs(
+    experiment_ids: list[str] | None = None,
+    filter_string: str = "",
+    run_view_type: int = ViewType.ACTIVE_ONLY,
+    max_results: int = SEARCH_MAX_RESULTS_PANDAS,
+    order_by: list[str] | None = None,
+    search_all_experiments: bool = False,
+    experiment_names: list[str] | None = None,
+) -> Runs:
+    """
+    Search for Runs that fit the specified criteria.
+    This function wraps the `mlflow.search_runs` function and returns the results
+    as a `Runs` object. It allows for flexible searching of MLflow runs based on
+    various criteria.
+    Args:
+        experiment_ids: List of experiment IDs. Search can work with experiment IDs or
+            experiment names, but not both in the same call. Values other than
+            ``None`` or ``[]`` will result in error if ``experiment_names`` is
+            also not ``None`` or ``[]``. ``None`` will default to the active
+            experiment if ``experiment_names`` is ``None`` or ``[]``.
+        filter_string: Filter query string, defaults to searching all runs.
+        run_view_type: one of enum values ``ACTIVE_ONLY``, ``DELETED_ONLY``, or ``ALL`` runs
+            defined in :py:class:`mlflow.entities.ViewType`.
+        max_results: The maximum number of runs to put in the dataframe. Default is 100,000
+            to avoid causing out-of-memory issues on the user's machine.
+        order_by: List of columns to order by (e.g., "metrics.rmse"). The ``order_by`` column
+            can contain an optional ``DESC`` or ``ASC`` value. The default is ``ASC``.
+            The default ordering is to sort by ``start_time DESC``, then ``run_id``.
+        output_format: The output format to be returned. If ``pandas``, a ``pandas.DataFrame``
+            is returned and, if ``list``, a list of :py:class:`mlflow.entities.Run`
+            is returned.
+        search_all_experiments: Boolean specifying whether all experiments should be searched.
+            Only honored if ``experiment_ids`` is ``[]`` or ``None``.
+        experiment_names: List of experiment names. Search can work with experiment IDs or
+            experiment names, but not both in the same call. Values other
+            than ``None`` or ``[]`` will result in error if ``experiment_ids``
+            is also not ``None`` or ``[]``. ``None`` will default to the active
+            experiment if ``experiment_ids`` is ``None`` or ``[]``.
+    Returns:
+        A `Runs` object containing the search results.
+    """
+    runs = mlflow.search_runs(
+        experiment_ids=experiment_ids,
+        filter_string=filter_string,
+        run_view_type=run_view_type,
+        max_results=max_results,
+        order_by=order_by,
+        output_format="list",
+        search_all_experiments=search_all_experiments,
+        experiment_names=experiment_names,
+    )
+    return Runs(runs)  # type: ignore
 @dataclass
 class Runs:
     """
@@ -34,7 +89,7 @@ class Runs:
     retrieving specific runs, and accessing run information.
     """
-    runs: list[Run_] | DataFrame
+    runs: list[Run]
     def __repr__(self) -> str:
         return f"{self.__class__.__name__}({len(self)})"
@@ -53,115 +108,95 @@ class Runs:
         be included in the returned `Runs` object.
         Args:
-            config (object): The configuration object to filter the runs.
-                This object should contain key-value pairs representing
-                the parameters to filter by.
+            config: The configuration object to filter the runs.
         Returns:
-            Runs: A new `Runs` object containing the filtered runs.
+            A new `Runs` object containing the filtered runs.
         """
         return Runs(filter_runs(self.runs, config))
-    def get(self, config: object) -> Run:
+    def get(self, config: object) -> Run | None:
         """
         Retrieve a specific run based on the provided configuration.
         This method filters the runs in the collection according to the
         specified configuration object and returns the run that matches
         the provided parameters. If more than one run matches the criteria,
-        an error is raised.
+        a `ValueError` is raised.
         Args:
-            config (object): The configuration object to identify the run.
+            config: The configuration object to identify the run.
         Returns:
             Run: The run object that matches the provided configuration.
+            None, if the runs are not in a DataFrame format.
         Raises:
             ValueError: If the number of filtered runs is not exactly one.
         """
-        return Run(get_run(self.runs, config))
+        return get_run(self.runs, config)
-    def drop_unique_params(self) -> Runs:
+    def get_earliest_run(self, config: object | None = None, **kwargs) -> Run | None:
         """
-        Drop unique parameters from the runs and return a new Runs object.
+        Get the earliest run from the list of runs based on the start time.
-        This method removes parameters that have unique values across all runs
-        in the collection. This is useful for identifying common parameters
-        that are shared among multiple runs.
+        This method filters the runs based on the configuration if provided
+        and returns the run with the earliest start time.
+        Args:
+            config: The configuration object to filter the runs.
+                If None, no filtering is applied.
+            **kwargs: Additional key-value pairs to filter the runs.
         Returns:
-            Runs: A new `Runs` object with unique parameters dropped.
+            The run with the earliest start time, or None if no runs match the criteria.
+        """
+        return get_earliest_run(self.runs, config, **kwargs)
-        Raises:
-            NotImplementedError: If the runs are not in a DataFrame format.
+    def get_latest_run(self, config: object | None = None, **kwargs) -> Run | None:
         """
-        if isinstance(self.runs, DataFrame):
-            return Runs(drop_unique_params(self.runs))
+        Get the latest run from the list of runs based on the start time.
-        raise NotImplementedError
+        Args:
+            config: The configuration object to filter the runs.
+                If None, no filtering is applied.
+            **kwargs: Additional key-value pairs to filter the runs.
+        Returns:
+            The run with the latest start time, or None if no runs match the criteria.
+        """
+        return get_latest_run(self.runs, config, **kwargs)
     def get_param_names(self) -> list[str]:
         """
         Get the parameter names from the runs.
-        This method extracts the parameter names from the runs in the collection.
-        If the runs are stored in a DataFrame, it retrieves the column names
-        that correspond to the parameters.
+        This method extracts the unique parameter names from the provided list of runs.
+        It iterates through each run and collects the parameter names into a set to
+        ensure uniqueness.
         Returns:
-            list[str]: A list of parameter names.
-        Raises:
-            NotImplementedError: If the runs are not in a DataFrame format.
+            A list of unique parameter names.
         """
-        if isinstance(self.runs, DataFrame):
-            return get_param_names(self.runs)
-        raise NotImplementedError
+        return get_param_names(self.runs)
     def get_param_dict(self) -> dict[str, list[str]]:
         """
-        Get the parameter dictionary from the runs.
+        Get the parameter dictionary from the list of runs.
         This method extracts the parameter names and their corresponding values
-        from the runs in the collection. If the runs are stored in a DataFrame,
-        it retrieves the unique values for each parameter.
+        from the provided list of runs. It iterates through each run and collects
+        the parameter values into a dictionary where the keys are parameter names
+        and the values are lists of parameter values.
         Returns:
-            dict[str, list[str]]: A dictionary of parameter names and their
-            corresponding values.
-        Raises:
-            NotImplementedError: If the runs are not in a DataFrame format.
+            A dictionary where the keys are parameter names and the values are lists
+            of parameter values.
         """
-        if isinstance(self.runs, DataFrame):
-            return get_param_dict(self.runs)
-        raise NotImplementedError
-def search_runs(*args, **kwargs) -> Runs:
-    """
-    Search for runs that match the specified criteria.
-    This function wraps the `mlflow.search_runs` function and returns the results
-    as a `Runs` object.  It allows for flexible searching of MLflow runs based on
-    various criteria.
-    Args:
-        *args: Positional arguments to pass to `mlflow.search_runs`.
-        **kwargs: Keyword arguments to pass to `mlflow.search_runs`.
-    Returns:
-        Runs: A `Runs` object containing the search results.
-    """
-    runs = mlflow.search_runs(*args, **kwargs)
-    return Runs(runs)
+        return get_param_dict(self.runs)
-def filter_runs(runs: list[Run_] | DataFrame, config: object) -> list[Run_] | DataFrame:
+def filter_runs(runs: list[Run], config: object, **kwargs) -> list[Run]:
     """
     Filter the runs based on the provided configuration.
@@ -169,22 +204,26 @@ def filter_runs(runs: list[Run_] | DataFrame, config: object) -> list[Run_] | Da
     specified configuration object. The configuration object should
     contain key-value pairs that correspond to the parameters of the
     runs. Only the runs that match all the specified parameters will
-    be included in the returned `Runs` object.
+    be included in the returned list of runs.
     Args:
         runs: The runs to filter.
         config: The configuration object to filter the runs.
+        **kwargs: Additional key-value pairs to filter the runs.
     Returns:
-        Runs: A filtered list of runs or a DataFrame.
+        A filtered list of runs.
     """
-    if isinstance(runs, list):
-        return _filter_runs_list(runs, config)
+    for key, value in chain(iter_params(config), kwargs.items()):
+        runs = [run for run in runs if _is_equal(run, key, value)]
-    return _filter_runs_dataframe(runs, config)
+        if len(runs) == 0:
+            return []
+    return runs
-def _is_equal(run: Run_, key: str, value: Any) -> bool:
+def _is_equal(run: Run, key: str, value: Any) -> bool:
     param = run.data.params.get(key, value)
     if param is None:
@@ -193,275 +232,146 @@ def _is_equal(run: Run_, key: str, value: Any) -> bool:
     return type(value)(param) == value
-def _filter_runs_list(runs: list[Run_], config: object) -> list[Run_]:
-    for key, value in iter_params(config):
-        runs = [run for run in runs if _is_equal(run, key, value)]
-    return runs
-def _filter_runs_dataframe(runs: DataFrame, config: object) -> DataFrame:
-    index = np.ones(len(runs), dtype=bool)
-    for key, value in iter_params(config):
-        name = f"params.{key}"
-        if name in runs:
-            series = runs[name]
-            is_value = -series.isna()
-            param = series.fillna(value).astype(type(value))
-            index &= is_value & (param == value)
-    return runs[index]
-def get_run(runs: list[Run_] | DataFrame, config: object) -> Run_ | Series:
+def get_run(runs: list[Run], config: object, **kwargs) -> Run | None:
     """
     Retrieve a specific run based on the provided configuration.
     This method filters the runs in the collection according to the
     specified configuration object and returns the run that matches
     the provided parameters. If more than one run matches the criteria,
-    an error is raised.
+    a `ValueError` is raised.
     Args:
         runs: The runs to filter.
         config: The configuration object to identify the run.
+        **kwargs: Additional key-value pairs to filter the runs.
     Returns:
-        Run: The run object that matches the provided configuration.
+        The run object that matches the provided configuration, or None
+        if no runs match the criteria.
+    Raises:
+        ValueError: If more than one run matches the criteria.
     """
-    runs = filter_runs(runs, config)
+    runs = filter_runs(runs, config, **kwargs)
+    if len(runs) == 0:
+        return None
     if len(runs) == 1:
-        return runs[0] if isinstance(runs, list) else runs.iloc[0]
+        return runs[0]
-    msg = f"number of filtered runs is not 1: got {len(runs)}"
+    msg = f"Multiple runs were filtered. Expected number of runs is 1, but found {len(runs)} runs."
     raise ValueError(msg)
-def drop_unique_params(runs: DataFrame) -> DataFrame:
+def get_earliest_run(runs: list[Run], config: object | None = None, **kwargs) -> Run | None:
     """
-    Drop unique parameters from the runs and return a new DataFrame.
+    Get the earliest run from the list of runs based on the start time.
-    This method removes parameters that have unique values across all runs
-    in the collection. This is useful for identifying common parameters
-    that are shared among multiple runs.
+    This method filters the runs based on the configuration if provided
+    and returns the run with the earliest start time.
     Args:
-        runs: The DataFrame containing the runs.
+        runs: The list of runs.
+        config: The configuration object to filter the runs.
+            If None, no filtering is applied.
+        **kwargs: Additional key-value pairs to filter the runs.
     Returns:
-        DataFrame: A new DataFrame with unique parameters dropped.
+        The run with the earliest start time, or None if no runs match the criteria.
     """
+    if config is not None or kwargs:
+        runs = filter_runs(runs, config or {}, **kwargs)
-    def select(column: str) -> bool:
-        return not column.startswith("params.") or len(runs[column].unique()) > 1
+    return min(runs, key=lambda run: run.info.start_time, default=None)
-    columns = [select(column) for column in runs.columns]
-    return runs.iloc[:, columns]
-def get_param_names(runs: DataFrame) -> list[str]:
+def get_latest_run(runs: list[Run], config: object | None = None, **kwargs) -> Run | None:
     """
-    Get the parameter names from the runs.
+    Get the latest run from the list of runs based on the start time.
-    This method extracts the parameter names from the runs in the collection.
-    If the runs are stored in a DataFrame, it retrieves the column names
-    that correspond to the parameters.
+    This method filters the runs based on the configuration if provided
+    and returns the run with the latest start time.
     Args:
-        runs: The DataFrame containing the runs.
+        runs: The list of runs.
+        config: The configuration object to filter the runs.
+            If None, no filtering is applied.
+        **kwargs: Additional key-value pairs to filter the runs.
     Returns:
-        list[str]: A list of parameter names.
+        The run with the latest start time, or None if no runs match the criteria.
     """
+    if config is not None or kwargs:
+        runs = filter_runs(runs, config or {}, **kwargs)
-    def get_name(column: str) -> str:
-        if column.startswith("params."):
-            return column.split(".", maxsplit=1)[-1]
-        return ""
-    columns = [get_name(column) for column in runs.columns]
-    return [column for column in columns if column]
+    return max(runs, key=lambda run: run.info.start_time, default=None)
-def get_param_dict(runs: DataFrame) -> dict[str, list[str]]:
+def get_param_names(runs: list[Run]) -> list[str]:
     """
-    Get the parameter dictionary from the runs.
-    This method extracts the parameter names and their corresponding values
-    from the runs in the collection. If the runs are stored in a DataFrame,
-    it retrieves the unique values for each parameter.
-    Args:
-        runs: The DataFrame containing the runs.
-    Returns:
-        dict[str, list[str]]: A dictionary of parameter names and
-        their corresponding values.
-    """
-    params = {}
-    for name in get_param_names(runs):
-        params[name] = list(runs[f"params.{name}"].unique())
-    return params
-@dataclass
-class Run:
-    """
-    A class to represent a specific MLflow run.
-    This class provides methods to interact with the run, such as retrieving
-    the run ID, artifact URI, and configuration. It also includes properties
-    to access the artifact directory, artifact path, and Hydra output directory.
-    """
-    run: Run_ | Series | str
-    def __repr__(self) -> str:
-        return f"{self.__class__.__name__}({self.run_id!r})"
-    @property
-    def run_id(self) -> str:
-        """
-        Get the run ID.
-        Returns:
-            str: The run ID.
-        """
-        return get_run_id(self.run)
-    def artifact_uri(self, artifact_path: str | None = None) -> str:
-        """
-        Get the artifact URI.
-        Args:
-            artifact_path (str | None): The artifact path.
-        Returns:
-            str: The artifact URI.
-        """
-        return get_artifact_uri(self.run, artifact_path)
-    @property
-    def artifact_dir(self) -> Path:
-        """
-        Get the artifact directory.
-        Returns:
-            Path: The artifact directory.
-        """
-        return get_artifact_dir(self.run)
-    def artifact_path(self, artifact_path: str | None = None) -> Path:
-        """
-        Get the artifact path.
-        Args:
-            artifact_path: The artifact path.
-        Returns:
-            Path: The artifact path.
-        """
-        return get_artifact_path(self.run, artifact_path)
-    @property
-    def config(self) -> DictConfig:
-        """
-        Get the configuration.
-        Returns:
-            DictConfig: The configuration.
-        """
-        return load_config(self.run)
-    def log_hydra_output_dir(self) -> None:
-        """
-        Log the Hydra output directory.
-        Returns:
-            None
-        """
-        log_hydra_output_dir(self.run)
+    Get the parameter names from the runs.
-def get_run_id(run: Run_ | Series | str) -> str:
-    """
-    Get the run ID.
+    This method extracts the unique parameter names from the provided list of runs.
+    It iterates through each run and collects the parameter names into a set to
+    ensure uniqueness.
     Args:
-        run: The run object.
+        runs: The list of runs from which to extract parameter names.
     Returns:
-        str: The run ID.
+        A list of unique parameter names.
     """
-    if isinstance(run, str):
-        return run
-    if isinstance(run, Run_):
-        return run.info.run_id
+    param_names = set()
-    return run.run_id
+    for run in runs:
+        for param in run.data.params.keys():
+            param_names.add(param)
+    return list(param_names)
-def get_artifact_uri(run: Run_ | Series | str, artifact_path: str | None = None) -> str:
-    """
-    Get the artifact URI.
-    Args:
-        run: The run object.
-        artifact_path: The artifact path.
-    Returns:
-        str: The artifact URI.
+def get_param_dict(runs: list[Run]) -> dict[str, list[str]]:
     """
-    run_id = get_run_id(run)
-    return artifact_utils.get_artifact_uri(run_id, artifact_path)
+    Get the parameter dictionary from the list of runs.
-def get_artifact_dir(run: Run_ | Series | str) -> Path:
-    """
-    Get the artifact directory.
+    This method extracts the parameter names and their corresponding values
+    from the provided list of runs. It iterates through each run and collects
+    the parameter values into a dictionary where the keys are parameter names
+    and the values are lists of parameter values.
     Args:
-        run: The run object.
+        runs: The list of runs from which to extract parameter names and values.
     Returns:
-        Path: The artifact directory.
+        A dictionary where the keys are parameter names and the values are lists
+        of parameter values.
     """
-    uri = get_artifact_uri(run)
-    return uri_to_path(uri)
+    params = {}
+    for name in get_param_names(runs):
+        it = (run.data.params[name] for run in runs if name in run.data.params)
+        params[name] = sorted(set(it))
-def get_artifact_path(run: Run_ | Series | str, artifact_path: str | None = None) -> Path:
-    """
-    Get the artifact path.
+    return params
-    Args:
-        run: The run object.
-        artifact_path: The artifact path.
-    Returns:
-        Path: The artifact path.
+def load_config(run: Run) -> DictConfig:
     """
-    artifact_dir = get_artifact_dir(run)
-    return artifact_dir / artifact_path if artifact_path else artifact_dir
+    Load the configuration for a given run.
-def load_config(run: Run_ | Series | str) -> DictConfig:
-    """
-    Load the configuration.
+    This function loads the configuration for the provided Run instance
+    by downloading the configuration file from the MLflow artifacts and
+    loading it using OmegaConf.
     Args:
-        run: The run object.
+        run: The Run instance to load the configuration for.
     Returns:
-        DictConfig: The configuration.
+        The loaded configuration.
     """
-    run_id = get_run_id(run)
+    run_id = run.info.run_id
     return _load_config(run_id)
@@ -478,35 +388,35 @@ def _load_config(run_id: str) -> DictConfig:
     return OmegaConf.load(path)  # type: ignore
-def get_hydra_output_dir(run: Run_ | Series | str) -> Path:
-    """
-    Get the Hydra output directory.
+# def get_hydra_output_dir(run: Run_ | Series | str) -> Path:
+#     """
+#     Get the Hydra output directory.
-    Args:
-        run: The run object.
+#     Args:
+#         run: The run object.
-    Returns:
-        Path: The Hydra output directory.
-    """
-    path = get_artifact_dir(run) / ".hydra/hydra.yaml"
+#     Returns:
+#         Path: The Hydra output directory.
+#     """
+#     path = get_artifact_dir(run) / ".hydra/hydra.yaml"
-    if path.exists():
-        hc = OmegaConf.load(path)
-        return Path(hc.hydra.runtime.output_dir)
+#     if path.exists():
+#         hc = OmegaConf.load(path)
+#         return Path(hc.hydra.runtime.output_dir)
-    raise FileNotFoundError
+#     raise FileNotFoundError
-def log_hydra_output_dir(run: Run_ | Series | str) -> None:
-    """
-    Log the Hydra output directory.
+# def log_hydra_output_dir(run: Run_ | Series | str) -> None:
+#     """
+#     Log the Hydra output directory.
-    Args:
-        run: The run object.
+#     Args:
+#         run: The run object.
-    Returns:
-        None
-    """
-    output_dir = get_hydra_output_dir(run)
-    run_id = run if isinstance(run, str) else run.info.run_id
-    mlflow.log_artifacts(output_dir.as_posix(), run_id=run_id)
+#     Returns:
+#         None
+#     """
+#     output_dir = get_hydra_output_dir(run)
+#     run_id = run if isinstance(run, str) else run.info.run_id
+#     mlflow.log_artifacts(output_dir.as_posix(), run_id=run_id)

{hydraflow-0.1.5.dist-info → hydraflow-0.2.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: hydraflow
-Version: 0.1.5
+Version: 0.2.0
 Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
 Project-URL: Documentation, https://github.com/daizutabi/hydraflow
 Project-URL: Source, https://github.com/daizutabi/hydraflow

hydraflow-0.2.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,9 @@
+hydraflow/__init__.py,sha256=PzziOG9RnGAVbl9Yz4ScvsL8nfkjsuN0alMKRvZT-_Y,442
+hydraflow/config.py,sha256=BcyOYvdiqSCsmUMA_EvnWPXuW0fC5cT-Q2ilBk9-5gc,1863
+hydraflow/context.py,sha256=MqkEhKEZL_N3eb3v5u9D4EqKkiSmiPyXXafhPkALRlg,5129
+hydraflow/mlflow.py,sha256=_Los9E38eG8sTiN8bGwZmvjCrS0S-wSGiA4fyhQM3Zw,2251
+hydraflow/runs.py,sha256=NT7IzE-Pf7T2Ey-eWEPZzQQaX4Gt_RKDKSn2pj2yzGc,14304
+hydraflow-0.2.0.dist-info/METADATA,sha256=dfQ2_-Nk79yVazy5BHasYK681kiG1z-_i4VxWT8fJjg,4224
+hydraflow-0.2.0.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
+hydraflow-0.2.0.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
+hydraflow-0.2.0.dist-info/RECORD,,

hydraflow/util.py DELETED Viewed

@@ -1,24 +0,0 @@
-import platform
-from pathlib import Path
-from urllib.parse import urlparse
-def uri_to_path(uri: str) -> Path:
-    """
-    Convert a URI to a path.
-    This function parses the given URI and converts it to a local file system
-    path. On Windows, if the path starts with a forward slash, it is removed
-    to ensure the path is correctly formatted.
-    Args:
-        uri (str): The URI to convert.
-    Returns:
-        Path: The path corresponding to the URI.
-    """
-    path = urlparse(uri).path
-    if platform.system() == "Windows" and path.startswith("/"):
-        path = path[1:]
-    return Path(path)

hydraflow-0.1.5.dist-info/RECORD DELETED Viewed

@@ -1,10 +0,0 @@
-hydraflow/__init__.py,sha256=e1Q0Sskx39jaU2zkGNXjFWNC5xugEz_hDERTN_6Mzy8,666
-hydraflow/config.py,sha256=WARa5u1F0n3wCOi65v8v8rUO78ME-mtzMeeeE2Yc1I8,1728
-hydraflow/context.py,sha256=NYjIMepLtaKyvw1obpE8gR1qu1OBpSB_uc6-5So2tg8,5139
-hydraflow/mlflow.py,sha256=2YWOYpv8eRB_ROD2yFh6ksKDXHvAPDYb86hrUi9zv6E,1558
-hydraflow/runs.py,sha256=vH-hrlcoTo8HRmgUWam9gtLXAl_wDzX26HEZGWckdMs,14038
-hydraflow/util.py,sha256=qdUGtBgY7qOF4Yr4PibJHImbLPf-6WYFVuIKu6zbNbY,614
-hydraflow-0.1.5.dist-info/METADATA,sha256=8mCKAA9KjcJAUiqP-DPdMl4Gcp3MSXxOF34VYKA2P8I,4224
-hydraflow-0.1.5.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
-hydraflow-0.1.5.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
-hydraflow-0.1.5.dist-info/RECORD,,

{hydraflow-0.1.5.dist-info → hydraflow-0.2.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{hydraflow-0.1.5.dist-info → hydraflow-0.2.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

hydraflow 0.1.5__py3-none-any.whl → 0.2.0__py3-none-any.whl

Potentially problematic release.

hydraflow 0.1.5py3-none-any.whl → 0.2.0py3-none-any.whl