PyPI - hydraflow - Versions diffs - 0.16.2__py3-none-any.whl → 0.17.0__py3-none-any.whl - Mend

hydraflow 0.16.2py3-none-any.whl → 0.17.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

hydraflow/core/collection.py +541 -0
hydraflow/core/group_by.py +205 -0
hydraflow/core/run.py +42 -61
hydraflow/core/run_collection.py +37 -494
hydraflow/core/run_info.py +0 -9
{hydraflow-0.16.2.dist-info → hydraflow-0.17.0.dist-info}/METADATA +1 -1
{hydraflow-0.16.2.dist-info → hydraflow-0.17.0.dist-info}/RECORD +10 -8
{hydraflow-0.16.2.dist-info → hydraflow-0.17.0.dist-info}/WHEEL +0 -0
{hydraflow-0.16.2.dist-info → hydraflow-0.17.0.dist-info}/entry_points.txt +0 -0
{hydraflow-0.16.2.dist-info → hydraflow-0.17.0.dist-info}/licenses/LICENSE +0 -0

hydraflow/core/run_collection.py CHANGED Viewed

@@ -37,25 +37,18 @@ Note:
 from __future__ import annotations
-from collections.abc import Hashable, Iterable, Sequence
-from dataclasses import MISSING
+from functools import cached_property
 from typing import TYPE_CHECKING, overload
-import numpy as np
-import polars as pl
-from omegaconf import OmegaConf
-from polars import DataFrame, Series
+from .collection import Collection
 from .run import Run
 if TYPE_CHECKING:
-    from collections.abc import Callable, Iterator
+    from collections.abc import Callable, Iterable
     from typing import Any, Self
-    from numpy.typing import NDArray
-class RunCollection[R: Run[Any, Any]](Sequence[R]):
+class RunCollection[R: Run[Any, Any], I = None](Collection[R]):
     """A collection of Run instances that implements the Sequence protocol.
     RunCollection provides methods for filtering, sorting, grouping, and analyzing
@@ -67,79 +60,6 @@ class RunCollection[R: Run[Any, Any]](Sequence[R]):
     """
-    runs: list[R]
-    """A list containing the Run instances in this collection."""
-    def __init__(self, runs: Iterable[R]) -> None:
-        self.runs = list(runs)
-    def __repr__(self) -> str:
-        """Return a string representation of the RunCollection."""
-        class_name = self.__class__.__name__
-        if not self:
-            return f"{class_name}(empty)"
-        type_name = repr(self[0])
-        if "(" in type_name:
-            type_name = type_name.split("(", 1)[0]
-        return f"{class_name}({type_name}, n={len(self)})"
-    def __len__(self) -> int:
-        """Return the number of Run instances in the collection.
-        Returns:
-            int: The number of runs.
-        """
-        return len(self.runs)
-    def __bool__(self) -> bool:
-        """Return whether the collection contains any Run instances.
-        Returns:
-            bool: True if the collection is not empty, False otherwise.
-        """
-        return bool(self.runs)
-    @overload
-    def __getitem__(self, index: int) -> R: ...
-    @overload
-    def __getitem__(self, index: slice) -> Self: ...
-    @overload
-    def __getitem__(self, index: Iterable[int]) -> Self: ...
-    def __getitem__(self, index: int | slice | Iterable[int]) -> R | Self:
-        """Get a Run or a new RunCollection based on the provided index.
-        Args:
-            index: Can be one of:
-                - An integer to get a single Run
-                - A slice to get a subrange of Runs
-                - An iterable of integers to get specific Runs
-        Returns:
-            R | Self: A single Run if index is an integer, or a new
-            RunCollection if index is a slice or iterable of integers.
-        """
-        if isinstance(index, int):
-            return self.runs[index]
-        if isinstance(index, slice):
-            return self.__class__(self.runs[index])
-        return self.__class__([self.runs[i] for i in index])
-    def __iter__(self) -> Iterator[R]:
-        """Return an iterator over the Runs in the collection.
-        Returns:
-            Iterator[R]: An iterator yielding Run instances.
-        """
-        return iter(self.runs)
     def preload(
         self,
         *,
@@ -155,15 +75,39 @@ class RunCollection[R: Run[Any, Any]](Sequence[R]):
         access these properties, as they will be already loaded in memory.
         Args:
-            cfg (bool): Whether to preload the configuration objects
-            impl (bool): Whether to preload the implementation objects
-            n_jobs (int): Number of parallel jobs to run
-                (-1 means using all processors)
+            n_jobs (int): Number of parallel jobs to run.
+                - 0: Run sequentially (default)
+                - -1: Use all available CPU cores
+                - >0: Use the specified number of cores
+            cfg (bool): Whether to preload the configuration objects.
+                Defaults to True.
+            impl (bool): Whether to preload the implementation objects.
+                Defaults to True.
         Returns:
             Self: The same RunCollection instance with preloaded
             configuration and implementation objects.
+        Note:
+            The preloading is done using joblib's threading backend,
+            which is suitable for I/O-bound tasks like loading
+            configuration files and implementation objects.
+        Examples:
+            ```python
+            # Preload all runs sequentially
+            runs.preload()
+            # Preload using all available cores
+            runs.preload(n_jobs=-1)
+            # Preload only configurations
+            runs.preload(impl=False)
+            # Preload only implementations
+            runs.preload(cfg=False)
+            ```
         """
         def load(run: R) -> None:
@@ -220,413 +164,12 @@ class RunCollection[R: Run[Any, Any]](Sequence[R]):
         for run in self:
             run.update(key, value, force=force)
-    def filter(
-        self,
-        *predicates: Callable[[R], bool] | tuple[str, Any],
-        **kwargs: Any,
-    ) -> Self:
-        """Filter runs based on predicates or key-value conditions.
-        This method allows filtering runs using various criteria:
-        - Callable predicates that take a Run and return a boolean
-        - Key-value tuples where the key is a string and the value
-          is compared using the Run.predicate method
-        - Keyword arguments, where the key is a string and the value
-          is compared using the Run.predicate method
-        Args:
-            *predicates: Callable predicates or (key, value) tuples
-                for filtering.
-            **kwargs: Additional key-value pairs for filtering.
+    @cached_property
+    def impls(self) -> Collection[I]:
+        """Get the implementation object for all runs in the collection.
         Returns:
-            Self: A new RunCollection containing only the runs that
-            match all criteria.
+            Collection[Any]: A collection of implementation objects for all runs.
         """
-        runs = self.runs
-        for predicate in predicates:
-            if callable(predicate):
-                runs = [r for r in runs if predicate(r)]
-            else:
-                runs = [r for r in runs if r.predicate(*predicate)]
-        for key, value in kwargs.items():
-            runs = [r for r in runs if r.predicate(key, value)]
-        return self.__class__(runs)
-    def try_get(
-        self,
-        *predicates: Callable[[R], bool] | tuple[str, Any],
-        **kwargs: Any,
-    ) -> R | None:
-        """Try to get a single run matching the specified criteria.
-        This method applies filters and returns a single matching
-        run if exactly one is found, None if no runs are found,
-        or raises ValueError if multiple runs match.
-        Args:
-            *predicates: Callable predicates or (key, value) tuples
-                for filtering.
-            **kwargs: Additional key-value pairs for filtering.
-        Returns:
-            R | None: A single Run that matches the criteria, or None if
-            no matches are found.
-        Raises:
-            ValueError: If multiple runs match the criteria.
-        """
-        runs = self.filter(*predicates, **kwargs)
-        n = len(runs)
-        if n == 0:
-            return None
-        if n == 1:
-            return runs[0]
-        msg = f"Multiple Run ({n}) found matching the criteria, "
-        msg += "expected exactly one"
-        raise ValueError(msg)
-    def get(
-        self,
-        *predicates: Callable[[R], bool] | tuple[str, Any],
-        **kwargs: Any,
-    ) -> R:
-        """Get a single run matching the specified criteria.
-        This method applies filters and returns a single matching run,
-        or raises ValueError if no runs or multiple runs match.
-        Args:
-            *predicates: Callable predicates or (key, value) tuples
-                for filtering.
-            **kwargs: Additional key-value pairs for filtering.
-        Returns:
-            R: A single Run that matches the criteria.
-        Raises:
-            ValueError: If no runs match or if multiple runs match
-            the criteria.
-        """
-        if run := self.try_get(*predicates, **kwargs):
-            return run
-        raise _value_error()
-    def first(
-        self,
-        *predicates: Callable[[R], bool] | tuple[str, Any],
-        **kwargs: Any,
-    ) -> R:
-        """Get the first run matching the specified criteria.
-        This method applies filters and returns the first matching run,
-        or raises ValueError if no runs match.
-        Args:
-            *predicates: Callable predicates or (key, value) tuples
-                for filtering.
-            **kwargs: Additional key-value pairs for filtering.
-        Returns:
-            R: The first Run that matches the criteria.
-        Raises:
-            ValueError: If no runs match the criteria.
-        """
-        if runs := self.filter(*predicates, **kwargs):
-            return runs[0]
-        raise _value_error()
-    def last(
-        self,
-        *predicates: Callable[[R], bool] | tuple[str, Any],
-        **kwargs: Any,
-    ) -> R:
-        """Get the last run matching the specified criteria.
-        This method applies filters and returns the last matching run,
-        or raises ValueError if no runs match.
-        Args:
-            *predicates: Callable predicates or (key, value) tuples
-                for filtering.
-            **kwargs: Additional key-value pairs for filtering.
-        Returns:
-            R: The last Run that matches the criteria.
-        Raises:
-            ValueError: If no runs match the criteria.
-        """
-        if runs := self.filter(*predicates, **kwargs):
-            return runs[-1]
-        raise _value_error()
-    def to_list(
-        self,
-        key: str,
-        default: Any | Callable[[R], Any] = MISSING,
-    ) -> list[Any]:
-        """Extract a list of values for a specific key from all runs.
-        Args:
-            key: The key to extract from each run.
-            default: The default value to return if the key is not found.
-                If a callable, it will be called with the Run instance
-                and the value returned will be used as the default.
-        Returns:
-            list[Any]: A list containing the values for the
-            specified key from each run.
-        """
-        return [run.get(key, default) for run in self]
-    def to_numpy(
-        self,
-        key: str,
-        default: Any | Callable[[R], Any] = MISSING,
-    ) -> NDArray:
-        """Extract values for a specific key from all runs as a NumPy array.
-        Args:
-            key: The key to extract from each run.
-            default: The default value to return if the key is not found.
-                If a callable, it will be called with the Run instance
-                and the value returned will be used as the default.
-        Returns:
-            NDArray: A NumPy array containing the values for the
-            specified key from each run.
-        """
-        return np.array(self.to_list(key, default))
-    def to_series(
-        self,
-        key: str,
-        default: Any | Callable[[R], Any] = MISSING,
-        *,
-        name: str | None = None,
-    ) -> Series:
-        """Extract values for a specific key from all runs as a Polars series.
-        Args:
-            key: The key to extract from each run.
-            default: The default value to return if the key is not found.
-                If a callable, it will be called with the Run instance
-                and the value returned will be used as the default.
-            name: The name of the series. If not provided, the key will be used.
-        Returns:
-            Series: A Polars series containing the values for the
-            specified key from each run.
-        """
-        return Series(name or key, self.to_list(key, default))
-    def unique(
-        self,
-        key: str,
-        default: Any | Callable[[R], Any] = MISSING,
-    ) -> NDArray:
-        """Get the unique values for a specific key across all runs.
-        Args:
-            key: The key to extract unique values for.
-            default: The default value to return if the key is not found.
-                If a callable, it will be called with the Run instance
-                and the value returned will be used as the default.
-        Returns:
-            NDArray: A NumPy array containing the unique values for the
-            specified key.
-        """
-        return np.unique(self.to_numpy(key, default), axis=0)
-    def n_unique(
-        self,
-        key: str,
-        default: Any | Callable[[R], Any] = MISSING,
-    ) -> int:
-        """Count the number of unique values for a specific key across all runs.
-        Args:
-            key: The key to count unique values for.
-            default: The default value to return if the key is not found.
-                If a callable, it will be called with the Run instance
-                and the value returned will be used as the default.
-        Returns:
-            int: The number of unique values for the specified key.
-        """
-        return len(self.unique(key, default))
-    def sort(self, *keys: str, reverse: bool = False) -> Self:
-        """Sort runs based on one or more keys.
-        Args:
-            *keys: The keys to sort by, in order of priority.
-            reverse: Whether to sort in descending order (default is
-                ascending).
-        Returns:
-            Self: A new RunCollection with the runs sorted according to
-            the specified keys.
-        """
-        if not keys:
-            return self
-        arrays = [self.to_numpy(key) for key in keys]
-        index = np.lexsort(arrays[::-1])
-        if reverse:
-            index = index[::-1]
-        return self[index]
-    def to_frame(
-        self,
-        *keys: str,
-        defaults: dict[str, Any | Callable[[R], Any]] | None = None,
-        **kwargs: Callable[[R], Any],
-    ) -> DataFrame:
-        """Convert the collection to a Polars DataFrame.
-        Args:
-            *keys (str): The keys to include as columns in the DataFrame.
-                If not provided, all keys from each run's to_dict() method
-                will be used.
-            defaults (dict[str, Any | Callable[[R], Any]] | None): Default
-                values for the keys. If a callable, it will be called with
-                the Run instance and the value returned will be used as the
-                default.
-            **kwargs (Callable[[R], Any]): Additional columns to compute
-                using callables that take a Run and return a value.
-        Returns:
-            DataFrame: A Polars DataFrame containing the specified data
-            from the runs.
-        """
-        if defaults is None:
-            defaults = {}
-        if keys:
-            df = DataFrame(
-                {key: self.to_list(key, defaults.get(key, MISSING)) for key in keys},
-            )
-        else:
-            df = DataFrame(r.to_dict() for r in self)
-        if not kwargs:
-            return df
-        columns = [Series(k, [v(r) for r in self]) for k, v in kwargs.items()]
-        return df.with_columns(*columns)
-    def _group_by(self, *keys: str) -> dict[Any, Self]:
-        result: dict[Any, Self] = {}
-        for run in self:
-            keys_ = [to_hashable(run.get(key)) for key in keys]
-            key = keys_[0] if len(keys) == 1 else tuple(keys_)
-            if key not in result:
-                result[key] = self.__class__([])
-            result[key].runs.append(run)
-        return result
-    @overload
-    def group_by(self, *keys: str) -> dict[Any, Self]: ...
-    @overload
-    def group_by(
-        self,
-        *keys: str,
-        **kwargs: Callable[[Self | Sequence[R]], Any],
-    ) -> DataFrame: ...
-    def group_by(
-        self,
-        *keys: str,
-        **kwargs: Callable[[Self | Sequence[R]], Any],
-    ) -> dict[Any, Self] | DataFrame:
-        """Group runs by one or more keys.
-        This method can return either:
-        - A dictionary mapping group keys to RunCollections
-          (no kwargs provided)
-        - A Polars DataFrame with group keys and aggregated
-          values (kwargs provided)
-        Args:
-            *keys (str): The keys to group by.
-            **kwargs (Callable[[Self | Sequence[R]], Any]): Aggregation
-                functions to apply to each group. Each function should
-                accept a RunCollection or Sequence[Run] and return a value.
-        Returns:
-            dict[Any, Self] | DataFrame: Either a dictionary mapping
-            group keys to RunCollections, or a Polars DataFrame with
-            group keys and aggregated values.
-        """
-        gp = self._group_by(*keys)
-        if not kwargs:
-            return gp
-        if len(keys) == 1:
-            df = DataFrame({keys[0]: list(gp)})
-        else:
-            df = DataFrame(dict(zip(keys, k, strict=True)) for k in gp)
-        columns = [pl.Series(k, [v(r) for r in gp.values()]) for k, v in kwargs.items()]
-        return df.with_columns(*columns)
-def to_hashable(value: Any) -> Hashable:
-    """Convert a value to a hashable instance.
-    This function handles various types of values and converts them to
-    hashable equivalents for use in dictionaries and sets.
-    Args:
-        value: The value to convert to a hashable instance.
-    Returns:
-        A hashable version of the input value.
-    """
-    if OmegaConf.is_list(value):  # Is ListConfig hashable?
-        return tuple(value)
-    if isinstance(value, Hashable):
-        return value
-    if isinstance(value, np.ndarray):
-        return tuple(value.tolist())
-    try:
-        return tuple(value)
-    except TypeError:
-        return str(value)
-def _value_error() -> ValueError:
-    msg = "No Run found matching the specified criteria"
-    return ValueError(msg)
+        return Collection(run.impl for run in self)

hydraflow/core/run_info.py CHANGED Viewed

@@ -19,7 +19,6 @@ from .io import get_experiment_name
 if TYPE_CHECKING:
     from pathlib import Path
-    from typing import Any
 @dataclass
@@ -51,11 +50,3 @@ class RunInfo:
         contain the expected format).
         """
         return get_experiment_name(self.run_dir.parent)
-    def to_dict(self) -> dict[str, Any]:
-        """Convert the RunInfo to a dictionary."""
-        return {
-            "run_id": self.run_id,
-            "run_dir": self.run_dir.as_posix(),
-            "job_name": self.job_name,
-        }

{hydraflow-0.16.2.dist-info → hydraflow-0.17.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hydraflow
-Version: 0.16.2
+Version: 0.17.0
 Summary: HydraFlow seamlessly integrates Hydra and MLflow to streamline ML experiment management, combining Hydra's configuration management with MLflow's tracking capabilities.
 Project-URL: Documentation, https://daizutabi.github.io/hydraflow/
 Project-URL: Source, https://github.com/daizutabi/hydraflow

{hydraflow-0.16.2.dist-info → hydraflow-0.17.0.dist-info}/RECORD RENAMED Viewed

@@ -2,20 +2,22 @@ hydraflow/__init__.py,sha256=8UraqH00Qp0In301ZUmQBRTIGbV1L5zSZACOUlIRPn8,727
 hydraflow/cli.py,sha256=3rGr___wwp8KazjLGQ7JO_IgAMqLyMlcVSs_QJK7g0Y,3135
 hydraflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 hydraflow/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+hydraflow/core/collection.py,sha256=tUdjV_v4vzUHSNET-Z7a_8k5oXoH6nkZ_0OxZ-u8_nI,16791
 hydraflow/core/context.py,sha256=igE17oQESGjH-sBnICI8HkZbngY_crkHTgx2E-YkmEo,4155
+hydraflow/core/group_by.py,sha256=Pnw-oA5aXHeRG9lMLz-bKc8drqQ8LIRsWzvVn153iyQ,5488
 hydraflow/core/io.py,sha256=B3-jPuJWttRgpbIpy_XA-Z2qpXzNF1ATwyYEwA7Pv3w,5172
 hydraflow/core/main.py,sha256=pgr2b9A4VoZuwbApE71NElmV64MFJv8UKda05q4uCqk,6010
-hydraflow/core/run.py,sha256=SugX6JLdBqsfz3JTrB66I3muo03rrmwDvITVZQaF48w,12685
-hydraflow/core/run_collection.py,sha256=cbaJO68WzE-QNlTc8NhOyQ1pHDNberJs-31qTY7P9Fo,19495
-hydraflow/core/run_info.py,sha256=B5sueHKVH9KEwty8fWuYzGC3M0-_g3TF_iwDM_2dyJs,1885
+hydraflow/core/run.py,sha256=VQfS3DkAR2GBWdltmlD0XMStiOUo1YZiRONm-mPW2x4,11948
+hydraflow/core/run_collection.py,sha256=4YjnAmB4lpGxTnlHzZOIwEXNfdI5yU5cj3PRiCW6vuA,5439
+hydraflow/core/run_info.py,sha256=SMOTZXEa7OBV_XjTyctk5gJGrggmYwhePvRF8CLF1kU,1616
 hydraflow/executor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 hydraflow/executor/aio.py,sha256=xXsmBPIPdBlopv_1h0FdtOvoKUcuW7PQeKCV2d_lN9I,2122
 hydraflow/executor/conf.py,sha256=8Xq4UAenRKJIl1NBgNbSfv6VUTJhdwPLayZIEAsiBR0,414
 hydraflow/executor/io.py,sha256=18wnHpCMQRGYL-oN2841h9W2aSW_X2SmO68Lx-3FIbU,1043
 hydraflow/executor/job.py,sha256=6QeJ18OMeocXeM04rCYL46GgArfX1SvZs9_4HTomTgE,5436
 hydraflow/executor/parser.py,sha256=RxP8qpDaJ8VLqZ51VlPFyVitWctObhkE_3iPIsY66Cs,14610
-hydraflow-0.16.2.dist-info/METADATA,sha256=3UWuHRuYrTCwXopZeqP9xBDKYn2_pUpL4Q2MBSOJhaA,7535
-hydraflow-0.16.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-hydraflow-0.16.2.dist-info/entry_points.txt,sha256=XI0khPbpCIUo9UPqkNEpgh-kqK3Jy8T7L2VCWOdkbSM,48
-hydraflow-0.16.2.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
-hydraflow-0.16.2.dist-info/RECORD,,
+hydraflow-0.17.0.dist-info/METADATA,sha256=f9LHLgsZMEiTl1CusfZQHUSv6rlz8DfL78EoMfheCBA,7535
+hydraflow-0.17.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+hydraflow-0.17.0.dist-info/entry_points.txt,sha256=XI0khPbpCIUo9UPqkNEpgh-kqK3Jy8T7L2VCWOdkbSM,48
+hydraflow-0.17.0.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
+hydraflow-0.17.0.dist-info/RECORD,,

{hydraflow-0.16.2.dist-info → hydraflow-0.17.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{hydraflow-0.16.2.dist-info → hydraflow-0.17.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{hydraflow-0.16.2.dist-info → hydraflow-0.17.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

hydraflow 0.16.2__py3-none-any.whl → 0.17.0__py3-none-any.whl

hydraflow 0.16.2py3-none-any.whl → 0.17.0py3-none-any.whl