PyPI - hydraflow - Versions diffs - 0.2.16__tar.gz → 0.2.18__tar.gz - Mend

hydraflow 0.2.16tar.gz → 0.2.18tar.gz

Files changed (37) hide show

{hydraflow-0.2.16 → hydraflow-0.2.18}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: hydraflow
-Version: 0.2.16
+Version: 0.2.18
 Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
 Project-URL: Documentation, https://github.com/daizutabi/hydraflow
 Project-URL: Source, https://github.com/daizutabi/hydraflow

{hydraflow-0.2.16 → hydraflow-0.2.18}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "hydraflow"
-version = "0.2.16"
+version = "0.2.18"
 description = "Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments."
 readme = "README.md"
 license = "MIT"
@@ -66,17 +66,32 @@ target-version = "py310"
 [tool.ruff.lint]
 select = ["ALL"]
+unfixable = ["F401"]
 ignore = [
   "ANN003",
   "ANN401",
   "ARG002",
   "B904",
-  "D",
+  "D105",
+  "D107",
+  "D203",
+  "D213",
   "EM101",
   "PGH003",
   "TRY003",
 ]
-exclude = ["tests/scripts/*.py"]
+exclude = ["tests/scripts/*.py", "src/hydraflow/__init__.py"]
 [tool.ruff.lint.per-file-ignores]
-"tests/*" = ["A001", "ANN", "ARG", "FBT", "PLR", "PT", "S", "SIM117", "SLF"]
+"tests/*" = [
+  "A001",
+  "ANN",
+  "ARG",
+  "D",
+  "FBT",
+  "PLR",
+  "PT",
+  "S",
+  "SIM117",
+  "SLF",
+]

{hydraflow-0.2.16 → hydraflow-0.2.18}/src/hydraflow/asyncio.py RENAMED Viewed

@@ -1,3 +1,5 @@
+"""Provide functionality for running commands and monitoring file changes."""
 from __future__ import annotations
 import asyncio
@@ -27,8 +29,7 @@ async def execute_command(
     stderr: Callable[[str], None] | None = None,
     stop_event: asyncio.Event,
 ) -> int:
-    """
-    Runs a command asynchronously and pass the output to callback functions.
+    """Run a command asynchronously and pass the output to callback functions.
     Args:
         program (str): The program to run.
@@ -39,6 +40,7 @@ async def execute_command(
     Returns:
         int: The return code of the process.
     """
     try:
         process = await asyncio.create_subprocess_exec(
@@ -68,13 +70,13 @@ async def process_stream(
     stream: StreamReader | None,
     callback: Callable[[str], None] | None,
 ) -> None:
-    """
-    Reads a stream asynchronously and pass each line to a callback function.
+    """Read a stream asynchronously and pass each line to a callback function.
     Args:
         stream (StreamReader | None): The stream to read from.
         callback (Callable[[str], None] | None): The callback function to handle
         each line.
     """
     if stream is None or callback is None:
         return
@@ -93,9 +95,7 @@ async def monitor_file_changes(
     stop_event: asyncio.Event,
     **awatch_kwargs,
 ) -> None:
-    """
-    Watches for file changes in specified paths and pass the changes to a
-    callback function.
+    """Watch file changes in specified paths and pass the changes to a callback.
     Args:
         paths (list[str | Path]): List of paths to monitor for changes.
@@ -103,6 +103,7 @@ async def monitor_file_changes(
         function to handle file changes.
         stop_event (asyncio.Event): Event to signal when to stop watching.
         **awatch_kwargs: Additional keyword arguments to pass to watchfiles.awatch.
     """
     str_paths = [str(path) for path in paths]
     try:
@@ -127,8 +128,7 @@ async def run_and_monitor(
     paths: list[str | Path] | None = None,
     **awatch_kwargs,
 ) -> int:
-    """
-    Runs a command and optionally watch for file changes concurrently.
+    """Run a command and optionally watch for file changes concurrently.
     Args:
         program (str): The program to run.
@@ -138,6 +138,8 @@ async def run_and_monitor(
         watch (Callable[[set[tuple[Change, str]]], None] | None): Callback for
         file changes.
         paths (list[str | Path] | None): List of paths to monitor for changes.
+        **awatch_kwargs: Additional keyword arguments to pass to `watchfiles.awatch`.
     """
     stop_event = asyncio.Event()
     run_task = asyncio.create_task(
@@ -184,8 +186,7 @@ def run(
     paths: list[str | Path] | None = None,
     **awatch_kwargs,
 ) -> int:
-    """
-    Run a command synchronously and optionally watch for file changes.
+    """Run a command synchronously and optionally watch for file changes.
     This function is a synchronous wrapper around the asynchronous
     `run_and_monitor` function. It runs a specified command and optionally
@@ -208,6 +209,7 @@ def run(
     Returns:
         int: The return code of the process.
     """
     if watch and not paths:
         paths = [Path.cwd()]

{hydraflow-0.2.16 → hydraflow-0.2.18}/src/hydraflow/config.py RENAMED Viewed

@@ -1,7 +1,4 @@
-"""
-This module provides functionality for working with configuration
-objects using the OmegaConf library.
-"""
+"""Provide functionality for working with configuration objects using the OmegaConf."""
 from __future__ import annotations
@@ -15,8 +12,7 @@ if TYPE_CHECKING:
 def iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
-    """
-    Recursively iterate over the parameters in the given configuration object.
+    """Recursively iterate over the parameters in the given configuration object.
     This function traverses the configuration object and yields key-value pairs
     representing the parameters. The keys are prefixed with the provided prefix.
@@ -29,6 +25,7 @@ def iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
     Yields:
         Key-value pairs representing the parameters in the configuration object.
     """
     if config is None:
         return

{hydraflow-0.2.16 → hydraflow-0.2.18}/src/hydraflow/context.py RENAMED Viewed

@@ -1,7 +1,4 @@
-"""
-This module provides context managers to log parameters and manage the MLflow
-run context.
-"""
+"""Provide context managers to log parameters and manage the MLflow run context."""
 from __future__ import annotations
@@ -34,9 +31,7 @@ def log_run(
     *,
     synchronous: bool | None = None,
 ) -> Iterator[None]:
-    """
-    Log the parameters from the given configuration object and manage the MLflow
-    run context.
+    """Log the parameters from the given configuration object.
     This context manager logs the parameters from the provided configuration object
     using MLflow. It also manages the MLflow run context, ensuring that artifacts
@@ -56,6 +51,7 @@ def log_run(
             # Perform operations within the MLflow run context
             pass
         ```
     """
     log_params(config, synchronous=synchronous)
@@ -98,8 +94,7 @@ def start_run(  # noqa: PLR0913
     log_system_metrics: bool | None = None,
     synchronous: bool | None = None,
 ) -> Iterator[Run]:
-    """
-    Start an MLflow run and log parameters using the provided configuration object.
+    """Start an MLflow run and log parameters using the provided configuration object.
     This context manager starts an MLflow run and logs parameters using the specified
     configuration object. It ensures that the run is properly closed after completion.
@@ -130,6 +125,7 @@ def start_run(  # noqa: PLR0913
         - `mlflow.start_run`: The MLflow function to start a run directly.
         - `log_run`: A context manager to log parameters and manage the MLflow
            run context.
     """
     with (
         mlflow.start_run(
@@ -156,9 +152,7 @@ def watch(
     ignore_patterns: list[str] | None = None,
     ignore_log: bool = True,
 ) -> Iterator[None]:
-    """
-    Watch the given directory for changes and call the provided function
-    when a change is detected.
+    """Watch the given directory for changes.
     This context manager sets up a file system watcher on the specified directory.
     When a file modification is detected, the provided function is called with
@@ -173,6 +167,9 @@ def watch(
             the current MLflow artifact URI is used. Defaults to "".
         timeout (int): The timeout period in seconds for the watcher
             to run after the context is exited. Defaults to 60.
+        ignore_patterns (list[str] | None): A list of glob patterns to ignore.
+            Defaults to None.
+        ignore_log (bool): Whether to ignore log files. Defaults to True.
     Yields:
         None
@@ -183,6 +180,7 @@ def watch(
             # Perform operations while watching the directory for changes
             pass
         ```
     """
     dir = dir or get_artifact_dir()  # noqa: A001
     if isinstance(dir, Path):
@@ -214,6 +212,8 @@ def watch(
 class Handler(PatternMatchingEventHandler):
+    """Monitor file changes and call the given function when a change is detected."""
     def __init__(
         self,
         func: Callable[[Path], None],
@@ -232,6 +232,7 @@ class Handler(PatternMatchingEventHandler):
         super().__init__(ignore_patterns=ignore_patterns)
     def on_modified(self, event: FileModifiedEvent) -> None:
+        """Modify when a file is modified."""
         file = Path(str(event.src_path))
         if file.is_file():
             self.func(file)
@@ -242,9 +243,7 @@ def chdir_artifact(
     run: Run,
     artifact_path: str | None = None,
 ) -> Iterator[Path]:
-    """
-    Change the current working directory to the artifact directory of the
-    given run.
+    """Change the current working directory to the artifact directory of the given run.
     This context manager changes the current working directory to the artifact
     directory of the given run. It ensures that the directory is changed back
@@ -253,6 +252,7 @@ def chdir_artifact(
     Args:
         run (Run): The run to get the artifact directory from.
         artifact_path (str | None): The artifact path.
     """
     curdir = Path.cwd()
     path = mlflow.artifacts.download_artifacts(

{hydraflow-0.2.16 → hydraflow-0.2.18}/src/hydraflow/info.py RENAMED Viewed

@@ -1,3 +1,5 @@
+"""Provide information about MLflow runs."""
 from __future__ import annotations
 from pathlib import Path
@@ -15,37 +17,44 @@ if TYPE_CHECKING:
 class RunCollectionInfo:
+    """Provide information about MLflow runs."""
     def __init__(self, runs: RunCollection) -> None:
         self._runs = runs
     @property
     def run_id(self) -> list[str]:
+        """Get the run ID for each run in the collection."""
         return [run.info.run_id for run in self._runs]
     @property
     def params(self) -> list[dict[str, str]]:
+        """Get the parameters for each run in the collection."""
         return [run.data.params for run in self._runs]
     @property
     def metrics(self) -> list[dict[str, float]]:
+        """Get the metrics for each run in the collection."""
         return [run.data.metrics for run in self._runs]
     @property
     def artifact_uri(self) -> list[str | None]:
+        """Get the artifact URI for each run in the collection."""
         return [run.info.artifact_uri for run in self._runs]
     @property
     def artifact_dir(self) -> list[Path]:
+        """Get the artifact directory for each run in the collection."""
         return [get_artifact_dir(run) for run in self._runs]
     @property
     def config(self) -> list[DictConfig]:
+        """Get the configuration for each run in the collection."""
         return [load_config(run) for run in self._runs]
 def get_artifact_dir(run: Run | None = None) -> Path:
-    """
-    Retrieve the artifact directory for the given run.
+    """Retrieve the artifact directory for the given run.
     This function uses MLflow to get the artifact directory for the given run.
@@ -54,6 +63,7 @@ def get_artifact_dir(run: Run | None = None) -> Path:
     Returns:
         The local path to the directory where the artifacts are downloaded.
     """
     if run is None:
         uri = mlflow.get_artifact_uri()
@@ -64,8 +74,7 @@ def get_artifact_dir(run: Run | None = None) -> Path:
 def get_hydra_output_dir(run: Run | None = None) -> Path:
-    """
-    Retrieve the Hydra output directory for the given run.
+    """Retrieve the Hydra output directory for the given run.
     This function returns the Hydra output directory. If no run is provided,
     it retrieves the output directory from the current Hydra configuration.
@@ -82,6 +91,7 @@ def get_hydra_output_dir(run: Run | None = None) -> Path:
     Raises:
         FileNotFoundError: If the Hydra configuration file is not found
             in the artifacts.
     """
     if run is None:
         hc = HydraConfig.get()
@@ -97,8 +107,7 @@ def get_hydra_output_dir(run: Run | None = None) -> Path:
 def load_config(run: Run) -> DictConfig:
-    """
-    Load the configuration for a given run.
+    """Load the configuration for a given run.
     This function loads the configuration for the provided Run instance
     by downloading the configuration file from the MLflow artifacts and
@@ -111,6 +120,7 @@ def load_config(run: Run) -> DictConfig:
     Returns:
         The loaded configuration as a DictConfig object. Returns an empty
         DictConfig if the configuration file is not found.
     """
     path = get_artifact_dir(run) / ".hydra/config.yaml"
     return OmegaConf.load(path)  # type: ignore

{hydraflow-0.2.16 → hydraflow-0.2.18}/src/hydraflow/mlflow.py RENAMED Viewed

@@ -1,20 +1,17 @@
-"""
-This module provides functionality to log parameters from Hydra configuration objects
-and set up experiments using MLflow. It includes methods for managing experiments,
-searching for runs, and logging parameters and artifacts.
+"""Provide functionality to log parameters from Hydra configuration objects.
+This module provides functions to log parameters from Hydra configuration objects
+to MLflow, set experiments, and manage tracking URIs. It integrates Hydra's
+configuration management with MLflow's experiment tracking capabilities.
 Key Features:
-- **Experiment Management**: Set and manage MLflow experiments with customizable names
-  based on Hydra configuration.
-- **Run Logging**: Log parameters and metrics from Hydra configuration objects to
-  MLflow, ensuring that all relevant information is captured during experiments.
-- **Run Search**: Search for runs based on various criteria, allowing for flexible
-  retrieval of experiment results.
-- **Artifact Management**: Retrieve and log artifacts associated with runs, facilitating
-  easy access to outputs generated during experiments.
-This module is designed to integrate seamlessly with Hydra, providing a robust
-solution for tracking machine learning experiments and their associated metadata.
+- **Experiment Management**: Set experiment names and tracking URIs using Hydra
+  configuration details.
+- **Parameter Logging**: Log parameters from Hydra configuration objects to MLflow,
+  supporting both synchronous and asynchronous logging.
+- **Run Collection**: Utilize the `RunCollection` class to manage and interact with
+  multiple MLflow runs, providing methods to filter and retrieve runs based on
+  various criteria.
 """
 from __future__ import annotations
@@ -40,8 +37,7 @@ def set_experiment(
     suffix: str = "",
     uri: str | Path | None = None,
 ) -> Experiment:
-    """
-    Sets the experiment name and tracking URI optionally.
+    """Set the experiment name and tracking URI optionally.
     This function sets the experiment name by combining the given prefix,
     the job name from HydraConfig, and the given suffix. Optionally, it can
@@ -55,6 +51,7 @@ def set_experiment(
     Returns:
         Experiment: An instance of `mlflow.entities.Experiment` representing
         the new active experiment.
     """
     if uri is not None:
         mlflow.set_tracking_uri(uri)
@@ -65,8 +62,7 @@ def set_experiment(
 def log_params(config: object, *, synchronous: bool | None = None) -> None:
-    """
-    Log the parameters from the given configuration object.
+    """Log the parameters from the given configuration object.
     This method logs the parameters from the provided configuration object
     using MLflow. It iterates over the parameters and logs them using the
@@ -76,6 +72,7 @@ def log_params(config: object, *, synchronous: bool | None = None) -> None:
         config (object): The configuration object to log the parameters from.
         synchronous (bool | None): Whether to log the parameters synchronously.
             Defaults to None.
     """
     for key, value in iter_params(config):
         mlflow.log_param(key, value, synchronous=synchronous)
@@ -91,8 +88,7 @@ def search_runs(  # noqa: PLR0913
     search_all_experiments: bool = False,
     experiment_names: list[str] | None = None,
 ) -> RunCollection:
-    """
-    Search for Runs that fit the specified criteria.
+    """Search for Runs that fit the specified criteria.
     This function wraps the `mlflow.search_runs` function and returns the
     results as a `RunCollection` object. It allows for flexible searching of
@@ -133,6 +129,7 @@ def search_runs(  # noqa: PLR0913
     Returns:
         A `RunCollection` object containing the search results.
     """
     runs = mlflow.search_runs(
         experiment_ids=experiment_ids,
@@ -151,9 +148,9 @@ def search_runs(  # noqa: PLR0913
 def list_runs(
     experiment_names: str | list[str] | None = None,
     n_jobs: int = 0,
+    status: str | list[str] | int | list[int] | None = None,
 ) -> RunCollection:
-    """
-    List all runs for the specified experiments.
+    """List all runs for the specified experiments.
     This function retrieves all runs for the given list of experiment names.
     If no experiment names are provided (None), it defaults to searching all runs
@@ -169,11 +166,27 @@ def list_runs(
             for runs. If None or an empty list is provided, the function will
             search the currently active experiment or all experiments except
             the "Default" experiment.
+        n_jobs (int): The number of jobs to run in parallel. If 0, the function
+            will search runs sequentially.
+        status (str | list[str] | int | list[int] | None): The status of the runs
+            to filter.
     Returns:
         RunCollection: A `RunCollection` instance containing the runs for the
         specified experiments.
     """
+    rc = _list_runs(experiment_names, n_jobs)
+    if status is None:
+        return rc
+    return rc.filter(status=status)
+def _list_runs(
+    experiment_names: str | list[str] | None = None,
+    n_jobs: int = 0,
+) -> RunCollection:
     if isinstance(experiment_names, str):
         experiment_names = [experiment_names]

hydraflow-0.2.18/src/hydraflow/param.py ADDED Viewed

@@ -0,0 +1,75 @@
+"""Provide utility functions for parameter matching.
+The main function `match` checks if a given parameter matches a specified value.
+It supports various types of values including None, boolean, list, tuple, int,
+float, and str.
+Helper functions `_match_list` and `_match_tuple` are used internally to handle
+matching for list and tuple types respectively.
+"""
+from __future__ import annotations
+from typing import Any
+def match(param: str, value: Any) -> bool:
+    """Check if the string matches the specified value.
+    Args:
+        param (str): The parameter to check.
+        value (Any): The value to check.
+    Returns:
+        True if the parameter matches the specified value,
+        False otherwise.
+    """
+    if value in [None, True, False]:
+        return param == str(value)
+    if isinstance(value, list) and (m := _match_list(param, value)) is not None:
+        return m
+    if isinstance(value, tuple) and (m := _match_tuple(param, value)) is not None:
+        return m
+    if isinstance(value, int | float | str):
+        return type(value)(param) == value
+    return param == str(value)
+def _match_list(param: str, value: list) -> bool | None:
+    if not value:
+        return None
+    if any(param.startswith(x) for x in ["[", "(", "{"]):
+        return None
+    if isinstance(value[0], bool):
+        return None
+    if not isinstance(value[0], int | float | str):
+        return None
+    return type(value[0])(param) in value
+def _match_tuple(param: str, value: tuple) -> bool | None:
+    if len(value) != 2:  # noqa: PLR2004
+        return None
+    if any(param.startswith(x) for x in ["[", "(", "{"]):
+        return None
+    if isinstance(value[0], bool):
+        return None
+    if not isinstance(value[0], int | float | str):
+        return None
+    if type(value[0]) is not type(value[1]):
+        return None
+    return value[0] <= type(value[0])(param) < value[1]  # type: ignore

{hydraflow-0.2.16 → hydraflow-0.2.18}/src/hydraflow/progress.py RENAMED Viewed

@@ -1,18 +1,7 @@
-"""
-Module for managing progress tracking in parallel processing using Joblib
-and Rich's Progress bar.
+"""Context managers and functions for parallel task execution with progress.
 Provide context managers and functions to facilitate the execution
 of tasks in parallel while displaying progress updates.
-The following key components are provided:
-- JoblibProgress: A context manager for tracking progress with Rich's progress
-    bar.
-- parallel_progress: A function to execute a given function in parallel over
-    an iterable with progress tracking.
-- multi_tasks_progress: A function to render auto-updating progress bars for
-    multiple tasks concurrently.
 """
 from __future__ import annotations
@@ -37,8 +26,7 @@ def JoblibProgress(  # noqa: N802
     total: int | None = None,
     **kwargs,
 ) -> Iterator[Progress]:
-    """
-    Context manager for tracking progress using Joblib with Rich's Progress bar.
+    """Context manager for tracking progress using Joblib with Rich's Progress bar.
     Args:
         *columns (ProgressColumn | str): Columns to display in the progress bar.
@@ -56,6 +44,7 @@ def JoblibProgress(  # noqa: N802
         with JoblibProgress("task", total=100) as progress:
             # Your parallel processing code here
         ```
     """
     if not columns:
         columns = Progress.get_default_columns()
@@ -94,8 +83,7 @@ def parallel_progress(
     description: str | None = None,
     **kwargs,
 ) -> list[U]:
-    """
-    Execute a function in parallel over an iterable with progress tracking.
+    """Execute a function in parallel over an iterable with progress tracking.
     Args:
         func (Callable[[T], U]): The function to execute on each item in the
@@ -112,6 +100,7 @@ def parallel_progress(
     Returns:
         list[U]: A list of results from applying the function to each item in
         the iterable.
     """
     iterable = list(iterable)
     total = len(iterable)
@@ -130,8 +119,7 @@ def multi_tasks_progress(
     transient: bool | None = None,
     **kwargs,
 ) -> None:
-    """
-    Render auto-updating progress bars for multiple tasks concurrently.
+    """Render auto-updating progress bars for multiple tasks concurrently.
     Args:
         iterables (Iterable[Iterable[int | tuple[int, int]]]): A collection of
@@ -151,6 +139,7 @@ def multi_tasks_progress(
     Returns:
         None
     """
     if not columns:
         columns = Progress.get_default_columns()

hydraflow 0.2.16__tar.gz → 0.2.18__tar.gz

hydraflow 0.2.16tar.gz → 0.2.18tar.gz