PyPI - hydraflow - Versions diffs - 0.14.4__py3-none-any.whl → 0.15.1__py3-none-any.whl - Mend

hydraflow 0.14.4py3-none-any.whl → 0.15.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

hydraflow/__init__.py +3 -13
hydraflow/core/context.py +12 -32
hydraflow/core/io.py +36 -115
hydraflow/core/main.py +3 -3
hydraflow/core/run.py +355 -0
hydraflow/core/run_collection.py +525 -0
hydraflow/core/run_info.py +84 -0
hydraflow/executor/conf.py +6 -6
hydraflow/executor/io.py +1 -17
hydraflow/executor/job.py +41 -14
hydraflow/executor/parser.py +9 -8
{hydraflow-0.14.4.dist-info → hydraflow-0.15.1.dist-info}/METADATA +11 -9
hydraflow-0.15.1.dist-info/RECORD +21 -0
hydraflow/core/config.py +0 -122
hydraflow/core/mlflow.py +0 -174
hydraflow/core/param.py +0 -165
hydraflow/entities/__init__.py +0 -0
hydraflow/entities/run_collection.py +0 -583
hydraflow/entities/run_data.py +0 -61
hydraflow/entities/run_info.py +0 -36
hydraflow-0.14.4.dist-info/RECORD +0 -25
{hydraflow-0.14.4.dist-info → hydraflow-0.15.1.dist-info}/WHEEL +0 -0
{hydraflow-0.14.4.dist-info → hydraflow-0.15.1.dist-info}/entry_points.txt +0 -0
{hydraflow-0.14.4.dist-info → hydraflow-0.15.1.dist-info}/licenses/LICENSE +0 -0

hydraflow/executor/job.py CHANGED Viewed

@@ -2,7 +2,7 @@
 This module provides functionality for executing jobs in HydraFlow, including:
-- Argument parsing and expansion for job steps
+- Argument parsing and expansion for job parameter sets
 - Batch processing of Hydra configurations
 - Execution of jobs via shell commands or Python functions
@@ -11,8 +11,9 @@ The module supports two execution modes:
 1. Shell command execution
 2. Python function calls
-Each job can consist of multiple steps, and each step can have its own
-arguments and configurations that will be expanded into multiple runs.
+Each job can consist of multiple parameter sets, and each parameter
+set can have its own arguments and configurations that will be expanded
+into multiple runs.
 """
 from __future__ import annotations
@@ -39,24 +40,24 @@ if TYPE_CHECKING:
     from .conf import Job
-def iter_args(batch: str, args: str) -> Iterator[list[str]]:
+def iter_args(each: str, all_: str) -> Iterator[list[str]]:
     """Iterate over combinations generated from parsed arguments.
     Generate all possible combinations of arguments by parsing and
     expanding each one, yielding them as an iterator.
     Args:
-        batch (str): The batch to parse.
-        args (str): The arguments to parse.
+        each (str): The 'each' parameter to parse.
+        all_ (str): The 'all' parameter to parse.
     Yields:
         list[str]: a list of the parsed argument combinations.
     """
-    args_ = collect(args)
+    all_params = collect(all_)
-    for batch_ in expand(batch):
-        yield [*batch_, *args_]
+    for each_params in expand(each):
+        yield [*each_params, *all_params]
 def iter_batches(job: Job) -> Iterator[list[str]]:
@@ -74,14 +75,40 @@ def iter_batches(job: Job) -> Iterator[list[str]]:
     """
     job_name = f"hydra.job.name={job.name}"
-    job_configs = shlex.split(job.with_)
+    job_add = shlex.split(job.add)
-    for step in job.steps:
-        configs = shlex.split(step.with_) or job_configs
+    for set_ in job.sets:
+        add = merge_args(job_add, shlex.split(set_.add)) if set_.add else job_add
-        for args in iter_args(step.batch, step.args):
+        for args in iter_args(set_.each, set_.all):
             sweep_dir = f"hydra.sweep.dir=multirun/{ulid.ULID()}"
-            yield ["--multirun", *args, job_name, sweep_dir, *configs]
+            yield ["--multirun", *args, job_name, sweep_dir, *add]
+def merge_args(first: list[str], second: list[str]) -> list[str]:
+    """Merge two lists of arguments.
+    This function merges two lists of arguments by checking for conflicts
+    and resolving them by keeping the values from the second list.
+    Args:
+        first (list[str]): The first list of arguments.
+        second (list[str]): The second list of arguments.
+    Returns:
+        list[str]: A merged list of arguments.
+    """
+    merged = {}
+    for item in [*first, *second]:
+        if "=" in item:
+            key, value = item.split("=", 1)
+            merged[key] = value
+        else:
+            merged[item] = None
+    return [k if v is None else f"{k}={v}" for k, v in merged.items()]
 @dataclass

hydraflow/executor/parser.py CHANGED Viewed

@@ -165,25 +165,26 @@ SUFFIX_EXPONENT = {
 def _get_range(arg: str) -> tuple[float, float, float]:
+    """Return a tuple of (start, stop, step)."""
     args = [to_number(x) for x in arg.split(":")]
     if len(args) == 2:
         if args[0] > args[1]:
             raise ValueError("start cannot be greater than stop")
-        return (args[0], 1, args[1])
+        return (args[0], args[1], 1)
-    if args[1] == 0:
+    if args[2] == 0:
         raise ValueError("step cannot be zero")
-    if args[1] > 0 and args[0] > args[2]:
+    if args[2] > 0 and args[0] > args[1]:
         raise ValueError("start cannot be greater than stop")
-    if args[1] < 0 and args[0] < args[2]:
+    if args[2] < 0 and args[0] < args[1]:
         raise ValueError("start cannot be less than stop")
     return args[0], args[1], args[2]
-def _arange(start: float, step: float, stop: float) -> list[float]:
+def _arange(start: float, stop: float, step: float) -> list[float]:
     """Generate a range of floating point numbers.
     This function generates a range of floating point numbers
@@ -191,8 +192,8 @@ def _arange(start: float, step: float, stop: float) -> list[float]:
     Args:
         start (float): The starting value.
-        step (float): The step size.
         stop (float): The end value (inclusive).
+        step (float): The step size.
     Returns:
         list[float]: A list of floating point numbers from start to stop
@@ -323,7 +324,7 @@ def collect_parentheses(arg: str) -> list[str]:
         list[str]: A list of the collected values.
     Examples:
-        >>> collect_parentheses("(1:3,5:2:9,20)k")
+        >>> collect_parentheses("(1:3,5:9:2,20)k")
         ['1e3', '2e3', '3e3', '5e3', '7e3', '9e3', '20e3']
         >>> collect_parentheses("2e(-1,-2,-3)")
         ['2e-1', '2e-2', '2e-3']
@@ -352,7 +353,7 @@ def collect_values(arg: str) -> list[str]:
     Examples:
         >>> collect_values("1:4")
         ['1', '2', '3', '4']
-        >>> collect_values("1.2:0.1:1.4:k")
+        >>> collect_values("1.2:1.4:0.1:k")
         ['1.2e3', '1.3e3', '1.4e3']
         >>> collect_values("0.1")
         ['0.1']

{hydraflow-0.14.4.dist-info → hydraflow-0.15.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hydraflow
-Version: 0.14.4
+Version: 0.15.1
 Summary: HydraFlow seamlessly integrates Hydra and MLflow to streamline ML experiment management, combining Hydra's configuration management with MLflow's tracking capabilities.
 Project-URL: Documentation, https://daizutabi.github.io/hydraflow/
 Project-URL: Source, https://github.com/daizutabi/hydraflow
@@ -36,40 +36,40 @@ Classifier: Intended Audience :: Science/Research
 Classifier: License :: OSI Approved :: MIT License
 Classifier: Operating System :: OS Independent
 Classifier: Programming Language :: Python
-Classifier: Programming Language :: Python :: 3.10
-Classifier: Programming Language :: Python :: 3.11
-Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Classifier: Topic :: Software Development :: Libraries :: Python Modules
-Requires-Python: >=3.10
+Requires-Python: >=3.13
 Requires-Dist: hydra-core>=1.3
+Requires-Dist: joblib>=1.4.0
 Requires-Dist: mlflow>=2.15
 Requires-Dist: omegaconf>=2.3
+Requires-Dist: polars>=1.26
 Requires-Dist: python-ulid>=3.0.0
 Requires-Dist: rich>=13.9
+Requires-Dist: ruff>=0.11
 Requires-Dist: typer>=0.15
 Description-Content-Type: text/markdown
 # Hydraflow
 [![PyPI Version][pypi-v-image]][pypi-v-link]
-[![Python Version][python-v-image]][python-v-link]
 [![Build Status][GHAction-image]][GHAction-link]
 [![Coverage Status][codecov-image]][codecov-link]
 [![Documentation Status][docs-image]][docs-link]
+[![Python Version][python-v-image]][python-v-link]
 <!-- Badges -->
 [pypi-v-image]: https://img.shields.io/pypi/v/hydraflow.svg
 [pypi-v-link]: https://pypi.org/project/hydraflow/
-[python-v-image]: https://img.shields.io/pypi/pyversions/hydraflow.svg
-[python-v-link]: https://pypi.org/project/hydraflow
 [GHAction-image]: https://github.com/daizutabi/hydraflow/actions/workflows/ci.yaml/badge.svg?branch=main&event=push
 [GHAction-link]: https://github.com/daizutabi/hydraflow/actions?query=event%3Apush+branch%3Amain
 [codecov-image]: https://codecov.io/github/daizutabi/hydraflow/coverage.svg?branch=main
 [codecov-link]: https://codecov.io/github/daizutabi/hydraflow?branch=main
-[docs-image]: https://readthedocs.org/projects/hydraflow/badge/?version=latest
+[docs-image]: https://img.shields.io/badge/docs-latest-blue.svg
 [docs-link]: https://daizutabi.github.io/hydraflow/
+[python-v-image]: https://img.shields.io/pypi/pyversions/hydraflow.svg
+[python-v-link]: https://pypi.org/project/hydraflow
 ## Overview
@@ -101,6 +101,8 @@ You can install Hydraflow via pip:
 pip install hydraflow
 ```
+**Requirements:** Python 3.13+
 ## Quick Start
 Here is a simple example to get you started with Hydraflow:

hydraflow-0.15.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,21 @@
+hydraflow/__init__.py,sha256=5ByA9ogtS5ZfIYIUSMUjMwAIpr6xGXEXmcABOu4O8RA,673
+hydraflow/cli.py,sha256=3rGr___wwp8KazjLGQ7JO_IgAMqLyMlcVSs_QJK7g0Y,3135
+hydraflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+hydraflow/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+hydraflow/core/context.py,sha256=LFPNJxmuJQ2VUt-WBU07MC3ySbjlY8rRZ8VxuAih4o4,4148
+hydraflow/core/io.py,sha256=ZBXIL_jlBUiCI0L_J6S5S4OwtBMvdVVMXnekzMuC_JA,4404
+hydraflow/core/main.py,sha256=b9o6Rpn3uoXfDB8o0XZdl-g1yX2SKkOT12-H7lB8Les,5158
+hydraflow/core/run.py,sha256=KqaMdRUBOzOU4vkrRUczCrPCsVx30-XUQ_e78B78BSU,12330
+hydraflow/core/run_collection.py,sha256=pV3N83uBhmda9OeaNz1jqpF9z6A9j3jfUHtqy-uxCs4,15671
+hydraflow/core/run_info.py,sha256=3dW9GgWnZZNwbXwMrw-85AqQ956zlQddUi9irSNLR5g,2550
+hydraflow/executor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+hydraflow/executor/aio.py,sha256=xXsmBPIPdBlopv_1h0FdtOvoKUcuW7PQeKCV2d_lN9I,2122
+hydraflow/executor/conf.py,sha256=8Xq4UAenRKJIl1NBgNbSfv6VUTJhdwPLayZIEAsiBR0,414
+hydraflow/executor/io.py,sha256=18wnHpCMQRGYL-oN2841h9W2aSW_X2SmO68Lx-3FIbU,1043
+hydraflow/executor/job.py,sha256=6QeJ18OMeocXeM04rCYL46GgArfX1SvZs9_4HTomTgE,5436
+hydraflow/executor/parser.py,sha256=RxP8qpDaJ8VLqZ51VlPFyVitWctObhkE_3iPIsY66Cs,14610
+hydraflow-0.15.1.dist-info/METADATA,sha256=oC-UgH0sZKw2Ry1kBiMPpNobxzlLhmhQgS8W3TIvGJI,7238
+hydraflow-0.15.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+hydraflow-0.15.1.dist-info/entry_points.txt,sha256=XI0khPbpCIUo9UPqkNEpgh-kqK3Jy8T7L2VCWOdkbSM,48
+hydraflow-0.15.1.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
+hydraflow-0.15.1.dist-info/RECORD,,

hydraflow/core/config.py DELETED Viewed

@@ -1,122 +0,0 @@
-"""Provide functionality for working with configuration objects using the OmegaConf."""
-from __future__ import annotations
-from typing import TYPE_CHECKING
-from omegaconf import DictConfig, ListConfig, OmegaConf
-if TYPE_CHECKING:
-    from collections.abc import Iterator
-    from typing import Any
-def iter_params(config: Any, prefix: str = "") -> Iterator[tuple[str, Any]]:
-    """Recursively iterate over the parameters in the given configuration object.
-    This function traverses the configuration object and yields key-value pairs
-    representing the parameters. The keys are prefixed with the provided prefix.
-    Args:
-        config (Any): The configuration object to iterate over. This can be a
-            dictionary, list, DictConfig, or ListConfig.
-        prefix (str): The prefix to prepend to the parameter keys.
-            Defaults to an empty string.
-    Yields:
-        Key-value pairs representing the parameters in the configuration object.
-    """
-    if config is None:
-        return
-    if isinstance(config, list) and all(isinstance(x, str) for x in config):
-        config = _from_dotlist(config)
-    if not isinstance(config, DictConfig | ListConfig):
-        config = OmegaConf.create(config)
-    yield from _iter_params(config, prefix)
-def _from_dotlist(config: list[str]) -> dict[str, str]:
-    result = {}
-    for item in config:
-        if "=" in item:
-            key, value = item.split("=", 1)
-            result[key.strip()] = value.strip()
-    return result
-def _iter_params(config: Any, prefix: str = "") -> Iterator[tuple[str, Any]]:
-    if isinstance(config, DictConfig):
-        for key, value in config.items():
-            if _is_param(value):
-                yield f"{prefix}{key}", _convert(value)
-            else:
-                yield from _iter_params(value, f"{prefix}{key}.")
-    elif isinstance(config, ListConfig):
-        for index, value in enumerate(config):
-            if _is_param(value):
-                yield f"{prefix}{index}", _convert(value)
-            else:
-                yield from _iter_params(value, f"{prefix}{index}.")
-def _is_param(value: Any) -> bool:
-    """Check if the given value is a parameter."""
-    if isinstance(value, DictConfig):
-        return False
-    if isinstance(value, ListConfig):
-        if any(isinstance(v, DictConfig | ListConfig) for v in value):
-            return False
-    return True
-def _convert(value: Any) -> Any:
-    """Convert the given value to a Python object."""
-    if isinstance(value, ListConfig):
-        return list(value)
-    return value
-def select_config(config: Any, names: list[str]) -> dict[str, Any]:
-    """Select the given parameters from the configuration object.
-    This function selects the given parameters from the configuration object
-    and returns a new configuration object containing only the selected parameters.
-    Args:
-        config (Any): The configuration object to select parameters from.
-        names (list[str]): The names of the parameters to select.
-    Returns:
-        DictConfig: A new configuration object containing only the selected parameters.
-    """
-    if not isinstance(config, DictConfig):
-        config = OmegaConf.structured(config)
-    return {name: _get(config, name) for name in names}
-def _get(config: DictConfig, name: str) -> Any:
-    """Get the value of the given parameter from the configuration object."""
-    if "." not in name:
-        return config.get(name)
-    prefix, name = name.split(".", 1)
-    return _get(config.get(prefix), name)
-def select_overrides(config: object, overrides: list[str]) -> dict[str, Any]:
-    """Select the given overrides from the configuration object."""
-    names = [override.split("=")[0].strip() for override in overrides]
-    return select_config(config, names)

hydraflow/core/mlflow.py DELETED Viewed

@@ -1,174 +0,0 @@
-"""Integration of MLflow experiment tracking with Hydra configuration management.
-This module provides functions to log parameters from Hydra configuration objects
-to MLflow, set experiments, and manage tracking URIs. It integrates Hydra's
-configuration management with MLflow's experiment tracking capabilities.
-"""
-from __future__ import annotations
-from typing import TYPE_CHECKING
-import joblib
-from hydraflow.core.io import file_uri_to_path, get_artifact_dir
-from hydraflow.entities.run_collection import RunCollection
-from .config import iter_params
-if TYPE_CHECKING:
-    from pathlib import Path
-    from typing import Any
-def log_params(config: Any, *, synchronous: bool | None = None) -> None:
-    """Log the parameters from the given configuration object.
-    This method logs the parameters from the provided configuration object
-    using MLflow. It iterates over the parameters and logs them using the
-    `mlflow.log_param` method.
-    Args:
-        config (Any): The configuration object to log the parameters from.
-        synchronous (bool | None): Whether to log the parameters synchronously.
-            Defaults to None.
-    """
-    import mlflow
-    for key, value in iter_params(config):
-        mlflow.log_param(key, value, synchronous=synchronous)
-def log_text(from_dir: Path, pattern: str = "*.log") -> None:
-    """Log text files in the given directory as artifacts.
-    Append the text files to the existing text file in the artifact directory.
-    Args:
-        from_dir (Path): The directory to find the logs in.
-        pattern (str): The pattern to match the logs.
-    """
-    import mlflow
-    artifact_dir = get_artifact_dir()
-    for file in from_dir.glob(pattern):
-        if not file.is_file():
-            continue
-        file_artifact = artifact_dir / file.name
-        if file_artifact.exists():
-            text = file_artifact.read_text()
-            if not text.endswith("\n"):
-                text += "\n"
-        else:
-            text = ""
-        text += file.read_text()
-        mlflow.log_text(text, file.name)
-def list_run_paths(
-    experiment_names: str | list[str] | None = None,
-    *other: str,
-) -> list[Path]:
-    """List all run paths for the specified experiments.
-    This function retrieves all run paths for the given list of experiment names.
-    If no experiment names are provided (None), the function will search all runs
-    for all experiments except the "Default" experiment.
-    Args:
-        experiment_names (list[str] | None): List of experiment names to search
-            for runs. If None is provided, the function will search all runs
-            for all experiments except the "Default" experiment.
-        *other (str): The parts of the run directory to join.
-    Returns:
-        list[Path]: A list of run paths for the specified experiments.
-    """
-    import mlflow
-    if isinstance(experiment_names, str):
-        experiment_names = [experiment_names]
-    elif experiment_names is None:
-        experiments = mlflow.search_experiments()
-        experiment_names = [e.name for e in experiments if e.name != "Default"]
-    run_paths: list[Path] = []
-    for name in experiment_names:
-        if experiment := mlflow.get_experiment_by_name(name):
-            uri = experiment.artifact_location
-            if isinstance(uri, str):
-                path = file_uri_to_path(uri)
-                run_paths.extend(p for p in path.iterdir() if p.is_dir())
-    if other:
-        return [p.joinpath(*other) for p in run_paths]
-    return run_paths
-def list_run_ids(experiment_names: str | list[str] | None = None) -> list[str]:
-    """List all run IDs for the specified experiments.
-    This function retrieves all runs for the given list of experiment names.
-    If no experiment names are provided (None), the function will search all
-    runs for all experiments except the "Default" experiment.
-    Args:
-        experiment_names (list[str] | None): List of experiment names to search
-            for runs. If None is provided, the function will search all runs
-            for all experiments except the "Default" experiment.
-    Returns:
-        list[str]: A list of run IDs for the specified experiments.
-    """
-    return [run_path.stem for run_path in list_run_paths(experiment_names)]
-def list_runs(
-    experiment_names: str | list[str] | None = None,
-    n_jobs: int = 0,
-) -> RunCollection:
-    """List all runs for the specified experiments.
-    This function retrieves all runs for the given list of experiment names.
-    If no experiment names are provided (None), the function will search all runs
-    for all experiments except the "Default" experiment.
-    The function returns the results as a `RunCollection` object.
-    Note:
-        The returned runs are sorted by their start time in ascending order.
-    Args:
-        experiment_names (list[str] | None): List of experiment names to search
-            for runs. If None is provided, the function will search all runs
-            for all experiments except the "Default" experiment.
-        n_jobs (int): The number of jobs to retrieve runs in parallel.
-    Returns:
-        RunCollection: A `RunCollection` instance containing the runs for the
-        specified experiments.
-    """
-    import mlflow
-    run_ids = list_run_ids(experiment_names)
-    if n_jobs == 0:
-        runs = [mlflow.get_run(run_id) for run_id in run_ids]
-    else:
-        it = (joblib.delayed(mlflow.get_run)(run_id) for run_id in run_ids)
-        runs = joblib.Parallel(n_jobs, backend="threading")(it)
-    runs = sorted(runs, key=lambda run: run.info.start_time)  # type: ignore
-    return RunCollection(runs)  # type: ignore

hydraflow 0.14.4__py3-none-any.whl → 0.15.1__py3-none-any.whl

hydraflow 0.14.4py3-none-any.whl → 0.15.1py3-none-any.whl