PyPI - hydraflow - Versions diffs - 0.14.3__tar.gz → 0.15.0__tar.gz - Mend

hydraflow 0.14.3tar.gz → 0.15.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (116) hide show

{hydraflow-0.14.3 → hydraflow-0.15.0}/.github/workflows/ci.yaml RENAMED Viewed

@@ -21,7 +21,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest, windows-latest, macos-latest]
-        python-version: ["3.10", "3.11", "3.12", "3.13"]
+        python-version: ["3.13"]
     steps:
       - uses: actions/checkout@v4
@@ -36,7 +36,7 @@ jobs:
       - name: Ruff check
         run: ruff check
       - name: Run test
-        run: uv run pytest -v --junitxml=junit.xml
+        run: uv run pytest -v -n8 --junitxml=junit.xml
       - name: Upload Codecov Results
         if: success()
         uses: codecov/codecov-action@v4

{hydraflow-0.14.3 → hydraflow-0.15.0}/PKG-INFO RENAMED Viewed

@@ -1,7 +1,7 @@
 Metadata-Version: 2.4
 Name: hydraflow
-Version: 0.14.3
-Summary: HydraFlow seamlessly integrates Hydra and MLflow to streamline ML experiment management. It combines Hydra's configuration management with MLflow's tracking capabilities, offering automated experiment tracking, versioning, and a rich CLI interface. Perfect for ML researchers and teams who need reproducibility while scaling experiments.
+Version: 0.15.0
+Summary: HydraFlow seamlessly integrates Hydra and MLflow to streamline ML experiment management, combining Hydra's configuration management with MLflow's tracking capabilities.
 Project-URL: Documentation, https://daizutabi.github.io/hydraflow/
 Project-URL: Source, https://github.com/daizutabi/hydraflow
 Project-URL: Issues, https://github.com/daizutabi/hydraflow/issues
@@ -36,40 +36,40 @@ Classifier: Intended Audience :: Science/Research
 Classifier: License :: OSI Approved :: MIT License
 Classifier: Operating System :: OS Independent
 Classifier: Programming Language :: Python
-Classifier: Programming Language :: Python :: 3.10
-Classifier: Programming Language :: Python :: 3.11
-Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Classifier: Topic :: Software Development :: Libraries :: Python Modules
-Requires-Python: >=3.10
+Requires-Python: >=3.13
 Requires-Dist: hydra-core>=1.3
+Requires-Dist: joblib>=1.4.0
 Requires-Dist: mlflow>=2.15
 Requires-Dist: omegaconf>=2.3
+Requires-Dist: polars>=1.26
 Requires-Dist: python-ulid>=3.0.0
 Requires-Dist: rich>=13.9
+Requires-Dist: ruff>=0.11
 Requires-Dist: typer>=0.15
 Description-Content-Type: text/markdown
 # Hydraflow
 [![PyPI Version][pypi-v-image]][pypi-v-link]
-[![Python Version][python-v-image]][python-v-link]
 [![Build Status][GHAction-image]][GHAction-link]
 [![Coverage Status][codecov-image]][codecov-link]
 [![Documentation Status][docs-image]][docs-link]
+[![Python Version][python-v-image]][python-v-link]
 <!-- Badges -->
 [pypi-v-image]: https://img.shields.io/pypi/v/hydraflow.svg
 [pypi-v-link]: https://pypi.org/project/hydraflow/
-[python-v-image]: https://img.shields.io/pypi/pyversions/hydraflow.svg
-[python-v-link]: https://pypi.org/project/hydraflow
 [GHAction-image]: https://github.com/daizutabi/hydraflow/actions/workflows/ci.yaml/badge.svg?branch=main&event=push
 [GHAction-link]: https://github.com/daizutabi/hydraflow/actions?query=event%3Apush+branch%3Amain
 [codecov-image]: https://codecov.io/github/daizutabi/hydraflow/coverage.svg?branch=main
 [codecov-link]: https://codecov.io/github/daizutabi/hydraflow?branch=main
-[docs-image]: https://readthedocs.org/projects/hydraflow/badge/?version=latest
+[docs-image]: https://img.shields.io/badge/docs-latest-blue.svg
 [docs-link]: https://daizutabi.github.io/hydraflow/
+[python-v-image]: https://img.shields.io/pypi/pyversions/hydraflow.svg
+[python-v-link]: https://pypi.org/project/hydraflow
 ## Overview
@@ -101,6 +101,8 @@ You can install Hydraflow via pip:
 pip install hydraflow
 ```
+**Requirements:** Python 3.13+
 ## Quick Start
 Here is a simple example to get you started with Hydraflow:

{hydraflow-0.14.3 → hydraflow-0.15.0}/README.md RENAMED Viewed

@@ -1,22 +1,22 @@
 # Hydraflow
 [![PyPI Version][pypi-v-image]][pypi-v-link]
-[![Python Version][python-v-image]][python-v-link]
 [![Build Status][GHAction-image]][GHAction-link]
 [![Coverage Status][codecov-image]][codecov-link]
 [![Documentation Status][docs-image]][docs-link]
+[![Python Version][python-v-image]][python-v-link]
 <!-- Badges -->
 [pypi-v-image]: https://img.shields.io/pypi/v/hydraflow.svg
 [pypi-v-link]: https://pypi.org/project/hydraflow/
-[python-v-image]: https://img.shields.io/pypi/pyversions/hydraflow.svg
-[python-v-link]: https://pypi.org/project/hydraflow
 [GHAction-image]: https://github.com/daizutabi/hydraflow/actions/workflows/ci.yaml/badge.svg?branch=main&event=push
 [GHAction-link]: https://github.com/daizutabi/hydraflow/actions?query=event%3Apush+branch%3Amain
 [codecov-image]: https://codecov.io/github/daizutabi/hydraflow/coverage.svg?branch=main
 [codecov-link]: https://codecov.io/github/daizutabi/hydraflow?branch=main
-[docs-image]: https://readthedocs.org/projects/hydraflow/badge/?version=latest
+[docs-image]: https://img.shields.io/badge/docs-latest-blue.svg
 [docs-link]: https://daizutabi.github.io/hydraflow/
+[python-v-image]: https://img.shields.io/pypi/pyversions/hydraflow.svg
+[python-v-link]: https://pypi.org/project/hydraflow
 ## Overview
@@ -48,6 +48,8 @@ You can install Hydraflow via pip:
 pip install hydraflow
 ```
+**Requirements:** Python 3.13+
 ## Quick Start
 Here is a simple example to get you started with Hydraflow:

hydraflow-0.15.0/docs/index.md ADDED Viewed

@@ -0,0 +1,117 @@
+# HydraFlow: Seamless ML Experiment Management
+<div class="grid cards" markdown>
+- 🚀 **Streamlined Experimentation**
+  Create, run, and track ML experiments with minimal boilerplate
+- ⚙️ **Hydra + MLflow Integration**
+  Combine powerful configuration management with robust experiment tracking
+- 📈 **Rich Analysis Tools**
+  Filter, group, and visualize experiment results with intuitive APIs
+- ⚡ **Performance Optimized**
+  Parallel processing support for handling thousands of experiments efficiently
+</div>
+## What is HydraFlow?
+HydraFlow seamlessly integrates [Hydra](https://hydra.cc/) and
+[MLflow](https://mlflow.org/) to create a powerful framework for machine
+learning experimentation. It solves common challenges in ML research and
+production workflows:
+- **Configuration Management**: Type-safe, hierarchical, and dynamic
+  configuration
+- **Experiment Tracking**: Automatically log parameters, metrics, and artifacts
+- **Results Analysis**: Flexible tools to filter, compare, and visualize
+  experiment results
+- **Reproducibility**: Ensure experiments can be reliably reproduced with
+  exact parameters
+## Key Features
+**At Development Time:**
+- Type-safe configuration with IDE autocompletion
+- Declarative experiment definition with dataclasses
+- Seamless integration with existing ML pipelines
+**During Execution:**
+- Parameter sweeps with one command
+- Automatic configuration logging
+- De-duplication of identical experiments
+**After Completion:**
+- Powerful filtering and grouping of results
+- Conversion to DataFrames for analysis
+- Configuration-aware implementation loading
+## Quick Installation
+```bash
+pip install hydraflow
+```
+**Requirements:** Python 3.13+
+## Minimal Example
+```python
+from dataclasses import dataclass
+import hydraflow
+import mlflow
+@dataclass
+class Config:
+    learning_rate: float = 0.001
+    batch_size: int = 32
+@hydraflow.main(Config)
+def experiment(run, cfg):
+    # Your experiment code here
+    mlflow.log_metric("accuracy", 0.95)
+if __name__ == "__main__":
+    experiment()
+```
+Run this with parameter variations in one command:
+```bash
+python experiment.py -m learning_rate=0.01,0.001,0.0001 batch_size=16,32,64
+```
+## Post-experiment Analysis
+After running your experiments, analyze the results with HydraFlow's
+powerful API:
+```python
+from hydraflow import Run, RunCollection
+# Load all runs from the "experiment" experiment
+runs = Run.load(hydraflow.iter_run_dirs("mlruns", "experiment"))
+# Filter runs by configuration parameters
+best_runs = runs.filter(learning_rate=0.001, batch_size=32)
+# Convert to DataFrame for further analysis
+df = runs.to_frame("learning_rate", "batch_size",
+                   accuracy=lambda run: run.get("metrics.accuracy"))
+```
+<!--
+## Explore HydraFlow
+<div class="grid cards" markdown>
+- 📖 [**Getting Started**](usage/quickstart.md)
+  Learn the basics of HydraFlow with a step-by-step guide
+- 🧩 [**API Reference**](api/index.md)
+  Detailed documentation of HydraFlow's classes and functions
+- 💻 [**CLI Tools**](cli/index.md)
+  Discover HydraFlow's command-line utilities
+- 💡 [**Advanced Usage**](advanced/index.md)
+  Tips, tricks, and best practices for complex workflows
+</div>
+-->

hydraflow-0.15.0/docs/usage/quickstart.md ADDED Viewed

@@ -0,0 +1,330 @@
+# HydraFlow Quickstart Guide
+HydraFlow seamlessly integrates MLflow (for experiment tracking)
+with Hydra (for configuration management), creating a powerful
+framework for machine learning experimentation.
+This quickstart shows you how to get up and running with HydraFlow in minutes.
+## Hydra application
+The following example demonstrates how to use a Hydraflow application.
+```python title="apps/quickstart.py" linenums="1"
+--8<-- "apps/quickstart.py"
+```
+### Hydraflow's `main` decorator
+[`hydraflow.main`][] starts a new MLflow run that logs the Hydra
+configuration. The decorated function must have two arguments: `run` and
+`cfg`. The `run` argument is the current MLflow run with type
+`mlflow.entities.Run`. The `cfg` argument is the Hydra configuration
+with type `omegaconf.DictConfig`. You can annotate the arguments with
+`Run` and `Config` to get type checking and autocompletion in your IDE,
+although the `cfg` argument is not actually an instance of `Config`
+(duck typing is used).
+```python
+@hydraflow.main(Config)
+def app(run: Run, cfg: Config) -> None:
+    pass
+```
+## Run the application
+```bash exec="on"
+rm -rf mlruns outputs multirun
+```
+### Single-run
+Run the Hydraflow application as a normal Python script.
+```console exec="1" source="console"
+$ python apps/quickstart.py
+```
+Check the MLflow CLI to view the experiment.
+```console exec="1" source="console"
+$ mlflow experiments search
+```
+The experiment name comes from the name of the Hydra job.
+### Multi-run
+Run the Hydraflow application with multiple configurations.
+```console exec="1" source="console"
+$ python apps/quickstart.py -m width=400,600 height=100,200,300
+```
+## Use Hydraflow API
+### Iterate over run's directory
+The [`hydraflow.iter_run_dirs`][] function iterates over the run
+directories. The first argument is the path to the MLflow tracking root
+directory (in most cases, this is `"mlruns"`).
+```pycon exec="1" source="console" session="quickstart"
+>>> import hydraflow
+>>> for run_dir in hydraflow.iter_run_dirs("mlruns"):
+...     print(run_dir)
+```
+Optionally, you can specify the experiment name(s) to filter the runs.
+```python
+>>> hydraflow.iter_run_dirs("mlruns", "quickstart")
+>>> hydraflow.iter_run_dirs("mlruns", ["quickstart1", "quickstart2"])
+```
+### Load a run
+[`Run`][hydraflow.core.run.Run] is a class that represents a *Hydraflow*
+run, not an MLflow run. A `Run` instance is created by passing a
+`pathlib.Path` instance that points to the run directory to the `Run`
+constructor.
+```pycon exec="1" source="console" session="quickstart"
+>>> from hydraflow import Run
+>>> run_dirs = hydraflow.iter_run_dirs("mlruns", "quickstart")
+>>> run_dir = next(run_dirs)  # run_dirs is an iterator
+>>> run = Run(run_dir)
+>>> print(run)
+>>> print(type(run))
+```
+You can use the [`load`][hydraflow.core.run.Run.load] class method to
+load a `Run` instance, which accepts a `str` as well as `pathlib.Path`.
+```pycon exec="1" source="console" session="quickstart"
+>>> Run.load(str(run_dir))
+>>> print(run)
+```
+!!! note
+    The use case of `Run.load` is to load multiple `Run` instances
+    from run directories as described below.
+The `Run` instance has an `info` attribute that contains information
+about the run.
+```pycon exec="1" source="console" session="quickstart"
+>>> print(run.info.run_dir)
+>>> print(run.info.run_id)
+>>> print(run.info.job_name)  # Hydra job name = MLflow experiment name
+```
+The `Run` instance has a `cfg` attribute that contains the Hydra
+configuration.
+```pycon exec="1" source="console" session="quickstart"
+>>> print(run.cfg)
+```
+### Configuration type of the run
+Optionally, you can specify the config type of the run using the
+`Run[C]` class.
+```pycon exec="1" source="console" session="quickstart"
+>>> from dataclasses import dataclass
+>>> @dataclass
+... class Config:
+...     width: int = 1024
+...     height: int = 768
+>>> run = Run[Config](run_dir)
+>>> print(run)
+>>> # autocompletion occurs below, for example, run.cfg.height
+>>> # run.cfg.[TAB]
+```
+The `Run[C]` class is a generic class that takes a config type `C` as a
+type parameter. The `run.cfg` attribute is recognized as `C` type in
+IDEs, which provides autocompletion and type checking.
+### Get a run's configuration
+The `get` method can be used to get a run's configuration.
+```pycon exec="1" source="console" session="quickstart"
+>>> print(run.get("width"))
+>>> print(run.get("height"))
+```
+### Implementation of the run
+Optionally, you can specify the implementation of the run. Use the
+`Run[C, I]` class to specify the implementation type. The second
+argument `impl_factory` is the implementation factory, which can be a
+class or a function to generate the implementation. The `impl_factory`
+is called with the run's artifacts directory as the first and only
+argument.
+```pycon exec="1" source="console" session="quickstart"
+>>> from pathlib import Path
+>>> class Impl:
+...     root_dir: Path
+...     def __init__(self, root_dir: Path):
+...         self.root_dir = root_dir
+...     def __repr__(self) -> str:
+...         return f"Impl({self.root_dir.stem!r})"
+>>> run = Run[Config, Impl](run_dir, Impl)
+>>> print(run)
+```
+The representation of the `Run` instance includes the implementation
+type as shown above.
+If you specify the implementation type, the `run.impl` attribute is
+lazily initialized at the first time of the `run.impl` attribute access.
+The `run.impl` attribute is recognized as `I` type in IDEs, which
+provides autocompletion and type checking.
+```pycon exec="1" source="console" session="quickstart"
+>>> print(run.impl)
+>>> print(run.impl.root_dir)
+>>> # autocompletion occurs below, for example, run.impl.root_dir
+>>> # run.impl.[TAB]
+```
+The `impl_factory` can accept two arguments: the run's artifacts
+directory and the run's configuration.
+```pycon exec="1" source="console" session="quickstart"
+>>> from dataclasses import dataclass, field
+>>> @dataclass
+>>> class Size:
+...     root_dir: Path = field(repr=False)
+...     cfg: Config
+...     size: int = field(init=False)
+...     def __post_init__(self):
+...         self.size = self.cfg.width * self.cfg.height
+>>> run = Run[Config, Size].load(run_dir, Size)
+>>> print(run)
+>>> print(run.impl)
+```
+### Collect runs
+You can collect multiple `Run` instances from run directories as a
+collection of runs [`RunCollection`][hydraflow.RunCollection].
+```pycon exec="1" source="console" session="quickstart"
+>>> from hydraflow import RunCollection
+>>> run_dirs = hydraflow.iter_run_dirs("mlruns", "quickstart")
+>>> rc = Run[Config, Size].load(run_dirs, Size)
+>>> print(rc)
+```
+In the above example, the `load` class method is called with an iterable
+of run directories and the implementation type. The `load` class method
+returns a `RunCollection` instance instead of a single `Run` instance.
+The representation of the `RunCollection` instance includes the run
+collection type and the number of runs in the collection.
+### Handle a run collection
+The `RunCollection` instance has a [`first`][hydraflow.RunCollection.first]
+and [`last`][hydraflow.RunCollection.last] method that returns the first
+and last run in the collection.
+```pycon exec="1" source="console" session="quickstart"
+>>> print(rc.first())
+>>> print(rc.last())
+```
+The [`filter`][hydraflow.RunCollection.filter] method filters the runs
+by the given key-value pairs.
+```pycon exec="1" source="console" session="quickstart"
+>>> print(rc.filter(width=400))
+```
+If the value is a list, the run will be included if the value is in the
+list.
+```pycon exec="1" source="console" session="quickstart"
+>>> print(rc.filter(height=[100, 300]))
+```
+If the value is a tuple, the run will be included if the value is
+between the tuple. The start and end of the tuple are inclusive.
+```pycon exec="1" source="console" session="quickstart"
+>>> print(rc.filter(height=(100, 300)))
+```
+The [`get`][hydraflow.RunCollection.get] method returns a single `Run`
+instance with the given key-value pairs.
+```pycon exec="1" source="console" session="quickstart"
+>>> run = rc.get(width=(350, 450), height=(150, 250))
+>>> print(run)
+>>> print(run.impl)
+```
+The [`to_frame`][hydraflow.RunCollection.to_frame] method returns a
+polars DataFrame of the run collection.
+```pycon exec="1" source="console" session="quickstart"
+>>> print(rc.to_frame("width", "height"))
+```
+The `to_frame` method can take keyword arguments to customize the
+DataFrame. Each keyword argument is a callable that takes a `Run`
+instance and returns a value.
+```pycon exec="1" source="console" session="quickstart"
+>>> print(rc.to_frame("width", size=lambda run: run.impl.size))
+```
+The callable can return a list.
+```pycon exec="1" source="console" session="quickstart"
+>>> def to_list(run: Run) -> list[int]:
+...     return [2 * run.get("width"), 3 * run.get("height")]
+>>> print(rc.to_frame("width", from_list=to_list))
+```
+The callable can also return a dictionary.
+```pycon exec="1" source="console" session="quickstart"
+>>> def to_dict(run: Run) -> dict[int, str]:
+...     width2 = 2 * run.get("width")
+...     name = f"h{run.get('height')}"
+...     return {"width2": width2, "name": name}
+>>> print(rc.to_frame("width", from_dict=to_dict))
+```
+### Group runs
+The [`group_by`][hydraflow.RunCollection.group_by] method groups the
+runs by the given key.
+```pycon exec="1" source="console" session="quickstart"
+>>> grouped = rc.group_by("width")
+>>> for key, group in grouped.items():
+...     print(key, group)
+```
+The `group_by` method can also take multiple keys.
+```pycon exec="1" source="console" session="quickstart"
+>>> grouped = rc.group_by("width", "height")
+>>> for key, group in grouped.items():
+...     print(key, group)
+```
+The `group_by` method can also take a callable which accepts a sequence
+of runs and returns a value. In this case, the `group_by` method returns
+a polars DataFrame.
+```pycon exec="1" source="console" session="quickstart"
+>>> df = rc.group_by("width", n=lambda runs: len(runs))
+>>> print(df)
+```

{hydraflow-0.14.3 → hydraflow-0.15.0}/mkdocs.yaml RENAMED Viewed

@@ -49,6 +49,14 @@ markdown_extensions:
   - pymdownx.superfences
   - pymdownx.tabbed:
       alternate_style: true
+  - attr_list
+  - md_in_html
+  - pymdownx.emoji:
+      emoji_index: !!python/name:material.extensions.emoji.twemoji
+      emoji_generator: !!python/name:material.extensions.emoji.to_svg
+      options:
+        custom_icons:
+          - overrides/.icons
 nav:
   - Home: index.md
   - Usage:

{hydraflow-0.14.3 → hydraflow-0.15.0}/pyproject.toml RENAMED Viewed

@@ -4,8 +4,8 @@ build-backend = "hatchling.build"
 [project]
 name = "hydraflow"
-version = "0.14.3"
-description = "HydraFlow seamlessly integrates Hydra and MLflow to streamline ML experiment management. It combines Hydra's configuration management with MLflow's tracking capabilities, offering automated experiment tracking, versioning, and a rich CLI interface. Perfect for ML researchers and teams who need reproducibility while scaling experiments."
+version = "0.15.0"
+description = "HydraFlow seamlessly integrates Hydra and MLflow to streamline ML experiment management, combining Hydra's configuration management with MLflow's tracking capabilities."
 readme = "README.md"
 license = { file = "LICENSE" }
 authors = [{ name = "daizutabi", email = "daizutabi@gmail.com" }]
@@ -17,14 +17,11 @@ classifiers = [
   "License :: OSI Approved :: MIT License",
   "Operating System :: OS Independent",
   "Programming Language :: Python",
-  "Programming Language :: Python :: 3.10",
-  "Programming Language :: Python :: 3.11",
-  "Programming Language :: Python :: 3.12",
   "Programming Language :: Python :: 3.13",
   "Topic :: Scientific/Engineering :: Artificial Intelligence",
   "Topic :: Software Development :: Libraries :: Python Modules",
 ]
-requires-python = ">=3.10"
+requires-python = ">=3.13"
 keywords = [
   "machine-learning",
   "mlflow",
@@ -37,12 +34,14 @@ keywords = [
   "data-science",
 ]
 dependencies = [
   "hydra-core>=1.3",
+  "joblib>=1.4.0",
   "mlflow>=2.15",
   "omegaconf>=2.3",
+  "polars>=1.26",
   "python-ulid>=3.0.0",
   "rich>=13.9",
+  "ruff>=0.11",
   "typer>=0.15",
 ]
@@ -69,9 +68,7 @@ docs = ["markdown-exec[ansi]", "mkapi", "mkdocs-material"]
 addopts = [
   "--cov=hydraflow",
   "--cov-report=lcov:lcov.info",
-  "--dist=loadgroup",
   "--doctest-modules",
-  "-n8",
 ]
 filterwarnings = [
   "ignore:Support for class-based `config` is deprecated",
@@ -84,7 +81,7 @@ skip_covered = true
 [tool.ruff]
 line-length = 88
-target-version = "py310"
+target-version = "py313"
 [tool.ruff.lint]
 select = ["ALL"]
@@ -102,6 +99,7 @@ ignore = [
   "EM101",
   "FBT001",
   "FBT002",
+  "PD",
   "PGH003",
   "PLR0911",
   "PLR0913",
@@ -123,6 +121,3 @@ ignore = [
 [tool.pyright]
 include = ["src", "tests"]
-strictDictionaryInference = true
-strictListInference = true
-strictSetInference = true

hydraflow 0.14.3__tar.gz → 0.15.0__tar.gz

hydraflow 0.14.3tar.gz → 0.15.0tar.gz