hydraflow 0.15.0__tar.gz → 0.16.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hydraflow-0.15.0 → hydraflow-0.16.0}/.gitignore +3 -3
- {hydraflow-0.15.0 → hydraflow-0.16.0}/PKG-INFO +84 -75
- hydraflow-0.16.0/README.md +150 -0
- hydraflow-0.16.0/docs/getting-started/concepts.md +174 -0
- hydraflow-0.16.0/docs/getting-started/index.md +80 -0
- hydraflow-0.16.0/docs/getting-started/installation.md +83 -0
- hydraflow-0.16.0/docs/index.md +91 -0
- hydraflow-0.16.0/docs/part1-applications/configuration.md +126 -0
- hydraflow-0.16.0/docs/part1-applications/execution.md +183 -0
- hydraflow-0.16.0/docs/part1-applications/index.md +89 -0
- hydraflow-0.16.0/docs/part1-applications/main-decorator.md +264 -0
- hydraflow-0.16.0/docs/part2-advanced/index.md +88 -0
- hydraflow-0.16.0/docs/part2-advanced/job-configuration.md +259 -0
- hydraflow-0.16.0/docs/part2-advanced/sweep-syntax.md +280 -0
- hydraflow-0.16.0/docs/part3-analysis/index.md +144 -0
- hydraflow-0.16.0/docs/part3-analysis/run-class.md +261 -0
- hydraflow-0.16.0/docs/part3-analysis/run-collection.md +497 -0
- hydraflow-0.16.0/docs/part3-analysis/updating-runs.md +165 -0
- hydraflow-0.16.0/docs/practical-tutorials/advanced.md +252 -0
- hydraflow-0.16.0/docs/practical-tutorials/analysis.md +332 -0
- hydraflow-0.16.0/docs/practical-tutorials/applications.md +171 -0
- hydraflow-0.16.0/docs/practical-tutorials/index.md +51 -0
- hydraflow-0.15.0/apps/quickstart.py → hydraflow-0.16.0/examples/example.py +1 -11
- hydraflow-0.16.0/examples/hydraflow.yaml +19 -0
- hydraflow-0.16.0/examples/submit.py +19 -0
- hydraflow-0.16.0/mkdocs.yaml +91 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/pyproject.toml +2 -2
- {hydraflow-0.15.0 → hydraflow-0.16.0}/src/hydraflow/__init__.py +2 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/src/hydraflow/core/context.py +4 -4
- {hydraflow-0.15.0 → hydraflow-0.16.0}/src/hydraflow/core/io.py +6 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/src/hydraflow/core/main.py +19 -11
- {hydraflow-0.15.0 → hydraflow-0.16.0}/src/hydraflow/core/run.py +50 -26
- {hydraflow-0.15.0 → hydraflow-0.16.0}/src/hydraflow/core/run_collection.py +119 -12
- {hydraflow-0.15.0 → hydraflow-0.16.0}/src/hydraflow/core/run_info.py +16 -17
- {hydraflow-0.15.0 → hydraflow-0.16.0}/src/hydraflow/executor/conf.py +6 -6
- {hydraflow-0.15.0 → hydraflow-0.16.0}/src/hydraflow/executor/io.py +1 -17
- {hydraflow-0.15.0 → hydraflow-0.16.0}/src/hydraflow/executor/job.py +41 -14
- {hydraflow-0.15.0 → hydraflow-0.16.0}/src/hydraflow/executor/parser.py +9 -8
- hydraflow-0.16.0/tests/cli/hydraflow.yaml +62 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/cli/test_run.py +10 -0
- hydraflow-0.16.0/tests/core/main/test_update.py +18 -0
- hydraflow-0.16.0/tests/core/main/update.py +35 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/core/run/test_run.py +41 -6
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/core/run/test_run_collection.py +31 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/core/run/test_run_info.py +0 -24
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/core/test_io.py +6 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/executor/conftest.py +2 -2
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/executor/test_conf.py +5 -5
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/executor/test_job.py +19 -4
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/executor/test_parser.py +41 -41
- hydraflow-0.15.0/README.md +0 -141
- hydraflow-0.15.0/docs/index.md +0 -117
- hydraflow-0.15.0/docs/usage/quickstart.md +0 -330
- hydraflow-0.15.0/mkdocs.yaml +0 -64
- hydraflow-0.15.0/tests/cli/hydraflow.yaml +0 -42
- {hydraflow-0.15.0 → hydraflow-0.16.0}/.devcontainer/devcontainer.json +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/.devcontainer/postCreate.sh +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/.devcontainer/starship.toml +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/.gitattributes +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/.github/workflows/ci.yaml +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/.github/workflows/docs.yaml +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/.github/workflows/publish.yaml +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/LICENSE +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/src/hydraflow/cli.py +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/src/hydraflow/core/__init__.py +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/src/hydraflow/executor/__init__.py +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/src/hydraflow/executor/aio.py +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/src/hydraflow/py.typed +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/__init__.py +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/cli/__init__.py +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/cli/app.py +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/cli/conftest.py +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/cli/submit.py +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/cli/test_setup.py +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/cli/test_show.py +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/cli/test_version.py +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/conftest.py +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/core/__init__.py +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/core/context/__init__.py +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/core/context/chdir.py +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/core/context/log_run.py +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/core/context/start_run.py +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/core/context/test_chdir.py +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/core/context/test_log_run.py +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/core/context/test_start_run.py +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/core/main/__init__.py +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/core/main/default.py +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/core/main/force_new_run.py +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/core/main/match_overrides.py +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/core/main/rerun_finished.py +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/core/main/skip_finished.py +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/core/main/test_default.py +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/core/main/test_force_new_run.py +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/core/main/test_main.py +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/core/main/test_match_overrides.py +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/core/main/test_rerun_finished.py +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/core/main/test_skip_finished.py +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/core/run/__init__.py +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/core/run/run.py +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/executor/__init__.py +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/executor/echo.py +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/executor/read.py +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/executor/test_aio.py +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/executor/test_args.py +0 -0
- {hydraflow-0.15.0 → hydraflow-0.16.0}/tests/executor/test_io.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: hydraflow
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.16.0
|
4
4
|
Summary: HydraFlow seamlessly integrates Hydra and MLflow to streamline ML experiment management, combining Hydra's configuration management with MLflow's tracking capabilities.
|
5
5
|
Project-URL: Documentation, https://daizutabi.github.io/hydraflow/
|
6
6
|
Project-URL: Source, https://github.com/daizutabi/hydraflow
|
@@ -51,7 +51,7 @@ Requires-Dist: ruff>=0.11
|
|
51
51
|
Requires-Dist: typer>=0.15
|
52
52
|
Description-Content-Type: text/markdown
|
53
53
|
|
54
|
-
#
|
54
|
+
# HydraFlow
|
55
55
|
|
56
56
|
[![PyPI Version][pypi-v-image]][pypi-v-link]
|
57
57
|
[![Build Status][GHAction-image]][GHAction-link]
|
@@ -60,6 +60,7 @@ Description-Content-Type: text/markdown
|
|
60
60
|
[![Python Version][python-v-image]][python-v-link]
|
61
61
|
|
62
62
|
<!-- Badges -->
|
63
|
+
|
63
64
|
[pypi-v-image]: https://img.shields.io/pypi/v/hydraflow.svg
|
64
65
|
[pypi-v-link]: https://pypi.org/project/hydraflow/
|
65
66
|
[GHAction-image]: https://github.com/daizutabi/hydraflow/actions/workflows/ci.yaml/badge.svg?branch=main&event=push
|
@@ -73,117 +74,125 @@ Description-Content-Type: text/markdown
|
|
73
74
|
|
74
75
|
## Overview
|
75
76
|
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
77
|
+
HydraFlow seamlessly integrates [Hydra](https://hydra.cc/) and [MLflow](https://mlflow.org/) to streamline machine learning experiment workflows. By combining Hydra's powerful configuration management with MLflow's robust experiment tracking, HydraFlow provides a comprehensive solution for defining, executing, and analyzing machine learning experiments.
|
78
|
+
|
79
|
+
## Design Principles
|
80
|
+
|
81
|
+
HydraFlow is built on the following design principles:
|
82
|
+
|
83
|
+
1. **Type Safety** - Utilizing Python dataclasses for configuration type checking and IDE support
|
84
|
+
2. **Reproducibility** - Automatically tracking all experiment configurations for fully reproducible experiments
|
85
|
+
3. **Analysis Capabilities** - Providing powerful APIs for easily analyzing experiment results
|
86
|
+
4. **Workflow Integration** - Creating a cohesive workflow by integrating Hydra's configuration management with MLflow's experiment tracking
|
82
87
|
|
83
88
|
## Key Features
|
84
89
|
|
85
|
-
- **Configuration Management
|
86
|
-
|
87
|
-
- **
|
88
|
-
|
89
|
-
- **
|
90
|
-
|
91
|
-
- **Seamless Integration**: Easily integrate Hydra and MLflow in your machine learning
|
92
|
-
projects with minimal setup.
|
93
|
-
- **Rich CLI Interface**: Command-line tools for managing experiments and viewing results.
|
94
|
-
- **Cross-Platform Support**: Works consistently across different operating systems.
|
90
|
+
- **Type-safe Configuration Management** - Define experiment parameters using Python dataclasses with full IDE support and validation
|
91
|
+
- **Seamless Hydra-MLflow Integration** - Automatically register configurations with Hydra and track experiments with MLflow
|
92
|
+
- **Advanced Parameter Sweeps** - Define complex parameter spaces using extended sweep syntax for numerical ranges, combinations, and SI prefixes
|
93
|
+
- **Workflow Automation** - Create reusable experiment workflows with YAML-based job definitions
|
94
|
+
- **Powerful Analysis Tools** - Filter, group, and analyze experiment results with type-aware APIs
|
95
|
+
- **Custom Implementation Support** - Extend experiment analysis with domain-specific functionality
|
95
96
|
|
96
97
|
## Installation
|
97
98
|
|
98
|
-
You can install Hydraflow via pip:
|
99
|
-
|
100
99
|
```bash
|
101
100
|
pip install hydraflow
|
102
101
|
```
|
103
102
|
|
104
103
|
**Requirements:** Python 3.13+
|
105
104
|
|
106
|
-
## Quick
|
107
|
-
|
108
|
-
Here is a simple example to get you started with Hydraflow:
|
105
|
+
## Quick Example
|
109
106
|
|
110
107
|
```python
|
111
|
-
from __future__ import annotations
|
112
|
-
|
113
108
|
from dataclasses import dataclass
|
114
|
-
from
|
115
|
-
|
109
|
+
from mlflow.entities import Run
|
116
110
|
import hydraflow
|
117
|
-
import mlflow
|
118
111
|
|
119
|
-
|
120
|
-
|
112
|
+
@dataclass
|
113
|
+
class Config:
|
114
|
+
width: int = 1024
|
115
|
+
height: int = 768
|
121
116
|
|
117
|
+
@hydraflow.main(Config)
|
118
|
+
def app(run: Run, cfg: Config) -> None:
|
119
|
+
# Your experiment code here
|
120
|
+
print(f"Running with width={cfg.width}, height={cfg.height}")
|
121
|
+
|
122
|
+
# Log metrics
|
123
|
+
hydraflow.log_metric("area", cfg.width * cfg.height)
|
122
124
|
|
125
|
+
if __name__ == "__main__":
|
126
|
+
app()
|
127
|
+
```
|
128
|
+
|
129
|
+
Execute a parameter sweep with:
|
130
|
+
|
131
|
+
```bash
|
132
|
+
python app.py -m width=800,1200 height=600,900
|
133
|
+
```
|
134
|
+
|
135
|
+
## Core Components
|
136
|
+
|
137
|
+
HydraFlow consists of the following key components:
|
138
|
+
|
139
|
+
### Configuration Management
|
140
|
+
|
141
|
+
Define type-safe configurations using Python dataclasses:
|
142
|
+
|
143
|
+
```python
|
123
144
|
@dataclass
|
124
145
|
class Config:
|
125
|
-
"""Configuration for the ML training experiment."""
|
126
|
-
# Training hyperparameters
|
127
146
|
learning_rate: float = 0.001
|
128
147
|
batch_size: int = 32
|
129
148
|
epochs: int = 10
|
149
|
+
```
|
130
150
|
|
131
|
-
|
132
|
-
hidden_size: int = 128
|
133
|
-
dropout: float = 0.1
|
134
|
-
|
135
|
-
# Dataset parameters
|
136
|
-
train_size: float = 0.8
|
137
|
-
random_seed: int = 42
|
151
|
+
### Main Decorator
|
138
152
|
|
153
|
+
The `@hydraflow.main` decorator integrates Hydra and MLflow:
|
139
154
|
|
155
|
+
```python
|
140
156
|
@hydraflow.main(Config)
|
141
|
-
def
|
142
|
-
|
143
|
-
|
144
|
-
This example demonstrates how to:
|
157
|
+
def train(run: Run, cfg: Config) -> None:
|
158
|
+
# Your experiment code
|
159
|
+
```
|
145
160
|
|
146
|
-
|
147
|
-
2. Use Hydraflow to integrate with MLflow
|
148
|
-
3. Track metrics and parameters automatically
|
161
|
+
### Workflow Automation
|
149
162
|
|
150
|
-
|
151
|
-
run: MLflow run for the experiment corresponding to the Hydra app.
|
152
|
-
This `Run` instance is automatically created by Hydraflow.
|
153
|
-
cfg: Configuration for the experiment's run.
|
154
|
-
This `Config` instance is originally defined by Hydra, and then
|
155
|
-
automatically passed to the app by Hydraflow.
|
156
|
-
"""
|
157
|
-
# Training loop
|
158
|
-
for epoch in range(cfg.epochs):
|
159
|
-
# Simulate training and validation
|
160
|
-
train_loss = 1.0 / (epoch + 1)
|
161
|
-
val_loss = 1.1 / (epoch + 1)
|
163
|
+
Define reusable experiment workflows in YAML:
|
162
164
|
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
165
|
+
```yaml
|
166
|
+
jobs:
|
167
|
+
train_models:
|
168
|
+
run: python train.py
|
169
|
+
sets:
|
170
|
+
- each: model=small,medium,large
|
171
|
+
all: learning_rate=0.001,0.01,0.1
|
172
|
+
```
|
168
173
|
|
169
|
-
|
174
|
+
### Analysis Tools
|
170
175
|
|
176
|
+
Analyze experiment results with powerful APIs:
|
171
177
|
|
172
|
-
|
173
|
-
|
174
|
-
```
|
178
|
+
```python
|
179
|
+
from hydraflow import Run, iter_run_dirs
|
175
180
|
|
176
|
-
|
181
|
+
# Load runs
|
182
|
+
runs = Run.load(iter_run_dirs("mlruns"))
|
177
183
|
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
- Type-safe configuration with dataclasses
|
184
|
+
# Filter and analyze
|
185
|
+
best_runs = runs.filter(model_type="transformer").to_frame("learning_rate", "accuracy")
|
186
|
+
```
|
182
187
|
|
183
188
|
## Documentation
|
184
189
|
|
185
|
-
For detailed documentation,
|
186
|
-
|
190
|
+
For detailed documentation, visit our [documentation site](https://daizutabi.github.io/hydraflow/):
|
191
|
+
|
192
|
+
- [Getting Started](https://daizutabi.github.io/hydraflow/getting-started/) - Installation and core concepts
|
193
|
+
- [Practical Tutorials](https://daizutabi.github.io/hydraflow/practical-tutorials/) - Learn through hands-on examples
|
194
|
+
- [User Guide](https://daizutabi.github.io/hydraflow/part1-applications/) - Detailed documentation of HydraFlow's capabilities
|
195
|
+
- [API Reference](https://daizutabi.github.io/hydraflow/api/hydraflow/) - Complete API documentation
|
187
196
|
|
188
197
|
## Contributing
|
189
198
|
|
@@ -191,4 +200,4 @@ We welcome contributions! Please see our [contributing guide](CONTRIBUTING.md) f
|
|
191
200
|
|
192
201
|
## License
|
193
202
|
|
194
|
-
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
203
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
@@ -0,0 +1,150 @@
|
|
1
|
+
# HydraFlow
|
2
|
+
|
3
|
+
[![PyPI Version][pypi-v-image]][pypi-v-link]
|
4
|
+
[![Build Status][GHAction-image]][GHAction-link]
|
5
|
+
[![Coverage Status][codecov-image]][codecov-link]
|
6
|
+
[![Documentation Status][docs-image]][docs-link]
|
7
|
+
[![Python Version][python-v-image]][python-v-link]
|
8
|
+
|
9
|
+
<!-- Badges -->
|
10
|
+
|
11
|
+
[pypi-v-image]: https://img.shields.io/pypi/v/hydraflow.svg
|
12
|
+
[pypi-v-link]: https://pypi.org/project/hydraflow/
|
13
|
+
[GHAction-image]: https://github.com/daizutabi/hydraflow/actions/workflows/ci.yaml/badge.svg?branch=main&event=push
|
14
|
+
[GHAction-link]: https://github.com/daizutabi/hydraflow/actions?query=event%3Apush+branch%3Amain
|
15
|
+
[codecov-image]: https://codecov.io/github/daizutabi/hydraflow/coverage.svg?branch=main
|
16
|
+
[codecov-link]: https://codecov.io/github/daizutabi/hydraflow?branch=main
|
17
|
+
[docs-image]: https://img.shields.io/badge/docs-latest-blue.svg
|
18
|
+
[docs-link]: https://daizutabi.github.io/hydraflow/
|
19
|
+
[python-v-image]: https://img.shields.io/pypi/pyversions/hydraflow.svg
|
20
|
+
[python-v-link]: https://pypi.org/project/hydraflow
|
21
|
+
|
22
|
+
## Overview
|
23
|
+
|
24
|
+
HydraFlow seamlessly integrates [Hydra](https://hydra.cc/) and [MLflow](https://mlflow.org/) to streamline machine learning experiment workflows. By combining Hydra's powerful configuration management with MLflow's robust experiment tracking, HydraFlow provides a comprehensive solution for defining, executing, and analyzing machine learning experiments.
|
25
|
+
|
26
|
+
## Design Principles
|
27
|
+
|
28
|
+
HydraFlow is built on the following design principles:
|
29
|
+
|
30
|
+
1. **Type Safety** - Utilizing Python dataclasses for configuration type checking and IDE support
|
31
|
+
2. **Reproducibility** - Automatically tracking all experiment configurations for fully reproducible experiments
|
32
|
+
3. **Analysis Capabilities** - Providing powerful APIs for easily analyzing experiment results
|
33
|
+
4. **Workflow Integration** - Creating a cohesive workflow by integrating Hydra's configuration management with MLflow's experiment tracking
|
34
|
+
|
35
|
+
## Key Features
|
36
|
+
|
37
|
+
- **Type-safe Configuration Management** - Define experiment parameters using Python dataclasses with full IDE support and validation
|
38
|
+
- **Seamless Hydra-MLflow Integration** - Automatically register configurations with Hydra and track experiments with MLflow
|
39
|
+
- **Advanced Parameter Sweeps** - Define complex parameter spaces using extended sweep syntax for numerical ranges, combinations, and SI prefixes
|
40
|
+
- **Workflow Automation** - Create reusable experiment workflows with YAML-based job definitions
|
41
|
+
- **Powerful Analysis Tools** - Filter, group, and analyze experiment results with type-aware APIs
|
42
|
+
- **Custom Implementation Support** - Extend experiment analysis with domain-specific functionality
|
43
|
+
|
44
|
+
## Installation
|
45
|
+
|
46
|
+
```bash
|
47
|
+
pip install hydraflow
|
48
|
+
```
|
49
|
+
|
50
|
+
**Requirements:** Python 3.13+
|
51
|
+
|
52
|
+
## Quick Example
|
53
|
+
|
54
|
+
```python
|
55
|
+
from dataclasses import dataclass
|
56
|
+
from mlflow.entities import Run
|
57
|
+
import hydraflow
|
58
|
+
|
59
|
+
@dataclass
|
60
|
+
class Config:
|
61
|
+
width: int = 1024
|
62
|
+
height: int = 768
|
63
|
+
|
64
|
+
@hydraflow.main(Config)
|
65
|
+
def app(run: Run, cfg: Config) -> None:
|
66
|
+
# Your experiment code here
|
67
|
+
print(f"Running with width={cfg.width}, height={cfg.height}")
|
68
|
+
|
69
|
+
# Log metrics
|
70
|
+
hydraflow.log_metric("area", cfg.width * cfg.height)
|
71
|
+
|
72
|
+
if __name__ == "__main__":
|
73
|
+
app()
|
74
|
+
```
|
75
|
+
|
76
|
+
Execute a parameter sweep with:
|
77
|
+
|
78
|
+
```bash
|
79
|
+
python app.py -m width=800,1200 height=600,900
|
80
|
+
```
|
81
|
+
|
82
|
+
## Core Components
|
83
|
+
|
84
|
+
HydraFlow consists of the following key components:
|
85
|
+
|
86
|
+
### Configuration Management
|
87
|
+
|
88
|
+
Define type-safe configurations using Python dataclasses:
|
89
|
+
|
90
|
+
```python
|
91
|
+
@dataclass
|
92
|
+
class Config:
|
93
|
+
learning_rate: float = 0.001
|
94
|
+
batch_size: int = 32
|
95
|
+
epochs: int = 10
|
96
|
+
```
|
97
|
+
|
98
|
+
### Main Decorator
|
99
|
+
|
100
|
+
The `@hydraflow.main` decorator integrates Hydra and MLflow:
|
101
|
+
|
102
|
+
```python
|
103
|
+
@hydraflow.main(Config)
|
104
|
+
def train(run: Run, cfg: Config) -> None:
|
105
|
+
# Your experiment code
|
106
|
+
```
|
107
|
+
|
108
|
+
### Workflow Automation
|
109
|
+
|
110
|
+
Define reusable experiment workflows in YAML:
|
111
|
+
|
112
|
+
```yaml
|
113
|
+
jobs:
|
114
|
+
train_models:
|
115
|
+
run: python train.py
|
116
|
+
sets:
|
117
|
+
- each: model=small,medium,large
|
118
|
+
all: learning_rate=0.001,0.01,0.1
|
119
|
+
```
|
120
|
+
|
121
|
+
### Analysis Tools
|
122
|
+
|
123
|
+
Analyze experiment results with powerful APIs:
|
124
|
+
|
125
|
+
```python
|
126
|
+
from hydraflow import Run, iter_run_dirs
|
127
|
+
|
128
|
+
# Load runs
|
129
|
+
runs = Run.load(iter_run_dirs("mlruns"))
|
130
|
+
|
131
|
+
# Filter and analyze
|
132
|
+
best_runs = runs.filter(model_type="transformer").to_frame("learning_rate", "accuracy")
|
133
|
+
```
|
134
|
+
|
135
|
+
## Documentation
|
136
|
+
|
137
|
+
For detailed documentation, visit our [documentation site](https://daizutabi.github.io/hydraflow/):
|
138
|
+
|
139
|
+
- [Getting Started](https://daizutabi.github.io/hydraflow/getting-started/) - Installation and core concepts
|
140
|
+
- [Practical Tutorials](https://daizutabi.github.io/hydraflow/practical-tutorials/) - Learn through hands-on examples
|
141
|
+
- [User Guide](https://daizutabi.github.io/hydraflow/part1-applications/) - Detailed documentation of HydraFlow's capabilities
|
142
|
+
- [API Reference](https://daizutabi.github.io/hydraflow/api/hydraflow/) - Complete API documentation
|
143
|
+
|
144
|
+
## Contributing
|
145
|
+
|
146
|
+
We welcome contributions! Please see our [contributing guide](CONTRIBUTING.md) for details.
|
147
|
+
|
148
|
+
## License
|
149
|
+
|
150
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
@@ -0,0 +1,174 @@
|
|
1
|
+
# Core Concepts
|
2
|
+
|
3
|
+
This page introduces the fundamental concepts of HydraFlow that form the foundation of the framework.
|
4
|
+
|
5
|
+
## Design Principles
|
6
|
+
|
7
|
+
HydraFlow is built on the following design principles:
|
8
|
+
|
9
|
+
1. **Type Safety** - Utilizing Python dataclasses for configuration type checking and IDE support
|
10
|
+
2. **Reproducibility** - Automatically tracking all experiment configurations for fully reproducible experiments
|
11
|
+
3. **Workflow Integration** - Creating a cohesive workflow by integrating Hydra's configuration management with MLflow's experiment tracking
|
12
|
+
4. **Analysis Capabilities** - Providing powerful APIs for easily analyzing experiment results
|
13
|
+
|
14
|
+
## Key Components
|
15
|
+
|
16
|
+
HydraFlow consists of the following key components:
|
17
|
+
|
18
|
+
### Configuration Management
|
19
|
+
|
20
|
+
HydraFlow uses a hierarchical configuration system based on OmegaConf and Hydra. This provides:
|
21
|
+
|
22
|
+
- Type-safe configuration using Python dataclasses
|
23
|
+
- Schema validation to ensure configuration correctness
|
24
|
+
- Configuration composition from multiple sources
|
25
|
+
- Command-line overrides
|
26
|
+
|
27
|
+
Example configuration:
|
28
|
+
|
29
|
+
```python
|
30
|
+
from dataclasses import dataclass
|
31
|
+
|
32
|
+
@dataclass
|
33
|
+
class Config:
|
34
|
+
learning_rate: float = 0.001
|
35
|
+
batch_size: int = 32
|
36
|
+
epochs: int = 10
|
37
|
+
```
|
38
|
+
|
39
|
+
This configuration class defines the structure and default values for your experiment, enabling type checking and auto-completion.
|
40
|
+
|
41
|
+
### Main Decorator
|
42
|
+
|
43
|
+
The [`@hydraflow.main`][hydraflow.main] decorator defines the entry point for a HydraFlow application:
|
44
|
+
|
45
|
+
```python
|
46
|
+
import hydraflow
|
47
|
+
from mlflow.entities import Run
|
48
|
+
|
49
|
+
@hydraflow.main(Config)
|
50
|
+
def train(run: Run, cfg: Config) -> None:
|
51
|
+
# Your experiment code
|
52
|
+
print(f"Training with lr={cfg.learning_rate}, batch_size={cfg.batch_size}")
|
53
|
+
|
54
|
+
# Log metrics
|
55
|
+
hydraflow.log_metric("accuracy", 0.95)
|
56
|
+
```
|
57
|
+
|
58
|
+
This decorator provides:
|
59
|
+
|
60
|
+
- Automatic registration of your config class with Hydra's `ConfigStore`
|
61
|
+
- Automatic setup of an MLflow experiment
|
62
|
+
- Storage of Hydra configurations and logs as MLflow artifacts
|
63
|
+
- Support for type-safe APIs and IDE integration
|
64
|
+
|
65
|
+
### Workflow Automation
|
66
|
+
|
67
|
+
HydraFlow allows you to automate experiment workflows using a YAML-based job definition system:
|
68
|
+
|
69
|
+
```yaml
|
70
|
+
jobs:
|
71
|
+
train_models:
|
72
|
+
run: python train.py
|
73
|
+
sets:
|
74
|
+
- each: model=small,medium,large
|
75
|
+
all: learning_rate=0.001,0.01,0.1
|
76
|
+
```
|
77
|
+
|
78
|
+
This enables:
|
79
|
+
|
80
|
+
- Defining reusable experiment workflows
|
81
|
+
- Efficient configuration of parameter sweeps
|
82
|
+
- Organization of complex experiment campaigns
|
83
|
+
|
84
|
+
You can also define more complex parameter spaces using extended sweep syntax:
|
85
|
+
|
86
|
+
```bash
|
87
|
+
# Ranges (start:end:step)
|
88
|
+
python train.py -m "learning_rate=0.01:0.03:0.01"
|
89
|
+
|
90
|
+
# SI prefixes
|
91
|
+
python train.py -m "batch_size=1k,2k,4k"
|
92
|
+
# 1000, 2000, 4000
|
93
|
+
|
94
|
+
# Grid within a single parameter
|
95
|
+
python train.py -m "model=(small,large)_(v1,v2)"
|
96
|
+
# small_v1, small_v2, large_v1, large_v2
|
97
|
+
```
|
98
|
+
|
99
|
+
### Analysis Tools
|
100
|
+
|
101
|
+
After running experiments, HydraFlow provides powerful tools for accessing and analyzing results. These tools help you track, compare, and derive insights from your experiments.
|
102
|
+
|
103
|
+
#### Working with Individual Runs
|
104
|
+
|
105
|
+
For individual experiment analysis, HydraFlow provides the `Run` class, which represents a single experiment run:
|
106
|
+
|
107
|
+
```python
|
108
|
+
from hydraflow import Run
|
109
|
+
|
110
|
+
# Load an existing run
|
111
|
+
run = Run.load("path/to/run")
|
112
|
+
|
113
|
+
# Access configuration values
|
114
|
+
learning_rate = run.get("learning_rate")
|
115
|
+
```
|
116
|
+
|
117
|
+
The `Run` class provides:
|
118
|
+
|
119
|
+
- Access to experiment configurations used during the run
|
120
|
+
- Methods for loading and analyzing experiment results
|
121
|
+
- Support for custom implementations through the factory pattern
|
122
|
+
- Type-safe access to configuration values
|
123
|
+
|
124
|
+
You can use type parameters for more powerful IDE support:
|
125
|
+
|
126
|
+
```python
|
127
|
+
from dataclasses import dataclass
|
128
|
+
from hydraflow import Run
|
129
|
+
|
130
|
+
@dataclass
|
131
|
+
class MyConfig:
|
132
|
+
learning_rate: float
|
133
|
+
batch_size: int
|
134
|
+
|
135
|
+
# Load a Run with type information
|
136
|
+
run = Run[MyConfig].load("path/to/run")
|
137
|
+
print(run.cfg.learning_rate) # IDE auto-completion works
|
138
|
+
```
|
139
|
+
|
140
|
+
#### Comparing Multiple Runs
|
141
|
+
|
142
|
+
For comparing multiple runs, HydraFlow offers the `RunCollection` class, which enables efficient analysis across runs:
|
143
|
+
|
144
|
+
```python
|
145
|
+
# Load multiple runs
|
146
|
+
runs = Run.load(["path/to/run1", "path/to/run2", "path/to/run3"])
|
147
|
+
|
148
|
+
# Filter runs by parameter value
|
149
|
+
filtered_runs = runs.filter(model_type="lstm")
|
150
|
+
|
151
|
+
# Group runs by a parameter
|
152
|
+
grouped_runs = runs.group_by("dataset_name")
|
153
|
+
|
154
|
+
# Convert to DataFrame for analysis
|
155
|
+
df = runs.to_frame("learning_rate", "batch_size", "accuracy")
|
156
|
+
```
|
157
|
+
|
158
|
+
Key features of experiment comparison:
|
159
|
+
|
160
|
+
- Filtering runs based on configuration parameters
|
161
|
+
- Grouping runs by common attributes
|
162
|
+
- Aggregating data across runs
|
163
|
+
- Converting to Polars DataFrames for advanced analysis
|
164
|
+
|
165
|
+
## Summary
|
166
|
+
|
167
|
+
These core concepts work together to provide a comprehensive framework for managing machine learning experiments:
|
168
|
+
|
169
|
+
1. **Configuration Management** - Type-safe configuration with Python dataclasses
|
170
|
+
2. **Main Decorator** - The entry point that integrates Hydra and MLflow
|
171
|
+
3. **Workflow Automation** - Reusable experiment definitions and advanced parameter sweeps
|
172
|
+
4. **Analysis Tools** - Access, filter, and analyze experiment results
|
173
|
+
|
174
|
+
Understanding these fundamental concepts will help you leverage the full power of HydraFlow for your machine learning projects.
|
@@ -0,0 +1,80 @@
|
|
1
|
+
# Getting Started with HydraFlow
|
2
|
+
|
3
|
+
Welcome to HydraFlow, a framework designed to streamline machine learning
|
4
|
+
workflows by integrating Hydra's configuration management with MLflow's
|
5
|
+
experiment tracking capabilities.
|
6
|
+
|
7
|
+
## Overview
|
8
|
+
|
9
|
+
This section provides everything you need to begin using HydraFlow
|
10
|
+
effectively:
|
11
|
+
|
12
|
+
- [Installation](installation.md): Step-by-step instructions for installing
|
13
|
+
HydraFlow and its dependencies
|
14
|
+
- [Core Concepts](concepts.md): An introduction to the fundamental concepts
|
15
|
+
that underpin HydraFlow's design and functionality
|
16
|
+
|
17
|
+
## Why HydraFlow?
|
18
|
+
|
19
|
+
Managing machine learning experiments involves numerous challenges, including:
|
20
|
+
|
21
|
+
- **Configuration Management**: Tracking hyperparameters and settings across
|
22
|
+
multiple experiment runs
|
23
|
+
- **Reproducibility**: Ensuring experiments can be reliably reproduced
|
24
|
+
- **Result Analysis**: Efficiently comparing and analyzing experiment outcomes
|
25
|
+
- **Workflow Automation**: Organizing and managing experiment workflows
|
26
|
+
|
27
|
+
HydraFlow addresses these challenges by providing:
|
28
|
+
|
29
|
+
1. **Type-safe Configuration**: Using Python's native dataclasses for
|
30
|
+
robust configuration management
|
31
|
+
2. **Seamless Integration**: Bridging Hydra and MLflow to combine their
|
32
|
+
respective strengths
|
33
|
+
3. **Analysis Tools**: Providing powerful APIs for filtering, grouping,
|
34
|
+
and analyzing results
|
35
|
+
4. **Workflow Automation**: Simplifying the organization and execution of
|
36
|
+
machine learning experiments
|
37
|
+
|
38
|
+
## Quick Example
|
39
|
+
|
40
|
+
Here's a simple example to demonstrate HydraFlow's basic usage:
|
41
|
+
|
42
|
+
```python
|
43
|
+
from dataclasses import dataclass
|
44
|
+
from mlflow.entities import Run
|
45
|
+
import hydraflow
|
46
|
+
|
47
|
+
@dataclass
|
48
|
+
class Config:
|
49
|
+
learning_rate: float = 0.01
|
50
|
+
batch_size: int = 32
|
51
|
+
epochs: int = 10
|
52
|
+
|
53
|
+
@hydraflow.main(Config)
|
54
|
+
def train(run: Run, cfg: Config) -> None:
|
55
|
+
# Your training code here
|
56
|
+
print(f"Training with lr={cfg.learning_rate}, batch_size={cfg.batch_size}")
|
57
|
+
|
58
|
+
# Log metrics
|
59
|
+
hydraflow.log_metric("accuracy", 0.95)
|
60
|
+
|
61
|
+
if __name__ == "__main__":
|
62
|
+
train()
|
63
|
+
```
|
64
|
+
|
65
|
+
Run this example with:
|
66
|
+
|
67
|
+
```bash
|
68
|
+
python train.py learning_rate=0.001 batch_size=64
|
69
|
+
```
|
70
|
+
|
71
|
+
## Next Steps
|
72
|
+
|
73
|
+
After installing HydraFlow and understanding its core concepts, you're ready to:
|
74
|
+
|
75
|
+
1. Follow our [Practical Tutorials](../practical-tutorials/index.md) to see HydraFlow in action
|
76
|
+
2. Explore the detailed [User Guide](../part1-applications/index.md) to learn more about HydraFlow's capabilities
|
77
|
+
3. Check the [API Reference](../api/hydraflow/README.md) for detailed documentation of HydraFlow's API
|
78
|
+
|
79
|
+
Continue to the [Installation Guide](installation.md) to get started with
|
80
|
+
HydraFlow.
|