hydraflow 0.15.1__tar.gz → 0.16.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. {hydraflow-0.15.1 → hydraflow-0.16.0}/PKG-INFO +84 -75
  2. hydraflow-0.16.0/README.md +150 -0
  3. {hydraflow-0.15.1 → hydraflow-0.16.0}/docs/part3-analysis/run-class.md +27 -0
  4. {hydraflow-0.15.1 → hydraflow-0.16.0}/docs/part3-analysis/run-collection.md +165 -9
  5. {hydraflow-0.15.1 → hydraflow-0.16.0}/docs/part3-analysis/updating-runs.md +23 -0
  6. {hydraflow-0.15.1 → hydraflow-0.16.0}/docs/practical-tutorials/advanced.md +27 -2
  7. {hydraflow-0.15.1 → hydraflow-0.16.0}/mkdocs.yaml +10 -9
  8. {hydraflow-0.15.1 → hydraflow-0.16.0}/pyproject.toml +1 -1
  9. {hydraflow-0.15.1 → hydraflow-0.16.0}/src/hydraflow/__init__.py +2 -0
  10. {hydraflow-0.15.1 → hydraflow-0.16.0}/src/hydraflow/core/context.py +4 -4
  11. {hydraflow-0.15.1 → hydraflow-0.16.0}/src/hydraflow/core/io.py +6 -0
  12. {hydraflow-0.15.1 → hydraflow-0.16.0}/src/hydraflow/core/main.py +19 -11
  13. {hydraflow-0.15.1 → hydraflow-0.16.0}/src/hydraflow/core/run.py +13 -3
  14. {hydraflow-0.15.1 → hydraflow-0.16.0}/src/hydraflow/core/run_collection.py +119 -12
  15. {hydraflow-0.15.1 → hydraflow-0.16.0}/src/hydraflow/core/run_info.py +16 -17
  16. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/cli/hydraflow.yaml +4 -0
  17. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/cli/test_run.py +10 -0
  18. hydraflow-0.16.0/tests/core/main/test_update.py +18 -0
  19. hydraflow-0.16.0/tests/core/main/update.py +35 -0
  20. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/core/run/test_run.py +26 -0
  21. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/core/run/test_run_collection.py +31 -0
  22. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/core/run/test_run_info.py +0 -24
  23. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/core/test_io.py +6 -0
  24. hydraflow-0.15.1/README.md +0 -141
  25. {hydraflow-0.15.1 → hydraflow-0.16.0}/.devcontainer/devcontainer.json +0 -0
  26. {hydraflow-0.15.1 → hydraflow-0.16.0}/.devcontainer/postCreate.sh +0 -0
  27. {hydraflow-0.15.1 → hydraflow-0.16.0}/.devcontainer/starship.toml +0 -0
  28. {hydraflow-0.15.1 → hydraflow-0.16.0}/.gitattributes +0 -0
  29. {hydraflow-0.15.1 → hydraflow-0.16.0}/.github/workflows/ci.yaml +0 -0
  30. {hydraflow-0.15.1 → hydraflow-0.16.0}/.github/workflows/docs.yaml +0 -0
  31. {hydraflow-0.15.1 → hydraflow-0.16.0}/.github/workflows/publish.yaml +0 -0
  32. {hydraflow-0.15.1 → hydraflow-0.16.0}/.gitignore +0 -0
  33. {hydraflow-0.15.1 → hydraflow-0.16.0}/LICENSE +0 -0
  34. {hydraflow-0.15.1 → hydraflow-0.16.0}/docs/getting-started/concepts.md +0 -0
  35. {hydraflow-0.15.1 → hydraflow-0.16.0}/docs/getting-started/index.md +0 -0
  36. {hydraflow-0.15.1 → hydraflow-0.16.0}/docs/getting-started/installation.md +0 -0
  37. {hydraflow-0.15.1 → hydraflow-0.16.0}/docs/index.md +0 -0
  38. {hydraflow-0.15.1 → hydraflow-0.16.0}/docs/part1-applications/configuration.md +0 -0
  39. {hydraflow-0.15.1 → hydraflow-0.16.0}/docs/part1-applications/execution.md +0 -0
  40. {hydraflow-0.15.1 → hydraflow-0.16.0}/docs/part1-applications/index.md +0 -0
  41. {hydraflow-0.15.1 → hydraflow-0.16.0}/docs/part1-applications/main-decorator.md +0 -0
  42. {hydraflow-0.15.1 → hydraflow-0.16.0}/docs/part2-advanced/index.md +0 -0
  43. {hydraflow-0.15.1 → hydraflow-0.16.0}/docs/part2-advanced/job-configuration.md +0 -0
  44. {hydraflow-0.15.1 → hydraflow-0.16.0}/docs/part2-advanced/sweep-syntax.md +0 -0
  45. {hydraflow-0.15.1 → hydraflow-0.16.0}/docs/part3-analysis/index.md +0 -0
  46. {hydraflow-0.15.1 → hydraflow-0.16.0}/docs/practical-tutorials/analysis.md +0 -0
  47. {hydraflow-0.15.1 → hydraflow-0.16.0}/docs/practical-tutorials/applications.md +0 -0
  48. {hydraflow-0.15.1 → hydraflow-0.16.0}/docs/practical-tutorials/index.md +0 -0
  49. {hydraflow-0.15.1 → hydraflow-0.16.0}/examples/example.py +0 -0
  50. {hydraflow-0.15.1 → hydraflow-0.16.0}/examples/hydraflow.yaml +0 -0
  51. {hydraflow-0.15.1 → hydraflow-0.16.0}/examples/submit.py +0 -0
  52. {hydraflow-0.15.1 → hydraflow-0.16.0}/src/hydraflow/cli.py +0 -0
  53. {hydraflow-0.15.1 → hydraflow-0.16.0}/src/hydraflow/core/__init__.py +0 -0
  54. {hydraflow-0.15.1 → hydraflow-0.16.0}/src/hydraflow/executor/__init__.py +0 -0
  55. {hydraflow-0.15.1 → hydraflow-0.16.0}/src/hydraflow/executor/aio.py +0 -0
  56. {hydraflow-0.15.1 → hydraflow-0.16.0}/src/hydraflow/executor/conf.py +0 -0
  57. {hydraflow-0.15.1 → hydraflow-0.16.0}/src/hydraflow/executor/io.py +0 -0
  58. {hydraflow-0.15.1 → hydraflow-0.16.0}/src/hydraflow/executor/job.py +0 -0
  59. {hydraflow-0.15.1 → hydraflow-0.16.0}/src/hydraflow/executor/parser.py +0 -0
  60. {hydraflow-0.15.1 → hydraflow-0.16.0}/src/hydraflow/py.typed +0 -0
  61. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/__init__.py +0 -0
  62. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/cli/__init__.py +0 -0
  63. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/cli/app.py +0 -0
  64. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/cli/conftest.py +0 -0
  65. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/cli/submit.py +0 -0
  66. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/cli/test_setup.py +0 -0
  67. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/cli/test_show.py +0 -0
  68. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/cli/test_version.py +0 -0
  69. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/conftest.py +0 -0
  70. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/core/__init__.py +0 -0
  71. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/core/context/__init__.py +0 -0
  72. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/core/context/chdir.py +0 -0
  73. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/core/context/log_run.py +0 -0
  74. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/core/context/start_run.py +0 -0
  75. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/core/context/test_chdir.py +0 -0
  76. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/core/context/test_log_run.py +0 -0
  77. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/core/context/test_start_run.py +0 -0
  78. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/core/main/__init__.py +0 -0
  79. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/core/main/default.py +0 -0
  80. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/core/main/force_new_run.py +0 -0
  81. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/core/main/match_overrides.py +0 -0
  82. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/core/main/rerun_finished.py +0 -0
  83. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/core/main/skip_finished.py +0 -0
  84. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/core/main/test_default.py +0 -0
  85. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/core/main/test_force_new_run.py +0 -0
  86. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/core/main/test_main.py +0 -0
  87. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/core/main/test_match_overrides.py +0 -0
  88. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/core/main/test_rerun_finished.py +0 -0
  89. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/core/main/test_skip_finished.py +0 -0
  90. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/core/run/__init__.py +0 -0
  91. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/core/run/run.py +0 -0
  92. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/executor/__init__.py +0 -0
  93. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/executor/conftest.py +0 -0
  94. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/executor/echo.py +0 -0
  95. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/executor/read.py +0 -0
  96. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/executor/test_aio.py +0 -0
  97. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/executor/test_args.py +0 -0
  98. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/executor/test_conf.py +0 -0
  99. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/executor/test_io.py +0 -0
  100. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/executor/test_job.py +0 -0
  101. {hydraflow-0.15.1 → hydraflow-0.16.0}/tests/executor/test_parser.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hydraflow
3
- Version: 0.15.1
3
+ Version: 0.16.0
4
4
  Summary: HydraFlow seamlessly integrates Hydra and MLflow to streamline ML experiment management, combining Hydra's configuration management with MLflow's tracking capabilities.
5
5
  Project-URL: Documentation, https://daizutabi.github.io/hydraflow/
6
6
  Project-URL: Source, https://github.com/daizutabi/hydraflow
@@ -51,7 +51,7 @@ Requires-Dist: ruff>=0.11
51
51
  Requires-Dist: typer>=0.15
52
52
  Description-Content-Type: text/markdown
53
53
 
54
- # Hydraflow
54
+ # HydraFlow
55
55
 
56
56
  [![PyPI Version][pypi-v-image]][pypi-v-link]
57
57
  [![Build Status][GHAction-image]][GHAction-link]
@@ -60,6 +60,7 @@ Description-Content-Type: text/markdown
60
60
  [![Python Version][python-v-image]][python-v-link]
61
61
 
62
62
  <!-- Badges -->
63
+
63
64
  [pypi-v-image]: https://img.shields.io/pypi/v/hydraflow.svg
64
65
  [pypi-v-link]: https://pypi.org/project/hydraflow/
65
66
  [GHAction-image]: https://github.com/daizutabi/hydraflow/actions/workflows/ci.yaml/badge.svg?branch=main&event=push
@@ -73,117 +74,125 @@ Description-Content-Type: text/markdown
73
74
 
74
75
  ## Overview
75
76
 
76
- Hydraflow is a library designed to seamlessly integrate
77
- [Hydra](https://hydra.cc/) and [MLflow](https://mlflow.org/), making it easier to
78
- manage and track machine learning experiments. By combining the flexibility of
79
- Hydra's configuration management with the robust experiment tracking capabilities
80
- of MLflow, Hydraflow provides a comprehensive solution for managing complex
81
- machine learning workflows.
77
+ HydraFlow seamlessly integrates [Hydra](https://hydra.cc/) and [MLflow](https://mlflow.org/) to streamline machine learning experiment workflows. By combining Hydra's powerful configuration management with MLflow's robust experiment tracking, HydraFlow provides a comprehensive solution for defining, executing, and analyzing machine learning experiments.
78
+
79
+ ## Design Principles
80
+
81
+ HydraFlow is built on the following design principles:
82
+
83
+ 1. **Type Safety** - Utilizing Python dataclasses for configuration type checking and IDE support
84
+ 2. **Reproducibility** - Automatically tracking all experiment configurations for fully reproducible experiments
85
+ 3. **Analysis Capabilities** - Providing powerful APIs for easily analyzing experiment results
86
+ 4. **Workflow Integration** - Creating a cohesive workflow by integrating Hydra's configuration management with MLflow's experiment tracking
82
87
 
83
88
  ## Key Features
84
89
 
85
- - **Configuration Management**: Utilize Hydra's advanced configuration management
86
- to handle complex parameter sweeps and experiment setups.
87
- - **Experiment Tracking**: Leverage MLflow's tracking capabilities to log parameters,
88
- metrics, and artifacts for each run.
89
- - **Artifact Management**: Automatically log and manage artifacts, such as model
90
- checkpoints and configuration files, with MLflow.
91
- - **Seamless Integration**: Easily integrate Hydra and MLflow in your machine learning
92
- projects with minimal setup.
93
- - **Rich CLI Interface**: Command-line tools for managing experiments and viewing results.
94
- - **Cross-Platform Support**: Works consistently across different operating systems.
90
+ - **Type-safe Configuration Management** - Define experiment parameters using Python dataclasses with full IDE support and validation
91
+ - **Seamless Hydra-MLflow Integration** - Automatically register configurations with Hydra and track experiments with MLflow
92
+ - **Advanced Parameter Sweeps** - Define complex parameter spaces using extended sweep syntax for numerical ranges, combinations, and SI prefixes
93
+ - **Workflow Automation** - Create reusable experiment workflows with YAML-based job definitions
94
+ - **Powerful Analysis Tools** - Filter, group, and analyze experiment results with type-aware APIs
95
+ - **Custom Implementation Support** - Extend experiment analysis with domain-specific functionality
95
96
 
96
97
  ## Installation
97
98
 
98
- You can install Hydraflow via pip:
99
-
100
99
  ```bash
101
100
  pip install hydraflow
102
101
  ```
103
102
 
104
103
  **Requirements:** Python 3.13+
105
104
 
106
- ## Quick Start
107
-
108
- Here is a simple example to get you started with Hydraflow:
105
+ ## Quick Example
109
106
 
110
107
  ```python
111
- from __future__ import annotations
112
-
113
108
  from dataclasses import dataclass
114
- from typing import TYPE_CHECKING
115
-
109
+ from mlflow.entities import Run
116
110
  import hydraflow
117
- import mlflow
118
111
 
119
- if TYPE_CHECKING:
120
- from mlflow.entities import Run
112
+ @dataclass
113
+ class Config:
114
+ width: int = 1024
115
+ height: int = 768
121
116
 
117
+ @hydraflow.main(Config)
118
+ def app(run: Run, cfg: Config) -> None:
119
+ # Your experiment code here
120
+ print(f"Running with width={cfg.width}, height={cfg.height}")
121
+
122
+ # Log metrics
123
+ hydraflow.log_metric("area", cfg.width * cfg.height)
122
124
 
125
+ if __name__ == "__main__":
126
+ app()
127
+ ```
128
+
129
+ Execute a parameter sweep with:
130
+
131
+ ```bash
132
+ python app.py -m width=800,1200 height=600,900
133
+ ```
134
+
135
+ ## Core Components
136
+
137
+ HydraFlow consists of the following key components:
138
+
139
+ ### Configuration Management
140
+
141
+ Define type-safe configurations using Python dataclasses:
142
+
143
+ ```python
123
144
  @dataclass
124
145
  class Config:
125
- """Configuration for the ML training experiment."""
126
- # Training hyperparameters
127
146
  learning_rate: float = 0.001
128
147
  batch_size: int = 32
129
148
  epochs: int = 10
149
+ ```
130
150
 
131
- # Model architecture parameters
132
- hidden_size: int = 128
133
- dropout: float = 0.1
134
-
135
- # Dataset parameters
136
- train_size: float = 0.8
137
- random_seed: int = 42
151
+ ### Main Decorator
138
152
 
153
+ The `@hydraflow.main` decorator integrates Hydra and MLflow:
139
154
 
155
+ ```python
140
156
  @hydraflow.main(Config)
141
- def app(run: Run, cfg: Config):
142
- """Train a model with the given configuration.
143
-
144
- This example demonstrates how to:
157
+ def train(run: Run, cfg: Config) -> None:
158
+ # Your experiment code
159
+ ```
145
160
 
146
- 1. Define a configuration using dataclasses
147
- 2. Use Hydraflow to integrate with MLflow
148
- 3. Track metrics and parameters automatically
161
+ ### Workflow Automation
149
162
 
150
- Args:
151
- run: MLflow run for the experiment corresponding to the Hydra app.
152
- This `Run` instance is automatically created by Hydraflow.
153
- cfg: Configuration for the experiment's run.
154
- This `Config` instance is originally defined by Hydra, and then
155
- automatically passed to the app by Hydraflow.
156
- """
157
- # Training loop
158
- for epoch in range(cfg.epochs):
159
- # Simulate training and validation
160
- train_loss = 1.0 / (epoch + 1)
161
- val_loss = 1.1 / (epoch + 1)
163
+ Define reusable experiment workflows in YAML:
162
164
 
163
- # Log metrics to MLflow
164
- mlflow.log_metrics({
165
- "train_loss": train_loss,
166
- "val_loss": val_loss
167
- }, step=epoch)
165
+ ```yaml
166
+ jobs:
167
+ train_models:
168
+ run: python train.py
169
+ sets:
170
+ - each: model=small,medium,large
171
+ all: learning_rate=0.001,0.01,0.1
172
+ ```
168
173
 
169
- print(f"Epoch {epoch}: train_loss={train_loss:.4f}, val_loss={val_loss:.4f}")
174
+ ### Analysis Tools
170
175
 
176
+ Analyze experiment results with powerful APIs:
171
177
 
172
- if __name__ == "__main__":
173
- app()
174
- ```
178
+ ```python
179
+ from hydraflow import Run, iter_run_dirs
175
180
 
176
- This example demonstrates:
181
+ # Load runs
182
+ runs = Run.load(iter_run_dirs("mlruns"))
177
183
 
178
- - Configuration management with Hydra
179
- - Automatic experiment tracking with MLflow
180
- - Parameter logging and metric tracking
181
- - Type-safe configuration with dataclasses
184
+ # Filter and analyze
185
+ best_runs = runs.filter(model_type="transformer").to_frame("learning_rate", "accuracy")
186
+ ```
182
187
 
183
188
  ## Documentation
184
189
 
185
- For detailed documentation, including advanced usage examples and API reference,
186
- visit our [documentation site](https://daizutabi.github.io/hydraflow/).
190
+ For detailed documentation, visit our [documentation site](https://daizutabi.github.io/hydraflow/):
191
+
192
+ - [Getting Started](https://daizutabi.github.io/hydraflow/getting-started/) - Installation and core concepts
193
+ - [Practical Tutorials](https://daizutabi.github.io/hydraflow/practical-tutorials/) - Learn through hands-on examples
194
+ - [User Guide](https://daizutabi.github.io/hydraflow/part1-applications/) - Detailed documentation of HydraFlow's capabilities
195
+ - [API Reference](https://daizutabi.github.io/hydraflow/api/hydraflow/) - Complete API documentation
187
196
 
188
197
  ## Contributing
189
198
 
@@ -191,4 +200,4 @@ We welcome contributions! Please see our [contributing guide](CONTRIBUTING.md) f
191
200
 
192
201
  ## License
193
202
 
194
- This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
203
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
@@ -0,0 +1,150 @@
1
+ # HydraFlow
2
+
3
+ [![PyPI Version][pypi-v-image]][pypi-v-link]
4
+ [![Build Status][GHAction-image]][GHAction-link]
5
+ [![Coverage Status][codecov-image]][codecov-link]
6
+ [![Documentation Status][docs-image]][docs-link]
7
+ [![Python Version][python-v-image]][python-v-link]
8
+
9
+ <!-- Badges -->
10
+
11
+ [pypi-v-image]: https://img.shields.io/pypi/v/hydraflow.svg
12
+ [pypi-v-link]: https://pypi.org/project/hydraflow/
13
+ [GHAction-image]: https://github.com/daizutabi/hydraflow/actions/workflows/ci.yaml/badge.svg?branch=main&event=push
14
+ [GHAction-link]: https://github.com/daizutabi/hydraflow/actions?query=event%3Apush+branch%3Amain
15
+ [codecov-image]: https://codecov.io/github/daizutabi/hydraflow/coverage.svg?branch=main
16
+ [codecov-link]: https://codecov.io/github/daizutabi/hydraflow?branch=main
17
+ [docs-image]: https://img.shields.io/badge/docs-latest-blue.svg
18
+ [docs-link]: https://daizutabi.github.io/hydraflow/
19
+ [python-v-image]: https://img.shields.io/pypi/pyversions/hydraflow.svg
20
+ [python-v-link]: https://pypi.org/project/hydraflow
21
+
22
+ ## Overview
23
+
24
+ HydraFlow seamlessly integrates [Hydra](https://hydra.cc/) and [MLflow](https://mlflow.org/) to streamline machine learning experiment workflows. By combining Hydra's powerful configuration management with MLflow's robust experiment tracking, HydraFlow provides a comprehensive solution for defining, executing, and analyzing machine learning experiments.
25
+
26
+ ## Design Principles
27
+
28
+ HydraFlow is built on the following design principles:
29
+
30
+ 1. **Type Safety** - Utilizing Python dataclasses for configuration type checking and IDE support
31
+ 2. **Reproducibility** - Automatically tracking all experiment configurations for fully reproducible experiments
32
+ 3. **Analysis Capabilities** - Providing powerful APIs for easily analyzing experiment results
33
+ 4. **Workflow Integration** - Creating a cohesive workflow by integrating Hydra's configuration management with MLflow's experiment tracking
34
+
35
+ ## Key Features
36
+
37
+ - **Type-safe Configuration Management** - Define experiment parameters using Python dataclasses with full IDE support and validation
38
+ - **Seamless Hydra-MLflow Integration** - Automatically register configurations with Hydra and track experiments with MLflow
39
+ - **Advanced Parameter Sweeps** - Define complex parameter spaces using extended sweep syntax for numerical ranges, combinations, and SI prefixes
40
+ - **Workflow Automation** - Create reusable experiment workflows with YAML-based job definitions
41
+ - **Powerful Analysis Tools** - Filter, group, and analyze experiment results with type-aware APIs
42
+ - **Custom Implementation Support** - Extend experiment analysis with domain-specific functionality
43
+
44
+ ## Installation
45
+
46
+ ```bash
47
+ pip install hydraflow
48
+ ```
49
+
50
+ **Requirements:** Python 3.13+
51
+
52
+ ## Quick Example
53
+
54
+ ```python
55
+ from dataclasses import dataclass
56
+ from mlflow.entities import Run
57
+ import hydraflow
58
+
59
+ @dataclass
60
+ class Config:
61
+ width: int = 1024
62
+ height: int = 768
63
+
64
+ @hydraflow.main(Config)
65
+ def app(run: Run, cfg: Config) -> None:
66
+ # Your experiment code here
67
+ print(f"Running with width={cfg.width}, height={cfg.height}")
68
+
69
+ # Log metrics
70
+ hydraflow.log_metric("area", cfg.width * cfg.height)
71
+
72
+ if __name__ == "__main__":
73
+ app()
74
+ ```
75
+
76
+ Execute a parameter sweep with:
77
+
78
+ ```bash
79
+ python app.py -m width=800,1200 height=600,900
80
+ ```
81
+
82
+ ## Core Components
83
+
84
+ HydraFlow consists of the following key components:
85
+
86
+ ### Configuration Management
87
+
88
+ Define type-safe configurations using Python dataclasses:
89
+
90
+ ```python
91
+ @dataclass
92
+ class Config:
93
+ learning_rate: float = 0.001
94
+ batch_size: int = 32
95
+ epochs: int = 10
96
+ ```
97
+
98
+ ### Main Decorator
99
+
100
+ The `@hydraflow.main` decorator integrates Hydra and MLflow:
101
+
102
+ ```python
103
+ @hydraflow.main(Config)
104
+ def train(run: Run, cfg: Config) -> None:
105
+ # Your experiment code
106
+ ```
107
+
108
+ ### Workflow Automation
109
+
110
+ Define reusable experiment workflows in YAML:
111
+
112
+ ```yaml
113
+ jobs:
114
+ train_models:
115
+ run: python train.py
116
+ sets:
117
+ - each: model=small,medium,large
118
+ all: learning_rate=0.001,0.01,0.1
119
+ ```
120
+
121
+ ### Analysis Tools
122
+
123
+ Analyze experiment results with powerful APIs:
124
+
125
+ ```python
126
+ from hydraflow import Run, iter_run_dirs
127
+
128
+ # Load runs
129
+ runs = Run.load(iter_run_dirs("mlruns"))
130
+
131
+ # Filter and analyze
132
+ best_runs = runs.filter(model_type="transformer").to_frame("learning_rate", "accuracy")
133
+ ```
134
+
135
+ ## Documentation
136
+
137
+ For detailed documentation, visit our [documentation site](https://daizutabi.github.io/hydraflow/):
138
+
139
+ - [Getting Started](https://daizutabi.github.io/hydraflow/getting-started/) - Installation and core concepts
140
+ - [Practical Tutorials](https://daizutabi.github.io/hydraflow/practical-tutorials/) - Learn through hands-on examples
141
+ - [User Guide](https://daizutabi.github.io/hydraflow/part1-applications/) - Detailed documentation of HydraFlow's capabilities
142
+ - [API Reference](https://daizutabi.github.io/hydraflow/api/hydraflow/) - Complete API documentation
143
+
144
+ ## Contributing
145
+
146
+ We welcome contributions! Please see our [contributing guide](CONTRIBUTING.md) for details.
147
+
148
+ ## License
149
+
150
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
@@ -56,9 +56,22 @@ learning_rate = run.get("learning_rate")
56
56
  # Nested access with dot notation
57
57
  model_type = run.get("model.type")
58
58
 
59
+ # Alternatively, use double underscore notation for nested access
60
+ model_type = run.get("model__type") # Equivalent to "model.type"
61
+
59
62
  # Access implementation attributes or run info
60
63
  metric_value = run.get("accuracy") # From impl or cfg
61
64
  run_id = run.get("run_id") # From RunInfo
65
+
66
+ # Provide a default value if the key doesn't exist
67
+ batch_size = run.get("batch_size", 32)
68
+
69
+ # Use a callable as default to dynamically generate values based on the run
70
+ # This is useful for derived parameters or conditional defaults
71
+ lr = run.get("learning_rate", default=lambda r: r.get("base_lr", 0.01) / 10)
72
+
73
+ # Complex default logic based on other parameters
74
+ steps = run.get("steps", default=lambda r: r.get("epochs", 10) * r.get("steps_per_epoch", 100))
62
75
  ```
63
76
 
64
77
  The `get` method searches for values in the following order:
@@ -69,6 +82,20 @@ The `get` method searches for values in the following order:
69
82
 
70
83
  This provides a unified access interface regardless of where the data is stored.
71
84
 
85
+ The double underscore notation (`__`) is automatically converted to dot notation (`.`) internally,
86
+ making it useful for nested parameter access, especially when using keyword arguments in methods
87
+ that don't allow dots in parameter names.
88
+
89
+ When providing a default value, you can use either a static value or a callable function.
90
+ If you provide a callable, it will receive the Run instance as an argument, allowing you to
91
+ create context-dependent default values that can access other run parameters or properties.
92
+ This is particularly useful for:
93
+
94
+ - Creating derived parameters that don't exist in the original configuration
95
+ - Handling schema evolution across different experiment iterations
96
+ - Providing fallbacks that depend on other configuration values
97
+ - Implementing conditional logic for parameter defaults
98
+
72
99
  ## Type-Safe Configuration Access
73
100
 
74
101
  For better IDE integration and type checking, you can specify the configuration
@@ -86,6 +86,11 @@ specific_runs = runs.filter(
86
86
  # Use a tuple to specify the parameter name and value
87
87
  nested_filter = runs.filter(("model.hidden_size", 512))
88
88
 
89
+ # Filter with double underscore notation for nested parameters
90
+ # This is often more convenient with keyword arguments
91
+ nested_filter = runs.filter(model__hidden_size=512) # Equivalent to "model.hidden_size"
92
+ nested_filter = runs.filter(model__encoder__num_layers=6) # For deeply nested parameters
93
+
89
94
  # Filter with tuple for range values (inclusive)
90
95
  lr_range = runs.filter(learning_rate=(0.0001, 0.01))
91
96
 
@@ -99,6 +104,11 @@ def is_large_image(run: Run):
99
104
  good_runs = runs.filter(predicate=is_large_image)
100
105
  ```
101
106
 
107
+ The double underscore notation (`__`) is particularly useful for accessing nested
108
+ configuration parameters with keyword arguments, as it's automatically converted to
109
+ dot notation (`.`) internally. This allows you to write more natural and Pythonic
110
+ filtering expressions, especially for deeply nested configurations.
111
+
102
112
  ## Advanced Filtering
103
113
 
104
114
  The `filter` method supports more complex filtering patterns:
@@ -113,8 +123,23 @@ complex_filter = runs.filter(
113
123
 
114
124
  # Chained filtering
115
125
  final_runs = runs.filter(model_type="transformer").filter(learning_rate=0.001)
126
+
127
+ # Advanced filtering using predicate functions with callable defaults
128
+ # This example filters runs based on learning rate efficiency (lr * batch_size)
129
+ # Even if some runs are missing one parameter, the default logic provides values
130
+ def has_efficient_lr(run: Run) -> bool:
131
+ lr = run.get("learning_rate", default=lambda r: r.get("base_lr", 0.01) * r.get("lr_multiplier", 1.0))
132
+ batch_size = run.get("batch_size", default=lambda r: r.get("default_batch_size", 32))
133
+ return lr * batch_size < 0.5
134
+
135
+ # Apply the complex predicate
136
+ efficient_runs = runs.filter(predicate=has_efficient_lr)
116
137
  ```
117
138
 
139
+ The combination of predicate functions with callable defaults in `get` enables sophisticated
140
+ filtering logic that can handle missing parameters and varied configuration schemas across
141
+ different experiment runs.
142
+
118
143
  ## Sorting Runs
119
144
 
120
145
  The `sort` method allows you to sort runs based on specific criteria:
@@ -154,9 +179,47 @@ RunCollection provides several methods to extract specific data from runs:
154
179
  # Extract values for a specific key as a list
155
180
  learning_rates = runs.to_list("learning_rate")
156
181
 
182
+ # Extract values with a static default for missing values
183
+ batch_sizes = runs.to_list("batch_size", default=32)
184
+
185
+ # Extract values with a callable default that dynamically computes values
186
+ # This is particularly useful for handling missing parameters or derived values
187
+ accuracies = runs.to_list("accuracy", default=lambda run: run.get("val_accuracy", 0.0) * 0.9)
188
+
157
189
  # Extract values as a NumPy array
158
190
  batch_sizes = runs.to_numpy("batch_size")
159
191
 
192
+ # Extract with callable default for complex scenarios
193
+ learning_rates = runs.to_numpy(
194
+ "learning_rate",
195
+ default=lambda run: run.get("base_lr", 0.01) * run.get("lr_schedule_factor", 1.0)
196
+ )
197
+
198
+ # Extract values as a Polars Series
199
+ lr_series = runs.to_series("learning_rate")
200
+
201
+ # Extract with a custom name for the series
202
+ model_series = runs.to_series("model_type", name="Model Architecture")
203
+
204
+ # Extract with callable default and custom name
205
+ effective_lr = runs.to_series(
206
+ "learning_rate",
207
+ default=lambda run: run.get("base_lr", 0.01) * run.get("lr_multiplier", 1.0),
208
+ name="Effective Learning Rate"
209
+ )
210
+
211
+ # Use Series for further analysis and operations
212
+ import polars as pl
213
+ # Combine multiple series into a DataFrame
214
+ df = pl.DataFrame([
215
+ runs.to_series("model_type", name="Model"),
216
+ runs.to_series("batch_size", default=32, name="Batch Size"),
217
+ effective_lr
218
+ ])
219
+ # Perform operations between Series
220
+ normalized_acc = runs.to_series("accuracy", default=0.0, name="Accuracy")
221
+ efficiency = normalized_acc / effective_lr # Series division
222
+
160
223
  # Get unique values for a key
161
224
  model_types = runs.unique("model_type")
162
225
 
@@ -164,6 +227,18 @@ model_types = runs.unique("model_type")
164
227
  num_model_types = runs.n_unique("model_type")
165
228
  ```
166
229
 
230
+ All data extraction methods (`to_list`, `to_numpy`, `to_series`, etc.) support both static and callable default values,
231
+ matching the behavior of the `Run.get` method. When using a callable default, the function receives
232
+ the Run instance as an argument, allowing you to:
233
+
234
+ - Implement fallback logic for missing parameters
235
+ - Create derived values based on multiple parameters
236
+ - Handle varying configuration schemas across different experiments
237
+ - Apply transformations to the raw parameter values
238
+
239
+ This makes it much easier to work with heterogeneous collections of runs that might have different
240
+ parameter sets or evolving configuration schemas.
241
+
167
242
  ## Converting to DataFrame
168
243
 
169
244
  For advanced analysis, you can convert your runs to a Polars DataFrame:
@@ -175,16 +250,76 @@ df = runs.to_frame()
175
250
  # DataFrame with specific configuration parameters
176
251
  df = runs.to_frame("model_type", "learning_rate", "batch_size")
177
252
 
178
- # Using a custom function that returns multiple columns
253
+ # Specify default values for missing parameters using the defaults parameter
254
+ df = runs.to_frame(
255
+ "model_type",
256
+ "learning_rate",
257
+ "batch_size",
258
+ defaults={"learning_rate": 0.01, "batch_size": 32}
259
+ )
260
+
261
+ # Use callable defaults for dynamic values based on each run
262
+ df = runs.to_frame(
263
+ "model_type",
264
+ "learning_rate",
265
+ "epochs",
266
+ defaults={
267
+ "learning_rate": lambda run: run.get("base_lr", 0.01) * run.get("lr_multiplier", 1.0),
268
+ "epochs": lambda run: int(run.get("max_steps", 1000) / run.get("steps_per_epoch", 100))
269
+ }
270
+ )
271
+
272
+ # Missing values without defaults are represented as None (null) in the DataFrame
273
+ # This allows for standard handling of missing data in Polars
274
+ missing_values_df = runs.to_frame("model_type", "parameter_that_might_be_missing")
275
+
276
+ # Filter rows with non-null values
277
+ import polars as pl
278
+ valid_rows = missing_values_df.filter(pl.col("parameter_that_might_be_missing").is_not_null())
279
+
280
+ # Fill null values after creating the DataFrame
281
+ filled_df = missing_values_df.with_columns(
282
+ pl.col("parameter_that_might_be_missing").fill_null("default_value")
283
+ )
284
+
285
+ # Using a custom function that returns multiple columns as keyword arguments
179
286
  def get_metrics(run: Run) -> dict[str, float]:
180
287
  return {
181
- "accuracy": run.impl.accuracy(),
182
- "precision": run.impl.precision(),
288
+ "accuracy": run.get("accuracy", default=lambda r: r.get("val_accuracy", 0.0) * 0.9),
289
+ "precision": run.get("precision", default=lambda r: r.get("val_precision", 0.0) * 0.9),
183
290
  }
184
291
 
292
+ # Add custom columns using a function
185
293
  df = runs.to_frame("model_type", metrics=get_metrics)
294
+
295
+ # Combine defaults with custom column generator functions
296
+ df = runs.to_frame(
297
+ "model_type",
298
+ "learning_rate",
299
+ defaults={"learning_rate": 0.01},
300
+ metrics=get_metrics
301
+ )
186
302
  ```
187
303
 
304
+ The `to_frame` method provides several ways to handle missing data:
305
+
306
+ 1. **defaults parameter**: Provide static or callable default values for specific keys
307
+ - Static values: `defaults={"param": value}`
308
+ - Callable values: `defaults={"param": lambda run: computed_value}`
309
+
310
+ 2. **None values**: Parameters without defaults are represented as `None` (null) in the DataFrame
311
+ - This lets you use Polars operations for handling null values:
312
+ - Filter: `df.filter(pl.col("param").is_not_null())`
313
+ - Fill nulls: `df.with_columns(pl.col("param").fill_null(value))`
314
+ - Aggregations: Most aggregation functions handle nulls appropriately
315
+
316
+ 3. **Custom column generators**: Use keyword argument functions to compute complex columns
317
+ - These functions receive each Run instance and can implement custom logic
318
+ - They can use `run.get()` with defaults to handle missing parameters
319
+
320
+ These approaches can be combined to create flexible and robust data extraction pipelines
321
+ that handle different experiment configurations and parameter evolution over time.
322
+
188
323
  ## Grouping Runs
189
324
 
190
325
  The `group_by` method allows you to organize runs based on parameter values:
@@ -193,6 +328,9 @@ The `group_by` method allows you to organize runs based on parameter values:
193
328
  # Group by a single parameter
194
329
  model_groups = runs.group_by("model_type")
195
330
 
331
+ # Group by nested parameter using dot notation
332
+ architecture_groups = runs.group_by("model.architecture")
333
+
196
334
  # Iterate through groups
197
335
  for model_type, group in model_groups.items():
198
336
  print(f"Model type: {model_type}, Runs: {len(group)}")
@@ -200,6 +338,9 @@ for model_type, group in model_groups.items():
200
338
  # Group by multiple parameters
201
339
  param_groups = runs.group_by("model_type", "learning_rate")
202
340
 
341
+ # Mix of regular and nested parameters using double underscore notation
342
+ param_groups = runs.group_by("model_type", "model__hidden_size", "optimizer__learning_rate")
343
+
203
344
  # Access a specific group
204
345
  transformer_001_group = param_groups[("transformer", 0.001)]
205
346
  ```
@@ -218,14 +359,18 @@ This approach preserves all information in each group, giving you maximum flexib
218
359
  Combine `group_by` with aggregation for powerful analysis:
219
360
 
220
361
  ```python
221
- # Simple aggregation function using get method
362
+ # Simple aggregation function using get method with callable defaults
222
363
  def mean_accuracy(runs: RunCollection) -> float:
223
- return runs.to_numpy("accuracy").mean()
364
+ return runs.to_numpy(
365
+ "accuracy",
366
+ default=lambda run: run.get("val_accuracy", 0.0) * 0.9
367
+ ).mean()
224
368
 
225
- # Complex aggregation from implementation or configuration
369
+ # Complex aggregation from implementation or configuration with fallbacks
226
370
  def combined_metric(runs: RunCollection) -> float:
227
- accuracies = runs.to_numpy("accuracy") # Could be from impl or cfg
228
- precisions = runs.to_numpy("precision") # Could be from impl or cfg
371
+ # Use callable defaults to handle missing values consistently
372
+ accuracies = runs.to_numpy("accuracy", default=lambda r: r.get("val_accuracy", 0.0))
373
+ precisions = runs.to_numpy("precision", default=lambda r: r.get("val_precision", 0.0))
229
374
  return (accuracies.mean() + precisions.mean()) / 2
230
375
 
231
376
 
@@ -243,9 +388,20 @@ results = runs.group_by(
243
388
  accuracy=mean_accuracy,
244
389
  combined=combined_metric
245
390
  )
391
+
392
+ # Group by parameters that might be missing in some runs using callable defaults
393
+ def normalize_architecture(run: Run) -> str:
394
+ # Get architecture with a fallback to model type if not available
395
+ arch = run.get("architecture", default=lambda r: r.get("model_type", "unknown"))
396
+ return arch.lower() # Normalize to lowercase
397
+
398
+ # Group by the normalized architecture
399
+ arch_results = runs.group_by(normalize_architecture, accuracy=mean_accuracy)
246
400
  ```
247
401
 
248
- With the enhanced `get` method that can access both configuration and implementation attributes, writing aggregation functions becomes more straightforward. You no longer need to worry about whether a metric comes from configuration, implementation, or run information - the `get` method provides a unified access interface.
402
+ With the enhanced `get` method and callable defaults support throughout the API, writing aggregation
403
+ functions becomes more straightforward and robust. You can handle missing values consistently and
404
+ implement complex transformations that work across heterogeneous runs.
249
405
 
250
406
  When aggregation functions are provided as keyword arguments, `group_by` returns a Polars DataFrame with the group keys and aggregated values. This design choice offers several advantages:
251
407