hydraflow 0.1.2__tar.gz → 0.1.5__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- hydraflow-0.1.5/PKG-INFO +111 -0
- hydraflow-0.1.5/README.md +82 -0
- {hydraflow-0.1.2 → hydraflow-0.1.5}/pyproject.toml +2 -2
- {hydraflow-0.1.2 → hydraflow-0.1.5}/src/hydraflow/__init__.py +9 -1
- hydraflow-0.1.5/src/hydraflow/config.py +54 -0
- {hydraflow-0.1.2 → hydraflow-0.1.5}/src/hydraflow/context.py +65 -0
- hydraflow-0.1.5/src/hydraflow/mlflow.py +49 -0
- hydraflow-0.1.5/src/hydraflow/runs.py +512 -0
- hydraflow-0.1.5/src/hydraflow/util.py +24 -0
- {hydraflow-0.1.2 → hydraflow-0.1.5}/tests/test_config.py +1 -2
- hydraflow-0.1.2/PKG-INFO +0 -45
- hydraflow-0.1.2/README.md +0 -16
- hydraflow-0.1.2/src/hydraflow/config.py +0 -30
- hydraflow-0.1.2/src/hydraflow/mlflow.py +0 -20
- hydraflow-0.1.2/src/hydraflow/runs.py +0 -217
- hydraflow-0.1.2/src/hydraflow/util.py +0 -11
- {hydraflow-0.1.2 → hydraflow-0.1.5}/.devcontainer/devcontainer.json +0 -0
- {hydraflow-0.1.2 → hydraflow-0.1.5}/.devcontainer/postCreate.sh +0 -0
- {hydraflow-0.1.2 → hydraflow-0.1.5}/.devcontainer/starship.toml +0 -0
- {hydraflow-0.1.2 → hydraflow-0.1.5}/.gitattributes +0 -0
- {hydraflow-0.1.2 → hydraflow-0.1.5}/.gitignore +0 -0
- {hydraflow-0.1.2 → hydraflow-0.1.5}/LICENSE +0 -0
- {hydraflow-0.1.2 → hydraflow-0.1.5}/tests/scripts/__init__.py +0 -0
- {hydraflow-0.1.2 → hydraflow-0.1.5}/tests/scripts/log_run.py +0 -0
- {hydraflow-0.1.2 → hydraflow-0.1.5}/tests/scripts/watch.py +0 -0
- {hydraflow-0.1.2 → hydraflow-0.1.5}/tests/test_log_run.py +0 -0
- {hydraflow-0.1.2 → hydraflow-0.1.5}/tests/test_runs.py +0 -0
- {hydraflow-0.1.2 → hydraflow-0.1.5}/tests/test_version.py +0 -0
- {hydraflow-0.1.2 → hydraflow-0.1.5}/tests/test_watch.py +0 -0
hydraflow-0.1.5/PKG-INFO
ADDED
@@ -0,0 +1,111 @@
|
|
1
|
+
Metadata-Version: 2.3
|
2
|
+
Name: hydraflow
|
3
|
+
Version: 0.1.5
|
4
|
+
Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
|
5
|
+
Project-URL: Documentation, https://github.com/daizutabi/hydraflow
|
6
|
+
Project-URL: Source, https://github.com/daizutabi/hydraflow
|
7
|
+
Project-URL: Issues, https://github.com/daizutabi/hydraflow/issues
|
8
|
+
Author-email: daizutabi <daizutabi@gmail.com>
|
9
|
+
License-Expression: MIT
|
10
|
+
License-File: LICENSE
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
12
|
+
Classifier: Programming Language :: Python
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
16
|
+
Classifier: Topic :: Documentation
|
17
|
+
Classifier: Topic :: Software Development :: Documentation
|
18
|
+
Requires-Python: >=3.10
|
19
|
+
Requires-Dist: hydra-core>1.3
|
20
|
+
Requires-Dist: mlflow>2.15
|
21
|
+
Requires-Dist: setuptools
|
22
|
+
Requires-Dist: watchdog
|
23
|
+
Provides-Extra: dev
|
24
|
+
Requires-Dist: pytest-clarity; extra == 'dev'
|
25
|
+
Requires-Dist: pytest-cov; extra == 'dev'
|
26
|
+
Requires-Dist: pytest-randomly; extra == 'dev'
|
27
|
+
Requires-Dist: pytest-xdist; extra == 'dev'
|
28
|
+
Description-Content-Type: text/markdown
|
29
|
+
|
30
|
+
# Hydraflow
|
31
|
+
|
32
|
+
[![PyPI Version][pypi-v-image]][pypi-v-link]
|
33
|
+
[![Python Version][python-v-image]][python-v-link]
|
34
|
+
[![Build Status][GHAction-image]][GHAction-link]
|
35
|
+
[![Coverage Status][codecov-image]][codecov-link]
|
36
|
+
|
37
|
+
<!-- Badges -->
|
38
|
+
[pypi-v-image]: https://img.shields.io/pypi/v/hydraflow.svg
|
39
|
+
[pypi-v-link]: https://pypi.org/project/hydraflow/
|
40
|
+
[python-v-image]: https://img.shields.io/pypi/pyversions/hydraflow.svg
|
41
|
+
[python-v-link]: https://pypi.org/project/hydraflow
|
42
|
+
[GHAction-image]: https://github.com/daizutabi/hydraflow/actions/workflows/ci.yml/badge.svg?branch=main&event=push
|
43
|
+
[GHAction-link]: https://github.com/daizutabi/hydraflow/actions?query=event%3Apush+branch%3Amain
|
44
|
+
[codecov-image]: https://codecov.io/github/daizutabi/hydraflow/coverage.svg?branch=main
|
45
|
+
[codecov-link]: https://codecov.io/github/daizutabi/hydraflow?branch=main
|
46
|
+
|
47
|
+
## Overview
|
48
|
+
|
49
|
+
Hydraflow is a powerful library designed to seamlessly integrate [Hydra](https://hydra.cc/) and [MLflow](https://mlflow.org/), making it easier to manage and track machine learning experiments. By combining the flexibility of Hydra's configuration management with the robust experiment tracking capabilities of MLflow, Hydraflow provides a comprehensive solution for managing complex machine learning workflows.
|
50
|
+
|
51
|
+
## Key Features
|
52
|
+
|
53
|
+
- **Configuration Management**: Utilize Hydra's advanced configuration management to handle complex parameter sweeps and experiment setups.
|
54
|
+
- **Experiment Tracking**: Leverage MLflow's tracking capabilities to log parameters, metrics, and artifacts for each run.
|
55
|
+
- **Artifact Management**: Automatically log and manage artifacts, such as model checkpoints and configuration files, with MLflow.
|
56
|
+
- **Seamless Integration**: Easily integrate Hydra and MLflow in your machine learning projects with minimal setup.
|
57
|
+
|
58
|
+
## Installation
|
59
|
+
|
60
|
+
You can install Hydraflow via pip:
|
61
|
+
|
62
|
+
```bash
|
63
|
+
pip install hydraflow
|
64
|
+
```
|
65
|
+
|
66
|
+
## Getting Started
|
67
|
+
|
68
|
+
Here is a simple example to get you started with Hydraflow:
|
69
|
+
|
70
|
+
```python
|
71
|
+
import hydra
|
72
|
+
import hydraflow
|
73
|
+
import mlflow
|
74
|
+
from dataclasses import dataclass
|
75
|
+
from hydra.core.config_store import ConfigStore
|
76
|
+
from pathlib import Path
|
77
|
+
|
78
|
+
@dataclass
|
79
|
+
class MySQLConfig:
|
80
|
+
host: str = "localhost"
|
81
|
+
port: int = 3306
|
82
|
+
|
83
|
+
cs = ConfigStore.instance()
|
84
|
+
cs.store(name="config", node=MySQLConfig)
|
85
|
+
|
86
|
+
@hydra.main(version_base=None, config_name="config")
|
87
|
+
def my_app(cfg: MySQLConfig) -> None:
|
88
|
+
# Set experiment by Hydra job name.
|
89
|
+
hydraflow.set_experiment()
|
90
|
+
|
91
|
+
# Automatically log params using Hydra config.
|
92
|
+
with mlflow.start_run(), hydraflow.log_run(cfg) as info:
|
93
|
+
# Your app code below.
|
94
|
+
|
95
|
+
# `info.output_dir` is the Hydra output directory.
|
96
|
+
# `info.artifact_dir` is the MLflow artifact directory.
|
97
|
+
|
98
|
+
with hydraflow.watch(callback):
|
99
|
+
# Watch files in the MLflow artifact directory.
|
100
|
+
# You can update metrics or log other artifacts
|
101
|
+
# according to the watched files in your callback
|
102
|
+
# function.
|
103
|
+
pass
|
104
|
+
|
105
|
+
# Your callback function here.
|
106
|
+
def callback(file: Path) -> None:
|
107
|
+
pass
|
108
|
+
|
109
|
+
if __name__ == "__main__":
|
110
|
+
my_app()
|
111
|
+
```
|
@@ -0,0 +1,82 @@
|
|
1
|
+
# Hydraflow
|
2
|
+
|
3
|
+
[![PyPI Version][pypi-v-image]][pypi-v-link]
|
4
|
+
[![Python Version][python-v-image]][python-v-link]
|
5
|
+
[![Build Status][GHAction-image]][GHAction-link]
|
6
|
+
[![Coverage Status][codecov-image]][codecov-link]
|
7
|
+
|
8
|
+
<!-- Badges -->
|
9
|
+
[pypi-v-image]: https://img.shields.io/pypi/v/hydraflow.svg
|
10
|
+
[pypi-v-link]: https://pypi.org/project/hydraflow/
|
11
|
+
[python-v-image]: https://img.shields.io/pypi/pyversions/hydraflow.svg
|
12
|
+
[python-v-link]: https://pypi.org/project/hydraflow
|
13
|
+
[GHAction-image]: https://github.com/daizutabi/hydraflow/actions/workflows/ci.yml/badge.svg?branch=main&event=push
|
14
|
+
[GHAction-link]: https://github.com/daizutabi/hydraflow/actions?query=event%3Apush+branch%3Amain
|
15
|
+
[codecov-image]: https://codecov.io/github/daizutabi/hydraflow/coverage.svg?branch=main
|
16
|
+
[codecov-link]: https://codecov.io/github/daizutabi/hydraflow?branch=main
|
17
|
+
|
18
|
+
## Overview
|
19
|
+
|
20
|
+
Hydraflow is a powerful library designed to seamlessly integrate [Hydra](https://hydra.cc/) and [MLflow](https://mlflow.org/), making it easier to manage and track machine learning experiments. By combining the flexibility of Hydra's configuration management with the robust experiment tracking capabilities of MLflow, Hydraflow provides a comprehensive solution for managing complex machine learning workflows.
|
21
|
+
|
22
|
+
## Key Features
|
23
|
+
|
24
|
+
- **Configuration Management**: Utilize Hydra's advanced configuration management to handle complex parameter sweeps and experiment setups.
|
25
|
+
- **Experiment Tracking**: Leverage MLflow's tracking capabilities to log parameters, metrics, and artifacts for each run.
|
26
|
+
- **Artifact Management**: Automatically log and manage artifacts, such as model checkpoints and configuration files, with MLflow.
|
27
|
+
- **Seamless Integration**: Easily integrate Hydra and MLflow in your machine learning projects with minimal setup.
|
28
|
+
|
29
|
+
## Installation
|
30
|
+
|
31
|
+
You can install Hydraflow via pip:
|
32
|
+
|
33
|
+
```bash
|
34
|
+
pip install hydraflow
|
35
|
+
```
|
36
|
+
|
37
|
+
## Getting Started
|
38
|
+
|
39
|
+
Here is a simple example to get you started with Hydraflow:
|
40
|
+
|
41
|
+
```python
|
42
|
+
import hydra
|
43
|
+
import hydraflow
|
44
|
+
import mlflow
|
45
|
+
from dataclasses import dataclass
|
46
|
+
from hydra.core.config_store import ConfigStore
|
47
|
+
from pathlib import Path
|
48
|
+
|
49
|
+
@dataclass
|
50
|
+
class MySQLConfig:
|
51
|
+
host: str = "localhost"
|
52
|
+
port: int = 3306
|
53
|
+
|
54
|
+
cs = ConfigStore.instance()
|
55
|
+
cs.store(name="config", node=MySQLConfig)
|
56
|
+
|
57
|
+
@hydra.main(version_base=None, config_name="config")
|
58
|
+
def my_app(cfg: MySQLConfig) -> None:
|
59
|
+
# Set experiment by Hydra job name.
|
60
|
+
hydraflow.set_experiment()
|
61
|
+
|
62
|
+
# Automatically log params using Hydra config.
|
63
|
+
with mlflow.start_run(), hydraflow.log_run(cfg) as info:
|
64
|
+
# Your app code below.
|
65
|
+
|
66
|
+
# `info.output_dir` is the Hydra output directory.
|
67
|
+
# `info.artifact_dir` is the MLflow artifact directory.
|
68
|
+
|
69
|
+
with hydraflow.watch(callback):
|
70
|
+
# Watch files in the MLflow artifact directory.
|
71
|
+
# You can update metrics or log other artifacts
|
72
|
+
# according to the watched files in your callback
|
73
|
+
# function.
|
74
|
+
pass
|
75
|
+
|
76
|
+
# Your callback function here.
|
77
|
+
def callback(file: Path) -> None:
|
78
|
+
pass
|
79
|
+
|
80
|
+
if __name__ == "__main__":
|
81
|
+
my_app()
|
82
|
+
```
|
@@ -4,8 +4,8 @@ build-backend = "hatchling.build"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "hydraflow"
|
7
|
-
version = "0.1.
|
8
|
-
description = "Hydra
|
7
|
+
version = "0.1.5"
|
8
|
+
description = "Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments."
|
9
9
|
readme = "README.md"
|
10
10
|
license = "MIT"
|
11
11
|
authors = [{ name = "daizutabi", email = "daizutabi@gmail.com" }]
|
@@ -1,6 +1,9 @@
|
|
1
1
|
from .context import Info, chdir_artifact, log_run, watch
|
2
2
|
from .mlflow import set_experiment
|
3
3
|
from .runs import (
|
4
|
+
Run,
|
5
|
+
Runs,
|
6
|
+
drop_unique_params,
|
4
7
|
filter_runs,
|
5
8
|
get_artifact_dir,
|
6
9
|
get_artifact_path,
|
@@ -9,19 +12,24 @@ from .runs import (
|
|
9
12
|
get_param_names,
|
10
13
|
get_run,
|
11
14
|
get_run_id,
|
15
|
+
load_config,
|
12
16
|
)
|
13
17
|
|
14
18
|
__all__ = [
|
15
19
|
"Info",
|
20
|
+
"Run",
|
21
|
+
"Runs",
|
16
22
|
"chdir_artifact",
|
23
|
+
"drop_unique_params",
|
17
24
|
"filter_runs",
|
18
25
|
"get_artifact_dir",
|
19
26
|
"get_artifact_path",
|
20
27
|
"get_artifact_uri",
|
21
|
-
"get_run",
|
22
28
|
"get_param_dict",
|
23
29
|
"get_param_names",
|
30
|
+
"get_run",
|
24
31
|
"get_run_id",
|
32
|
+
"load_config",
|
25
33
|
"log_run",
|
26
34
|
"set_experiment",
|
27
35
|
"watch",
|
@@ -0,0 +1,54 @@
|
|
1
|
+
"""
|
2
|
+
This module provides functionality for working with configuration
|
3
|
+
objects using the OmegaConf library.
|
4
|
+
"""
|
5
|
+
|
6
|
+
from __future__ import annotations
|
7
|
+
|
8
|
+
from typing import TYPE_CHECKING
|
9
|
+
|
10
|
+
from omegaconf import DictConfig, ListConfig, OmegaConf
|
11
|
+
|
12
|
+
if TYPE_CHECKING:
|
13
|
+
from collections.abc import Iterator
|
14
|
+
from typing import Any
|
15
|
+
|
16
|
+
|
17
|
+
def iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
|
18
|
+
"""
|
19
|
+
Iterate over the parameters in the given configuration object.
|
20
|
+
|
21
|
+
This function recursively traverses the configuration object and yields
|
22
|
+
key-value pairs representing the parameters.
|
23
|
+
|
24
|
+
Args:
|
25
|
+
config (object): The configuration object to iterate over.
|
26
|
+
prefix (str, optional): The prefix to prepend to the parameter keys.
|
27
|
+
Defaults to "".
|
28
|
+
|
29
|
+
Yields:
|
30
|
+
Key-value pairs representing the parameters.
|
31
|
+
"""
|
32
|
+
if not isinstance(config, (DictConfig, ListConfig)):
|
33
|
+
config = OmegaConf.create(config) # type: ignore
|
34
|
+
|
35
|
+
if isinstance(config, DictConfig):
|
36
|
+
for key, value in config.items():
|
37
|
+
if isinstance(value, ListConfig) and not any(
|
38
|
+
isinstance(v, (DictConfig, ListConfig)) for v in value
|
39
|
+
):
|
40
|
+
yield f"{prefix}{key}", value
|
41
|
+
|
42
|
+
elif isinstance(value, (DictConfig, ListConfig)):
|
43
|
+
yield from iter_params(value, f"{prefix}{key}.")
|
44
|
+
|
45
|
+
else:
|
46
|
+
yield f"{prefix}{key}", value
|
47
|
+
|
48
|
+
elif isinstance(config, ListConfig):
|
49
|
+
for index, value in enumerate(config):
|
50
|
+
if isinstance(value, (DictConfig, ListConfig)):
|
51
|
+
yield from iter_params(value, f"{prefix}{index}.")
|
52
|
+
|
53
|
+
else:
|
54
|
+
yield f"{prefix}{index}", value
|
@@ -1,3 +1,8 @@
|
|
1
|
+
"""
|
2
|
+
This module provides context managers to log parameters and manage the MLflow
|
3
|
+
run context.
|
4
|
+
"""
|
5
|
+
|
1
6
|
from __future__ import annotations
|
2
7
|
|
3
8
|
import os
|
@@ -35,6 +40,28 @@ def log_run(
|
|
35
40
|
*,
|
36
41
|
synchronous: bool | None = None,
|
37
42
|
) -> Iterator[Info]:
|
43
|
+
"""
|
44
|
+
Log the parameters from the given configuration object and manage the MLflow
|
45
|
+
run context.
|
46
|
+
|
47
|
+
This context manager logs the parameters from the provided configuration object
|
48
|
+
using MLflow. It also manages the MLflow run context, ensuring that artifacts
|
49
|
+
are logged and the run is properly closed.
|
50
|
+
|
51
|
+
Args:
|
52
|
+
config: The configuration object to log the parameters from.
|
53
|
+
synchronous: Whether to log the parameters synchronously.
|
54
|
+
Defaults to None.
|
55
|
+
|
56
|
+
Yields:
|
57
|
+
Info: An `Info` object containing the output directory and artifact directory
|
58
|
+
paths.
|
59
|
+
|
60
|
+
Example:
|
61
|
+
with log_run(config) as info:
|
62
|
+
# Perform operations within the MLflow run context
|
63
|
+
pass
|
64
|
+
"""
|
38
65
|
log_params(config, synchronous=synchronous)
|
39
66
|
|
40
67
|
hc = HydraConfig.get()
|
@@ -61,6 +88,32 @@ def log_run(
|
|
61
88
|
|
62
89
|
@contextmanager
|
63
90
|
def watch(func: Callable[[Path], None], dir: Path | str = "", timeout: int = 60) -> Iterator[None]:
|
91
|
+
"""
|
92
|
+
Watch the given directory for changes and call the provided function
|
93
|
+
when a change is detected.
|
94
|
+
|
95
|
+
This context manager sets up a file system watcher on the specified directory.
|
96
|
+
When a file modification is detected, the provided function is called with
|
97
|
+
the path of the modified file. The watcher runs for the specified timeout
|
98
|
+
period or until the context is exited.
|
99
|
+
|
100
|
+
Args:
|
101
|
+
func (Callable[[Path], None]): The function to call when a change is
|
102
|
+
detected. It should accept a single argument of type `Path`,
|
103
|
+
which is the path of the modified file.
|
104
|
+
dir (Path | str, optional): The directory to watch. If not specified,
|
105
|
+
the current MLflow artifact URI is used. Defaults to "".
|
106
|
+
timeout (int, optional): The timeout period in seconds for the watcher
|
107
|
+
to run after the context is exited. Defaults to 60.
|
108
|
+
|
109
|
+
Yields:
|
110
|
+
None: This context manager does not return any value.
|
111
|
+
|
112
|
+
Example:
|
113
|
+
with watch(log_artifact, "/path/to/dir"):
|
114
|
+
# Perform operations while watching the directory for changes
|
115
|
+
pass
|
116
|
+
"""
|
64
117
|
if not dir:
|
65
118
|
uri = mlflow.get_artifact_uri()
|
66
119
|
dir = uri_to_path(uri)
|
@@ -100,6 +153,18 @@ def chdir_artifact(
|
|
100
153
|
run: Run | Series | str,
|
101
154
|
artifact_path: str | None = None,
|
102
155
|
) -> Iterator[Path]:
|
156
|
+
"""
|
157
|
+
Change the current working directory to the artifact directory of the
|
158
|
+
given run.
|
159
|
+
|
160
|
+
This context manager changes the current working directory to the artifact
|
161
|
+
directory of the given run. It ensures that the directory is changed back
|
162
|
+
to the original directory after the context is exited.
|
163
|
+
|
164
|
+
Args:
|
165
|
+
run: The run to get the artifact directory from.
|
166
|
+
artifact_path: The artifact path.
|
167
|
+
"""
|
103
168
|
curdir = Path.cwd()
|
104
169
|
|
105
170
|
artifact_dir = get_artifact_path(run, artifact_path)
|
@@ -0,0 +1,49 @@
|
|
1
|
+
"""
|
2
|
+
This module provides functionality to log parameters from Hydra
|
3
|
+
configuration objects and set up experiments using MLflow.
|
4
|
+
"""
|
5
|
+
|
6
|
+
from __future__ import annotations
|
7
|
+
|
8
|
+
import mlflow
|
9
|
+
from hydra.core.hydra_config import HydraConfig
|
10
|
+
|
11
|
+
from hydraflow.config import iter_params
|
12
|
+
|
13
|
+
|
14
|
+
def set_experiment(prefix: str = "", suffix: str = "", uri: str | None = None) -> None:
|
15
|
+
"""
|
16
|
+
Set the experiment name and tracking URI optionally.
|
17
|
+
|
18
|
+
This function sets the experiment name by combining the given prefix,
|
19
|
+
the job name from HydraConfig, and the given suffix. Optionally, it can
|
20
|
+
also set the tracking URI.
|
21
|
+
|
22
|
+
Args:
|
23
|
+
prefix: The prefix to prepend to the experiment name.
|
24
|
+
suffix: The suffix to append to the experiment name.
|
25
|
+
uri: The tracking URI to use.
|
26
|
+
"""
|
27
|
+
if uri:
|
28
|
+
mlflow.set_tracking_uri(uri)
|
29
|
+
|
30
|
+
hc = HydraConfig.get()
|
31
|
+
name = f"{prefix}{hc.job.name}{suffix}"
|
32
|
+
mlflow.set_experiment(name)
|
33
|
+
|
34
|
+
|
35
|
+
def log_params(config: object, *, synchronous: bool | None = None) -> None:
|
36
|
+
"""
|
37
|
+
Log the parameters from the given configuration object.
|
38
|
+
|
39
|
+
This method logs the parameters from the provided configuration object
|
40
|
+
using MLflow. It iterates over the parameters and logs them using the
|
41
|
+
`mlflow.log_param` method.
|
42
|
+
|
43
|
+
Args:
|
44
|
+
config: The configuration object to log the parameters from.
|
45
|
+
synchronous: Whether to log the parameters synchronously.
|
46
|
+
Defaults to None.
|
47
|
+
"""
|
48
|
+
for key, value in iter_params(config):
|
49
|
+
mlflow.log_param(key, value, synchronous=synchronous)
|