hydraflow 0.1.4__tar.gz → 0.1.5__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- hydraflow-0.1.5/PKG-INFO +111 -0
- hydraflow-0.1.5/README.md +82 -0
- {hydraflow-0.1.4 → hydraflow-0.1.5}/pyproject.toml +2 -2
- hydraflow-0.1.5/src/hydraflow/config.py +54 -0
- {hydraflow-0.1.4 → hydraflow-0.1.5}/src/hydraflow/context.py +65 -0
- hydraflow-0.1.5/src/hydraflow/mlflow.py +49 -0
- hydraflow-0.1.5/src/hydraflow/runs.py +512 -0
- hydraflow-0.1.5/src/hydraflow/util.py +24 -0
- {hydraflow-0.1.4 → hydraflow-0.1.5}/tests/test_config.py +1 -2
- hydraflow-0.1.4/PKG-INFO +0 -45
- hydraflow-0.1.4/README.md +0 -16
- hydraflow-0.1.4/src/hydraflow/config.py +0 -30
- hydraflow-0.1.4/src/hydraflow/mlflow.py +0 -20
- hydraflow-0.1.4/src/hydraflow/runs.py +0 -217
- hydraflow-0.1.4/src/hydraflow/util.py +0 -11
- {hydraflow-0.1.4 → hydraflow-0.1.5}/.devcontainer/devcontainer.json +0 -0
- {hydraflow-0.1.4 → hydraflow-0.1.5}/.devcontainer/postCreate.sh +0 -0
- {hydraflow-0.1.4 → hydraflow-0.1.5}/.devcontainer/starship.toml +0 -0
- {hydraflow-0.1.4 → hydraflow-0.1.5}/.gitattributes +0 -0
- {hydraflow-0.1.4 → hydraflow-0.1.5}/.gitignore +0 -0
- {hydraflow-0.1.4 → hydraflow-0.1.5}/LICENSE +0 -0
- {hydraflow-0.1.4 → hydraflow-0.1.5}/src/hydraflow/__init__.py +0 -0
- {hydraflow-0.1.4 → hydraflow-0.1.5}/tests/scripts/__init__.py +0 -0
- {hydraflow-0.1.4 → hydraflow-0.1.5}/tests/scripts/log_run.py +0 -0
- {hydraflow-0.1.4 → hydraflow-0.1.5}/tests/scripts/watch.py +0 -0
- {hydraflow-0.1.4 → hydraflow-0.1.5}/tests/test_log_run.py +0 -0
- {hydraflow-0.1.4 → hydraflow-0.1.5}/tests/test_runs.py +0 -0
- {hydraflow-0.1.4 → hydraflow-0.1.5}/tests/test_version.py +0 -0
- {hydraflow-0.1.4 → hydraflow-0.1.5}/tests/test_watch.py +0 -0
hydraflow-0.1.5/PKG-INFO
ADDED
@@ -0,0 +1,111 @@
|
|
1
|
+
Metadata-Version: 2.3
|
2
|
+
Name: hydraflow
|
3
|
+
Version: 0.1.5
|
4
|
+
Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
|
5
|
+
Project-URL: Documentation, https://github.com/daizutabi/hydraflow
|
6
|
+
Project-URL: Source, https://github.com/daizutabi/hydraflow
|
7
|
+
Project-URL: Issues, https://github.com/daizutabi/hydraflow/issues
|
8
|
+
Author-email: daizutabi <daizutabi@gmail.com>
|
9
|
+
License-Expression: MIT
|
10
|
+
License-File: LICENSE
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
12
|
+
Classifier: Programming Language :: Python
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
16
|
+
Classifier: Topic :: Documentation
|
17
|
+
Classifier: Topic :: Software Development :: Documentation
|
18
|
+
Requires-Python: >=3.10
|
19
|
+
Requires-Dist: hydra-core>1.3
|
20
|
+
Requires-Dist: mlflow>2.15
|
21
|
+
Requires-Dist: setuptools
|
22
|
+
Requires-Dist: watchdog
|
23
|
+
Provides-Extra: dev
|
24
|
+
Requires-Dist: pytest-clarity; extra == 'dev'
|
25
|
+
Requires-Dist: pytest-cov; extra == 'dev'
|
26
|
+
Requires-Dist: pytest-randomly; extra == 'dev'
|
27
|
+
Requires-Dist: pytest-xdist; extra == 'dev'
|
28
|
+
Description-Content-Type: text/markdown
|
29
|
+
|
30
|
+
# Hydraflow
|
31
|
+
|
32
|
+
[![PyPI Version][pypi-v-image]][pypi-v-link]
|
33
|
+
[![Python Version][python-v-image]][python-v-link]
|
34
|
+
[![Build Status][GHAction-image]][GHAction-link]
|
35
|
+
[![Coverage Status][codecov-image]][codecov-link]
|
36
|
+
|
37
|
+
<!-- Badges -->
|
38
|
+
[pypi-v-image]: https://img.shields.io/pypi/v/hydraflow.svg
|
39
|
+
[pypi-v-link]: https://pypi.org/project/hydraflow/
|
40
|
+
[python-v-image]: https://img.shields.io/pypi/pyversions/hydraflow.svg
|
41
|
+
[python-v-link]: https://pypi.org/project/hydraflow
|
42
|
+
[GHAction-image]: https://github.com/daizutabi/hydraflow/actions/workflows/ci.yml/badge.svg?branch=main&event=push
|
43
|
+
[GHAction-link]: https://github.com/daizutabi/hydraflow/actions?query=event%3Apush+branch%3Amain
|
44
|
+
[codecov-image]: https://codecov.io/github/daizutabi/hydraflow/coverage.svg?branch=main
|
45
|
+
[codecov-link]: https://codecov.io/github/daizutabi/hydraflow?branch=main
|
46
|
+
|
47
|
+
## Overview
|
48
|
+
|
49
|
+
Hydraflow is a powerful library designed to seamlessly integrate [Hydra](https://hydra.cc/) and [MLflow](https://mlflow.org/), making it easier to manage and track machine learning experiments. By combining the flexibility of Hydra's configuration management with the robust experiment tracking capabilities of MLflow, Hydraflow provides a comprehensive solution for managing complex machine learning workflows.
|
50
|
+
|
51
|
+
## Key Features
|
52
|
+
|
53
|
+
- **Configuration Management**: Utilize Hydra's advanced configuration management to handle complex parameter sweeps and experiment setups.
|
54
|
+
- **Experiment Tracking**: Leverage MLflow's tracking capabilities to log parameters, metrics, and artifacts for each run.
|
55
|
+
- **Artifact Management**: Automatically log and manage artifacts, such as model checkpoints and configuration files, with MLflow.
|
56
|
+
- **Seamless Integration**: Easily integrate Hydra and MLflow in your machine learning projects with minimal setup.
|
57
|
+
|
58
|
+
## Installation
|
59
|
+
|
60
|
+
You can install Hydraflow via pip:
|
61
|
+
|
62
|
+
```bash
|
63
|
+
pip install hydraflow
|
64
|
+
```
|
65
|
+
|
66
|
+
## Getting Started
|
67
|
+
|
68
|
+
Here is a simple example to get you started with Hydraflow:
|
69
|
+
|
70
|
+
```python
|
71
|
+
import hydra
|
72
|
+
import hydraflow
|
73
|
+
import mlflow
|
74
|
+
from dataclasses import dataclass
|
75
|
+
from hydra.core.config_store import ConfigStore
|
76
|
+
from pathlib import Path
|
77
|
+
|
78
|
+
@dataclass
|
79
|
+
class MySQLConfig:
|
80
|
+
host: str = "localhost"
|
81
|
+
port: int = 3306
|
82
|
+
|
83
|
+
cs = ConfigStore.instance()
|
84
|
+
cs.store(name="config", node=MySQLConfig)
|
85
|
+
|
86
|
+
@hydra.main(version_base=None, config_name="config")
|
87
|
+
def my_app(cfg: MySQLConfig) -> None:
|
88
|
+
# Set experiment by Hydra job name.
|
89
|
+
hydraflow.set_experiment()
|
90
|
+
|
91
|
+
# Automatically log params using Hydra config.
|
92
|
+
with mlflow.start_run(), hydraflow.log_run(cfg) as info:
|
93
|
+
# Your app code below.
|
94
|
+
|
95
|
+
# `info.output_dir` is the Hydra output directory.
|
96
|
+
# `info.artifact_dir` is the MLflow artifact directory.
|
97
|
+
|
98
|
+
with hydraflow.watch(callback):
|
99
|
+
# Watch files in the MLflow artifact directory.
|
100
|
+
# You can update metrics or log other artifacts
|
101
|
+
# according to the watched files in your callback
|
102
|
+
# function.
|
103
|
+
pass
|
104
|
+
|
105
|
+
# Your callback function here.
|
106
|
+
def callback(file: Path) -> None:
|
107
|
+
pass
|
108
|
+
|
109
|
+
if __name__ == "__main__":
|
110
|
+
my_app()
|
111
|
+
```
|
@@ -0,0 +1,82 @@
|
|
1
|
+
# Hydraflow
|
2
|
+
|
3
|
+
[![PyPI Version][pypi-v-image]][pypi-v-link]
|
4
|
+
[![Python Version][python-v-image]][python-v-link]
|
5
|
+
[![Build Status][GHAction-image]][GHAction-link]
|
6
|
+
[![Coverage Status][codecov-image]][codecov-link]
|
7
|
+
|
8
|
+
<!-- Badges -->
|
9
|
+
[pypi-v-image]: https://img.shields.io/pypi/v/hydraflow.svg
|
10
|
+
[pypi-v-link]: https://pypi.org/project/hydraflow/
|
11
|
+
[python-v-image]: https://img.shields.io/pypi/pyversions/hydraflow.svg
|
12
|
+
[python-v-link]: https://pypi.org/project/hydraflow
|
13
|
+
[GHAction-image]: https://github.com/daizutabi/hydraflow/actions/workflows/ci.yml/badge.svg?branch=main&event=push
|
14
|
+
[GHAction-link]: https://github.com/daizutabi/hydraflow/actions?query=event%3Apush+branch%3Amain
|
15
|
+
[codecov-image]: https://codecov.io/github/daizutabi/hydraflow/coverage.svg?branch=main
|
16
|
+
[codecov-link]: https://codecov.io/github/daizutabi/hydraflow?branch=main
|
17
|
+
|
18
|
+
## Overview
|
19
|
+
|
20
|
+
Hydraflow is a powerful library designed to seamlessly integrate [Hydra](https://hydra.cc/) and [MLflow](https://mlflow.org/), making it easier to manage and track machine learning experiments. By combining the flexibility of Hydra's configuration management with the robust experiment tracking capabilities of MLflow, Hydraflow provides a comprehensive solution for managing complex machine learning workflows.
|
21
|
+
|
22
|
+
## Key Features
|
23
|
+
|
24
|
+
- **Configuration Management**: Utilize Hydra's advanced configuration management to handle complex parameter sweeps and experiment setups.
|
25
|
+
- **Experiment Tracking**: Leverage MLflow's tracking capabilities to log parameters, metrics, and artifacts for each run.
|
26
|
+
- **Artifact Management**: Automatically log and manage artifacts, such as model checkpoints and configuration files, with MLflow.
|
27
|
+
- **Seamless Integration**: Easily integrate Hydra and MLflow in your machine learning projects with minimal setup.
|
28
|
+
|
29
|
+
## Installation
|
30
|
+
|
31
|
+
You can install Hydraflow via pip:
|
32
|
+
|
33
|
+
```bash
|
34
|
+
pip install hydraflow
|
35
|
+
```
|
36
|
+
|
37
|
+
## Getting Started
|
38
|
+
|
39
|
+
Here is a simple example to get you started with Hydraflow:
|
40
|
+
|
41
|
+
```python
|
42
|
+
import hydra
|
43
|
+
import hydraflow
|
44
|
+
import mlflow
|
45
|
+
from dataclasses import dataclass
|
46
|
+
from hydra.core.config_store import ConfigStore
|
47
|
+
from pathlib import Path
|
48
|
+
|
49
|
+
@dataclass
|
50
|
+
class MySQLConfig:
|
51
|
+
host: str = "localhost"
|
52
|
+
port: int = 3306
|
53
|
+
|
54
|
+
cs = ConfigStore.instance()
|
55
|
+
cs.store(name="config", node=MySQLConfig)
|
56
|
+
|
57
|
+
@hydra.main(version_base=None, config_name="config")
|
58
|
+
def my_app(cfg: MySQLConfig) -> None:
|
59
|
+
# Set experiment by Hydra job name.
|
60
|
+
hydraflow.set_experiment()
|
61
|
+
|
62
|
+
# Automatically log params using Hydra config.
|
63
|
+
with mlflow.start_run(), hydraflow.log_run(cfg) as info:
|
64
|
+
# Your app code below.
|
65
|
+
|
66
|
+
# `info.output_dir` is the Hydra output directory.
|
67
|
+
# `info.artifact_dir` is the MLflow artifact directory.
|
68
|
+
|
69
|
+
with hydraflow.watch(callback):
|
70
|
+
# Watch files in the MLflow artifact directory.
|
71
|
+
# You can update metrics or log other artifacts
|
72
|
+
# according to the watched files in your callback
|
73
|
+
# function.
|
74
|
+
pass
|
75
|
+
|
76
|
+
# Your callback function here.
|
77
|
+
def callback(file: Path) -> None:
|
78
|
+
pass
|
79
|
+
|
80
|
+
if __name__ == "__main__":
|
81
|
+
my_app()
|
82
|
+
```
|
@@ -4,8 +4,8 @@ build-backend = "hatchling.build"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "hydraflow"
|
7
|
-
version = "0.1.
|
8
|
-
description = "Hydra
|
7
|
+
version = "0.1.5"
|
8
|
+
description = "Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments."
|
9
9
|
readme = "README.md"
|
10
10
|
license = "MIT"
|
11
11
|
authors = [{ name = "daizutabi", email = "daizutabi@gmail.com" }]
|
@@ -0,0 +1,54 @@
|
|
1
|
+
"""
|
2
|
+
This module provides functionality for working with configuration
|
3
|
+
objects using the OmegaConf library.
|
4
|
+
"""
|
5
|
+
|
6
|
+
from __future__ import annotations
|
7
|
+
|
8
|
+
from typing import TYPE_CHECKING
|
9
|
+
|
10
|
+
from omegaconf import DictConfig, ListConfig, OmegaConf
|
11
|
+
|
12
|
+
if TYPE_CHECKING:
|
13
|
+
from collections.abc import Iterator
|
14
|
+
from typing import Any
|
15
|
+
|
16
|
+
|
17
|
+
def iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
|
18
|
+
"""
|
19
|
+
Iterate over the parameters in the given configuration object.
|
20
|
+
|
21
|
+
This function recursively traverses the configuration object and yields
|
22
|
+
key-value pairs representing the parameters.
|
23
|
+
|
24
|
+
Args:
|
25
|
+
config (object): The configuration object to iterate over.
|
26
|
+
prefix (str, optional): The prefix to prepend to the parameter keys.
|
27
|
+
Defaults to "".
|
28
|
+
|
29
|
+
Yields:
|
30
|
+
Key-value pairs representing the parameters.
|
31
|
+
"""
|
32
|
+
if not isinstance(config, (DictConfig, ListConfig)):
|
33
|
+
config = OmegaConf.create(config) # type: ignore
|
34
|
+
|
35
|
+
if isinstance(config, DictConfig):
|
36
|
+
for key, value in config.items():
|
37
|
+
if isinstance(value, ListConfig) and not any(
|
38
|
+
isinstance(v, (DictConfig, ListConfig)) for v in value
|
39
|
+
):
|
40
|
+
yield f"{prefix}{key}", value
|
41
|
+
|
42
|
+
elif isinstance(value, (DictConfig, ListConfig)):
|
43
|
+
yield from iter_params(value, f"{prefix}{key}.")
|
44
|
+
|
45
|
+
else:
|
46
|
+
yield f"{prefix}{key}", value
|
47
|
+
|
48
|
+
elif isinstance(config, ListConfig):
|
49
|
+
for index, value in enumerate(config):
|
50
|
+
if isinstance(value, (DictConfig, ListConfig)):
|
51
|
+
yield from iter_params(value, f"{prefix}{index}.")
|
52
|
+
|
53
|
+
else:
|
54
|
+
yield f"{prefix}{index}", value
|
@@ -1,3 +1,8 @@
|
|
1
|
+
"""
|
2
|
+
This module provides context managers to log parameters and manage the MLflow
|
3
|
+
run context.
|
4
|
+
"""
|
5
|
+
|
1
6
|
from __future__ import annotations
|
2
7
|
|
3
8
|
import os
|
@@ -35,6 +40,28 @@ def log_run(
|
|
35
40
|
*,
|
36
41
|
synchronous: bool | None = None,
|
37
42
|
) -> Iterator[Info]:
|
43
|
+
"""
|
44
|
+
Log the parameters from the given configuration object and manage the MLflow
|
45
|
+
run context.
|
46
|
+
|
47
|
+
This context manager logs the parameters from the provided configuration object
|
48
|
+
using MLflow. It also manages the MLflow run context, ensuring that artifacts
|
49
|
+
are logged and the run is properly closed.
|
50
|
+
|
51
|
+
Args:
|
52
|
+
config: The configuration object to log the parameters from.
|
53
|
+
synchronous: Whether to log the parameters synchronously.
|
54
|
+
Defaults to None.
|
55
|
+
|
56
|
+
Yields:
|
57
|
+
Info: An `Info` object containing the output directory and artifact directory
|
58
|
+
paths.
|
59
|
+
|
60
|
+
Example:
|
61
|
+
with log_run(config) as info:
|
62
|
+
# Perform operations within the MLflow run context
|
63
|
+
pass
|
64
|
+
"""
|
38
65
|
log_params(config, synchronous=synchronous)
|
39
66
|
|
40
67
|
hc = HydraConfig.get()
|
@@ -61,6 +88,32 @@ def log_run(
|
|
61
88
|
|
62
89
|
@contextmanager
|
63
90
|
def watch(func: Callable[[Path], None], dir: Path | str = "", timeout: int = 60) -> Iterator[None]:
|
91
|
+
"""
|
92
|
+
Watch the given directory for changes and call the provided function
|
93
|
+
when a change is detected.
|
94
|
+
|
95
|
+
This context manager sets up a file system watcher on the specified directory.
|
96
|
+
When a file modification is detected, the provided function is called with
|
97
|
+
the path of the modified file. The watcher runs for the specified timeout
|
98
|
+
period or until the context is exited.
|
99
|
+
|
100
|
+
Args:
|
101
|
+
func (Callable[[Path], None]): The function to call when a change is
|
102
|
+
detected. It should accept a single argument of type `Path`,
|
103
|
+
which is the path of the modified file.
|
104
|
+
dir (Path | str, optional): The directory to watch. If not specified,
|
105
|
+
the current MLflow artifact URI is used. Defaults to "".
|
106
|
+
timeout (int, optional): The timeout period in seconds for the watcher
|
107
|
+
to run after the context is exited. Defaults to 60.
|
108
|
+
|
109
|
+
Yields:
|
110
|
+
None: This context manager does not return any value.
|
111
|
+
|
112
|
+
Example:
|
113
|
+
with watch(log_artifact, "/path/to/dir"):
|
114
|
+
# Perform operations while watching the directory for changes
|
115
|
+
pass
|
116
|
+
"""
|
64
117
|
if not dir:
|
65
118
|
uri = mlflow.get_artifact_uri()
|
66
119
|
dir = uri_to_path(uri)
|
@@ -100,6 +153,18 @@ def chdir_artifact(
|
|
100
153
|
run: Run | Series | str,
|
101
154
|
artifact_path: str | None = None,
|
102
155
|
) -> Iterator[Path]:
|
156
|
+
"""
|
157
|
+
Change the current working directory to the artifact directory of the
|
158
|
+
given run.
|
159
|
+
|
160
|
+
This context manager changes the current working directory to the artifact
|
161
|
+
directory of the given run. It ensures that the directory is changed back
|
162
|
+
to the original directory after the context is exited.
|
163
|
+
|
164
|
+
Args:
|
165
|
+
run: The run to get the artifact directory from.
|
166
|
+
artifact_path: The artifact path.
|
167
|
+
"""
|
103
168
|
curdir = Path.cwd()
|
104
169
|
|
105
170
|
artifact_dir = get_artifact_path(run, artifact_path)
|
@@ -0,0 +1,49 @@
|
|
1
|
+
"""
|
2
|
+
This module provides functionality to log parameters from Hydra
|
3
|
+
configuration objects and set up experiments using MLflow.
|
4
|
+
"""
|
5
|
+
|
6
|
+
from __future__ import annotations
|
7
|
+
|
8
|
+
import mlflow
|
9
|
+
from hydra.core.hydra_config import HydraConfig
|
10
|
+
|
11
|
+
from hydraflow.config import iter_params
|
12
|
+
|
13
|
+
|
14
|
+
def set_experiment(prefix: str = "", suffix: str = "", uri: str | None = None) -> None:
|
15
|
+
"""
|
16
|
+
Set the experiment name and tracking URI optionally.
|
17
|
+
|
18
|
+
This function sets the experiment name by combining the given prefix,
|
19
|
+
the job name from HydraConfig, and the given suffix. Optionally, it can
|
20
|
+
also set the tracking URI.
|
21
|
+
|
22
|
+
Args:
|
23
|
+
prefix: The prefix to prepend to the experiment name.
|
24
|
+
suffix: The suffix to append to the experiment name.
|
25
|
+
uri: The tracking URI to use.
|
26
|
+
"""
|
27
|
+
if uri:
|
28
|
+
mlflow.set_tracking_uri(uri)
|
29
|
+
|
30
|
+
hc = HydraConfig.get()
|
31
|
+
name = f"{prefix}{hc.job.name}{suffix}"
|
32
|
+
mlflow.set_experiment(name)
|
33
|
+
|
34
|
+
|
35
|
+
def log_params(config: object, *, synchronous: bool | None = None) -> None:
|
36
|
+
"""
|
37
|
+
Log the parameters from the given configuration object.
|
38
|
+
|
39
|
+
This method logs the parameters from the provided configuration object
|
40
|
+
using MLflow. It iterates over the parameters and logs them using the
|
41
|
+
`mlflow.log_param` method.
|
42
|
+
|
43
|
+
Args:
|
44
|
+
config: The configuration object to log the parameters from.
|
45
|
+
synchronous: Whether to log the parameters synchronously.
|
46
|
+
Defaults to None.
|
47
|
+
"""
|
48
|
+
for key, value in iter_params(config):
|
49
|
+
mlflow.log_param(key, value, synchronous=synchronous)
|
@@ -0,0 +1,512 @@
|
|
1
|
+
"""
|
2
|
+
This module provides functionality for managing and interacting with MLflow runs.
|
3
|
+
It includes classes and functions to filter runs, retrieve run information, and
|
4
|
+
log artifacts and configurations.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from __future__ import annotations
|
8
|
+
|
9
|
+
from dataclasses import dataclass
|
10
|
+
from functools import cache
|
11
|
+
from pathlib import Path
|
12
|
+
from typing import TYPE_CHECKING, Any
|
13
|
+
|
14
|
+
import mlflow
|
15
|
+
import numpy as np
|
16
|
+
from mlflow.entities.run import Run as Run_
|
17
|
+
from mlflow.tracking import artifact_utils
|
18
|
+
from omegaconf import DictConfig, OmegaConf
|
19
|
+
from pandas import DataFrame, Series
|
20
|
+
|
21
|
+
from hydraflow.config import iter_params
|
22
|
+
from hydraflow.util import uri_to_path
|
23
|
+
|
24
|
+
if TYPE_CHECKING:
|
25
|
+
from typing import Any
|
26
|
+
|
27
|
+
|
28
|
+
@dataclass
|
29
|
+
class Runs:
|
30
|
+
"""
|
31
|
+
A class to represent a collection of MLflow runs.
|
32
|
+
|
33
|
+
This class provides methods to interact with the runs, such as filtering,
|
34
|
+
retrieving specific runs, and accessing run information.
|
35
|
+
"""
|
36
|
+
|
37
|
+
runs: list[Run_] | DataFrame
|
38
|
+
|
39
|
+
def __repr__(self) -> str:
|
40
|
+
return f"{self.__class__.__name__}({len(self)})"
|
41
|
+
|
42
|
+
def __len__(self) -> int:
|
43
|
+
return len(self.runs)
|
44
|
+
|
45
|
+
def filter(self, config: object) -> Runs:
|
46
|
+
"""
|
47
|
+
Filter the runs based on the provided configuration.
|
48
|
+
|
49
|
+
This method filters the runs in the collection according to the
|
50
|
+
specified configuration object. The configuration object should
|
51
|
+
contain key-value pairs that correspond to the parameters of the
|
52
|
+
runs. Only the runs that match all the specified parameters will
|
53
|
+
be included in the returned `Runs` object.
|
54
|
+
|
55
|
+
Args:
|
56
|
+
config (object): The configuration object to filter the runs.
|
57
|
+
This object should contain key-value pairs representing
|
58
|
+
the parameters to filter by.
|
59
|
+
|
60
|
+
Returns:
|
61
|
+
Runs: A new `Runs` object containing the filtered runs.
|
62
|
+
"""
|
63
|
+
return Runs(filter_runs(self.runs, config))
|
64
|
+
|
65
|
+
def get(self, config: object) -> Run:
|
66
|
+
"""
|
67
|
+
Retrieve a specific run based on the provided configuration.
|
68
|
+
|
69
|
+
This method filters the runs in the collection according to the
|
70
|
+
specified configuration object and returns the run that matches
|
71
|
+
the provided parameters. If more than one run matches the criteria,
|
72
|
+
an error is raised.
|
73
|
+
|
74
|
+
Args:
|
75
|
+
config (object): The configuration object to identify the run.
|
76
|
+
|
77
|
+
Returns:
|
78
|
+
Run: The run object that matches the provided configuration.
|
79
|
+
|
80
|
+
Raises:
|
81
|
+
ValueError: If the number of filtered runs is not exactly one.
|
82
|
+
"""
|
83
|
+
return Run(get_run(self.runs, config))
|
84
|
+
|
85
|
+
def drop_unique_params(self) -> Runs:
|
86
|
+
"""
|
87
|
+
Drop unique parameters from the runs and return a new Runs object.
|
88
|
+
|
89
|
+
This method removes parameters that have unique values across all runs
|
90
|
+
in the collection. This is useful for identifying common parameters
|
91
|
+
that are shared among multiple runs.
|
92
|
+
|
93
|
+
Returns:
|
94
|
+
Runs: A new `Runs` object with unique parameters dropped.
|
95
|
+
|
96
|
+
Raises:
|
97
|
+
NotImplementedError: If the runs are not in a DataFrame format.
|
98
|
+
"""
|
99
|
+
if isinstance(self.runs, DataFrame):
|
100
|
+
return Runs(drop_unique_params(self.runs))
|
101
|
+
|
102
|
+
raise NotImplementedError
|
103
|
+
|
104
|
+
def get_param_names(self) -> list[str]:
|
105
|
+
"""
|
106
|
+
Get the parameter names from the runs.
|
107
|
+
|
108
|
+
This method extracts the parameter names from the runs in the collection.
|
109
|
+
If the runs are stored in a DataFrame, it retrieves the column names
|
110
|
+
that correspond to the parameters.
|
111
|
+
|
112
|
+
Returns:
|
113
|
+
list[str]: A list of parameter names.
|
114
|
+
|
115
|
+
Raises:
|
116
|
+
NotImplementedError: If the runs are not in a DataFrame format.
|
117
|
+
"""
|
118
|
+
if isinstance(self.runs, DataFrame):
|
119
|
+
return get_param_names(self.runs)
|
120
|
+
|
121
|
+
raise NotImplementedError
|
122
|
+
|
123
|
+
def get_param_dict(self) -> dict[str, list[str]]:
|
124
|
+
"""
|
125
|
+
Get the parameter dictionary from the runs.
|
126
|
+
|
127
|
+
This method extracts the parameter names and their corresponding values
|
128
|
+
from the runs in the collection. If the runs are stored in a DataFrame,
|
129
|
+
it retrieves the unique values for each parameter.
|
130
|
+
|
131
|
+
|
132
|
+
Returns:
|
133
|
+
dict[str, list[str]]: A dictionary of parameter names and their
|
134
|
+
corresponding values.
|
135
|
+
|
136
|
+
Raises:
|
137
|
+
NotImplementedError: If the runs are not in a DataFrame format.
|
138
|
+
"""
|
139
|
+
if isinstance(self.runs, DataFrame):
|
140
|
+
return get_param_dict(self.runs)
|
141
|
+
|
142
|
+
raise NotImplementedError
|
143
|
+
|
144
|
+
|
145
|
+
def search_runs(*args, **kwargs) -> Runs:
|
146
|
+
"""
|
147
|
+
Search for runs that match the specified criteria.
|
148
|
+
|
149
|
+
This function wraps the `mlflow.search_runs` function and returns the results
|
150
|
+
as a `Runs` object. It allows for flexible searching of MLflow runs based on
|
151
|
+
various criteria.
|
152
|
+
|
153
|
+
Args:
|
154
|
+
*args: Positional arguments to pass to `mlflow.search_runs`.
|
155
|
+
**kwargs: Keyword arguments to pass to `mlflow.search_runs`.
|
156
|
+
|
157
|
+
Returns:
|
158
|
+
Runs: A `Runs` object containing the search results.
|
159
|
+
"""
|
160
|
+
runs = mlflow.search_runs(*args, **kwargs)
|
161
|
+
return Runs(runs)
|
162
|
+
|
163
|
+
|
164
|
+
def filter_runs(runs: list[Run_] | DataFrame, config: object) -> list[Run_] | DataFrame:
|
165
|
+
"""
|
166
|
+
Filter the runs based on the provided configuration.
|
167
|
+
|
168
|
+
This method filters the runs in the collection according to the
|
169
|
+
specified configuration object. The configuration object should
|
170
|
+
contain key-value pairs that correspond to the parameters of the
|
171
|
+
runs. Only the runs that match all the specified parameters will
|
172
|
+
be included in the returned `Runs` object.
|
173
|
+
|
174
|
+
Args:
|
175
|
+
runs: The runs to filter.
|
176
|
+
config: The configuration object to filter the runs.
|
177
|
+
|
178
|
+
Returns:
|
179
|
+
Runs: A filtered list of runs or a DataFrame.
|
180
|
+
"""
|
181
|
+
if isinstance(runs, list):
|
182
|
+
return _filter_runs_list(runs, config)
|
183
|
+
|
184
|
+
return _filter_runs_dataframe(runs, config)
|
185
|
+
|
186
|
+
|
187
|
+
def _is_equal(run: Run_, key: str, value: Any) -> bool:
|
188
|
+
param = run.data.params.get(key, value)
|
189
|
+
|
190
|
+
if param is None:
|
191
|
+
return False
|
192
|
+
|
193
|
+
return type(value)(param) == value
|
194
|
+
|
195
|
+
|
196
|
+
def _filter_runs_list(runs: list[Run_], config: object) -> list[Run_]:
|
197
|
+
for key, value in iter_params(config):
|
198
|
+
runs = [run for run in runs if _is_equal(run, key, value)]
|
199
|
+
|
200
|
+
return runs
|
201
|
+
|
202
|
+
|
203
|
+
def _filter_runs_dataframe(runs: DataFrame, config: object) -> DataFrame:
|
204
|
+
index = np.ones(len(runs), dtype=bool)
|
205
|
+
|
206
|
+
for key, value in iter_params(config):
|
207
|
+
name = f"params.{key}"
|
208
|
+
|
209
|
+
if name in runs:
|
210
|
+
series = runs[name]
|
211
|
+
is_value = -series.isna()
|
212
|
+
param = series.fillna(value).astype(type(value))
|
213
|
+
index &= is_value & (param == value)
|
214
|
+
|
215
|
+
return runs[index]
|
216
|
+
|
217
|
+
|
218
|
+
def get_run(runs: list[Run_] | DataFrame, config: object) -> Run_ | Series:
|
219
|
+
"""
|
220
|
+
Retrieve a specific run based on the provided configuration.
|
221
|
+
|
222
|
+
This method filters the runs in the collection according to the
|
223
|
+
specified configuration object and returns the run that matches
|
224
|
+
the provided parameters. If more than one run matches the criteria,
|
225
|
+
an error is raised.
|
226
|
+
|
227
|
+
Args:
|
228
|
+
runs: The runs to filter.
|
229
|
+
config: The configuration object to identify the run.
|
230
|
+
|
231
|
+
Returns:
|
232
|
+
Run: The run object that matches the provided configuration.
|
233
|
+
"""
|
234
|
+
runs = filter_runs(runs, config)
|
235
|
+
|
236
|
+
if len(runs) == 1:
|
237
|
+
return runs[0] if isinstance(runs, list) else runs.iloc[0]
|
238
|
+
|
239
|
+
msg = f"number of filtered runs is not 1: got {len(runs)}"
|
240
|
+
raise ValueError(msg)
|
241
|
+
|
242
|
+
|
243
|
+
def drop_unique_params(runs: DataFrame) -> DataFrame:
|
244
|
+
"""
|
245
|
+
Drop unique parameters from the runs and return a new DataFrame.
|
246
|
+
|
247
|
+
This method removes parameters that have unique values across all runs
|
248
|
+
in the collection. This is useful for identifying common parameters
|
249
|
+
that are shared among multiple runs.
|
250
|
+
|
251
|
+
Args:
|
252
|
+
runs: The DataFrame containing the runs.
|
253
|
+
|
254
|
+
Returns:
|
255
|
+
DataFrame: A new DataFrame with unique parameters dropped.
|
256
|
+
"""
|
257
|
+
|
258
|
+
def select(column: str) -> bool:
|
259
|
+
return not column.startswith("params.") or len(runs[column].unique()) > 1
|
260
|
+
|
261
|
+
columns = [select(column) for column in runs.columns]
|
262
|
+
return runs.iloc[:, columns]
|
263
|
+
|
264
|
+
|
265
|
+
def get_param_names(runs: DataFrame) -> list[str]:
|
266
|
+
"""
|
267
|
+
Get the parameter names from the runs.
|
268
|
+
|
269
|
+
This method extracts the parameter names from the runs in the collection.
|
270
|
+
If the runs are stored in a DataFrame, it retrieves the column names
|
271
|
+
that correspond to the parameters.
|
272
|
+
|
273
|
+
Args:
|
274
|
+
runs: The DataFrame containing the runs.
|
275
|
+
|
276
|
+
Returns:
|
277
|
+
list[str]: A list of parameter names.
|
278
|
+
"""
|
279
|
+
|
280
|
+
def get_name(column: str) -> str:
|
281
|
+
if column.startswith("params."):
|
282
|
+
return column.split(".", maxsplit=1)[-1]
|
283
|
+
|
284
|
+
return ""
|
285
|
+
|
286
|
+
columns = [get_name(column) for column in runs.columns]
|
287
|
+
return [column for column in columns if column]
|
288
|
+
|
289
|
+
|
290
|
+
def get_param_dict(runs: DataFrame) -> dict[str, list[str]]:
|
291
|
+
"""
|
292
|
+
Get the parameter dictionary from the runs.
|
293
|
+
|
294
|
+
This method extracts the parameter names and their corresponding values
|
295
|
+
from the runs in the collection. If the runs are stored in a DataFrame,
|
296
|
+
it retrieves the unique values for each parameter.
|
297
|
+
|
298
|
+
Args:
|
299
|
+
runs: The DataFrame containing the runs.
|
300
|
+
|
301
|
+
Returns:
|
302
|
+
dict[str, list[str]]: A dictionary of parameter names and
|
303
|
+
their corresponding values.
|
304
|
+
"""
|
305
|
+
params = {}
|
306
|
+
for name in get_param_names(runs):
|
307
|
+
params[name] = list(runs[f"params.{name}"].unique())
|
308
|
+
|
309
|
+
return params
|
310
|
+
|
311
|
+
|
312
|
+
@dataclass
|
313
|
+
class Run:
|
314
|
+
"""
|
315
|
+
A class to represent a specific MLflow run.
|
316
|
+
|
317
|
+
This class provides methods to interact with the run, such as retrieving
|
318
|
+
the run ID, artifact URI, and configuration. It also includes properties
|
319
|
+
to access the artifact directory, artifact path, and Hydra output directory.
|
320
|
+
"""
|
321
|
+
|
322
|
+
run: Run_ | Series | str
|
323
|
+
|
324
|
+
def __repr__(self) -> str:
|
325
|
+
return f"{self.__class__.__name__}({self.run_id!r})"
|
326
|
+
|
327
|
+
@property
|
328
|
+
def run_id(self) -> str:
|
329
|
+
"""
|
330
|
+
Get the run ID.
|
331
|
+
|
332
|
+
Returns:
|
333
|
+
str: The run ID.
|
334
|
+
"""
|
335
|
+
return get_run_id(self.run)
|
336
|
+
|
337
|
+
def artifact_uri(self, artifact_path: str | None = None) -> str:
|
338
|
+
"""
|
339
|
+
Get the artifact URI.
|
340
|
+
|
341
|
+
Args:
|
342
|
+
artifact_path (str | None): The artifact path.
|
343
|
+
|
344
|
+
Returns:
|
345
|
+
str: The artifact URI.
|
346
|
+
"""
|
347
|
+
return get_artifact_uri(self.run, artifact_path)
|
348
|
+
|
349
|
+
@property
|
350
|
+
def artifact_dir(self) -> Path:
|
351
|
+
"""
|
352
|
+
Get the artifact directory.
|
353
|
+
|
354
|
+
Returns:
|
355
|
+
Path: The artifact directory.
|
356
|
+
"""
|
357
|
+
return get_artifact_dir(self.run)
|
358
|
+
|
359
|
+
def artifact_path(self, artifact_path: str | None = None) -> Path:
|
360
|
+
"""
|
361
|
+
Get the artifact path.
|
362
|
+
|
363
|
+
Args:
|
364
|
+
artifact_path: The artifact path.
|
365
|
+
|
366
|
+
Returns:
|
367
|
+
Path: The artifact path.
|
368
|
+
"""
|
369
|
+
return get_artifact_path(self.run, artifact_path)
|
370
|
+
|
371
|
+
@property
|
372
|
+
def config(self) -> DictConfig:
|
373
|
+
"""
|
374
|
+
Get the configuration.
|
375
|
+
|
376
|
+
Returns:
|
377
|
+
DictConfig: The configuration.
|
378
|
+
"""
|
379
|
+
return load_config(self.run)
|
380
|
+
|
381
|
+
def log_hydra_output_dir(self) -> None:
|
382
|
+
"""
|
383
|
+
Log the Hydra output directory.
|
384
|
+
|
385
|
+
Returns:
|
386
|
+
None
|
387
|
+
"""
|
388
|
+
log_hydra_output_dir(self.run)
|
389
|
+
|
390
|
+
|
391
|
+
def get_run_id(run: Run_ | Series | str) -> str:
|
392
|
+
"""
|
393
|
+
Get the run ID.
|
394
|
+
|
395
|
+
Args:
|
396
|
+
run: The run object.
|
397
|
+
|
398
|
+
Returns:
|
399
|
+
str: The run ID.
|
400
|
+
"""
|
401
|
+
if isinstance(run, str):
|
402
|
+
return run
|
403
|
+
|
404
|
+
if isinstance(run, Run_):
|
405
|
+
return run.info.run_id
|
406
|
+
|
407
|
+
return run.run_id
|
408
|
+
|
409
|
+
|
410
|
+
def get_artifact_uri(run: Run_ | Series | str, artifact_path: str | None = None) -> str:
|
411
|
+
"""
|
412
|
+
Get the artifact URI.
|
413
|
+
|
414
|
+
Args:
|
415
|
+
run: The run object.
|
416
|
+
artifact_path: The artifact path.
|
417
|
+
|
418
|
+
Returns:
|
419
|
+
str: The artifact URI.
|
420
|
+
"""
|
421
|
+
run_id = get_run_id(run)
|
422
|
+
return artifact_utils.get_artifact_uri(run_id, artifact_path)
|
423
|
+
|
424
|
+
|
425
|
+
def get_artifact_dir(run: Run_ | Series | str) -> Path:
|
426
|
+
"""
|
427
|
+
Get the artifact directory.
|
428
|
+
|
429
|
+
Args:
|
430
|
+
run: The run object.
|
431
|
+
|
432
|
+
Returns:
|
433
|
+
Path: The artifact directory.
|
434
|
+
"""
|
435
|
+
uri = get_artifact_uri(run)
|
436
|
+
return uri_to_path(uri)
|
437
|
+
|
438
|
+
|
439
|
+
def get_artifact_path(run: Run_ | Series | str, artifact_path: str | None = None) -> Path:
|
440
|
+
"""
|
441
|
+
Get the artifact path.
|
442
|
+
|
443
|
+
Args:
|
444
|
+
run: The run object.
|
445
|
+
artifact_path: The artifact path.
|
446
|
+
|
447
|
+
Returns:
|
448
|
+
Path: The artifact path.
|
449
|
+
"""
|
450
|
+
artifact_dir = get_artifact_dir(run)
|
451
|
+
return artifact_dir / artifact_path if artifact_path else artifact_dir
|
452
|
+
|
453
|
+
|
454
|
+
def load_config(run: Run_ | Series | str) -> DictConfig:
|
455
|
+
"""
|
456
|
+
Load the configuration.
|
457
|
+
|
458
|
+
Args:
|
459
|
+
run: The run object.
|
460
|
+
|
461
|
+
Returns:
|
462
|
+
DictConfig: The configuration.
|
463
|
+
"""
|
464
|
+
run_id = get_run_id(run)
|
465
|
+
return _load_config(run_id)
|
466
|
+
|
467
|
+
|
468
|
+
@cache
|
469
|
+
def _load_config(run_id: str) -> DictConfig:
|
470
|
+
try:
|
471
|
+
path = mlflow.artifacts.download_artifacts(
|
472
|
+
run_id=run_id,
|
473
|
+
artifact_path=".hydra/config.yaml",
|
474
|
+
)
|
475
|
+
except OSError:
|
476
|
+
return DictConfig({})
|
477
|
+
|
478
|
+
return OmegaConf.load(path) # type: ignore
|
479
|
+
|
480
|
+
|
481
|
+
def get_hydra_output_dir(run: Run_ | Series | str) -> Path:
|
482
|
+
"""
|
483
|
+
Get the Hydra output directory.
|
484
|
+
|
485
|
+
Args:
|
486
|
+
run: The run object.
|
487
|
+
|
488
|
+
Returns:
|
489
|
+
Path: The Hydra output directory.
|
490
|
+
"""
|
491
|
+
path = get_artifact_dir(run) / ".hydra/hydra.yaml"
|
492
|
+
|
493
|
+
if path.exists():
|
494
|
+
hc = OmegaConf.load(path)
|
495
|
+
return Path(hc.hydra.runtime.output_dir)
|
496
|
+
|
497
|
+
raise FileNotFoundError
|
498
|
+
|
499
|
+
|
500
|
+
def log_hydra_output_dir(run: Run_ | Series | str) -> None:
|
501
|
+
"""
|
502
|
+
Log the Hydra output directory.
|
503
|
+
|
504
|
+
Args:
|
505
|
+
run: The run object.
|
506
|
+
|
507
|
+
Returns:
|
508
|
+
None
|
509
|
+
"""
|
510
|
+
output_dir = get_hydra_output_dir(run)
|
511
|
+
run_id = run if isinstance(run, str) else run.info.run_id
|
512
|
+
mlflow.log_artifacts(output_dir.as_posix(), run_id=run_id)
|
@@ -0,0 +1,24 @@
|
|
1
|
+
import platform
|
2
|
+
from pathlib import Path
|
3
|
+
from urllib.parse import urlparse
|
4
|
+
|
5
|
+
|
6
|
+
def uri_to_path(uri: str) -> Path:
|
7
|
+
"""
|
8
|
+
Convert a URI to a path.
|
9
|
+
|
10
|
+
This function parses the given URI and converts it to a local file system
|
11
|
+
path. On Windows, if the path starts with a forward slash, it is removed
|
12
|
+
to ensure the path is correctly formatted.
|
13
|
+
|
14
|
+
Args:
|
15
|
+
uri (str): The URI to convert.
|
16
|
+
|
17
|
+
Returns:
|
18
|
+
Path: The path corresponding to the URI.
|
19
|
+
"""
|
20
|
+
path = urlparse(uri).path
|
21
|
+
if platform.system() == "Windows" and path.startswith("/"):
|
22
|
+
path = path[1:]
|
23
|
+
|
24
|
+
return Path(path)
|
@@ -59,5 +59,4 @@ def test_iter_params_from_config(cfg):
|
|
59
59
|
assert next(it) == ("size.y", 2)
|
60
60
|
assert next(it) == ("db.name", "name")
|
61
61
|
assert next(it) == ("db.port", 100)
|
62
|
-
assert next(it) == ("store.items
|
63
|
-
assert next(it) == ("store.items.1", "b")
|
62
|
+
assert next(it) == ("store.items", ["a", "b"])
|
hydraflow-0.1.4/PKG-INFO
DELETED
@@ -1,45 +0,0 @@
|
|
1
|
-
Metadata-Version: 2.3
|
2
|
-
Name: hydraflow
|
3
|
-
Version: 0.1.4
|
4
|
-
Summary: Hydra with MLflow
|
5
|
-
Project-URL: Documentation, https://github.com/daizutabi/hydraflow
|
6
|
-
Project-URL: Source, https://github.com/daizutabi/hydraflow
|
7
|
-
Project-URL: Issues, https://github.com/daizutabi/hydraflow/issues
|
8
|
-
Author-email: daizutabi <daizutabi@gmail.com>
|
9
|
-
License-Expression: MIT
|
10
|
-
License-File: LICENSE
|
11
|
-
Classifier: Development Status :: 4 - Beta
|
12
|
-
Classifier: Programming Language :: Python
|
13
|
-
Classifier: Programming Language :: Python :: 3.10
|
14
|
-
Classifier: Programming Language :: Python :: 3.11
|
15
|
-
Classifier: Programming Language :: Python :: 3.12
|
16
|
-
Classifier: Topic :: Documentation
|
17
|
-
Classifier: Topic :: Software Development :: Documentation
|
18
|
-
Requires-Python: >=3.10
|
19
|
-
Requires-Dist: hydra-core>1.3
|
20
|
-
Requires-Dist: mlflow>2.15
|
21
|
-
Requires-Dist: setuptools
|
22
|
-
Requires-Dist: watchdog
|
23
|
-
Provides-Extra: dev
|
24
|
-
Requires-Dist: pytest-clarity; extra == 'dev'
|
25
|
-
Requires-Dist: pytest-cov; extra == 'dev'
|
26
|
-
Requires-Dist: pytest-randomly; extra == 'dev'
|
27
|
-
Requires-Dist: pytest-xdist; extra == 'dev'
|
28
|
-
Description-Content-Type: text/markdown
|
29
|
-
|
30
|
-
# hydraflow
|
31
|
-
|
32
|
-
[![PyPI Version][pypi-v-image]][pypi-v-link]
|
33
|
-
[![Python Version][python-v-image]][python-v-link]
|
34
|
-
[![Build Status][GHAction-image]][GHAction-link]
|
35
|
-
[![Coverage Status][codecov-image]][codecov-link]
|
36
|
-
|
37
|
-
<!-- Badges -->
|
38
|
-
[pypi-v-image]: https://img.shields.io/pypi/v/hydraflow.svg
|
39
|
-
[pypi-v-link]: https://pypi.org/project/hydraflow/
|
40
|
-
[python-v-image]: https://img.shields.io/pypi/pyversions/hydraflow.svg
|
41
|
-
[python-v-link]: https://pypi.org/project/hydraflow
|
42
|
-
[GHAction-image]: https://github.com/daizutabi/hydraflow/actions/workflows/ci.yml/badge.svg?branch=main&event=push
|
43
|
-
[GHAction-link]: https://github.com/daizutabi/hydraflow/actions?query=event%3Apush+branch%3Amain
|
44
|
-
[codecov-image]: https://codecov.io/github/daizutabi/hydraflow/coverage.svg?branch=main
|
45
|
-
[codecov-link]: https://codecov.io/github/daizutabi/hydraflow?branch=main
|
hydraflow-0.1.4/README.md
DELETED
@@ -1,16 +0,0 @@
|
|
1
|
-
# hydraflow
|
2
|
-
|
3
|
-
[![PyPI Version][pypi-v-image]][pypi-v-link]
|
4
|
-
[![Python Version][python-v-image]][python-v-link]
|
5
|
-
[![Build Status][GHAction-image]][GHAction-link]
|
6
|
-
[![Coverage Status][codecov-image]][codecov-link]
|
7
|
-
|
8
|
-
<!-- Badges -->
|
9
|
-
[pypi-v-image]: https://img.shields.io/pypi/v/hydraflow.svg
|
10
|
-
[pypi-v-link]: https://pypi.org/project/hydraflow/
|
11
|
-
[python-v-image]: https://img.shields.io/pypi/pyversions/hydraflow.svg
|
12
|
-
[python-v-link]: https://pypi.org/project/hydraflow
|
13
|
-
[GHAction-image]: https://github.com/daizutabi/hydraflow/actions/workflows/ci.yml/badge.svg?branch=main&event=push
|
14
|
-
[GHAction-link]: https://github.com/daizutabi/hydraflow/actions?query=event%3Apush+branch%3Amain
|
15
|
-
[codecov-image]: https://codecov.io/github/daizutabi/hydraflow/coverage.svg?branch=main
|
16
|
-
[codecov-link]: https://codecov.io/github/daizutabi/hydraflow?branch=main
|
@@ -1,30 +0,0 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
|
-
from typing import TYPE_CHECKING
|
4
|
-
|
5
|
-
from omegaconf import DictConfig, ListConfig, OmegaConf
|
6
|
-
|
7
|
-
if TYPE_CHECKING:
|
8
|
-
from collections.abc import Iterator
|
9
|
-
from typing import Any
|
10
|
-
|
11
|
-
|
12
|
-
def iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
|
13
|
-
if not isinstance(config, DictConfig | ListConfig):
|
14
|
-
config = OmegaConf.create(config) # type: ignore
|
15
|
-
|
16
|
-
if isinstance(config, DictConfig):
|
17
|
-
for key, value in config.items():
|
18
|
-
if isinstance(value, (DictConfig, ListConfig)):
|
19
|
-
yield from iter_params(value, f"{prefix}{key}.")
|
20
|
-
|
21
|
-
else:
|
22
|
-
yield f"{prefix}{key}", value
|
23
|
-
|
24
|
-
elif isinstance(config, ListConfig):
|
25
|
-
for index, value in enumerate(config):
|
26
|
-
if isinstance(value, (DictConfig, ListConfig)):
|
27
|
-
yield from iter_params(value, f"{prefix}{index}.")
|
28
|
-
|
29
|
-
else:
|
30
|
-
yield f"{prefix}{index}", value
|
@@ -1,20 +0,0 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
|
-
import mlflow
|
4
|
-
from hydra.core.hydra_config import HydraConfig
|
5
|
-
|
6
|
-
from hydraflow.config import iter_params
|
7
|
-
|
8
|
-
|
9
|
-
def set_experiment(prefix: str = "", suffix: str = "", uri: str | None = None) -> None:
|
10
|
-
if uri:
|
11
|
-
mlflow.set_tracking_uri(uri)
|
12
|
-
|
13
|
-
hc = HydraConfig.get()
|
14
|
-
name = f"{prefix}{hc.job.name}{suffix}"
|
15
|
-
mlflow.set_experiment(name)
|
16
|
-
|
17
|
-
|
18
|
-
def log_params(config: object, *, synchronous: bool | None = None) -> None:
|
19
|
-
for key, value in iter_params(config):
|
20
|
-
mlflow.log_param(key, value, synchronous=synchronous)
|
@@ -1,217 +0,0 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
|
-
from dataclasses import dataclass
|
4
|
-
from functools import cache
|
5
|
-
from pathlib import Path
|
6
|
-
from typing import TYPE_CHECKING, Any
|
7
|
-
|
8
|
-
import mlflow
|
9
|
-
import numpy as np
|
10
|
-
from mlflow.entities.run import Run as Run_
|
11
|
-
from mlflow.tracking import artifact_utils
|
12
|
-
from omegaconf import DictConfig, OmegaConf
|
13
|
-
from pandas import DataFrame, Series
|
14
|
-
|
15
|
-
from hydraflow.config import iter_params
|
16
|
-
from hydraflow.util import uri_to_path
|
17
|
-
|
18
|
-
if TYPE_CHECKING:
|
19
|
-
from typing import Any
|
20
|
-
|
21
|
-
|
22
|
-
@dataclass
|
23
|
-
class Runs:
|
24
|
-
runs: list[Run_] | DataFrame
|
25
|
-
|
26
|
-
def __repr__(self) -> str:
|
27
|
-
return f"{self.__class__.__name__}({len(self)})"
|
28
|
-
|
29
|
-
def __len__(self) -> int:
|
30
|
-
return len(self.runs)
|
31
|
-
|
32
|
-
def filter(self, config: object) -> Runs:
|
33
|
-
return Runs(filter_runs(self.runs, config))
|
34
|
-
|
35
|
-
def get(self, config: object) -> Run:
|
36
|
-
return Run(get_run(self.runs, config))
|
37
|
-
|
38
|
-
def drop_unique_params(self) -> Runs:
|
39
|
-
if isinstance(self.runs, DataFrame):
|
40
|
-
return Runs(drop_unique_params(self.runs))
|
41
|
-
|
42
|
-
raise NotImplementedError
|
43
|
-
|
44
|
-
def get_param_names(self) -> list[str]:
|
45
|
-
if isinstance(self.runs, DataFrame):
|
46
|
-
return get_param_names(self.runs)
|
47
|
-
|
48
|
-
raise NotImplementedError
|
49
|
-
|
50
|
-
def get_param_dict(self) -> dict[str, list[str]]:
|
51
|
-
if isinstance(self.runs, DataFrame):
|
52
|
-
return get_param_dict(self.runs)
|
53
|
-
|
54
|
-
raise NotImplementedError
|
55
|
-
|
56
|
-
|
57
|
-
def filter_runs(runs: list[Run_] | DataFrame, config: object) -> list[Run_] | DataFrame:
|
58
|
-
if isinstance(runs, list):
|
59
|
-
return filter_runs_list(runs, config)
|
60
|
-
|
61
|
-
return filter_runs_dataframe(runs, config)
|
62
|
-
|
63
|
-
|
64
|
-
def _is_equal(run: Run_, key: str, value: Any) -> bool:
|
65
|
-
param = run.data.params.get(key, value)
|
66
|
-
|
67
|
-
if param is None:
|
68
|
-
return False
|
69
|
-
|
70
|
-
return type(value)(param) == value
|
71
|
-
|
72
|
-
|
73
|
-
def filter_runs_list(runs: list[Run_], config: object) -> list[Run_]:
|
74
|
-
for key, value in iter_params(config):
|
75
|
-
runs = [run for run in runs if _is_equal(run, key, value)]
|
76
|
-
|
77
|
-
return runs
|
78
|
-
|
79
|
-
|
80
|
-
def filter_runs_dataframe(runs: DataFrame, config: object) -> DataFrame:
|
81
|
-
index = np.ones(len(runs), dtype=bool)
|
82
|
-
|
83
|
-
for key, value in iter_params(config):
|
84
|
-
name = f"params.{key}"
|
85
|
-
|
86
|
-
if name in runs:
|
87
|
-
series = runs[name]
|
88
|
-
is_value = -series.isna()
|
89
|
-
param = series.fillna(value).astype(type(value))
|
90
|
-
index &= is_value & (param == value)
|
91
|
-
|
92
|
-
return runs[index]
|
93
|
-
|
94
|
-
|
95
|
-
def get_run(runs: list[Run_] | DataFrame, config: object) -> Run_ | Series:
|
96
|
-
runs = filter_runs(runs, config)
|
97
|
-
|
98
|
-
if len(runs) == 1:
|
99
|
-
return runs[0] if isinstance(runs, list) else runs.iloc[0]
|
100
|
-
|
101
|
-
msg = f"number of filtered runs is not 1: got {len(runs)}"
|
102
|
-
raise ValueError(msg)
|
103
|
-
|
104
|
-
|
105
|
-
def drop_unique_params(runs: DataFrame) -> DataFrame:
|
106
|
-
def select(column: str) -> bool:
|
107
|
-
return not column.startswith("params.") or len(runs[column].unique()) > 1
|
108
|
-
|
109
|
-
columns = [select(column) for column in runs.columns]
|
110
|
-
return runs.iloc[:, columns]
|
111
|
-
|
112
|
-
|
113
|
-
def get_param_names(runs: DataFrame) -> list[str]:
|
114
|
-
def get_name(column: str) -> str:
|
115
|
-
if column.startswith("params."):
|
116
|
-
return column.split(".", maxsplit=1)[-1]
|
117
|
-
|
118
|
-
return ""
|
119
|
-
|
120
|
-
columns = [get_name(column) for column in runs.columns]
|
121
|
-
return [column for column in columns if column]
|
122
|
-
|
123
|
-
|
124
|
-
def get_param_dict(runs: DataFrame) -> dict[str, list[str]]:
|
125
|
-
params = {}
|
126
|
-
for name in get_param_names(runs):
|
127
|
-
params[name] = list(runs[f"params.{name}"].unique())
|
128
|
-
|
129
|
-
return params
|
130
|
-
|
131
|
-
|
132
|
-
@dataclass
|
133
|
-
class Run:
|
134
|
-
run: Run_ | Series | str
|
135
|
-
|
136
|
-
def __repr__(self) -> str:
|
137
|
-
return f"{self.__class__.__name__}({self.run_id!r})"
|
138
|
-
|
139
|
-
@property
|
140
|
-
def run_id(self) -> str:
|
141
|
-
return get_run_id(self.run)
|
142
|
-
|
143
|
-
def artifact_uri(self, artifact_path: str | None = None) -> str:
|
144
|
-
return get_artifact_uri(self.run, artifact_path)
|
145
|
-
|
146
|
-
@property
|
147
|
-
def artifact_dir(self) -> Path:
|
148
|
-
return get_artifact_dir(self.run)
|
149
|
-
|
150
|
-
def artifact_path(self, artifact_path: str | None = None) -> Path:
|
151
|
-
return get_artifact_path(self.run, artifact_path)
|
152
|
-
|
153
|
-
@property
|
154
|
-
def config(self) -> DictConfig:
|
155
|
-
return load_config(self.run)
|
156
|
-
|
157
|
-
def log_hydra_output_dir(self) -> None:
|
158
|
-
log_hydra_output_dir(self.run)
|
159
|
-
|
160
|
-
|
161
|
-
def get_run_id(run: Run_ | Series | str) -> str:
|
162
|
-
if isinstance(run, str):
|
163
|
-
return run
|
164
|
-
|
165
|
-
if isinstance(run, Run_):
|
166
|
-
return run.info.run_id
|
167
|
-
|
168
|
-
return run.run_id
|
169
|
-
|
170
|
-
|
171
|
-
def get_artifact_uri(run: Run_ | Series | str, artifact_path: str | None = None) -> str:
|
172
|
-
run_id = get_run_id(run)
|
173
|
-
return artifact_utils.get_artifact_uri(run_id, artifact_path)
|
174
|
-
|
175
|
-
|
176
|
-
def get_artifact_dir(run: Run_ | Series | str) -> Path:
|
177
|
-
uri = get_artifact_uri(run)
|
178
|
-
return uri_to_path(uri)
|
179
|
-
|
180
|
-
|
181
|
-
def get_artifact_path(run: Run_ | Series | str, artifact_path: str | None = None) -> Path:
|
182
|
-
artifact_dir = get_artifact_dir(run)
|
183
|
-
return artifact_dir / artifact_path if artifact_path else artifact_dir
|
184
|
-
|
185
|
-
|
186
|
-
def load_config(run: Run_ | Series | str) -> DictConfig:
|
187
|
-
run_id = get_run_id(run)
|
188
|
-
return _load_config(run_id)
|
189
|
-
|
190
|
-
|
191
|
-
@cache
|
192
|
-
def _load_config(run_id: str) -> DictConfig:
|
193
|
-
try:
|
194
|
-
path = mlflow.artifacts.download_artifacts(
|
195
|
-
run_id=run_id,
|
196
|
-
artifact_path=".hydra/config.yaml",
|
197
|
-
)
|
198
|
-
except OSError:
|
199
|
-
return DictConfig({})
|
200
|
-
|
201
|
-
return OmegaConf.load(path) # type: ignore
|
202
|
-
|
203
|
-
|
204
|
-
def get_hydra_output_dir(run: Run_ | Series | str) -> Path:
|
205
|
-
path = get_artifact_dir(run) / ".hydra/hydra.yaml"
|
206
|
-
|
207
|
-
if path.exists():
|
208
|
-
hc = OmegaConf.load(path)
|
209
|
-
return Path(hc.hydra.runtime.output_dir)
|
210
|
-
|
211
|
-
raise FileNotFoundError
|
212
|
-
|
213
|
-
|
214
|
-
def log_hydra_output_dir(run: Run_ | Series | str) -> None:
|
215
|
-
output_dir = get_hydra_output_dir(run)
|
216
|
-
run_id = run if isinstance(run, str) else run.info.run_id
|
217
|
-
mlflow.log_artifacts(output_dir.as_posix(), run_id=run_id)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|