hydraflow 0.1.4__tar.gz → 0.2.0__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- hydraflow-0.2.0/PKG-INFO +111 -0
- hydraflow-0.2.0/README.md +82 -0
- {hydraflow-0.1.4 → hydraflow-0.2.0}/pyproject.toml +2 -2
- {hydraflow-0.1.4 → hydraflow-0.2.0}/src/hydraflow/__init__.py +0 -10
- hydraflow-0.2.0/src/hydraflow/config.py +55 -0
- hydraflow-0.2.0/src/hydraflow/context.py +188 -0
- hydraflow-0.2.0/src/hydraflow/mlflow.py +72 -0
- hydraflow-0.2.0/src/hydraflow/runs.py +422 -0
- {hydraflow-0.1.4 → hydraflow-0.2.0}/tests/scripts/log_run.py +2 -2
- {hydraflow-0.1.4 → hydraflow-0.2.0}/tests/test_config.py +1 -2
- hydraflow-0.2.0/tests/test_context.py +36 -0
- {hydraflow-0.1.4 → hydraflow-0.2.0}/tests/test_log_run.py +26 -8
- hydraflow-0.2.0/tests/test_mlflow.py +35 -0
- hydraflow-0.2.0/tests/test_runs.py +277 -0
- hydraflow-0.1.4/PKG-INFO +0 -45
- hydraflow-0.1.4/README.md +0 -16
- hydraflow-0.1.4/src/hydraflow/config.py +0 -30
- hydraflow-0.1.4/src/hydraflow/context.py +0 -111
- hydraflow-0.1.4/src/hydraflow/mlflow.py +0 -20
- hydraflow-0.1.4/src/hydraflow/runs.py +0 -217
- hydraflow-0.1.4/src/hydraflow/util.py +0 -11
- hydraflow-0.1.4/tests/test_runs.py +0 -260
- {hydraflow-0.1.4 → hydraflow-0.2.0}/.devcontainer/devcontainer.json +0 -0
- {hydraflow-0.1.4 → hydraflow-0.2.0}/.devcontainer/postCreate.sh +0 -0
- {hydraflow-0.1.4 → hydraflow-0.2.0}/.devcontainer/starship.toml +0 -0
- {hydraflow-0.1.4 → hydraflow-0.2.0}/.gitattributes +0 -0
- {hydraflow-0.1.4 → hydraflow-0.2.0}/.gitignore +0 -0
- {hydraflow-0.1.4 → hydraflow-0.2.0}/LICENSE +0 -0
- {hydraflow-0.1.4 → hydraflow-0.2.0}/tests/scripts/__init__.py +0 -0
- {hydraflow-0.1.4 → hydraflow-0.2.0}/tests/scripts/watch.py +0 -0
- {hydraflow-0.1.4 → hydraflow-0.2.0}/tests/test_version.py +0 -0
- {hydraflow-0.1.4 → hydraflow-0.2.0}/tests/test_watch.py +0 -0
hydraflow-0.2.0/PKG-INFO
ADDED
@@ -0,0 +1,111 @@
|
|
1
|
+
Metadata-Version: 2.3
|
2
|
+
Name: hydraflow
|
3
|
+
Version: 0.2.0
|
4
|
+
Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
|
5
|
+
Project-URL: Documentation, https://github.com/daizutabi/hydraflow
|
6
|
+
Project-URL: Source, https://github.com/daizutabi/hydraflow
|
7
|
+
Project-URL: Issues, https://github.com/daizutabi/hydraflow/issues
|
8
|
+
Author-email: daizutabi <daizutabi@gmail.com>
|
9
|
+
License-Expression: MIT
|
10
|
+
License-File: LICENSE
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
12
|
+
Classifier: Programming Language :: Python
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
16
|
+
Classifier: Topic :: Documentation
|
17
|
+
Classifier: Topic :: Software Development :: Documentation
|
18
|
+
Requires-Python: >=3.10
|
19
|
+
Requires-Dist: hydra-core>1.3
|
20
|
+
Requires-Dist: mlflow>2.15
|
21
|
+
Requires-Dist: setuptools
|
22
|
+
Requires-Dist: watchdog
|
23
|
+
Provides-Extra: dev
|
24
|
+
Requires-Dist: pytest-clarity; extra == 'dev'
|
25
|
+
Requires-Dist: pytest-cov; extra == 'dev'
|
26
|
+
Requires-Dist: pytest-randomly; extra == 'dev'
|
27
|
+
Requires-Dist: pytest-xdist; extra == 'dev'
|
28
|
+
Description-Content-Type: text/markdown
|
29
|
+
|
30
|
+
# Hydraflow
|
31
|
+
|
32
|
+
[![PyPI Version][pypi-v-image]][pypi-v-link]
|
33
|
+
[![Python Version][python-v-image]][python-v-link]
|
34
|
+
[![Build Status][GHAction-image]][GHAction-link]
|
35
|
+
[![Coverage Status][codecov-image]][codecov-link]
|
36
|
+
|
37
|
+
<!-- Badges -->
|
38
|
+
[pypi-v-image]: https://img.shields.io/pypi/v/hydraflow.svg
|
39
|
+
[pypi-v-link]: https://pypi.org/project/hydraflow/
|
40
|
+
[python-v-image]: https://img.shields.io/pypi/pyversions/hydraflow.svg
|
41
|
+
[python-v-link]: https://pypi.org/project/hydraflow
|
42
|
+
[GHAction-image]: https://github.com/daizutabi/hydraflow/actions/workflows/ci.yml/badge.svg?branch=main&event=push
|
43
|
+
[GHAction-link]: https://github.com/daizutabi/hydraflow/actions?query=event%3Apush+branch%3Amain
|
44
|
+
[codecov-image]: https://codecov.io/github/daizutabi/hydraflow/coverage.svg?branch=main
|
45
|
+
[codecov-link]: https://codecov.io/github/daizutabi/hydraflow?branch=main
|
46
|
+
|
47
|
+
## Overview
|
48
|
+
|
49
|
+
Hydraflow is a powerful library designed to seamlessly integrate [Hydra](https://hydra.cc/) and [MLflow](https://mlflow.org/), making it easier to manage and track machine learning experiments. By combining the flexibility of Hydra's configuration management with the robust experiment tracking capabilities of MLflow, Hydraflow provides a comprehensive solution for managing complex machine learning workflows.
|
50
|
+
|
51
|
+
## Key Features
|
52
|
+
|
53
|
+
- **Configuration Management**: Utilize Hydra's advanced configuration management to handle complex parameter sweeps and experiment setups.
|
54
|
+
- **Experiment Tracking**: Leverage MLflow's tracking capabilities to log parameters, metrics, and artifacts for each run.
|
55
|
+
- **Artifact Management**: Automatically log and manage artifacts, such as model checkpoints and configuration files, with MLflow.
|
56
|
+
- **Seamless Integration**: Easily integrate Hydra and MLflow in your machine learning projects with minimal setup.
|
57
|
+
|
58
|
+
## Installation
|
59
|
+
|
60
|
+
You can install Hydraflow via pip:
|
61
|
+
|
62
|
+
```bash
|
63
|
+
pip install hydraflow
|
64
|
+
```
|
65
|
+
|
66
|
+
## Getting Started
|
67
|
+
|
68
|
+
Here is a simple example to get you started with Hydraflow:
|
69
|
+
|
70
|
+
```python
|
71
|
+
import hydra
|
72
|
+
import hydraflow
|
73
|
+
import mlflow
|
74
|
+
from dataclasses import dataclass
|
75
|
+
from hydra.core.config_store import ConfigStore
|
76
|
+
from pathlib import Path
|
77
|
+
|
78
|
+
@dataclass
|
79
|
+
class MySQLConfig:
|
80
|
+
host: str = "localhost"
|
81
|
+
port: int = 3306
|
82
|
+
|
83
|
+
cs = ConfigStore.instance()
|
84
|
+
cs.store(name="config", node=MySQLConfig)
|
85
|
+
|
86
|
+
@hydra.main(version_base=None, config_name="config")
|
87
|
+
def my_app(cfg: MySQLConfig) -> None:
|
88
|
+
# Set experiment by Hydra job name.
|
89
|
+
hydraflow.set_experiment()
|
90
|
+
|
91
|
+
# Automatically log params using Hydra config.
|
92
|
+
with mlflow.start_run(), hydraflow.log_run(cfg) as info:
|
93
|
+
# Your app code below.
|
94
|
+
|
95
|
+
# `info.output_dir` is the Hydra output directory.
|
96
|
+
# `info.artifact_dir` is the MLflow artifact directory.
|
97
|
+
|
98
|
+
with hydraflow.watch(callback):
|
99
|
+
# Watch files in the MLflow artifact directory.
|
100
|
+
# You can update metrics or log other artifacts
|
101
|
+
# according to the watched files in your callback
|
102
|
+
# function.
|
103
|
+
pass
|
104
|
+
|
105
|
+
# Your callback function here.
|
106
|
+
def callback(file: Path) -> None:
|
107
|
+
pass
|
108
|
+
|
109
|
+
if __name__ == "__main__":
|
110
|
+
my_app()
|
111
|
+
```
|
@@ -0,0 +1,82 @@
|
|
1
|
+
# Hydraflow
|
2
|
+
|
3
|
+
[![PyPI Version][pypi-v-image]][pypi-v-link]
|
4
|
+
[![Python Version][python-v-image]][python-v-link]
|
5
|
+
[![Build Status][GHAction-image]][GHAction-link]
|
6
|
+
[![Coverage Status][codecov-image]][codecov-link]
|
7
|
+
|
8
|
+
<!-- Badges -->
|
9
|
+
[pypi-v-image]: https://img.shields.io/pypi/v/hydraflow.svg
|
10
|
+
[pypi-v-link]: https://pypi.org/project/hydraflow/
|
11
|
+
[python-v-image]: https://img.shields.io/pypi/pyversions/hydraflow.svg
|
12
|
+
[python-v-link]: https://pypi.org/project/hydraflow
|
13
|
+
[GHAction-image]: https://github.com/daizutabi/hydraflow/actions/workflows/ci.yml/badge.svg?branch=main&event=push
|
14
|
+
[GHAction-link]: https://github.com/daizutabi/hydraflow/actions?query=event%3Apush+branch%3Amain
|
15
|
+
[codecov-image]: https://codecov.io/github/daizutabi/hydraflow/coverage.svg?branch=main
|
16
|
+
[codecov-link]: https://codecov.io/github/daizutabi/hydraflow?branch=main
|
17
|
+
|
18
|
+
## Overview
|
19
|
+
|
20
|
+
Hydraflow is a powerful library designed to seamlessly integrate [Hydra](https://hydra.cc/) and [MLflow](https://mlflow.org/), making it easier to manage and track machine learning experiments. By combining the flexibility of Hydra's configuration management with the robust experiment tracking capabilities of MLflow, Hydraflow provides a comprehensive solution for managing complex machine learning workflows.
|
21
|
+
|
22
|
+
## Key Features
|
23
|
+
|
24
|
+
- **Configuration Management**: Utilize Hydra's advanced configuration management to handle complex parameter sweeps and experiment setups.
|
25
|
+
- **Experiment Tracking**: Leverage MLflow's tracking capabilities to log parameters, metrics, and artifacts for each run.
|
26
|
+
- **Artifact Management**: Automatically log and manage artifacts, such as model checkpoints and configuration files, with MLflow.
|
27
|
+
- **Seamless Integration**: Easily integrate Hydra and MLflow in your machine learning projects with minimal setup.
|
28
|
+
|
29
|
+
## Installation
|
30
|
+
|
31
|
+
You can install Hydraflow via pip:
|
32
|
+
|
33
|
+
```bash
|
34
|
+
pip install hydraflow
|
35
|
+
```
|
36
|
+
|
37
|
+
## Getting Started
|
38
|
+
|
39
|
+
Here is a simple example to get you started with Hydraflow:
|
40
|
+
|
41
|
+
```python
|
42
|
+
import hydra
|
43
|
+
import hydraflow
|
44
|
+
import mlflow
|
45
|
+
from dataclasses import dataclass
|
46
|
+
from hydra.core.config_store import ConfigStore
|
47
|
+
from pathlib import Path
|
48
|
+
|
49
|
+
@dataclass
|
50
|
+
class MySQLConfig:
|
51
|
+
host: str = "localhost"
|
52
|
+
port: int = 3306
|
53
|
+
|
54
|
+
cs = ConfigStore.instance()
|
55
|
+
cs.store(name="config", node=MySQLConfig)
|
56
|
+
|
57
|
+
@hydra.main(version_base=None, config_name="config")
|
58
|
+
def my_app(cfg: MySQLConfig) -> None:
|
59
|
+
# Set experiment by Hydra job name.
|
60
|
+
hydraflow.set_experiment()
|
61
|
+
|
62
|
+
# Automatically log params using Hydra config.
|
63
|
+
with mlflow.start_run(), hydraflow.log_run(cfg) as info:
|
64
|
+
# Your app code below.
|
65
|
+
|
66
|
+
# `info.output_dir` is the Hydra output directory.
|
67
|
+
# `info.artifact_dir` is the MLflow artifact directory.
|
68
|
+
|
69
|
+
with hydraflow.watch(callback):
|
70
|
+
# Watch files in the MLflow artifact directory.
|
71
|
+
# You can update metrics or log other artifacts
|
72
|
+
# according to the watched files in your callback
|
73
|
+
# function.
|
74
|
+
pass
|
75
|
+
|
76
|
+
# Your callback function here.
|
77
|
+
def callback(file: Path) -> None:
|
78
|
+
pass
|
79
|
+
|
80
|
+
if __name__ == "__main__":
|
81
|
+
my_app()
|
82
|
+
```
|
@@ -4,8 +4,8 @@ build-backend = "hatchling.build"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "hydraflow"
|
7
|
-
version = "0.
|
8
|
-
description = "Hydra
|
7
|
+
version = "0.2.0"
|
8
|
+
description = "Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments."
|
9
9
|
readme = "README.md"
|
10
10
|
license = "MIT"
|
11
11
|
authors = [{ name = "daizutabi", email = "daizutabi@gmail.com" }]
|
@@ -3,15 +3,10 @@ from .mlflow import set_experiment
|
|
3
3
|
from .runs import (
|
4
4
|
Run,
|
5
5
|
Runs,
|
6
|
-
drop_unique_params,
|
7
6
|
filter_runs,
|
8
|
-
get_artifact_dir,
|
9
|
-
get_artifact_path,
|
10
|
-
get_artifact_uri,
|
11
7
|
get_param_dict,
|
12
8
|
get_param_names,
|
13
9
|
get_run,
|
14
|
-
get_run_id,
|
15
10
|
load_config,
|
16
11
|
)
|
17
12
|
|
@@ -20,15 +15,10 @@ __all__ = [
|
|
20
15
|
"Run",
|
21
16
|
"Runs",
|
22
17
|
"chdir_artifact",
|
23
|
-
"drop_unique_params",
|
24
18
|
"filter_runs",
|
25
|
-
"get_artifact_dir",
|
26
|
-
"get_artifact_path",
|
27
|
-
"get_artifact_uri",
|
28
19
|
"get_param_dict",
|
29
20
|
"get_param_names",
|
30
21
|
"get_run",
|
31
|
-
"get_run_id",
|
32
22
|
"load_config",
|
33
23
|
"log_run",
|
34
24
|
"set_experiment",
|
@@ -0,0 +1,55 @@
|
|
1
|
+
"""
|
2
|
+
This module provides functionality for working with configuration
|
3
|
+
objects using the OmegaConf library.
|
4
|
+
"""
|
5
|
+
|
6
|
+
from __future__ import annotations
|
7
|
+
|
8
|
+
from typing import TYPE_CHECKING
|
9
|
+
|
10
|
+
from omegaconf import DictConfig, ListConfig, OmegaConf
|
11
|
+
|
12
|
+
if TYPE_CHECKING:
|
13
|
+
from collections.abc import Iterator
|
14
|
+
from typing import Any
|
15
|
+
|
16
|
+
|
17
|
+
def iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
|
18
|
+
"""
|
19
|
+
Recursively iterate over the parameters in the given configuration object.
|
20
|
+
|
21
|
+
This function traverses the configuration object and yields key-value pairs
|
22
|
+
representing the parameters. The keys are prefixed with the provided prefix.
|
23
|
+
|
24
|
+
Args:
|
25
|
+
config: The configuration object to iterate over. This can be a dictionary,
|
26
|
+
list, DictConfig, or ListConfig.
|
27
|
+
prefix: The prefix to prepend to the parameter keys.
|
28
|
+
Defaults to an empty string.
|
29
|
+
|
30
|
+
Yields:
|
31
|
+
Key-value pairs representing the parameters in the configuration object.
|
32
|
+
"""
|
33
|
+
if not isinstance(config, (DictConfig, ListConfig)):
|
34
|
+
config = OmegaConf.create(config) # type: ignore
|
35
|
+
|
36
|
+
if isinstance(config, DictConfig):
|
37
|
+
for key, value in config.items():
|
38
|
+
if isinstance(value, ListConfig) and not any(
|
39
|
+
isinstance(v, (DictConfig, ListConfig)) for v in value
|
40
|
+
):
|
41
|
+
yield f"{prefix}{key}", value
|
42
|
+
|
43
|
+
elif isinstance(value, (DictConfig, ListConfig)):
|
44
|
+
yield from iter_params(value, f"{prefix}{key}.")
|
45
|
+
|
46
|
+
else:
|
47
|
+
yield f"{prefix}{key}", value
|
48
|
+
|
49
|
+
elif isinstance(config, ListConfig):
|
50
|
+
for index, value in enumerate(config):
|
51
|
+
if isinstance(value, (DictConfig, ListConfig)):
|
52
|
+
yield from iter_params(value, f"{prefix}{index}.")
|
53
|
+
|
54
|
+
else:
|
55
|
+
yield f"{prefix}{index}", value
|
@@ -0,0 +1,188 @@
|
|
1
|
+
"""
|
2
|
+
This module provides context managers to log parameters and manage the MLflow
|
3
|
+
run context.
|
4
|
+
"""
|
5
|
+
|
6
|
+
from __future__ import annotations
|
7
|
+
|
8
|
+
import logging
|
9
|
+
import os
|
10
|
+
import time
|
11
|
+
from contextlib import contextmanager
|
12
|
+
from dataclasses import dataclass
|
13
|
+
from pathlib import Path
|
14
|
+
from typing import TYPE_CHECKING
|
15
|
+
|
16
|
+
import mlflow
|
17
|
+
from hydra.core.hydra_config import HydraConfig
|
18
|
+
from watchdog.events import FileModifiedEvent, FileSystemEventHandler
|
19
|
+
from watchdog.observers import Observer
|
20
|
+
|
21
|
+
from hydraflow.mlflow import get_artifact_dir, log_params
|
22
|
+
|
23
|
+
if TYPE_CHECKING:
|
24
|
+
from collections.abc import Callable, Iterator
|
25
|
+
|
26
|
+
from mlflow.entities.run import Run
|
27
|
+
|
28
|
+
log = logging.getLogger(__name__)
|
29
|
+
|
30
|
+
|
31
|
+
@dataclass
|
32
|
+
class Info:
|
33
|
+
output_dir: Path
|
34
|
+
artifact_dir: Path
|
35
|
+
|
36
|
+
|
37
|
+
@contextmanager
|
38
|
+
def log_run(
|
39
|
+
config: object,
|
40
|
+
*,
|
41
|
+
synchronous: bool | None = None,
|
42
|
+
) -> Iterator[Info]:
|
43
|
+
"""
|
44
|
+
Log the parameters from the given configuration object and manage the MLflow
|
45
|
+
run context.
|
46
|
+
|
47
|
+
This context manager logs the parameters from the provided configuration object
|
48
|
+
using MLflow. It also manages the MLflow run context, ensuring that artifacts
|
49
|
+
are logged and the run is properly closed.
|
50
|
+
|
51
|
+
Args:
|
52
|
+
config: The configuration object to log the parameters from.
|
53
|
+
synchronous: Whether to log the parameters synchronously.
|
54
|
+
Defaults to None.
|
55
|
+
|
56
|
+
Yields:
|
57
|
+
Info: An `Info` object containing the output directory and artifact directory
|
58
|
+
paths.
|
59
|
+
|
60
|
+
Example:
|
61
|
+
with log_run(config) as info:
|
62
|
+
# Perform operations within the MLflow run context
|
63
|
+
pass
|
64
|
+
"""
|
65
|
+
log_params(config, synchronous=synchronous)
|
66
|
+
|
67
|
+
hc = HydraConfig.get()
|
68
|
+
output_dir = Path(hc.runtime.output_dir)
|
69
|
+
info = Info(output_dir, get_artifact_dir())
|
70
|
+
|
71
|
+
# Save '.hydra' config directory first.
|
72
|
+
output_subdir = output_dir / (hc.output_subdir or "")
|
73
|
+
mlflow.log_artifacts(output_subdir.as_posix(), hc.output_subdir)
|
74
|
+
|
75
|
+
def log_artifact(path: Path) -> None:
|
76
|
+
local_path = (output_dir / path).as_posix()
|
77
|
+
mlflow.log_artifact(local_path)
|
78
|
+
|
79
|
+
try:
|
80
|
+
with watch(log_artifact, output_dir):
|
81
|
+
yield info
|
82
|
+
|
83
|
+
except Exception as e:
|
84
|
+
log.error(f"Error during log_run: {e}")
|
85
|
+
raise
|
86
|
+
|
87
|
+
finally:
|
88
|
+
# Save output_dir including '.hydra' config directory.
|
89
|
+
mlflow.log_artifacts(output_dir.as_posix())
|
90
|
+
|
91
|
+
|
92
|
+
@contextmanager
|
93
|
+
def watch(
|
94
|
+
func: Callable[[Path], None],
|
95
|
+
dir: Path | str = "",
|
96
|
+
timeout: int = 60,
|
97
|
+
) -> Iterator[None]:
|
98
|
+
"""
|
99
|
+
Watch the given directory for changes and call the provided function
|
100
|
+
when a change is detected.
|
101
|
+
|
102
|
+
This context manager sets up a file system watcher on the specified directory.
|
103
|
+
When a file modification is detected, the provided function is called with
|
104
|
+
the path of the modified file. The watcher runs for the specified timeout
|
105
|
+
period or until the context is exited.
|
106
|
+
|
107
|
+
Args:
|
108
|
+
func: The function to call when a change is
|
109
|
+
detected. It should accept a single argument of type `Path`,
|
110
|
+
which is the path of the modified file.
|
111
|
+
dir: The directory to watch. If not specified,
|
112
|
+
the current MLflow artifact URI is used. Defaults to "".
|
113
|
+
timeout: The timeout period in seconds for the watcher
|
114
|
+
to run after the context is exited. Defaults to 60.
|
115
|
+
|
116
|
+
Yields:
|
117
|
+
None
|
118
|
+
|
119
|
+
Example:
|
120
|
+
with watch(log_artifact, "/path/to/dir"):
|
121
|
+
# Perform operations while watching the directory for changes
|
122
|
+
pass
|
123
|
+
"""
|
124
|
+
dir = dir or get_artifact_dir()
|
125
|
+
|
126
|
+
handler = Handler(func)
|
127
|
+
observer = Observer()
|
128
|
+
observer.schedule(handler, dir, recursive=True)
|
129
|
+
observer.start()
|
130
|
+
|
131
|
+
try:
|
132
|
+
yield
|
133
|
+
|
134
|
+
except Exception as e:
|
135
|
+
log.error(f"Error during watch: {e}")
|
136
|
+
raise
|
137
|
+
|
138
|
+
finally:
|
139
|
+
elapsed = 0
|
140
|
+
while not observer.event_queue.empty():
|
141
|
+
time.sleep(0.2)
|
142
|
+
elapsed += 0.2
|
143
|
+
if elapsed > timeout:
|
144
|
+
break
|
145
|
+
|
146
|
+
observer.stop()
|
147
|
+
observer.join()
|
148
|
+
|
149
|
+
|
150
|
+
class Handler(FileSystemEventHandler):
|
151
|
+
def __init__(self, func: Callable[[Path], None]) -> None:
|
152
|
+
self.func = func
|
153
|
+
|
154
|
+
def on_modified(self, event: FileModifiedEvent) -> None:
|
155
|
+
file = Path(event.src_path)
|
156
|
+
if file.is_file():
|
157
|
+
self.func(file)
|
158
|
+
|
159
|
+
|
160
|
+
@contextmanager
|
161
|
+
def chdir_artifact(
|
162
|
+
run: Run,
|
163
|
+
artifact_path: str | None = None,
|
164
|
+
) -> Iterator[Path]:
|
165
|
+
"""
|
166
|
+
Change the current working directory to the artifact directory of the
|
167
|
+
given run.
|
168
|
+
|
169
|
+
This context manager changes the current working directory to the artifact
|
170
|
+
directory of the given run. It ensures that the directory is changed back
|
171
|
+
to the original directory after the context is exited.
|
172
|
+
|
173
|
+
Args:
|
174
|
+
run: The run to get the artifact directory from.
|
175
|
+
artifact_path: The artifact path.
|
176
|
+
"""
|
177
|
+
curdir = Path.cwd()
|
178
|
+
path = mlflow.artifacts.download_artifacts(
|
179
|
+
run_id=run.info.run_id,
|
180
|
+
artifact_path=artifact_path,
|
181
|
+
)
|
182
|
+
|
183
|
+
os.chdir(path)
|
184
|
+
try:
|
185
|
+
yield Path(path)
|
186
|
+
|
187
|
+
finally:
|
188
|
+
os.chdir(curdir)
|
@@ -0,0 +1,72 @@
|
|
1
|
+
"""
|
2
|
+
This module provides functionality to log parameters from Hydra
|
3
|
+
configuration objects and set up experiments using MLflow.
|
4
|
+
"""
|
5
|
+
|
6
|
+
from __future__ import annotations
|
7
|
+
|
8
|
+
from pathlib import Path
|
9
|
+
|
10
|
+
import mlflow
|
11
|
+
from hydra.core.hydra_config import HydraConfig
|
12
|
+
|
13
|
+
from hydraflow.config import iter_params
|
14
|
+
|
15
|
+
|
16
|
+
def set_experiment(prefix: str = "", suffix: str = "", uri: str | None = None) -> None:
|
17
|
+
"""
|
18
|
+
Set the experiment name and tracking URI optionally.
|
19
|
+
|
20
|
+
This function sets the experiment name by combining the given prefix,
|
21
|
+
the job name from HydraConfig, and the given suffix. Optionally, it can
|
22
|
+
also set the tracking URI.
|
23
|
+
|
24
|
+
Args:
|
25
|
+
prefix: The prefix to prepend to the experiment name.
|
26
|
+
suffix: The suffix to append to the experiment name.
|
27
|
+
uri: The tracking URI to use.
|
28
|
+
"""
|
29
|
+
if uri:
|
30
|
+
mlflow.set_tracking_uri(uri)
|
31
|
+
|
32
|
+
hc = HydraConfig.get()
|
33
|
+
name = f"{prefix}{hc.job.name}{suffix}"
|
34
|
+
mlflow.set_experiment(name)
|
35
|
+
|
36
|
+
|
37
|
+
def log_params(config: object, *, synchronous: bool | None = None) -> None:
|
38
|
+
"""
|
39
|
+
Log the parameters from the given configuration object.
|
40
|
+
|
41
|
+
This method logs the parameters from the provided configuration object
|
42
|
+
using MLflow. It iterates over the parameters and logs them using the
|
43
|
+
`mlflow.log_param` method.
|
44
|
+
|
45
|
+
Args:
|
46
|
+
config: The configuration object to log the parameters from.
|
47
|
+
synchronous: Whether to log the parameters synchronously.
|
48
|
+
Defaults to None.
|
49
|
+
"""
|
50
|
+
for key, value in iter_params(config):
|
51
|
+
mlflow.log_param(key, value, synchronous=synchronous)
|
52
|
+
|
53
|
+
|
54
|
+
def get_artifact_dir(artifact_path: str | None = None) -> Path:
|
55
|
+
"""
|
56
|
+
Get the artifact directory for the given artifact path.
|
57
|
+
|
58
|
+
This function retrieves the artifact URI for the specified artifact path
|
59
|
+
using MLflow, downloads the artifacts to a local directory, and returns
|
60
|
+
the path to that directory.
|
61
|
+
|
62
|
+
Args:
|
63
|
+
artifact_path: The artifact path for which to get the directory.
|
64
|
+
Defaults to None.
|
65
|
+
|
66
|
+
Returns:
|
67
|
+
The local path to the directory where the artifacts are downloaded.
|
68
|
+
"""
|
69
|
+
uri = mlflow.get_artifact_uri(artifact_path)
|
70
|
+
dir = mlflow.artifacts.download_artifacts(artifact_uri=uri)
|
71
|
+
|
72
|
+
return Path(dir)
|