hydraflow 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hydraflow/__init__.py +28 -0
- hydraflow/config.py +30 -0
- hydraflow/context.py +110 -0
- hydraflow/mlflow.py +17 -0
- hydraflow/run.py +172 -0
- hydraflow/util.py +11 -0
- hydraflow-0.1.0.dist-info/METADATA +29 -0
- hydraflow-0.1.0.dist-info/RECORD +10 -0
- hydraflow-0.1.0.dist-info/WHEEL +4 -0
- hydraflow-0.1.0.dist-info/licenses/LICENSE +21 -0
hydraflow/__init__.py
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
from .context import Info, chdir_artifact, log_run, watch
|
2
|
+
from .mlflow import set_experiment
|
3
|
+
from .run import (
|
4
|
+
filter_by_config,
|
5
|
+
get_artifact_dir,
|
6
|
+
get_artifact_path,
|
7
|
+
get_artifact_uri,
|
8
|
+
get_by_config,
|
9
|
+
get_param_dict,
|
10
|
+
get_param_names,
|
11
|
+
get_run_id,
|
12
|
+
)
|
13
|
+
|
14
|
+
__all__ = [
|
15
|
+
"Info",
|
16
|
+
"chdir_artifact",
|
17
|
+
"filter_by_config",
|
18
|
+
"get_artifact_dir",
|
19
|
+
"get_artifact_path",
|
20
|
+
"get_artifact_uri",
|
21
|
+
"get_by_config",
|
22
|
+
"get_param_dict",
|
23
|
+
"get_param_names",
|
24
|
+
"get_run_id",
|
25
|
+
"log_run",
|
26
|
+
"set_experiment",
|
27
|
+
"watch",
|
28
|
+
]
|
hydraflow/config.py
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from typing import TYPE_CHECKING
|
4
|
+
|
5
|
+
from omegaconf import DictConfig, ListConfig, OmegaConf
|
6
|
+
|
7
|
+
if TYPE_CHECKING:
|
8
|
+
from collections.abc import Iterator
|
9
|
+
from typing import Any
|
10
|
+
|
11
|
+
|
12
|
+
def iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
|
13
|
+
if not isinstance(config, DictConfig | ListConfig):
|
14
|
+
config = OmegaConf.create(config) # type: ignore
|
15
|
+
|
16
|
+
if isinstance(config, DictConfig):
|
17
|
+
for key, value in config.items():
|
18
|
+
if isinstance(value, (DictConfig, ListConfig)):
|
19
|
+
yield from iter_params(value, f"{prefix}{key}.")
|
20
|
+
|
21
|
+
else:
|
22
|
+
yield f"{prefix}{key}", value
|
23
|
+
|
24
|
+
elif isinstance(config, ListConfig):
|
25
|
+
for index, value in enumerate(config):
|
26
|
+
if isinstance(value, (DictConfig, ListConfig)):
|
27
|
+
yield from iter_params(value, f"{prefix}{index}.")
|
28
|
+
|
29
|
+
else:
|
30
|
+
yield f"{prefix}{index}", value
|
hydraflow/context.py
ADDED
@@ -0,0 +1,110 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import os
|
4
|
+
import time
|
5
|
+
from contextlib import contextmanager
|
6
|
+
from dataclasses import dataclass
|
7
|
+
from pathlib import Path
|
8
|
+
from typing import TYPE_CHECKING
|
9
|
+
|
10
|
+
import mlflow
|
11
|
+
from hydra.core.hydra_config import HydraConfig
|
12
|
+
from watchdog.events import FileModifiedEvent, FileSystemEventHandler
|
13
|
+
from watchdog.observers import Observer
|
14
|
+
|
15
|
+
from hydraflow.mlflow import log_params
|
16
|
+
from hydraflow.run import get_artifact_path
|
17
|
+
from hydraflow.util import uri_to_path
|
18
|
+
|
19
|
+
if TYPE_CHECKING:
|
20
|
+
from collections.abc import Callable, Iterator
|
21
|
+
|
22
|
+
from mlflow.entities.run import Run
|
23
|
+
from pandas import Series
|
24
|
+
|
25
|
+
|
26
|
+
@dataclass
|
27
|
+
class Info:
|
28
|
+
output_dir: Path
|
29
|
+
artifact_dir: Path
|
30
|
+
|
31
|
+
|
32
|
+
@contextmanager
|
33
|
+
def log_run(
|
34
|
+
config: object,
|
35
|
+
*,
|
36
|
+
synchronous: bool | None = None,
|
37
|
+
) -> Iterator[Info]:
|
38
|
+
log_params(config, synchronous=synchronous)
|
39
|
+
|
40
|
+
hc = HydraConfig.get()
|
41
|
+
output_dir = Path(hc.runtime.output_dir)
|
42
|
+
uri = mlflow.get_artifact_uri()
|
43
|
+
location = Info(output_dir, uri_to_path(uri))
|
44
|
+
|
45
|
+
# Save '.hydra' config directory first.
|
46
|
+
output_subdir = output_dir / (hc.output_subdir or "")
|
47
|
+
mlflow.log_artifacts(output_subdir.as_posix(), hc.output_subdir)
|
48
|
+
|
49
|
+
try:
|
50
|
+
yield location
|
51
|
+
|
52
|
+
finally:
|
53
|
+
# Save output_dir including '.hydra' config directory.
|
54
|
+
mlflow.log_artifacts(output_dir.as_posix())
|
55
|
+
|
56
|
+
|
57
|
+
@contextmanager
|
58
|
+
def watch(
|
59
|
+
func: Callable[[Path], None],
|
60
|
+
dir: Path | str = "",
|
61
|
+
timeout: int = 600,
|
62
|
+
) -> Iterator[None]:
|
63
|
+
if not dir:
|
64
|
+
uri = mlflow.get_artifact_uri()
|
65
|
+
dir = uri_to_path(uri)
|
66
|
+
|
67
|
+
handler = Handler(func)
|
68
|
+
observer = Observer()
|
69
|
+
observer.schedule(handler, dir, recursive=True)
|
70
|
+
observer.start()
|
71
|
+
|
72
|
+
try:
|
73
|
+
yield
|
74
|
+
|
75
|
+
finally:
|
76
|
+
elapsed = 0
|
77
|
+
while not observer.event_queue.empty():
|
78
|
+
time.sleep(0.2)
|
79
|
+
elapsed += 0.2
|
80
|
+
if elapsed > timeout:
|
81
|
+
break
|
82
|
+
|
83
|
+
observer.stop()
|
84
|
+
observer.join()
|
85
|
+
|
86
|
+
|
87
|
+
class Handler(FileSystemEventHandler):
|
88
|
+
def __init__(self, func: Callable[[Path], None]) -> None:
|
89
|
+
self.func = func
|
90
|
+
|
91
|
+
def on_modified(self, event: FileModifiedEvent) -> None:
|
92
|
+
file = Path(event.src_path)
|
93
|
+
if file.is_file():
|
94
|
+
self.func(file)
|
95
|
+
|
96
|
+
|
97
|
+
@contextmanager
|
98
|
+
def chdir_artifact(
|
99
|
+
run: Run | Series | str,
|
100
|
+
artifact_path: str | None = None,
|
101
|
+
) -> Iterator[Path]:
|
102
|
+
curdir = Path.cwd()
|
103
|
+
|
104
|
+
artifact_dir = get_artifact_path(run, artifact_path)
|
105
|
+
|
106
|
+
os.chdir(artifact_dir)
|
107
|
+
try:
|
108
|
+
yield artifact_dir
|
109
|
+
finally:
|
110
|
+
os.chdir(curdir)
|
hydraflow/mlflow.py
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import mlflow
|
4
|
+
from hydra.core.hydra_config import HydraConfig
|
5
|
+
|
6
|
+
from hydraflow.config import iter_params
|
7
|
+
|
8
|
+
|
9
|
+
def set_experiment() -> None:
|
10
|
+
hc = HydraConfig.get()
|
11
|
+
mlflow.set_tracking_uri("")
|
12
|
+
mlflow.set_experiment(hc.job.name)
|
13
|
+
|
14
|
+
|
15
|
+
def log_params(config: object, *, synchronous: bool | None = None) -> None:
|
16
|
+
for key, value in iter_params(config):
|
17
|
+
mlflow.log_param(key, value, synchronous=synchronous)
|
hydraflow/run.py
ADDED
@@ -0,0 +1,172 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from pathlib import Path
|
4
|
+
from typing import TYPE_CHECKING, Any, overload
|
5
|
+
|
6
|
+
import mlflow
|
7
|
+
import numpy as np
|
8
|
+
from mlflow.entities.run import Run
|
9
|
+
from mlflow.tracking import artifact_utils
|
10
|
+
from omegaconf import DictConfig, OmegaConf
|
11
|
+
|
12
|
+
from hydraflow.config import iter_params
|
13
|
+
from hydraflow.util import uri_to_path
|
14
|
+
|
15
|
+
if TYPE_CHECKING:
|
16
|
+
from typing import Any
|
17
|
+
|
18
|
+
from pandas import DataFrame, Series
|
19
|
+
|
20
|
+
|
21
|
+
@overload
|
22
|
+
def filter_by_config(runs: list[Run], config: object) -> list[Run]: ...
|
23
|
+
|
24
|
+
|
25
|
+
@overload
|
26
|
+
def filter_by_config(runs: DataFrame, config: object) -> DataFrame: ...
|
27
|
+
|
28
|
+
|
29
|
+
def filter_by_config(runs: list[Run] | DataFrame, config: object):
|
30
|
+
if isinstance(runs, list):
|
31
|
+
return filter_by_config_list(runs, config)
|
32
|
+
|
33
|
+
return filter_by_config_dataframe(runs, config)
|
34
|
+
|
35
|
+
|
36
|
+
def _is_equal(run: Run, key: str, value: Any) -> bool:
|
37
|
+
param = run.data.params.get(key, value)
|
38
|
+
if param is None:
|
39
|
+
return False
|
40
|
+
|
41
|
+
return type(value)(param) == value
|
42
|
+
|
43
|
+
|
44
|
+
def filter_by_config_list(runs: list[Run], config: object) -> list[Run]:
|
45
|
+
for key, value in iter_params(config):
|
46
|
+
runs = [run for run in runs if _is_equal(run, key, value)]
|
47
|
+
|
48
|
+
return runs
|
49
|
+
|
50
|
+
|
51
|
+
def filter_by_config_dataframe(runs: DataFrame, config: object) -> DataFrame:
|
52
|
+
index = np.ones(len(runs), dtype=bool)
|
53
|
+
|
54
|
+
for key, value in iter_params(config):
|
55
|
+
name = f"params.{key}"
|
56
|
+
if name in runs:
|
57
|
+
series = runs[name]
|
58
|
+
is_value = -series.isna()
|
59
|
+
param = series.fillna(value).astype(type(value))
|
60
|
+
index &= is_value & (param == value)
|
61
|
+
|
62
|
+
return runs[index]
|
63
|
+
|
64
|
+
|
65
|
+
@overload
|
66
|
+
def get_by_config(runs: list[Run], config: object) -> Run: ...
|
67
|
+
|
68
|
+
|
69
|
+
@overload
|
70
|
+
def get_by_config(runs: DataFrame, config: object) -> Series: ...
|
71
|
+
|
72
|
+
|
73
|
+
def get_by_config(runs: list[Run] | DataFrame, config: object):
|
74
|
+
runs = filter_by_config(runs, config)
|
75
|
+
|
76
|
+
if len(runs) == 1:
|
77
|
+
return runs[0] if isinstance(runs, list) else runs.iloc[0]
|
78
|
+
|
79
|
+
msg = f"filtered runs has not length of 1.: {len(runs)}"
|
80
|
+
raise ValueError(msg)
|
81
|
+
|
82
|
+
|
83
|
+
def drop_unique_params(runs: DataFrame) -> DataFrame:
|
84
|
+
def select(column: str) -> bool:
|
85
|
+
return not column.startswith("params.") or len(runs[column].unique()) > 1
|
86
|
+
|
87
|
+
columns = [select(column) for column in runs.columns]
|
88
|
+
return runs.iloc[:, columns]
|
89
|
+
|
90
|
+
|
91
|
+
def get_param_names(runs: DataFrame) -> list[str]:
|
92
|
+
def get_name(column: str) -> str:
|
93
|
+
if column.startswith("params."):
|
94
|
+
return column.split(".", maxsplit=1)[-1]
|
95
|
+
|
96
|
+
return ""
|
97
|
+
|
98
|
+
columns = [get_name(column) for column in runs.columns]
|
99
|
+
return [column for column in columns if column]
|
100
|
+
|
101
|
+
|
102
|
+
def get_param_dict(runs: DataFrame) -> dict[str, list[str]]:
|
103
|
+
params = {}
|
104
|
+
for name in get_param_names(runs):
|
105
|
+
params[name] = list(runs[f"params.{name}"].unique())
|
106
|
+
|
107
|
+
return params
|
108
|
+
|
109
|
+
|
110
|
+
def get_run_id(run: Run | Series | str) -> str:
|
111
|
+
if isinstance(run, Run):
|
112
|
+
return run.info.run_id
|
113
|
+
if isinstance(run, str):
|
114
|
+
return run
|
115
|
+
return run.run_id
|
116
|
+
|
117
|
+
|
118
|
+
def get_artifact_uri(run: Run | Series | str, artifact_path: str | None = None) -> str:
|
119
|
+
if isinstance(run, Run):
|
120
|
+
uri = run.info.artifact_uri
|
121
|
+
elif isinstance(run, str):
|
122
|
+
uri = artifact_utils.get_artifact_uri(run_id=run)
|
123
|
+
else:
|
124
|
+
uri = run.artifact_uri
|
125
|
+
|
126
|
+
if artifact_path:
|
127
|
+
uri = f"{uri}/{artifact_path}"
|
128
|
+
|
129
|
+
return uri # type: ignore
|
130
|
+
|
131
|
+
|
132
|
+
def get_artifact_dir(run: Run | Series | str) -> Path:
|
133
|
+
uri = get_artifact_uri(run)
|
134
|
+
return uri_to_path(uri)
|
135
|
+
|
136
|
+
|
137
|
+
def get_artifact_path(
|
138
|
+
run: Run | Series | str,
|
139
|
+
artifact_path: str | None = None,
|
140
|
+
) -> Path:
|
141
|
+
artifact_dir = get_artifact_dir(run)
|
142
|
+
return artifact_dir / artifact_path if artifact_path else artifact_dir
|
143
|
+
|
144
|
+
|
145
|
+
def load_config(run: Run | Series | str, output_subdir: str = ".hydra") -> DictConfig:
|
146
|
+
run_id = get_run_id(run)
|
147
|
+
|
148
|
+
try:
|
149
|
+
path = mlflow.artifacts.download_artifacts(
|
150
|
+
run_id=run_id,
|
151
|
+
artifact_path=f"{output_subdir}/config.yaml",
|
152
|
+
)
|
153
|
+
except OSError:
|
154
|
+
return DictConfig({})
|
155
|
+
|
156
|
+
return OmegaConf.load(path) # type: ignore
|
157
|
+
|
158
|
+
|
159
|
+
def get_hydra_output_dir(run: Run | Series | str) -> Path:
|
160
|
+
path = get_artifact_dir(run) / ".hydra/hydra.yaml"
|
161
|
+
|
162
|
+
if path.exists():
|
163
|
+
hc = OmegaConf.load(path)
|
164
|
+
return Path(hc.hydra.runtime.output_dir)
|
165
|
+
|
166
|
+
raise FileNotFoundError
|
167
|
+
|
168
|
+
|
169
|
+
def log_hydra_output_dir(run: Run | Series | str) -> None:
|
170
|
+
output_dir = get_hydra_output_dir(run)
|
171
|
+
run_id = run if isinstance(run, str) else run.info.run_id
|
172
|
+
mlflow.log_artifacts(output_dir.as_posix(), run_id=run_id)
|
hydraflow/util.py
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
Metadata-Version: 2.3
|
2
|
+
Name: hydraflow
|
3
|
+
Version: 0.1.0
|
4
|
+
Summary: Hydra with MLflow
|
5
|
+
Project-URL: Source, https://github.com/daizutabi/hydraflow
|
6
|
+
Project-URL: Issues, https://github.com/daizutabi/hydraflow/issues
|
7
|
+
Author-email: daizutabi <daizutabi@gmail.com>
|
8
|
+
License-Expression: MIT
|
9
|
+
License-File: LICENSE
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
11
|
+
Classifier: Programming Language :: Python
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
15
|
+
Classifier: Topic :: Documentation
|
16
|
+
Classifier: Topic :: Software Development :: Documentation
|
17
|
+
Requires-Python: >=3.10
|
18
|
+
Requires-Dist: hydra-core
|
19
|
+
Requires-Dist: mlflow
|
20
|
+
Requires-Dist: watchdog
|
21
|
+
Provides-Extra: dev
|
22
|
+
Requires-Dist: pytest-clarity; extra == 'dev'
|
23
|
+
Requires-Dist: pytest-cov; extra == 'dev'
|
24
|
+
Requires-Dist: pytest-randomly; extra == 'dev'
|
25
|
+
Requires-Dist: pytest-xdist; extra == 'dev'
|
26
|
+
Requires-Dist: setuptools; extra == 'dev'
|
27
|
+
Description-Content-Type: text/markdown
|
28
|
+
|
29
|
+
# hydraflow
|
@@ -0,0 +1,10 @@
|
|
1
|
+
hydraflow/__init__.py,sha256=9RaHPTloOOJYPUKKfPuK_wxKDr_J9A3rJ_gr-bLABD0,559
|
2
|
+
hydraflow/config.py,sha256=b3Plh_lmq94loZNw9QP2asd6thCLyTzzYSutH0cONXA,964
|
3
|
+
hydraflow/context.py,sha256=zBmbZWNLxUF2IDDPregPnR_sh3utmFwFJaneSsBsLDM,2558
|
4
|
+
hydraflow/mlflow.py,sha256=yDZ_oB1IZdCNNqHm_0LxdZ1Nld28IkW8Xl7NMhWLApE,453
|
5
|
+
hydraflow/run.py,sha256=XTAD_fd-ivvZ4tbjQLHrf6u5eAGRrrhqvExiZQcFnX8,4591
|
6
|
+
hydraflow/util.py,sha256=HTymDLqa2UzCw3kNjqHDaAZNdRMnrEAWhCJ7_ZD7ffA,264
|
7
|
+
hydraflow-0.1.0.dist-info/METADATA,sha256=WuryvAC_8MrC-UerPqbvcWxgBn9ABrnysQ0aRYimw3A,1021
|
8
|
+
hydraflow-0.1.0.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
9
|
+
hydraflow-0.1.0.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
|
10
|
+
hydraflow-0.1.0.dist-info/RECORD,,
|
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2024 Daizu
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|