hydraflow 0.14.3__py3-none-any.whl → 0.15.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hydraflow/__init__.py +3 -13
- hydraflow/core/context.py +12 -32
- hydraflow/core/io.py +36 -115
- hydraflow/core/main.py +3 -3
- hydraflow/core/run.py +341 -0
- hydraflow/core/run_collection.py +525 -0
- hydraflow/core/run_info.py +84 -0
- {hydraflow-0.14.3.dist-info → hydraflow-0.15.0.dist-info}/METADATA +12 -10
- hydraflow-0.15.0.dist-info/RECORD +21 -0
- hydraflow/core/config.py +0 -122
- hydraflow/core/mlflow.py +0 -174
- hydraflow/core/param.py +0 -165
- hydraflow/entities/__init__.py +0 -0
- hydraflow/entities/run_collection.py +0 -583
- hydraflow/entities/run_data.py +0 -61
- hydraflow/entities/run_info.py +0 -36
- hydraflow-0.14.3.dist-info/RECORD +0 -25
- {hydraflow-0.14.3.dist-info → hydraflow-0.15.0.dist-info}/WHEEL +0 -0
- {hydraflow-0.14.3.dist-info → hydraflow-0.15.0.dist-info}/entry_points.txt +0 -0
- {hydraflow-0.14.3.dist-info → hydraflow-0.15.0.dist-info}/licenses/LICENSE +0 -0
hydraflow/__init__.py
CHANGED
@@ -3,35 +3,25 @@
|
|
3
3
|
from hydraflow.core.context import chdir_artifact, log_run, start_run
|
4
4
|
from hydraflow.core.io import (
|
5
5
|
get_artifact_dir,
|
6
|
-
get_artifact_path,
|
7
|
-
get_hydra_output_dir,
|
8
6
|
iter_artifact_paths,
|
9
7
|
iter_artifacts_dirs,
|
10
8
|
iter_experiment_dirs,
|
11
9
|
iter_run_dirs,
|
12
|
-
load_config,
|
13
|
-
remove_run,
|
14
10
|
)
|
15
11
|
from hydraflow.core.main import main
|
16
|
-
from hydraflow.core.
|
17
|
-
from hydraflow.
|
12
|
+
from hydraflow.core.run import Run
|
13
|
+
from hydraflow.core.run_collection import RunCollection
|
18
14
|
|
19
15
|
__all__ = [
|
16
|
+
"Run",
|
20
17
|
"RunCollection",
|
21
18
|
"chdir_artifact",
|
22
19
|
"get_artifact_dir",
|
23
|
-
"get_artifact_path",
|
24
|
-
"get_hydra_output_dir",
|
25
20
|
"iter_artifact_paths",
|
26
21
|
"iter_artifacts_dirs",
|
27
22
|
"iter_experiment_dirs",
|
28
23
|
"iter_run_dirs",
|
29
|
-
"list_run_ids",
|
30
|
-
"list_run_paths",
|
31
|
-
"list_runs",
|
32
|
-
"load_config",
|
33
24
|
"log_run",
|
34
25
|
"main",
|
35
|
-
"remove_run",
|
36
26
|
"start_run",
|
37
27
|
]
|
hydraflow/core/context.py
CHANGED
@@ -10,9 +10,7 @@ from typing import TYPE_CHECKING
|
|
10
10
|
|
11
11
|
from hydra.core.hydra_config import HydraConfig
|
12
12
|
|
13
|
-
from
|
14
|
-
|
15
|
-
from .mlflow import log_params, log_text
|
13
|
+
from .io import get_artifact_dir, log_text
|
16
14
|
|
17
15
|
if TYPE_CHECKING:
|
18
16
|
from collections.abc import Iterator
|
@@ -23,38 +21,22 @@ log = logging.getLogger(__name__)
|
|
23
21
|
|
24
22
|
|
25
23
|
@contextmanager
|
26
|
-
def log_run(
|
27
|
-
|
28
|
-
*,
|
29
|
-
synchronous: bool | None = None,
|
30
|
-
) -> Iterator[None]:
|
31
|
-
"""Log the parameters from the given configuration object.
|
24
|
+
def log_run(run: Run) -> Iterator[None]:
|
25
|
+
"""Log the parameters from the given configuration instance.
|
32
26
|
|
33
|
-
This context manager logs the parameters from the provided configuration
|
27
|
+
This context manager logs the parameters from the provided configuration instance
|
34
28
|
using MLflow. It also manages the MLflow run context, ensuring that artifacts
|
35
29
|
are logged and the run is properly closed.
|
36
30
|
|
37
31
|
Args:
|
38
|
-
|
39
|
-
synchronous (bool | None): Whether to log the parameters synchronously.
|
40
|
-
Defaults to None.
|
32
|
+
run (Run): The run instance.
|
41
33
|
|
42
34
|
Yields:
|
43
35
|
None
|
44
36
|
|
45
|
-
Example:
|
46
|
-
```python
|
47
|
-
with log_run(config):
|
48
|
-
# Perform operations within the MLflow run context
|
49
|
-
pass
|
50
|
-
```
|
51
|
-
|
52
37
|
"""
|
53
38
|
import mlflow
|
54
39
|
|
55
|
-
if config:
|
56
|
-
log_params(config, synchronous=synchronous)
|
57
|
-
|
58
40
|
hc = HydraConfig.get()
|
59
41
|
hydra_dir = Path(hc.runtime.output_dir)
|
60
42
|
|
@@ -71,12 +53,11 @@ def log_run(
|
|
71
53
|
raise
|
72
54
|
|
73
55
|
finally:
|
74
|
-
log_text(hydra_dir)
|
56
|
+
log_text(run, hydra_dir)
|
75
57
|
|
76
58
|
|
77
59
|
@contextmanager
|
78
60
|
def start_run(
|
79
|
-
config: object,
|
80
61
|
*,
|
81
62
|
chdir: bool = False,
|
82
63
|
run_id: str | None = None,
|
@@ -87,15 +68,14 @@ def start_run(
|
|
87
68
|
tags: dict[str, str] | None = None,
|
88
69
|
description: str | None = None,
|
89
70
|
log_system_metrics: bool | None = None,
|
90
|
-
synchronous: bool | None = None,
|
91
71
|
) -> Iterator[Run]:
|
92
|
-
"""Start an MLflow run and log parameters using the provided configuration
|
72
|
+
"""Start an MLflow run and log parameters using the provided configuration instance.
|
93
73
|
|
94
74
|
This context manager starts an MLflow run and logs parameters using the specified
|
95
|
-
configuration
|
75
|
+
configuration instance. It ensures that the run is properly closed after completion.
|
96
76
|
|
97
77
|
Args:
|
98
|
-
config (object): The configuration
|
78
|
+
config (object): The configuration instance to log parameters from.
|
99
79
|
chdir (bool): Whether to change the current working directory to the
|
100
80
|
artifact directory of the current run. Defaults to False.
|
101
81
|
run_id (str | None): The existing run ID. Defaults to None.
|
@@ -111,7 +91,7 @@ def start_run(
|
|
111
91
|
Defaults to None.
|
112
92
|
|
113
93
|
Yields:
|
114
|
-
Run: An MLflow Run
|
94
|
+
Run: An MLflow Run instance representing the started run.
|
115
95
|
|
116
96
|
"""
|
117
97
|
import mlflow
|
@@ -127,7 +107,7 @@ def start_run(
|
|
127
107
|
description=description,
|
128
108
|
log_system_metrics=log_system_metrics,
|
129
109
|
) as run,
|
130
|
-
log_run(
|
110
|
+
log_run(run),
|
131
111
|
):
|
132
112
|
if chdir:
|
133
113
|
with chdir_artifact(run):
|
@@ -137,7 +117,7 @@ def start_run(
|
|
137
117
|
|
138
118
|
|
139
119
|
@contextmanager
|
140
|
-
def chdir_artifact(run: Run
|
120
|
+
def chdir_artifact(run: Run) -> Iterator[Path]:
|
141
121
|
"""Change the current working directory to the artifact directory of the given run.
|
142
122
|
|
143
123
|
This context manager changes the current working directory to the artifact
|
hydraflow/core/io.py
CHANGED
@@ -3,17 +3,13 @@
|
|
3
3
|
from __future__ import annotations
|
4
4
|
|
5
5
|
import fnmatch
|
6
|
-
import shutil
|
7
6
|
import urllib.parse
|
8
7
|
import urllib.request
|
9
8
|
from pathlib import Path
|
10
9
|
from typing import TYPE_CHECKING
|
11
10
|
|
12
|
-
from hydra.core.hydra_config import HydraConfig
|
13
|
-
from omegaconf import DictConfig, ListConfig, OmegaConf
|
14
|
-
|
15
11
|
if TYPE_CHECKING:
|
16
|
-
from collections.abc import Callable,
|
12
|
+
from collections.abc import Callable, Iterator
|
17
13
|
|
18
14
|
from mlflow.entities import Run
|
19
15
|
|
@@ -27,24 +23,19 @@ def file_uri_to_path(uri: str) -> Path:
|
|
27
23
|
return Path(urllib.request.url2pathname(path)) # for Windows
|
28
24
|
|
29
25
|
|
30
|
-
def get_artifact_dir(run: Run
|
26
|
+
def get_artifact_dir(run: Run) -> Path:
|
31
27
|
"""Retrieve the artifact directory for the given run.
|
32
28
|
|
33
29
|
This function uses MLflow to get the artifact directory for the given run.
|
34
30
|
|
35
31
|
Args:
|
36
|
-
run (Run | None): The run
|
32
|
+
run (Run | None): The run instance. Defaults to None.
|
37
33
|
|
38
34
|
Returns:
|
39
35
|
The local path to the directory where the artifacts are downloaded.
|
40
36
|
|
41
37
|
"""
|
42
|
-
|
43
|
-
|
44
|
-
if run is None:
|
45
|
-
uri = mlflow.get_artifact_uri()
|
46
|
-
else:
|
47
|
-
uri = run.info.artifact_uri
|
38
|
+
uri = run.info.artifact_uri
|
48
39
|
|
49
40
|
if not isinstance(uri, str):
|
50
41
|
raise NotImplementedError
|
@@ -52,105 +43,35 @@ def get_artifact_dir(run: Run | None = None) -> Path:
|
|
52
43
|
return file_uri_to_path(uri)
|
53
44
|
|
54
45
|
|
55
|
-
def
|
56
|
-
"""
|
57
|
-
|
58
|
-
This function uses MLflow to get the artifact path for the given run and path.
|
59
|
-
|
60
|
-
Args:
|
61
|
-
run (Run | None): The run object. Defaults to None.
|
62
|
-
path (str): The path to the artifact.
|
63
|
-
|
64
|
-
Returns:
|
65
|
-
The local path to the artifact.
|
66
|
-
|
67
|
-
"""
|
68
|
-
return get_artifact_dir(run) / path
|
69
|
-
|
70
|
-
|
71
|
-
def get_hydra_output_dir(run: Run | None = None) -> Path:
|
72
|
-
"""Retrieve the Hydra output directory for the given run.
|
73
|
-
|
74
|
-
This function returns the Hydra output directory. If no run is provided,
|
75
|
-
it retrieves the output directory from the current Hydra configuration.
|
76
|
-
If a run is provided, it retrieves the artifact path for the run, loads
|
77
|
-
the Hydra configuration from the downloaded artifacts, and returns the
|
78
|
-
output directory specified in that configuration.
|
79
|
-
|
80
|
-
Args:
|
81
|
-
run (Run | None): The run object. Defaults to None.
|
82
|
-
|
83
|
-
Returns:
|
84
|
-
Path: The path to the Hydra output directory.
|
85
|
-
|
86
|
-
Raises:
|
87
|
-
FileNotFoundError: If the Hydra configuration file is not found
|
88
|
-
in the artifacts.
|
89
|
-
|
90
|
-
"""
|
91
|
-
if run is None:
|
92
|
-
hc = HydraConfig.get()
|
93
|
-
return Path(hc.runtime.output_dir)
|
94
|
-
|
95
|
-
path = get_artifact_dir(run) / ".hydra/hydra.yaml"
|
96
|
-
|
97
|
-
if path.exists():
|
98
|
-
hc = OmegaConf.load(path)
|
99
|
-
return Path(hc.hydra.runtime.output_dir)
|
100
|
-
|
101
|
-
raise FileNotFoundError
|
102
|
-
|
46
|
+
def log_text(run: Run, from_dir: Path, pattern: str = "*.log") -> None:
|
47
|
+
"""Log text files in the given directory as artifacts.
|
103
48
|
|
104
|
-
|
105
|
-
"""Load the configuration for a given run.
|
106
|
-
|
107
|
-
This function loads the configuration for the provided Run instance
|
108
|
-
by downloading the configuration file from the MLflow artifacts and
|
109
|
-
loading it using OmegaConf. It returns an empty config if
|
110
|
-
`.hydra/config.yaml` is not found in the run's artifact directory.
|
49
|
+
Append the text files to the existing text file in the artifact directory.
|
111
50
|
|
112
51
|
Args:
|
113
|
-
run (Run): The
|
114
|
-
|
115
|
-
|
116
|
-
The loaded configuration as a DictConfig object. Returns an empty
|
117
|
-
DictConfig if the configuration file is not found.
|
52
|
+
run (Run): The run instance.
|
53
|
+
from_dir (Path): The directory to find the logs in.
|
54
|
+
pattern (str): The pattern to match the logs.
|
118
55
|
|
119
56
|
"""
|
120
|
-
|
121
|
-
return OmegaConf.load(path) # type: ignore
|
122
|
-
|
123
|
-
|
124
|
-
def load_overrides(run: Run) -> ListConfig:
|
125
|
-
"""Load the overrides for a given run.
|
126
|
-
|
127
|
-
This function loads the overrides for the provided Run instance
|
128
|
-
by downloading the overrides file from the MLflow artifacts and
|
129
|
-
loading it using OmegaConf. It returns an empty config if
|
130
|
-
`.hydra/overrides.yaml` is not found in the run's artifact directory.
|
131
|
-
|
132
|
-
Args:
|
133
|
-
run (Run): The Run instance for which to load the configuration.
|
134
|
-
|
135
|
-
Returns:
|
136
|
-
The loaded configuration as a DictConfig object. Returns an empty
|
137
|
-
DictConfig if the configuration file is not found.
|
138
|
-
|
139
|
-
"""
|
140
|
-
path = get_artifact_dir(run) / ".hydra/overrides.yaml"
|
141
|
-
return sorted(OmegaConf.load(path)) # type: ignore
|
57
|
+
import mlflow
|
142
58
|
|
59
|
+
artifact_dir = get_artifact_dir(run)
|
143
60
|
|
144
|
-
|
145
|
-
|
146
|
-
|
61
|
+
for file in from_dir.glob(pattern):
|
62
|
+
if not file.is_file():
|
63
|
+
continue
|
147
64
|
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
65
|
+
file_artifact = artifact_dir / file.name
|
66
|
+
if file_artifact.exists():
|
67
|
+
text = file_artifact.read_text()
|
68
|
+
if not text.endswith("\n"):
|
69
|
+
text += "\n"
|
70
|
+
else:
|
71
|
+
text = ""
|
152
72
|
|
153
|
-
|
73
|
+
text += file.read_text()
|
74
|
+
mlflow.log_text(text, file.name)
|
154
75
|
|
155
76
|
|
156
77
|
def get_experiment_name(path: Path) -> str | None:
|
@@ -187,43 +108,43 @@ def predicate_experiment_dir(
|
|
187
108
|
|
188
109
|
|
189
110
|
def iter_experiment_dirs(
|
190
|
-
|
111
|
+
tracking_dir: str | Path,
|
191
112
|
experiment_names: str | list[str] | Callable[[str], bool] | None = None,
|
192
113
|
) -> Iterator[Path]:
|
193
|
-
"""Iterate over the experiment directories in the
|
114
|
+
"""Iterate over the experiment directories in the tracking directory."""
|
194
115
|
if isinstance(experiment_names, str):
|
195
116
|
experiment_names = [experiment_names]
|
196
117
|
|
197
|
-
for path in Path(
|
118
|
+
for path in Path(tracking_dir).iterdir():
|
198
119
|
if predicate_experiment_dir(path, experiment_names):
|
199
120
|
yield path
|
200
121
|
|
201
122
|
|
202
123
|
def iter_run_dirs(
|
203
|
-
|
124
|
+
tracking_dir: str | Path,
|
204
125
|
experiment_names: str | list[str] | Callable[[str], bool] | None = None,
|
205
126
|
) -> Iterator[Path]:
|
206
|
-
"""Iterate over the run directories in the
|
207
|
-
for experiment_dir in iter_experiment_dirs(
|
127
|
+
"""Iterate over the run directories in the tracking directory."""
|
128
|
+
for experiment_dir in iter_experiment_dirs(tracking_dir, experiment_names):
|
208
129
|
for path in experiment_dir.iterdir():
|
209
130
|
if path.is_dir() and (path / "artifacts").exists():
|
210
131
|
yield path
|
211
132
|
|
212
133
|
|
213
134
|
def iter_artifacts_dirs(
|
214
|
-
|
135
|
+
tracking_dir: str | Path,
|
215
136
|
experiment_names: str | list[str] | Callable[[str], bool] | None = None,
|
216
137
|
) -> Iterator[Path]:
|
217
|
-
"""Iterate over the artifacts directories in the
|
218
|
-
for path in iter_run_dirs(
|
138
|
+
"""Iterate over the artifacts directories in the tracking directory."""
|
139
|
+
for path in iter_run_dirs(tracking_dir, experiment_names):
|
219
140
|
yield path / "artifacts"
|
220
141
|
|
221
142
|
|
222
143
|
def iter_artifact_paths(
|
223
|
-
|
144
|
+
tracking_dir: str | Path,
|
224
145
|
artifact_path: str | Path,
|
225
146
|
experiment_names: str | list[str] | Callable[[str], bool] | None = None,
|
226
147
|
) -> Iterator[Path]:
|
227
|
-
"""Iterate over the artifact paths in the
|
228
|
-
for path in iter_artifacts_dirs(
|
148
|
+
"""Iterate over the artifact paths in the tracking directory."""
|
149
|
+
for path in iter_artifacts_dirs(tracking_dir, experiment_names):
|
229
150
|
yield path / artifact_path
|
hydraflow/core/main.py
CHANGED
@@ -111,7 +111,7 @@ def main(
|
|
111
111
|
if run.info.status == finished:
|
112
112
|
return
|
113
113
|
|
114
|
-
with start_run(
|
114
|
+
with start_run(run_id=run_id, chdir=chdir) as run:
|
115
115
|
app(run, config)
|
116
116
|
|
117
117
|
return inner_decorator
|
@@ -126,7 +126,7 @@ def get_run_id(uri: str, config: Any, overrides: list[str] | None) -> str | None
|
|
126
126
|
|
127
127
|
Args:
|
128
128
|
uri (str): The URI of the experiment.
|
129
|
-
config (object): The configuration
|
129
|
+
config (object): The configuration instance.
|
130
130
|
overrides (list[str] | None): The task overrides.
|
131
131
|
|
132
132
|
Returns:
|
@@ -146,7 +146,7 @@ def equals(run_dir: Path, config: Any, overrides: list[str] | None) -> bool:
|
|
146
146
|
|
147
147
|
Args:
|
148
148
|
run_dir (Path): The run directory.
|
149
|
-
config (object): The configuration
|
149
|
+
config (object): The configuration instance.
|
150
150
|
overrides (list[str] | None): The task overrides.
|
151
151
|
|
152
152
|
Returns:
|