hydraflow 0.7.4__py3-none-any.whl → 0.8.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- hydraflow/__init__.py +3 -10
- hydraflow/config.py +10 -27
- hydraflow/context.py +6 -49
- hydraflow/main.py +128 -20
- hydraflow/mlflow.py +93 -151
- hydraflow/param.py +2 -2
- hydraflow/run_collection.py +10 -156
- hydraflow/run_data.py +4 -2
- hydraflow/utils.py +19 -28
- {hydraflow-0.7.4.dist-info → hydraflow-0.8.0.dist-info}/METADATA +3 -3
- hydraflow-0.8.0.dist-info/RECORD +17 -0
- hydraflow-0.7.4.dist-info/RECORD +0 -17
- {hydraflow-0.7.4.dist-info → hydraflow-0.8.0.dist-info}/WHEEL +0 -0
- {hydraflow-0.7.4.dist-info → hydraflow-0.8.0.dist-info}/entry_points.txt +0 -0
- {hydraflow-0.7.4.dist-info → hydraflow-0.8.0.dist-info}/licenses/LICENSE +0 -0
hydraflow/__init__.py
CHANGED
@@ -1,17 +1,14 @@
|
|
1
1
|
"""Integrate Hydra and MLflow to manage and track machine learning experiments."""
|
2
2
|
|
3
|
-
from hydraflow.config import select_config, select_overrides
|
4
3
|
from hydraflow.context import chdir_artifact, log_run, start_run
|
5
4
|
from hydraflow.main import main
|
6
|
-
from hydraflow.mlflow import
|
5
|
+
from hydraflow.mlflow import list_run_ids, list_run_paths, list_runs
|
7
6
|
from hydraflow.run_collection import RunCollection
|
8
7
|
from hydraflow.utils import (
|
9
8
|
get_artifact_dir,
|
10
9
|
get_artifact_path,
|
11
10
|
get_hydra_output_dir,
|
12
|
-
get_overrides,
|
13
11
|
load_config,
|
14
|
-
load_overrides,
|
15
12
|
remove_run,
|
16
13
|
)
|
17
14
|
|
@@ -21,16 +18,12 @@ __all__ = [
|
|
21
18
|
"get_artifact_dir",
|
22
19
|
"get_artifact_path",
|
23
20
|
"get_hydra_output_dir",
|
24
|
-
"
|
21
|
+
"list_run_ids",
|
22
|
+
"list_run_paths",
|
25
23
|
"list_runs",
|
26
24
|
"load_config",
|
27
|
-
"load_overrides",
|
28
25
|
"log_run",
|
29
26
|
"main",
|
30
27
|
"remove_run",
|
31
|
-
"search_runs",
|
32
|
-
"select_config",
|
33
|
-
"select_overrides",
|
34
|
-
"set_experiment",
|
35
28
|
"start_run",
|
36
29
|
]
|
hydraflow/config.py
CHANGED
@@ -6,35 +6,19 @@ from typing import TYPE_CHECKING
|
|
6
6
|
|
7
7
|
from omegaconf import DictConfig, ListConfig, OmegaConf
|
8
8
|
|
9
|
-
from hydraflow.utils import get_overrides
|
10
|
-
|
11
9
|
if TYPE_CHECKING:
|
12
10
|
from collections.abc import Iterator
|
13
11
|
from typing import Any
|
14
12
|
|
15
13
|
|
16
|
-
def
|
17
|
-
"""Iterate over parameters and collect them into a dictionary.
|
18
|
-
|
19
|
-
Args:
|
20
|
-
config (object): The configuration object to iterate over.
|
21
|
-
prefix (str): The prefix to prepend to the parameter keys.
|
22
|
-
|
23
|
-
Returns:
|
24
|
-
dict[str, Any]: A dictionary of collected parameters.
|
25
|
-
|
26
|
-
"""
|
27
|
-
return dict(iter_params(config))
|
28
|
-
|
29
|
-
|
30
|
-
def iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
|
14
|
+
def iter_params(config: Any, prefix: str = "") -> Iterator[tuple[str, Any]]:
|
31
15
|
"""Recursively iterate over the parameters in the given configuration object.
|
32
16
|
|
33
17
|
This function traverses the configuration object and yields key-value pairs
|
34
18
|
representing the parameters. The keys are prefixed with the provided prefix.
|
35
19
|
|
36
20
|
Args:
|
37
|
-
config (
|
21
|
+
config (Any): The configuration object to iterate over. This can be a
|
38
22
|
dictionary, list, DictConfig, or ListConfig.
|
39
23
|
prefix (str): The prefix to prepend to the parameter keys.
|
40
24
|
Defaults to an empty string.
|
@@ -50,7 +34,7 @@ def iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
|
|
50
34
|
config = _from_dotlist(config)
|
51
35
|
|
52
36
|
if not isinstance(config, DictConfig | ListConfig):
|
53
|
-
config = OmegaConf.create(config)
|
37
|
+
config = OmegaConf.create(config)
|
54
38
|
|
55
39
|
yield from _iter_params(config, prefix)
|
56
40
|
|
@@ -65,7 +49,7 @@ def _from_dotlist(config: list[str]) -> dict[str, str]:
|
|
65
49
|
return result
|
66
50
|
|
67
51
|
|
68
|
-
def _iter_params(config:
|
52
|
+
def _iter_params(config: Any, prefix: str = "") -> Iterator[tuple[str, Any]]:
|
69
53
|
if isinstance(config, DictConfig):
|
70
54
|
for key, value in config.items():
|
71
55
|
if _is_param(value):
|
@@ -83,12 +67,12 @@ def _iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
|
|
83
67
|
yield from _iter_params(value, f"{prefix}{index}.")
|
84
68
|
|
85
69
|
|
86
|
-
def _is_param(value:
|
70
|
+
def _is_param(value: Any) -> bool:
|
87
71
|
"""Check if the given value is a parameter."""
|
88
72
|
if isinstance(value, DictConfig):
|
89
73
|
return False
|
90
74
|
|
91
|
-
if isinstance(value, ListConfig):
|
75
|
+
if isinstance(value, ListConfig):
|
92
76
|
if any(isinstance(v, DictConfig | ListConfig) for v in value):
|
93
77
|
return False
|
94
78
|
|
@@ -103,14 +87,14 @@ def _convert(value: Any) -> Any:
|
|
103
87
|
return value
|
104
88
|
|
105
89
|
|
106
|
-
def select_config(config:
|
90
|
+
def select_config(config: Any, names: list[str]) -> dict[str, Any]:
|
107
91
|
"""Select the given parameters from the configuration object.
|
108
92
|
|
109
93
|
This function selects the given parameters from the configuration object
|
110
94
|
and returns a new configuration object containing only the selected parameters.
|
111
95
|
|
112
96
|
Args:
|
113
|
-
config (
|
97
|
+
config (Any): The configuration object to select parameters from.
|
114
98
|
names (list[str]): The names of the parameters to select.
|
115
99
|
|
116
100
|
Returns:
|
@@ -120,7 +104,7 @@ def select_config(config: object, names: list[str]) -> dict[str, Any]:
|
|
120
104
|
if not isinstance(config, DictConfig):
|
121
105
|
config = OmegaConf.structured(config)
|
122
106
|
|
123
|
-
return {name: _get(config, name) for name in names}
|
107
|
+
return {name: _get(config, name) for name in names}
|
124
108
|
|
125
109
|
|
126
110
|
def _get(config: DictConfig, name: str) -> Any:
|
@@ -132,8 +116,7 @@ def _get(config: DictConfig, name: str) -> Any:
|
|
132
116
|
return _get(config.get(prefix), name)
|
133
117
|
|
134
118
|
|
135
|
-
def select_overrides(config: object) -> dict[str, Any]:
|
119
|
+
def select_overrides(config: object, overrides: list[str]) -> dict[str, Any]:
|
136
120
|
"""Select the given overrides from the configuration object."""
|
137
|
-
overrides = get_overrides()
|
138
121
|
names = [override.split("=")[0].strip() for override in overrides]
|
139
122
|
return select_config(config, names)
|
hydraflow/context.py
CHANGED
@@ -12,7 +12,7 @@ import mlflow
|
|
12
12
|
import mlflow.artifacts
|
13
13
|
from hydra.core.hydra_config import HydraConfig
|
14
14
|
|
15
|
-
from hydraflow.mlflow import log_params
|
15
|
+
from hydraflow.mlflow import log_params, log_text
|
16
16
|
from hydraflow.utils import get_artifact_dir
|
17
17
|
|
18
18
|
if TYPE_CHECKING:
|
@@ -55,11 +55,11 @@ def log_run(
|
|
55
55
|
log_params(config, synchronous=synchronous)
|
56
56
|
|
57
57
|
hc = HydraConfig.get()
|
58
|
-
|
58
|
+
hydra_dir = Path(hc.runtime.output_dir)
|
59
59
|
|
60
60
|
# Save '.hydra' config directory.
|
61
|
-
|
62
|
-
mlflow.log_artifacts(
|
61
|
+
hydra_subdir = hydra_dir / (hc.output_subdir or "")
|
62
|
+
mlflow.log_artifacts(hydra_subdir.as_posix(), hc.output_subdir)
|
63
63
|
|
64
64
|
try:
|
65
65
|
yield
|
@@ -70,43 +70,14 @@ def log_run(
|
|
70
70
|
raise
|
71
71
|
|
72
72
|
finally:
|
73
|
-
log_text(
|
74
|
-
|
75
|
-
|
76
|
-
def log_text(directory: Path, pattern: str = "*.log") -> None:
|
77
|
-
"""Log text files in the given directory as artifacts.
|
78
|
-
|
79
|
-
Append the text files to the existing text file in the artifact directory.
|
80
|
-
|
81
|
-
Args:
|
82
|
-
directory (Path): The directory to find the logs in.
|
83
|
-
pattern (str): The pattern to match the logs.
|
84
|
-
|
85
|
-
"""
|
86
|
-
artifact_dir = get_artifact_dir()
|
87
|
-
|
88
|
-
for file in directory.glob(pattern):
|
89
|
-
if not file.is_file():
|
90
|
-
continue
|
91
|
-
|
92
|
-
file_artifact = artifact_dir / file.name
|
93
|
-
if file_artifact.exists():
|
94
|
-
text = file_artifact.read_text()
|
95
|
-
if not text.endswith("\n"):
|
96
|
-
text += "\n"
|
97
|
-
else:
|
98
|
-
text = ""
|
99
|
-
|
100
|
-
text += file.read_text()
|
101
|
-
mlflow.log_text(text, file.name)
|
73
|
+
log_text(hydra_dir)
|
102
74
|
|
103
75
|
|
104
76
|
@contextmanager
|
105
|
-
def start_run(
|
77
|
+
def start_run(
|
106
78
|
config: object,
|
107
79
|
*,
|
108
80
|
chdir: bool = False,
|
109
|
-
run: Run | None = None,
|
110
81
|
run_id: str | None = None,
|
111
82
|
experiment_id: str | None = None,
|
112
83
|
run_name: str | None = None,
|
@@ -126,7 +97,6 @@ def start_run( # noqa: PLR0913
|
|
126
97
|
config (object): The configuration object to log parameters from.
|
127
98
|
chdir (bool): Whether to change the current working directory to the
|
128
99
|
artifact directory of the current run. Defaults to False.
|
129
|
-
run (Run | None): The existing run. Defaults to None.
|
130
100
|
run_id (str | None): The existing run ID. Defaults to None.
|
131
101
|
experiment_id (str | None): The experiment ID. Defaults to None.
|
132
102
|
run_name (str | None): The name of the run. Defaults to None.
|
@@ -142,20 +112,7 @@ def start_run( # noqa: PLR0913
|
|
142
112
|
Yields:
|
143
113
|
Run: An MLflow Run object representing the started run.
|
144
114
|
|
145
|
-
Example:
|
146
|
-
with start_run(config) as run:
|
147
|
-
# Perform operations within the MLflow run context
|
148
|
-
pass
|
149
|
-
|
150
|
-
See Also:
|
151
|
-
- `mlflow.start_run`: The MLflow function to start a run directly.
|
152
|
-
- `log_run`: A context manager to log parameters and manage the MLflow
|
153
|
-
run context.
|
154
|
-
|
155
115
|
"""
|
156
|
-
if run:
|
157
|
-
run_id = run.info.run_id
|
158
|
-
|
159
116
|
with (
|
160
117
|
mlflow.start_run(
|
161
118
|
run_id=run_id,
|
hydraflow/main.py
CHANGED
@@ -1,54 +1,162 @@
|
|
1
|
-
"""
|
1
|
+
"""Integration of MLflow experiment tracking with Hydra configuration management.
|
2
|
+
|
3
|
+
This module provides decorators and utilities to seamlessly combine Hydra's
|
4
|
+
configuration management with MLflow's experiment tracking capabilities. It
|
5
|
+
enables automatic run deduplication, configuration storage, and experiment
|
6
|
+
management.
|
7
|
+
|
8
|
+
The main functionality is provided through the `main` decorator, which can be
|
9
|
+
used to wrap experiment entry points. This decorator handles:
|
10
|
+
- Configuration management via Hydra
|
11
|
+
- Experiment tracking via MLflow
|
12
|
+
- Run deduplication based on configurations
|
13
|
+
- Working directory management
|
14
|
+
- Automatic configuration storage
|
15
|
+
|
16
|
+
Example:
|
17
|
+
```python
|
18
|
+
from dataclasses import dataclass
|
19
|
+
from mlflow.entities import Run
|
20
|
+
|
21
|
+
@dataclass
|
22
|
+
class Config:
|
23
|
+
learning_rate: float
|
24
|
+
batch_size: int
|
25
|
+
|
26
|
+
@main(Config)
|
27
|
+
def train(run: Run, config: Config):
|
28
|
+
# Your training code here
|
29
|
+
pass
|
30
|
+
```
|
31
|
+
|
32
|
+
"""
|
2
33
|
|
3
34
|
from __future__ import annotations
|
4
35
|
|
5
36
|
from functools import wraps
|
6
|
-
from typing import TYPE_CHECKING,
|
37
|
+
from typing import TYPE_CHECKING, TypeVar
|
7
38
|
|
8
39
|
import hydra
|
40
|
+
import mlflow
|
9
41
|
from hydra.core.config_store import ConfigStore
|
42
|
+
from hydra.core.hydra_config import HydraConfig
|
10
43
|
from mlflow.entities import RunStatus
|
44
|
+
from omegaconf import OmegaConf
|
11
45
|
|
12
46
|
import hydraflow
|
47
|
+
from hydraflow.utils import file_uri_to_path
|
13
48
|
|
14
49
|
if TYPE_CHECKING:
|
15
50
|
from collections.abc import Callable
|
51
|
+
from pathlib import Path
|
16
52
|
|
17
53
|
from mlflow.entities import Run
|
18
54
|
|
19
55
|
FINISHED = RunStatus.to_string(RunStatus.FINISHED)
|
20
56
|
|
57
|
+
T = TypeVar("T")
|
58
|
+
|
21
59
|
|
22
60
|
def main(
|
23
|
-
node:
|
61
|
+
node: T | type[T],
|
24
62
|
config_name: str = "config",
|
25
63
|
*,
|
26
64
|
chdir: bool = False,
|
27
65
|
force_new_run: bool = False,
|
28
|
-
|
66
|
+
match_overrides: bool = False,
|
67
|
+
rerun_finished: bool = False,
|
29
68
|
):
|
30
|
-
"""
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
69
|
+
"""Decorator for configuring and running MLflow experiments with Hydra.
|
70
|
+
|
71
|
+
This decorator combines Hydra configuration management with MLflow experiment
|
72
|
+
tracking. It automatically handles run deduplication and configuration storage.
|
73
|
+
|
74
|
+
Args:
|
75
|
+
node: Configuration node class or instance defining the structure of the
|
76
|
+
configuration.
|
77
|
+
config_name: Name of the configuration. Defaults to "config".
|
78
|
+
chdir: If True, changes working directory to the artifact directory
|
79
|
+
of the run. Defaults to False.
|
80
|
+
force_new_run: If True, always creates a new MLflow run instead of
|
81
|
+
reusing existing ones. Defaults to False.
|
82
|
+
match_overrides: If True, matches runs based on Hydra CLI overrides
|
83
|
+
instead of full config. Defaults to False.
|
84
|
+
rerun_finished: If True, allows rerunning completed runs. Defaults to
|
85
|
+
False.
|
86
|
+
|
87
|
+
"""
|
88
|
+
|
89
|
+
def decorator(app: Callable[[Run, T], None]) -> Callable[[], None]:
|
90
|
+
ConfigStore.instance().store(config_name, node)
|
91
|
+
|
92
|
+
@hydra.main(config_name=config_name, version_base=None)
|
35
93
|
@wraps(app)
|
36
|
-
|
37
|
-
|
38
|
-
|
94
|
+
def inner_decorator(config: T) -> None:
|
95
|
+
hc = HydraConfig.get()
|
96
|
+
experiment = mlflow.set_experiment(hc.job.name)
|
39
97
|
|
40
98
|
if force_new_run:
|
41
|
-
|
99
|
+
run_id = None
|
42
100
|
else:
|
43
|
-
|
44
|
-
|
101
|
+
uri = experiment.artifact_location
|
102
|
+
overrides = hc.overrides.task if match_overrides else None
|
103
|
+
run_id = get_run_id(uri, config, overrides)
|
45
104
|
|
46
|
-
if
|
47
|
-
|
105
|
+
if run_id and not rerun_finished:
|
106
|
+
run = mlflow.get_run(run_id)
|
107
|
+
if run.info.status == FINISHED:
|
108
|
+
return
|
48
109
|
|
49
|
-
with hydraflow.start_run(
|
50
|
-
app(run,
|
110
|
+
with hydraflow.start_run(config, run_id=run_id, chdir=chdir) as run:
|
111
|
+
app(run, config)
|
51
112
|
|
52
|
-
return
|
113
|
+
return inner_decorator
|
53
114
|
|
54
115
|
return decorator
|
116
|
+
|
117
|
+
|
118
|
+
def get_run_id(uri: str, config: object, overrides: list[str] | None) -> str | None:
|
119
|
+
"""Try to get the run ID for the given configuration.
|
120
|
+
|
121
|
+
If the run is not found, the function will return None.
|
122
|
+
|
123
|
+
Args:
|
124
|
+
uri (str): The URI of the experiment.
|
125
|
+
config (object): The configuration object.
|
126
|
+
overrides (list[str] | None): The task overrides.
|
127
|
+
|
128
|
+
Returns:
|
129
|
+
The run ID for the given configuration or overrides. Returns None if
|
130
|
+
no run ID is found.
|
131
|
+
|
132
|
+
"""
|
133
|
+
for run_dir in file_uri_to_path(uri).iterdir():
|
134
|
+
if run_dir.is_dir() and equals(run_dir, config, overrides):
|
135
|
+
return run_dir.name
|
136
|
+
|
137
|
+
return None
|
138
|
+
|
139
|
+
|
140
|
+
def equals(run_dir: Path, config: object, overrides: list[str] | None) -> bool:
|
141
|
+
"""Check if the run directory matches the given configuration or overrides.
|
142
|
+
|
143
|
+
Args:
|
144
|
+
run_dir (Path): The run directory.
|
145
|
+
config (object): The configuration object.
|
146
|
+
overrides (list[str] | None): The task overrides.
|
147
|
+
|
148
|
+
Returns:
|
149
|
+
True if the run directory matches the given configuration or overrides,
|
150
|
+
False otherwise.
|
151
|
+
|
152
|
+
"""
|
153
|
+
if overrides is None:
|
154
|
+
path = run_dir / "artifacts/.hydra/config.yaml"
|
155
|
+
else:
|
156
|
+
path = run_dir / "artifacts/.hydra/overrides.yaml"
|
157
|
+
config = overrides
|
158
|
+
|
159
|
+
if not path.exists():
|
160
|
+
return False
|
161
|
+
|
162
|
+
return OmegaConf.load(path) == config
|
hydraflow/mlflow.py
CHANGED
@@ -1,17 +1,8 @@
|
|
1
|
-
"""
|
1
|
+
"""Integration of MLflow experiment tracking with Hydra configuration management.
|
2
2
|
|
3
3
|
This module provides functions to log parameters from Hydra configuration objects
|
4
4
|
to MLflow, set experiments, and manage tracking URIs. It integrates Hydra's
|
5
5
|
configuration management with MLflow's experiment tracking capabilities.
|
6
|
-
|
7
|
-
Key Features:
|
8
|
-
- **Experiment Management**: Set experiment names and tracking URIs using Hydra
|
9
|
-
configuration details.
|
10
|
-
- **Parameter Logging**: Log parameters from Hydra configuration objects to MLflow,
|
11
|
-
supporting both synchronous and asynchronous logging.
|
12
|
-
- **Run Collection**: Utilize the `RunCollection` class to manage and interact with
|
13
|
-
multiple MLflow runs, providing methods to filter and retrieve runs based on
|
14
|
-
various criteria.
|
15
6
|
"""
|
16
7
|
|
17
8
|
from __future__ import annotations
|
@@ -21,149 +12,132 @@ from typing import TYPE_CHECKING
|
|
21
12
|
import joblib
|
22
13
|
import mlflow
|
23
14
|
import mlflow.artifacts
|
24
|
-
from hydra.core.hydra_config import HydraConfig
|
25
|
-
from mlflow.entities import ViewType
|
26
|
-
from mlflow.tracking.fluent import SEARCH_MAX_RESULTS_PANDAS, _get_experiment_id
|
27
15
|
|
28
16
|
from hydraflow.config import iter_params
|
29
17
|
from hydraflow.run_collection import RunCollection
|
30
|
-
from hydraflow.utils import get_artifact_dir
|
18
|
+
from hydraflow.utils import file_uri_to_path, get_artifact_dir
|
31
19
|
|
32
20
|
if TYPE_CHECKING:
|
33
21
|
from pathlib import Path
|
34
|
-
|
35
|
-
from mlflow.entities.experiment import Experiment
|
22
|
+
from typing import Any
|
36
23
|
|
37
24
|
|
38
|
-
def
|
39
|
-
|
40
|
-
suffix: str = "",
|
41
|
-
uri: str | Path | None = None,
|
42
|
-
name: str | None = None,
|
43
|
-
) -> Experiment:
|
44
|
-
"""Set the experiment name and tracking URI optionally.
|
25
|
+
def log_params(config: Any, *, synchronous: bool | None = None) -> None:
|
26
|
+
"""Log the parameters from the given configuration object.
|
45
27
|
|
46
|
-
This
|
47
|
-
|
48
|
-
|
28
|
+
This method logs the parameters from the provided configuration object
|
29
|
+
using MLflow. It iterates over the parameters and logs them using the
|
30
|
+
`mlflow.log_param` method.
|
49
31
|
|
50
32
|
Args:
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
name (str | None): The name of the experiment. Defaults to None.
|
33
|
+
config (Any): The configuration object to log the parameters from.
|
34
|
+
synchronous (bool | None): Whether to log the parameters synchronously.
|
35
|
+
Defaults to None.
|
55
36
|
|
56
|
-
|
57
|
-
|
58
|
-
|
37
|
+
"""
|
38
|
+
for key, value in iter_params(config):
|
39
|
+
mlflow.log_param(key, value, synchronous=synchronous)
|
40
|
+
|
41
|
+
|
42
|
+
def log_text(from_dir: Path, pattern: str = "*.log") -> None:
|
43
|
+
"""Log text files in the given directory as artifacts.
|
44
|
+
|
45
|
+
Append the text files to the existing text file in the artifact directory.
|
46
|
+
|
47
|
+
Args:
|
48
|
+
from_dir (Path): The directory to find the logs in.
|
49
|
+
pattern (str): The pattern to match the logs.
|
59
50
|
|
60
51
|
"""
|
61
|
-
|
62
|
-
mlflow.set_tracking_uri(uri)
|
52
|
+
artifact_dir = get_artifact_dir()
|
63
53
|
|
64
|
-
|
65
|
-
|
54
|
+
for file in from_dir.glob(pattern):
|
55
|
+
if not file.is_file():
|
56
|
+
continue
|
66
57
|
|
67
|
-
|
68
|
-
|
69
|
-
|
58
|
+
file_artifact = artifact_dir / file.name
|
59
|
+
if file_artifact.exists():
|
60
|
+
text = file_artifact.read_text()
|
61
|
+
if not text.endswith("\n"):
|
62
|
+
text += "\n"
|
63
|
+
else:
|
64
|
+
text = ""
|
70
65
|
|
66
|
+
text += file.read_text()
|
67
|
+
mlflow.log_text(text, file.name)
|
71
68
|
|
72
|
-
def log_params(config: object, *, synchronous: bool | None = None) -> None:
|
73
|
-
"""Log the parameters from the given configuration object.
|
74
69
|
|
75
|
-
|
76
|
-
|
77
|
-
|
70
|
+
def list_run_paths(
|
71
|
+
experiment_names: str | list[str] | None = None,
|
72
|
+
*other: str,
|
73
|
+
) -> list[Path]:
|
74
|
+
"""List all run paths for the specified experiments.
|
75
|
+
|
76
|
+
This function retrieves all run paths for the given list of experiment names.
|
77
|
+
If no experiment names are provided (None), the function will search all runs
|
78
|
+
for all experiments except the "Default" experiment.
|
78
79
|
|
79
80
|
Args:
|
80
|
-
|
81
|
-
|
82
|
-
|
81
|
+
experiment_names (list[str] | None): List of experiment names to search
|
82
|
+
for runs. If None is provided, the function will search all runs
|
83
|
+
for all experiments except the "Default" experiment.
|
84
|
+
*other (str): The parts of the run directory to join.
|
85
|
+
|
86
|
+
Returns:
|
87
|
+
list[Path]: A list of run paths for the specified experiments.
|
83
88
|
|
84
89
|
"""
|
85
|
-
|
86
|
-
|
90
|
+
if isinstance(experiment_names, str):
|
91
|
+
experiment_names = [experiment_names]
|
92
|
+
|
93
|
+
elif experiment_names is None:
|
94
|
+
experiments = mlflow.search_experiments()
|
95
|
+
experiment_names = [e.name for e in experiments if e.name != "Default"]
|
87
96
|
|
97
|
+
run_paths: list[Path] = []
|
88
98
|
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
filter_string: str = "",
|
93
|
-
run_view_type: int = ViewType.ACTIVE_ONLY,
|
94
|
-
max_results: int = SEARCH_MAX_RESULTS_PANDAS,
|
95
|
-
order_by: list[str] | None = None,
|
96
|
-
search_all_experiments: bool = False,
|
97
|
-
experiment_names: list[str] | None = None,
|
98
|
-
) -> RunCollection:
|
99
|
-
"""Search for Runs that fit the specified criteria.
|
99
|
+
for name in experiment_names:
|
100
|
+
if experiment := mlflow.get_experiment_by_name(name):
|
101
|
+
uri = experiment.artifact_location
|
100
102
|
|
101
|
-
|
102
|
-
|
103
|
-
|
103
|
+
if isinstance(uri, str):
|
104
|
+
path = file_uri_to_path(uri)
|
105
|
+
run_paths.extend(p for p in path.iterdir() if p.is_dir())
|
104
106
|
|
105
|
-
|
106
|
-
|
107
|
+
if other:
|
108
|
+
return [p.joinpath(*other) for p in run_paths]
|
109
|
+
|
110
|
+
return run_paths
|
111
|
+
|
112
|
+
|
113
|
+
def list_run_ids(experiment_names: str | list[str] | None = None) -> list[str]:
|
114
|
+
"""List all run IDs for the specified experiments.
|
115
|
+
|
116
|
+
This function retrieves all runs for the given list of experiment names.
|
117
|
+
If no experiment names are provided (None), the function will search all
|
118
|
+
runs for all experiments except the "Default" experiment.
|
107
119
|
|
108
120
|
Args:
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
error if ``experiment_names`` is also not ``None`` or ``[]``.
|
113
|
-
``None`` will default to the active experiment if ``experiment_names``
|
114
|
-
is ``None`` or ``[]``.
|
115
|
-
filter_string (str): Filter query string, defaults to searching all
|
116
|
-
runs.
|
117
|
-
run_view_type (int): one of enum values ``ACTIVE_ONLY``, ``DELETED_ONLY``,
|
118
|
-
or ``ALL`` runs defined in :py:class:`mlflow.entities.ViewType`.
|
119
|
-
max_results (int): The maximum number of runs to put in the dataframe.
|
120
|
-
Default is 100,000 to avoid causing out-of-memory issues on the user's
|
121
|
-
machine.
|
122
|
-
order_by (list[str] | None): List of columns to order by (e.g.,
|
123
|
-
"metrics.rmse"). The ``order_by`` column can contain an optional
|
124
|
-
``DESC`` or ``ASC`` value. The default is ``ASC``. The default
|
125
|
-
ordering is to sort by ``start_time DESC``, then ``run_id``.
|
126
|
-
``start_time DESC``, then ``run_id``.
|
127
|
-
search_all_experiments (bool): Boolean specifying whether all
|
128
|
-
experiments should be searched. Only honored if ``experiment_ids``
|
129
|
-
is ``[]`` or ``None``.
|
130
|
-
experiment_names (list[str] | None): List of experiment names. Search
|
131
|
-
can work with experiment IDs or experiment names, but not both in
|
132
|
-
the same call. Values other than ``None`` or ``[]`` will result in
|
133
|
-
error if ``experiment_ids`` is also not ``None`` or ``[]``.
|
134
|
-
``experiment_ids`` is also not ``None`` or ``[]``. ``None`` will
|
135
|
-
default to the active experiment if ``experiment_ids`` is ``None``
|
136
|
-
or ``[]``.
|
121
|
+
experiment_names (list[str] | None): List of experiment names to search
|
122
|
+
for runs. If None is provided, the function will search all runs
|
123
|
+
for all experiments except the "Default" experiment.
|
137
124
|
|
138
125
|
Returns:
|
139
|
-
A
|
126
|
+
list[str]: A list of run IDs for the specified experiments.
|
140
127
|
|
141
128
|
"""
|
142
|
-
|
143
|
-
experiment_ids=experiment_ids,
|
144
|
-
filter_string=filter_string,
|
145
|
-
run_view_type=run_view_type,
|
146
|
-
max_results=max_results,
|
147
|
-
order_by=order_by,
|
148
|
-
output_format="list",
|
149
|
-
search_all_experiments=search_all_experiments,
|
150
|
-
experiment_names=experiment_names,
|
151
|
-
)
|
152
|
-
runs = sorted(runs, key=lambda run: run.info.start_time) # type: ignore
|
153
|
-
return RunCollection(runs) # type: ignore
|
129
|
+
return [run_path.stem for run_path in list_run_paths(experiment_names)]
|
154
130
|
|
155
131
|
|
156
132
|
def list_runs(
|
157
133
|
experiment_names: str | list[str] | None = None,
|
158
134
|
n_jobs: int = 0,
|
159
|
-
status: str | list[str] | int | list[int] | None = None,
|
160
135
|
) -> RunCollection:
|
161
136
|
"""List all runs for the specified experiments.
|
162
137
|
|
163
138
|
This function retrieves all runs for the given list of experiment names.
|
164
|
-
If no experiment names are provided (None),
|
165
|
-
for the
|
166
|
-
will search all runs for all experiments except the "Default" experiment.
|
139
|
+
If no experiment names are provided (None), the function will search all runs
|
140
|
+
for all experiments except the "Default" experiment.
|
167
141
|
The function returns the results as a `RunCollection` object.
|
168
142
|
|
169
143
|
Note:
|
@@ -171,55 +145,23 @@ def list_runs(
|
|
171
145
|
|
172
146
|
Args:
|
173
147
|
experiment_names (list[str] | None): List of experiment names to search
|
174
|
-
for runs. If None
|
175
|
-
|
176
|
-
|
177
|
-
n_jobs (int): The number of jobs to run in parallel. If 0, the function
|
178
|
-
will search runs sequentially.
|
179
|
-
status (str | list[str] | int | list[int] | None): The status of the runs
|
180
|
-
to filter.
|
148
|
+
for runs. If None is provided, the function will search all runs
|
149
|
+
for all experiments except the "Default" experiment.
|
150
|
+
n_jobs (int): The number of jobs to retrieve runs in parallel.
|
181
151
|
|
182
152
|
Returns:
|
183
153
|
RunCollection: A `RunCollection` instance containing the runs for the
|
184
154
|
specified experiments.
|
185
155
|
|
186
156
|
"""
|
187
|
-
|
188
|
-
if status is None:
|
189
|
-
return rc
|
190
|
-
|
191
|
-
return rc.filter(status=status)
|
192
|
-
|
193
|
-
|
194
|
-
def _list_runs(
|
195
|
-
experiment_names: str | list[str] | None = None,
|
196
|
-
n_jobs: int = 0,
|
197
|
-
) -> RunCollection:
|
198
|
-
if isinstance(experiment_names, str):
|
199
|
-
experiment_names = [experiment_names]
|
200
|
-
|
201
|
-
elif experiment_names == []:
|
202
|
-
experiments = mlflow.search_experiments()
|
203
|
-
experiment_names = [e.name for e in experiments if e.name != "Default"]
|
157
|
+
run_ids = list_run_ids(experiment_names)
|
204
158
|
|
205
159
|
if n_jobs == 0:
|
206
|
-
|
207
|
-
|
208
|
-
if experiment_names is None:
|
209
|
-
experiment_id = _get_experiment_id()
|
210
|
-
experiment_names = [mlflow.get_experiment(experiment_id).name]
|
211
|
-
|
212
|
-
run_ids = []
|
213
|
-
|
214
|
-
for name in experiment_names:
|
215
|
-
if experiment := mlflow.get_experiment_by_name(name):
|
216
|
-
uri = experiment.artifact_location
|
160
|
+
runs = [mlflow.get_run(run_id) for run_id in run_ids]
|
217
161
|
|
218
|
-
|
219
|
-
|
220
|
-
|
162
|
+
else:
|
163
|
+
it = (joblib.delayed(mlflow.get_run)(run_id) for run_id in run_ids)
|
164
|
+
runs = joblib.Parallel(n_jobs, backend="threading")(it)
|
221
165
|
|
222
|
-
it = (joblib.delayed(mlflow.get_run)(run_id) for run_id in run_ids)
|
223
|
-
runs = joblib.Parallel(n_jobs, prefer="threads")(it)
|
224
166
|
runs = sorted(runs, key=lambda run: run.info.start_time) # type: ignore
|
225
167
|
return RunCollection(runs) # type: ignore
|
hydraflow/param.py
CHANGED
@@ -18,7 +18,7 @@ if TYPE_CHECKING:
|
|
18
18
|
from mlflow.entities import Run
|
19
19
|
|
20
20
|
|
21
|
-
def match(param: str, value: Any) -> bool:
|
21
|
+
def match(param: str, value: Any) -> bool:
|
22
22
|
"""Check if the string matches the specified value.
|
23
23
|
|
24
24
|
Args:
|
@@ -68,7 +68,7 @@ def _match_list(param: str, value: list) -> bool | None:
|
|
68
68
|
|
69
69
|
|
70
70
|
def _match_tuple(param: str, value: tuple) -> bool | None:
|
71
|
-
if len(value) != 2:
|
71
|
+
if len(value) != 2:
|
72
72
|
return None
|
73
73
|
|
74
74
|
if any(param.startswith(x) for x in ["[", "(", "{"]):
|
hydraflow/run_collection.py
CHANGED
@@ -21,7 +21,7 @@ from __future__ import annotations
|
|
21
21
|
|
22
22
|
from dataclasses import dataclass, field
|
23
23
|
from itertools import chain
|
24
|
-
from typing import TYPE_CHECKING, Any,
|
24
|
+
from typing import TYPE_CHECKING, Any, overload
|
25
25
|
|
26
26
|
from mlflow.entities import RunStatus
|
27
27
|
|
@@ -34,15 +34,9 @@ from hydraflow.utils import load_config
|
|
34
34
|
|
35
35
|
if TYPE_CHECKING:
|
36
36
|
from collections.abc import Callable, Iterator
|
37
|
-
from pathlib import Path
|
38
37
|
from typing import Any
|
39
38
|
|
40
39
|
from mlflow.entities.run import Run
|
41
|
-
from omegaconf import DictConfig
|
42
|
-
|
43
|
-
|
44
|
-
T = TypeVar("T")
|
45
|
-
P = ParamSpec("P")
|
46
40
|
|
47
41
|
|
48
42
|
@dataclass
|
@@ -124,11 +118,6 @@ class RunCollection:
|
|
124
118
|
runs = [run for run in self._runs if run not in other._runs] # noqa: SLF001
|
125
119
|
return self.__class__(runs)
|
126
120
|
|
127
|
-
@classmethod
|
128
|
-
def from_list(cls, runs: list[Run]) -> RunCollection:
|
129
|
-
"""Create a `RunCollection` instance from a list of MLflow `Run` instances."""
|
130
|
-
return cls(runs)
|
131
|
-
|
132
121
|
@property
|
133
122
|
def info(self) -> RunCollectionInfo:
|
134
123
|
"""An instance of `RunCollectionInfo`."""
|
@@ -139,26 +128,6 @@ class RunCollection:
|
|
139
128
|
"""An instance of `RunCollectionData`."""
|
140
129
|
return self._data
|
141
130
|
|
142
|
-
def take(self, n: int) -> RunCollection:
|
143
|
-
"""Take the first n runs from the collection.
|
144
|
-
|
145
|
-
If n is negative, the method returns the last n runs
|
146
|
-
from the collection.
|
147
|
-
|
148
|
-
Args:
|
149
|
-
n (int): The number of runs to take. If n is negative, the method
|
150
|
-
returns the last n runs from the collection.
|
151
|
-
|
152
|
-
Returns:
|
153
|
-
A new `RunCollection` instance containing the first n runs if n is
|
154
|
-
positive, or the last n runs if n is negative.
|
155
|
-
|
156
|
-
"""
|
157
|
-
if n < 0:
|
158
|
-
return self.__class__(self._runs[n:])
|
159
|
-
|
160
|
-
return self.__class__(self._runs[:n])
|
161
|
-
|
162
131
|
def one(self) -> Run:
|
163
132
|
"""Get the only `Run` instance in the collection.
|
164
133
|
|
@@ -238,8 +207,8 @@ class RunCollection:
|
|
238
207
|
self,
|
239
208
|
config: object | Callable[[Run], bool] | None = None,
|
240
209
|
*,
|
241
|
-
override: bool = False,
|
242
210
|
select: list[str] | None = None,
|
211
|
+
overrides: list[str] | None = None,
|
243
212
|
status: str | list[str] | int | list[int] | None = None,
|
244
213
|
**kwargs,
|
245
214
|
) -> RunCollection:
|
@@ -264,9 +233,9 @@ class RunCollection:
|
|
264
233
|
to filter the runs. This can be any object that provides key-value
|
265
234
|
pairs through the `iter_params` function, or a callable that
|
266
235
|
takes a `Run` object and returns a boolean value.
|
267
|
-
override (bool): If True, override the configuration object with the
|
268
|
-
provided key-value pairs.
|
269
236
|
select (list[str] | None): The list of parameters to select.
|
237
|
+
overrides (list[str] | None): The list of overrides to filter the
|
238
|
+
runs.
|
270
239
|
status (str | list[str] | int | list[int] | None): The status of the
|
271
240
|
runs to filter.
|
272
241
|
**kwargs: Additional key-value pairs to filter the runs.
|
@@ -279,8 +248,8 @@ class RunCollection:
|
|
279
248
|
filter_runs(
|
280
249
|
self._runs,
|
281
250
|
config,
|
282
|
-
override=override,
|
283
251
|
select=select,
|
252
|
+
overrides=overrides,
|
284
253
|
status=status,
|
285
254
|
**kwargs,
|
286
255
|
),
|
@@ -400,121 +369,6 @@ class RunCollection:
|
|
400
369
|
|
401
370
|
return params
|
402
371
|
|
403
|
-
def map(
|
404
|
-
self,
|
405
|
-
func: Callable[Concatenate[Run, P], T],
|
406
|
-
*args: P.args,
|
407
|
-
**kwargs: P.kwargs,
|
408
|
-
) -> Iterator[T]:
|
409
|
-
"""Return an iterator of results by applying a function to each run.
|
410
|
-
|
411
|
-
This method iterates over each run in the collection and applies the
|
412
|
-
provided function to it, along with any additional arguments and
|
413
|
-
keyword arguments.
|
414
|
-
|
415
|
-
Args:
|
416
|
-
func (Callable[[Run, P], T]): A function that takes a run and
|
417
|
-
additional arguments and returns a result.
|
418
|
-
*args: Additional arguments to pass to the function.
|
419
|
-
**kwargs: Additional keyword arguments to pass to the function.
|
420
|
-
|
421
|
-
Yields:
|
422
|
-
Results obtained by applying the function to each run in the collection.
|
423
|
-
|
424
|
-
"""
|
425
|
-
return (func(run, *args, **kwargs) for run in self)
|
426
|
-
|
427
|
-
def map_id(
|
428
|
-
self,
|
429
|
-
func: Callable[Concatenate[str, P], T],
|
430
|
-
*args: P.args,
|
431
|
-
**kwargs: P.kwargs,
|
432
|
-
) -> Iterator[T]:
|
433
|
-
"""Return an iterator of results by applying a function to each run id.
|
434
|
-
|
435
|
-
Args:
|
436
|
-
func (Callable[[str, P], T]): A function that takes a run id and returns a
|
437
|
-
result.
|
438
|
-
*args: Additional arguments to pass to the function.
|
439
|
-
**kwargs: Additional keyword arguments to pass to the function.
|
440
|
-
|
441
|
-
Yields:
|
442
|
-
Results obtained by applying the function to each run id in the
|
443
|
-
collection.
|
444
|
-
|
445
|
-
"""
|
446
|
-
return (func(run_id, *args, **kwargs) for run_id in self.info.run_id)
|
447
|
-
|
448
|
-
def map_config(
|
449
|
-
self,
|
450
|
-
func: Callable[Concatenate[DictConfig, P], T],
|
451
|
-
*args: P.args,
|
452
|
-
**kwargs: P.kwargs,
|
453
|
-
) -> Iterator[T]:
|
454
|
-
"""Return an iterator of results by applying a function to each run config.
|
455
|
-
|
456
|
-
Args:
|
457
|
-
func (Callable[[DictConfig, P], T]): A function that takes a run
|
458
|
-
configuration and returns a result.
|
459
|
-
*args: Additional arguments to pass to the function.
|
460
|
-
**kwargs: Additional keyword arguments to pass to the function.
|
461
|
-
|
462
|
-
Yields:
|
463
|
-
Results obtained by applying the function to each run configuration
|
464
|
-
in the collection.
|
465
|
-
|
466
|
-
"""
|
467
|
-
return (func(load_config(run), *args, **kwargs) for run in self)
|
468
|
-
|
469
|
-
def map_uri(
|
470
|
-
self,
|
471
|
-
func: Callable[Concatenate[str | None, P], T],
|
472
|
-
*args: P.args,
|
473
|
-
**kwargs: P.kwargs,
|
474
|
-
) -> Iterator[T]:
|
475
|
-
"""Return an iterator of results by applying a function to each artifact URI.
|
476
|
-
|
477
|
-
Iterate over each run in the collection, retrieves the artifact URI, and
|
478
|
-
apply the provided function to it. If a run does not have an artifact
|
479
|
-
URI, None is passed to the function.
|
480
|
-
|
481
|
-
Args:
|
482
|
-
func (Callable[[str | None, P], T]): A function that takes an
|
483
|
-
artifact URI (string or None) and returns a result.
|
484
|
-
*args: Additional arguments to pass to the function.
|
485
|
-
**kwargs: Additional keyword arguments to pass to the function.
|
486
|
-
|
487
|
-
Yields:
|
488
|
-
Results obtained by applying the function to each artifact URI in the
|
489
|
-
collection.
|
490
|
-
|
491
|
-
"""
|
492
|
-
return (func(uri, *args, **kwargs) for uri in self.info.artifact_uri)
|
493
|
-
|
494
|
-
def map_dir(
|
495
|
-
self,
|
496
|
-
func: Callable[Concatenate[Path, P], T],
|
497
|
-
*args: P.args,
|
498
|
-
**kwargs: P.kwargs,
|
499
|
-
) -> Iterator[T]:
|
500
|
-
"""Return an iterator of results by applying a function to each artifact dir.
|
501
|
-
|
502
|
-
Iterate over each run in the collection, downloads the artifact
|
503
|
-
directory, and apply the provided function to the directory path.
|
504
|
-
|
505
|
-
Args:
|
506
|
-
func (Callable[[Path, P], T]): A function that takes an artifact directory
|
507
|
-
path (string) and returns a result.
|
508
|
-
*args: Additional arguments to pass to the function.
|
509
|
-
**kwargs: Additional keyword arguments to pass to the function.
|
510
|
-
|
511
|
-
Yields:
|
512
|
-
Results obtained by applying the function to each artifact directory
|
513
|
-
in the collection.
|
514
|
-
|
515
|
-
"""
|
516
|
-
return (func(dir, *args, **kwargs) for dir in self.info.artifact_dir) # noqa: A001
|
517
|
-
|
518
372
|
def groupby(
|
519
373
|
self,
|
520
374
|
names: str | list[str],
|
@@ -631,8 +485,8 @@ def filter_runs(
|
|
631
485
|
runs: list[Run],
|
632
486
|
config: object | Callable[[Run], bool] | None = None,
|
633
487
|
*,
|
634
|
-
override: bool = False,
|
635
488
|
select: list[str] | None = None,
|
489
|
+
overrides: list[str] | None = None,
|
636
490
|
status: str | list[str] | int | list[int] | None = None,
|
637
491
|
**kwargs,
|
638
492
|
) -> list[Run]:
|
@@ -658,10 +512,10 @@ def filter_runs(
|
|
658
512
|
that provides key-value pairs through the `iter_params` function.
|
659
513
|
This can also be a callable that takes a `Run` object and returns
|
660
514
|
a boolean value. Defaults to None.
|
661
|
-
override (bool, optional): If True, filter the runs based on
|
662
|
-
the overrides. Defaults to False.
|
663
515
|
select (list[str] | None, optional): The list of parameters to select.
|
664
516
|
Defaults to None.
|
517
|
+
overrides (list[str] | None, optional): The list of overrides to filter the
|
518
|
+
runs. Defaults to None.
|
665
519
|
status (str | list[str] | RunStatus | list[RunStatus] | None, optional): The
|
666
520
|
status of the runs to filter. Defaults to None.
|
667
521
|
**kwargs: Additional key-value pairs to filter the runs.
|
@@ -674,8 +528,8 @@ def filter_runs(
|
|
674
528
|
runs = [run for run in runs if config(run)]
|
675
529
|
|
676
530
|
else:
|
677
|
-
if
|
678
|
-
config = select_overrides(config)
|
531
|
+
if overrides:
|
532
|
+
config = select_overrides(config, overrides)
|
679
533
|
elif select:
|
680
534
|
config = select_config(config, select)
|
681
535
|
|
hydraflow/run_data.py
CHANGED
@@ -6,7 +6,8 @@ from typing import TYPE_CHECKING
|
|
6
6
|
|
7
7
|
from pandas import DataFrame
|
8
8
|
|
9
|
-
from hydraflow.config import
|
9
|
+
from hydraflow.config import iter_params
|
10
|
+
from hydraflow.utils import load_config
|
10
11
|
|
11
12
|
if TYPE_CHECKING:
|
12
13
|
from collections.abc import Iterable
|
@@ -39,7 +40,8 @@ class RunCollectionData:
|
|
39
40
|
A DataFrame containing the runs' configurations.
|
40
41
|
|
41
42
|
"""
|
42
|
-
|
43
|
+
values = [dict(iter_params(load_config(r))) for r in self._runs]
|
44
|
+
return DataFrame(values)
|
43
45
|
|
44
46
|
|
45
47
|
def _to_dict(it: Iterable[dict[str, Any]]) -> dict[str, list[Any]]:
|
hydraflow/utils.py
CHANGED
@@ -12,46 +12,42 @@ import mlflow
|
|
12
12
|
import mlflow.artifacts
|
13
13
|
from hydra.core.hydra_config import HydraConfig
|
14
14
|
from mlflow.entities import Run
|
15
|
-
from omegaconf import DictConfig, OmegaConf
|
15
|
+
from omegaconf import DictConfig, ListConfig, OmegaConf
|
16
16
|
|
17
17
|
if TYPE_CHECKING:
|
18
18
|
from collections.abc import Iterable
|
19
19
|
|
20
20
|
|
21
|
-
def
|
21
|
+
def file_uri_to_path(uri: str) -> Path:
|
22
|
+
"""Convert a file URI to a local path."""
|
23
|
+
if not uri.startswith("file:"):
|
24
|
+
return Path(uri)
|
25
|
+
|
26
|
+
path = urllib.parse.urlparse(uri).path
|
27
|
+
return Path(urllib.request.url2pathname(path)) # for Windows
|
28
|
+
|
29
|
+
|
30
|
+
def get_artifact_dir(run: Run | None = None) -> Path:
|
22
31
|
"""Retrieve the artifact directory for the given run.
|
23
32
|
|
24
33
|
This function uses MLflow to get the artifact directory for the given run.
|
25
34
|
|
26
35
|
Args:
|
27
36
|
run (Run | None): The run object. Defaults to None.
|
28
|
-
uri (str | None): The URI of the artifact. Defaults to None.
|
29
37
|
|
30
38
|
Returns:
|
31
39
|
The local path to the directory where the artifacts are downloaded.
|
32
40
|
|
33
41
|
"""
|
34
|
-
if run is
|
35
|
-
raise ValueError("Cannot provide both run and uri")
|
36
|
-
|
37
|
-
if run is None and uri is None:
|
42
|
+
if run is None:
|
38
43
|
uri = mlflow.get_artifact_uri()
|
39
|
-
|
44
|
+
else:
|
40
45
|
uri = run.info.artifact_uri
|
41
46
|
|
42
47
|
if not isinstance(uri, str):
|
43
48
|
raise NotImplementedError
|
44
49
|
|
45
|
-
|
46
|
-
return file_uri_to_path(uri)
|
47
|
-
|
48
|
-
return Path(uri)
|
49
|
-
|
50
|
-
|
51
|
-
def file_uri_to_path(uri: str) -> Path:
|
52
|
-
"""Convert a file URI to a local path."""
|
53
|
-
path = urllib.parse.urlparse(uri).path
|
54
|
-
return Path(urllib.request.url2pathname(path)) # for Windows
|
50
|
+
return file_uri_to_path(uri)
|
55
51
|
|
56
52
|
|
57
53
|
def get_artifact_path(run: Run | None, path: str) -> Path:
|
@@ -123,12 +119,7 @@ def load_config(run: Run) -> DictConfig:
|
|
123
119
|
return OmegaConf.load(path) # type: ignore
|
124
120
|
|
125
121
|
|
126
|
-
def
|
127
|
-
"""Retrieve the overrides for the current run."""
|
128
|
-
return list(HydraConfig.get().overrides.task) # ListConifg -> list
|
129
|
-
|
130
|
-
|
131
|
-
def load_overrides(run: Run) -> list[str]:
|
122
|
+
def load_overrides(run: Run) -> ListConfig:
|
132
123
|
"""Load the overrides for a given run.
|
133
124
|
|
134
125
|
This function loads the overrides for the provided Run instance
|
@@ -137,15 +128,15 @@ def load_overrides(run: Run) -> list[str]:
|
|
137
128
|
`.hydra/overrides.yaml` is not found in the run's artifact directory.
|
138
129
|
|
139
130
|
Args:
|
140
|
-
run (Run): The Run instance for which to load the
|
131
|
+
run (Run): The Run instance for which to load the configuration.
|
141
132
|
|
142
133
|
Returns:
|
143
|
-
The loaded
|
144
|
-
if the
|
134
|
+
The loaded configuration as a DictConfig object. Returns an empty
|
135
|
+
DictConfig if the configuration file is not found.
|
145
136
|
|
146
137
|
"""
|
147
138
|
path = get_artifact_dir(run) / ".hydra/overrides.yaml"
|
148
|
-
return
|
139
|
+
return OmegaConf.load(path) # type: ignore
|
149
140
|
|
150
141
|
|
151
142
|
def remove_run(run: Run | Iterable[Run]) -> None:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: hydraflow
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.8.0
|
4
4
|
Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
|
5
5
|
Project-URL: Documentation, https://daizutabi.github.io/hydraflow/
|
6
6
|
Project-URL: Source, https://github.com/daizutabi/hydraflow
|
@@ -55,7 +55,7 @@ Description-Content-Type: text/markdown
|
|
55
55
|
[pypi-v-link]: https://pypi.org/project/hydraflow/
|
56
56
|
[python-v-image]: https://img.shields.io/pypi/pyversions/hydraflow.svg
|
57
57
|
[python-v-link]: https://pypi.org/project/hydraflow
|
58
|
-
[GHAction-image]: https://github.com/daizutabi/hydraflow/actions/workflows/ci.
|
58
|
+
[GHAction-image]: https://github.com/daizutabi/hydraflow/actions/workflows/ci.yaml/badge.svg?branch=main&event=push
|
59
59
|
[GHAction-link]: https://github.com/daizutabi/hydraflow/actions?query=event%3Apush+branch%3Amain
|
60
60
|
[codecov-image]: https://codecov.io/github/daizutabi/hydraflow/coverage.svg?branch=main
|
61
61
|
[codecov-link]: https://codecov.io/github/daizutabi/hydraflow?branch=main
|
@@ -108,7 +108,7 @@ class MySQLConfig:
|
|
108
108
|
cs = ConfigStore.instance()
|
109
109
|
cs.store(name="config", node=MySQLConfig)
|
110
110
|
|
111
|
-
@hydra.main(
|
111
|
+
@hydra.main(config_name="config", version_base=None)
|
112
112
|
def my_app(cfg: MySQLConfig) -> None:
|
113
113
|
# Set experiment by Hydra job name.
|
114
114
|
hydraflow.set_experiment()
|
@@ -0,0 +1,17 @@
|
|
1
|
+
hydraflow/__init__.py,sha256=yp4LT1FDYPIduR6PqJNuSm9kztVCpL1P0zcPHWGvaJU,712
|
2
|
+
hydraflow/cli.py,sha256=jxqFppNeJWAr2Tb-C_MQXEJtegJ6TXcd3C1CT7Jdb1A,1559
|
3
|
+
hydraflow/config.py,sha256=SJzjgsO_kzB78_whJ3lmy7GlZvTvwZONH1BJBn8zCuI,3817
|
4
|
+
hydraflow/context.py,sha256=H5xeNbhMS23U-epsucprl5G3lbOR1aO9nDES4QGLWNk,4747
|
5
|
+
hydraflow/main.py,sha256=O5ETCMCg12zXoaYlZMHcM4IYAs6GVTkADrmEssrtjkk,4994
|
6
|
+
hydraflow/mlflow.py,sha256=pRRsBaBBH4cfzSko-8mmo5bV04GGklxoO0kORkInypM,5663
|
7
|
+
hydraflow/param.py,sha256=LHU9j9_7oA99igasoOyKofKClVr9FmGA3UABJ-KmyS0,4538
|
8
|
+
hydraflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
9
|
+
hydraflow/run_collection.py,sha256=rtH1cglSlK3QFg9hhifo9lzjDa9veHpoyYxEOmIEM84,19646
|
10
|
+
hydraflow/run_data.py,sha256=S2NNFtA1TleqpgeK4mIn1YY8YbWJFyhF7wXR5NWeYLk,1604
|
11
|
+
hydraflow/run_info.py,sha256=Jf5wrIjRLIV1-k-obHDqwKHa6j_ZonrY8od-rXlbtMo,1024
|
12
|
+
hydraflow/utils.py,sha256=T4ESiepEcqR-FZlo_m7VTBEFMwalrqPI8eFKPagvv3Q,4402
|
13
|
+
hydraflow-0.8.0.dist-info/METADATA,sha256=J1ilgG7L4A8OvzgZSNycp0YgyHk5e8_gwTr9NN82Ejk,4767
|
14
|
+
hydraflow-0.8.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
15
|
+
hydraflow-0.8.0.dist-info/entry_points.txt,sha256=XI0khPbpCIUo9UPqkNEpgh-kqK3Jy8T7L2VCWOdkbSM,48
|
16
|
+
hydraflow-0.8.0.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
|
17
|
+
hydraflow-0.8.0.dist-info/RECORD,,
|
hydraflow-0.7.4.dist-info/RECORD
DELETED
@@ -1,17 +0,0 @@
|
|
1
|
-
hydraflow/__init__.py,sha256=rujOGabEPPhPfyqTHynem3unqIEQ1haTWWSMuu2LuoQ,898
|
2
|
-
hydraflow/cli.py,sha256=jxqFppNeJWAr2Tb-C_MQXEJtegJ6TXcd3C1CT7Jdb1A,1559
|
3
|
-
hydraflow/config.py,sha256=MNX9da5bPVDcjnpji7Cm9ndK6ura92pt361m4PRh6_E,4326
|
4
|
-
hydraflow/context.py,sha256=3xfKhMozkKFqtWeOp9Gie0A5o5URMta4US6iVD5TcLU,6002
|
5
|
-
hydraflow/main.py,sha256=hroncI_SNpNgEtdxLgzI397J5S2Amv7J0atnPxwBePM,1314
|
6
|
-
hydraflow/mlflow.py,sha256=imD3XL0RTlpnKrkyvO8FNy_Bv6hwSfLiOu1yJuL40ck,8773
|
7
|
-
hydraflow/param.py,sha256=yu1aMNXRLegXGDL-68vwIkfeDF9CaU784WZENGLwl7Q,4572
|
8
|
-
hydraflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
9
|
-
hydraflow/run_collection.py,sha256=YCWg5Dz1j49xB2LA75onq5wsAeQQbifXpG4yPUwRN4I,24776
|
10
|
-
hydraflow/run_data.py,sha256=dpyyfnuH9mCtIZeigMo1iFQo9bafMdEL4i4uI2l0UqY,1525
|
11
|
-
hydraflow/run_info.py,sha256=Jf5wrIjRLIV1-k-obHDqwKHa6j_ZonrY8od-rXlbtMo,1024
|
12
|
-
hydraflow/utils.py,sha256=a9i5PEJn8Ssowv9dqHadAihZXlsqtVjHZ9MZvkPq1bY,4747
|
13
|
-
hydraflow-0.7.4.dist-info/METADATA,sha256=GTJi5z8TTIwPy6qpscw-t3Mb1V-GOR0iYU_IB-DB-UE,4766
|
14
|
-
hydraflow-0.7.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
15
|
-
hydraflow-0.7.4.dist-info/entry_points.txt,sha256=XI0khPbpCIUo9UPqkNEpgh-kqK3Jy8T7L2VCWOdkbSM,48
|
16
|
-
hydraflow-0.7.4.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
|
17
|
-
hydraflow-0.7.4.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|