hydraflow 0.7.4__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hydraflow/__init__.py +3 -10
- hydraflow/config.py +10 -27
- hydraflow/context.py +6 -49
- hydraflow/main.py +128 -20
- hydraflow/mlflow.py +93 -151
- hydraflow/param.py +2 -2
- hydraflow/run_collection.py +10 -156
- hydraflow/run_data.py +4 -2
- hydraflow/utils.py +19 -28
- {hydraflow-0.7.4.dist-info → hydraflow-0.8.0.dist-info}/METADATA +3 -3
- hydraflow-0.8.0.dist-info/RECORD +17 -0
- hydraflow-0.7.4.dist-info/RECORD +0 -17
- {hydraflow-0.7.4.dist-info → hydraflow-0.8.0.dist-info}/WHEEL +0 -0
- {hydraflow-0.7.4.dist-info → hydraflow-0.8.0.dist-info}/entry_points.txt +0 -0
- {hydraflow-0.7.4.dist-info → hydraflow-0.8.0.dist-info}/licenses/LICENSE +0 -0
hydraflow/__init__.py
CHANGED
@@ -1,17 +1,14 @@
|
|
1
1
|
"""Integrate Hydra and MLflow to manage and track machine learning experiments."""
|
2
2
|
|
3
|
-
from hydraflow.config import select_config, select_overrides
|
4
3
|
from hydraflow.context import chdir_artifact, log_run, start_run
|
5
4
|
from hydraflow.main import main
|
6
|
-
from hydraflow.mlflow import
|
5
|
+
from hydraflow.mlflow import list_run_ids, list_run_paths, list_runs
|
7
6
|
from hydraflow.run_collection import RunCollection
|
8
7
|
from hydraflow.utils import (
|
9
8
|
get_artifact_dir,
|
10
9
|
get_artifact_path,
|
11
10
|
get_hydra_output_dir,
|
12
|
-
get_overrides,
|
13
11
|
load_config,
|
14
|
-
load_overrides,
|
15
12
|
remove_run,
|
16
13
|
)
|
17
14
|
|
@@ -21,16 +18,12 @@ __all__ = [
|
|
21
18
|
"get_artifact_dir",
|
22
19
|
"get_artifact_path",
|
23
20
|
"get_hydra_output_dir",
|
24
|
-
"
|
21
|
+
"list_run_ids",
|
22
|
+
"list_run_paths",
|
25
23
|
"list_runs",
|
26
24
|
"load_config",
|
27
|
-
"load_overrides",
|
28
25
|
"log_run",
|
29
26
|
"main",
|
30
27
|
"remove_run",
|
31
|
-
"search_runs",
|
32
|
-
"select_config",
|
33
|
-
"select_overrides",
|
34
|
-
"set_experiment",
|
35
28
|
"start_run",
|
36
29
|
]
|
hydraflow/config.py
CHANGED
@@ -6,35 +6,19 @@ from typing import TYPE_CHECKING
|
|
6
6
|
|
7
7
|
from omegaconf import DictConfig, ListConfig, OmegaConf
|
8
8
|
|
9
|
-
from hydraflow.utils import get_overrides
|
10
|
-
|
11
9
|
if TYPE_CHECKING:
|
12
10
|
from collections.abc import Iterator
|
13
11
|
from typing import Any
|
14
12
|
|
15
13
|
|
16
|
-
def
|
17
|
-
"""Iterate over parameters and collect them into a dictionary.
|
18
|
-
|
19
|
-
Args:
|
20
|
-
config (object): The configuration object to iterate over.
|
21
|
-
prefix (str): The prefix to prepend to the parameter keys.
|
22
|
-
|
23
|
-
Returns:
|
24
|
-
dict[str, Any]: A dictionary of collected parameters.
|
25
|
-
|
26
|
-
"""
|
27
|
-
return dict(iter_params(config))
|
28
|
-
|
29
|
-
|
30
|
-
def iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
|
14
|
+
def iter_params(config: Any, prefix: str = "") -> Iterator[tuple[str, Any]]:
|
31
15
|
"""Recursively iterate over the parameters in the given configuration object.
|
32
16
|
|
33
17
|
This function traverses the configuration object and yields key-value pairs
|
34
18
|
representing the parameters. The keys are prefixed with the provided prefix.
|
35
19
|
|
36
20
|
Args:
|
37
|
-
config (
|
21
|
+
config (Any): The configuration object to iterate over. This can be a
|
38
22
|
dictionary, list, DictConfig, or ListConfig.
|
39
23
|
prefix (str): The prefix to prepend to the parameter keys.
|
40
24
|
Defaults to an empty string.
|
@@ -50,7 +34,7 @@ def iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
|
|
50
34
|
config = _from_dotlist(config)
|
51
35
|
|
52
36
|
if not isinstance(config, DictConfig | ListConfig):
|
53
|
-
config = OmegaConf.create(config)
|
37
|
+
config = OmegaConf.create(config)
|
54
38
|
|
55
39
|
yield from _iter_params(config, prefix)
|
56
40
|
|
@@ -65,7 +49,7 @@ def _from_dotlist(config: list[str]) -> dict[str, str]:
|
|
65
49
|
return result
|
66
50
|
|
67
51
|
|
68
|
-
def _iter_params(config:
|
52
|
+
def _iter_params(config: Any, prefix: str = "") -> Iterator[tuple[str, Any]]:
|
69
53
|
if isinstance(config, DictConfig):
|
70
54
|
for key, value in config.items():
|
71
55
|
if _is_param(value):
|
@@ -83,12 +67,12 @@ def _iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
|
|
83
67
|
yield from _iter_params(value, f"{prefix}{index}.")
|
84
68
|
|
85
69
|
|
86
|
-
def _is_param(value:
|
70
|
+
def _is_param(value: Any) -> bool:
|
87
71
|
"""Check if the given value is a parameter."""
|
88
72
|
if isinstance(value, DictConfig):
|
89
73
|
return False
|
90
74
|
|
91
|
-
if isinstance(value, ListConfig):
|
75
|
+
if isinstance(value, ListConfig):
|
92
76
|
if any(isinstance(v, DictConfig | ListConfig) for v in value):
|
93
77
|
return False
|
94
78
|
|
@@ -103,14 +87,14 @@ def _convert(value: Any) -> Any:
|
|
103
87
|
return value
|
104
88
|
|
105
89
|
|
106
|
-
def select_config(config:
|
90
|
+
def select_config(config: Any, names: list[str]) -> dict[str, Any]:
|
107
91
|
"""Select the given parameters from the configuration object.
|
108
92
|
|
109
93
|
This function selects the given parameters from the configuration object
|
110
94
|
and returns a new configuration object containing only the selected parameters.
|
111
95
|
|
112
96
|
Args:
|
113
|
-
config (
|
97
|
+
config (Any): The configuration object to select parameters from.
|
114
98
|
names (list[str]): The names of the parameters to select.
|
115
99
|
|
116
100
|
Returns:
|
@@ -120,7 +104,7 @@ def select_config(config: object, names: list[str]) -> dict[str, Any]:
|
|
120
104
|
if not isinstance(config, DictConfig):
|
121
105
|
config = OmegaConf.structured(config)
|
122
106
|
|
123
|
-
return {name: _get(config, name) for name in names}
|
107
|
+
return {name: _get(config, name) for name in names}
|
124
108
|
|
125
109
|
|
126
110
|
def _get(config: DictConfig, name: str) -> Any:
|
@@ -132,8 +116,7 @@ def _get(config: DictConfig, name: str) -> Any:
|
|
132
116
|
return _get(config.get(prefix), name)
|
133
117
|
|
134
118
|
|
135
|
-
def select_overrides(config: object) -> dict[str, Any]:
|
119
|
+
def select_overrides(config: object, overrides: list[str]) -> dict[str, Any]:
|
136
120
|
"""Select the given overrides from the configuration object."""
|
137
|
-
overrides = get_overrides()
|
138
121
|
names = [override.split("=")[0].strip() for override in overrides]
|
139
122
|
return select_config(config, names)
|
hydraflow/context.py
CHANGED
@@ -12,7 +12,7 @@ import mlflow
|
|
12
12
|
import mlflow.artifacts
|
13
13
|
from hydra.core.hydra_config import HydraConfig
|
14
14
|
|
15
|
-
from hydraflow.mlflow import log_params
|
15
|
+
from hydraflow.mlflow import log_params, log_text
|
16
16
|
from hydraflow.utils import get_artifact_dir
|
17
17
|
|
18
18
|
if TYPE_CHECKING:
|
@@ -55,11 +55,11 @@ def log_run(
|
|
55
55
|
log_params(config, synchronous=synchronous)
|
56
56
|
|
57
57
|
hc = HydraConfig.get()
|
58
|
-
|
58
|
+
hydra_dir = Path(hc.runtime.output_dir)
|
59
59
|
|
60
60
|
# Save '.hydra' config directory.
|
61
|
-
|
62
|
-
mlflow.log_artifacts(
|
61
|
+
hydra_subdir = hydra_dir / (hc.output_subdir or "")
|
62
|
+
mlflow.log_artifacts(hydra_subdir.as_posix(), hc.output_subdir)
|
63
63
|
|
64
64
|
try:
|
65
65
|
yield
|
@@ -70,43 +70,14 @@ def log_run(
|
|
70
70
|
raise
|
71
71
|
|
72
72
|
finally:
|
73
|
-
log_text(
|
74
|
-
|
75
|
-
|
76
|
-
def log_text(directory: Path, pattern: str = "*.log") -> None:
|
77
|
-
"""Log text files in the given directory as artifacts.
|
78
|
-
|
79
|
-
Append the text files to the existing text file in the artifact directory.
|
80
|
-
|
81
|
-
Args:
|
82
|
-
directory (Path): The directory to find the logs in.
|
83
|
-
pattern (str): The pattern to match the logs.
|
84
|
-
|
85
|
-
"""
|
86
|
-
artifact_dir = get_artifact_dir()
|
87
|
-
|
88
|
-
for file in directory.glob(pattern):
|
89
|
-
if not file.is_file():
|
90
|
-
continue
|
91
|
-
|
92
|
-
file_artifact = artifact_dir / file.name
|
93
|
-
if file_artifact.exists():
|
94
|
-
text = file_artifact.read_text()
|
95
|
-
if not text.endswith("\n"):
|
96
|
-
text += "\n"
|
97
|
-
else:
|
98
|
-
text = ""
|
99
|
-
|
100
|
-
text += file.read_text()
|
101
|
-
mlflow.log_text(text, file.name)
|
73
|
+
log_text(hydra_dir)
|
102
74
|
|
103
75
|
|
104
76
|
@contextmanager
|
105
|
-
def start_run(
|
77
|
+
def start_run(
|
106
78
|
config: object,
|
107
79
|
*,
|
108
80
|
chdir: bool = False,
|
109
|
-
run: Run | None = None,
|
110
81
|
run_id: str | None = None,
|
111
82
|
experiment_id: str | None = None,
|
112
83
|
run_name: str | None = None,
|
@@ -126,7 +97,6 @@ def start_run( # noqa: PLR0913
|
|
126
97
|
config (object): The configuration object to log parameters from.
|
127
98
|
chdir (bool): Whether to change the current working directory to the
|
128
99
|
artifact directory of the current run. Defaults to False.
|
129
|
-
run (Run | None): The existing run. Defaults to None.
|
130
100
|
run_id (str | None): The existing run ID. Defaults to None.
|
131
101
|
experiment_id (str | None): The experiment ID. Defaults to None.
|
132
102
|
run_name (str | None): The name of the run. Defaults to None.
|
@@ -142,20 +112,7 @@ def start_run( # noqa: PLR0913
|
|
142
112
|
Yields:
|
143
113
|
Run: An MLflow Run object representing the started run.
|
144
114
|
|
145
|
-
Example:
|
146
|
-
with start_run(config) as run:
|
147
|
-
# Perform operations within the MLflow run context
|
148
|
-
pass
|
149
|
-
|
150
|
-
See Also:
|
151
|
-
- `mlflow.start_run`: The MLflow function to start a run directly.
|
152
|
-
- `log_run`: A context manager to log parameters and manage the MLflow
|
153
|
-
run context.
|
154
|
-
|
155
115
|
"""
|
156
|
-
if run:
|
157
|
-
run_id = run.info.run_id
|
158
|
-
|
159
116
|
with (
|
160
117
|
mlflow.start_run(
|
161
118
|
run_id=run_id,
|
hydraflow/main.py
CHANGED
@@ -1,54 +1,162 @@
|
|
1
|
-
"""
|
1
|
+
"""Integration of MLflow experiment tracking with Hydra configuration management.
|
2
|
+
|
3
|
+
This module provides decorators and utilities to seamlessly combine Hydra's
|
4
|
+
configuration management with MLflow's experiment tracking capabilities. It
|
5
|
+
enables automatic run deduplication, configuration storage, and experiment
|
6
|
+
management.
|
7
|
+
|
8
|
+
The main functionality is provided through the `main` decorator, which can be
|
9
|
+
used to wrap experiment entry points. This decorator handles:
|
10
|
+
- Configuration management via Hydra
|
11
|
+
- Experiment tracking via MLflow
|
12
|
+
- Run deduplication based on configurations
|
13
|
+
- Working directory management
|
14
|
+
- Automatic configuration storage
|
15
|
+
|
16
|
+
Example:
|
17
|
+
```python
|
18
|
+
from dataclasses import dataclass
|
19
|
+
from mlflow.entities import Run
|
20
|
+
|
21
|
+
@dataclass
|
22
|
+
class Config:
|
23
|
+
learning_rate: float
|
24
|
+
batch_size: int
|
25
|
+
|
26
|
+
@main(Config)
|
27
|
+
def train(run: Run, config: Config):
|
28
|
+
# Your training code here
|
29
|
+
pass
|
30
|
+
```
|
31
|
+
|
32
|
+
"""
|
2
33
|
|
3
34
|
from __future__ import annotations
|
4
35
|
|
5
36
|
from functools import wraps
|
6
|
-
from typing import TYPE_CHECKING,
|
37
|
+
from typing import TYPE_CHECKING, TypeVar
|
7
38
|
|
8
39
|
import hydra
|
40
|
+
import mlflow
|
9
41
|
from hydra.core.config_store import ConfigStore
|
42
|
+
from hydra.core.hydra_config import HydraConfig
|
10
43
|
from mlflow.entities import RunStatus
|
44
|
+
from omegaconf import OmegaConf
|
11
45
|
|
12
46
|
import hydraflow
|
47
|
+
from hydraflow.utils import file_uri_to_path
|
13
48
|
|
14
49
|
if TYPE_CHECKING:
|
15
50
|
from collections.abc import Callable
|
51
|
+
from pathlib import Path
|
16
52
|
|
17
53
|
from mlflow.entities import Run
|
18
54
|
|
19
55
|
FINISHED = RunStatus.to_string(RunStatus.FINISHED)
|
20
56
|
|
57
|
+
T = TypeVar("T")
|
58
|
+
|
21
59
|
|
22
60
|
def main(
|
23
|
-
node:
|
61
|
+
node: T | type[T],
|
24
62
|
config_name: str = "config",
|
25
63
|
*,
|
26
64
|
chdir: bool = False,
|
27
65
|
force_new_run: bool = False,
|
28
|
-
|
66
|
+
match_overrides: bool = False,
|
67
|
+
rerun_finished: bool = False,
|
29
68
|
):
|
30
|
-
"""
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
69
|
+
"""Decorator for configuring and running MLflow experiments with Hydra.
|
70
|
+
|
71
|
+
This decorator combines Hydra configuration management with MLflow experiment
|
72
|
+
tracking. It automatically handles run deduplication and configuration storage.
|
73
|
+
|
74
|
+
Args:
|
75
|
+
node: Configuration node class or instance defining the structure of the
|
76
|
+
configuration.
|
77
|
+
config_name: Name of the configuration. Defaults to "config".
|
78
|
+
chdir: If True, changes working directory to the artifact directory
|
79
|
+
of the run. Defaults to False.
|
80
|
+
force_new_run: If True, always creates a new MLflow run instead of
|
81
|
+
reusing existing ones. Defaults to False.
|
82
|
+
match_overrides: If True, matches runs based on Hydra CLI overrides
|
83
|
+
instead of full config. Defaults to False.
|
84
|
+
rerun_finished: If True, allows rerunning completed runs. Defaults to
|
85
|
+
False.
|
86
|
+
|
87
|
+
"""
|
88
|
+
|
89
|
+
def decorator(app: Callable[[Run, T], None]) -> Callable[[], None]:
|
90
|
+
ConfigStore.instance().store(config_name, node)
|
91
|
+
|
92
|
+
@hydra.main(config_name=config_name, version_base=None)
|
35
93
|
@wraps(app)
|
36
|
-
|
37
|
-
|
38
|
-
|
94
|
+
def inner_decorator(config: T) -> None:
|
95
|
+
hc = HydraConfig.get()
|
96
|
+
experiment = mlflow.set_experiment(hc.job.name)
|
39
97
|
|
40
98
|
if force_new_run:
|
41
|
-
|
99
|
+
run_id = None
|
42
100
|
else:
|
43
|
-
|
44
|
-
|
101
|
+
uri = experiment.artifact_location
|
102
|
+
overrides = hc.overrides.task if match_overrides else None
|
103
|
+
run_id = get_run_id(uri, config, overrides)
|
45
104
|
|
46
|
-
if
|
47
|
-
|
105
|
+
if run_id and not rerun_finished:
|
106
|
+
run = mlflow.get_run(run_id)
|
107
|
+
if run.info.status == FINISHED:
|
108
|
+
return
|
48
109
|
|
49
|
-
with hydraflow.start_run(
|
50
|
-
app(run,
|
110
|
+
with hydraflow.start_run(config, run_id=run_id, chdir=chdir) as run:
|
111
|
+
app(run, config)
|
51
112
|
|
52
|
-
return
|
113
|
+
return inner_decorator
|
53
114
|
|
54
115
|
return decorator
|
116
|
+
|
117
|
+
|
118
|
+
def get_run_id(uri: str, config: object, overrides: list[str] | None) -> str | None:
|
119
|
+
"""Try to get the run ID for the given configuration.
|
120
|
+
|
121
|
+
If the run is not found, the function will return None.
|
122
|
+
|
123
|
+
Args:
|
124
|
+
uri (str): The URI of the experiment.
|
125
|
+
config (object): The configuration object.
|
126
|
+
overrides (list[str] | None): The task overrides.
|
127
|
+
|
128
|
+
Returns:
|
129
|
+
The run ID for the given configuration or overrides. Returns None if
|
130
|
+
no run ID is found.
|
131
|
+
|
132
|
+
"""
|
133
|
+
for run_dir in file_uri_to_path(uri).iterdir():
|
134
|
+
if run_dir.is_dir() and equals(run_dir, config, overrides):
|
135
|
+
return run_dir.name
|
136
|
+
|
137
|
+
return None
|
138
|
+
|
139
|
+
|
140
|
+
def equals(run_dir: Path, config: object, overrides: list[str] | None) -> bool:
|
141
|
+
"""Check if the run directory matches the given configuration or overrides.
|
142
|
+
|
143
|
+
Args:
|
144
|
+
run_dir (Path): The run directory.
|
145
|
+
config (object): The configuration object.
|
146
|
+
overrides (list[str] | None): The task overrides.
|
147
|
+
|
148
|
+
Returns:
|
149
|
+
True if the run directory matches the given configuration or overrides,
|
150
|
+
False otherwise.
|
151
|
+
|
152
|
+
"""
|
153
|
+
if overrides is None:
|
154
|
+
path = run_dir / "artifacts/.hydra/config.yaml"
|
155
|
+
else:
|
156
|
+
path = run_dir / "artifacts/.hydra/overrides.yaml"
|
157
|
+
config = overrides
|
158
|
+
|
159
|
+
if not path.exists():
|
160
|
+
return False
|
161
|
+
|
162
|
+
return OmegaConf.load(path) == config
|
hydraflow/mlflow.py
CHANGED
@@ -1,17 +1,8 @@
|
|
1
|
-
"""
|
1
|
+
"""Integration of MLflow experiment tracking with Hydra configuration management.
|
2
2
|
|
3
3
|
This module provides functions to log parameters from Hydra configuration objects
|
4
4
|
to MLflow, set experiments, and manage tracking URIs. It integrates Hydra's
|
5
5
|
configuration management with MLflow's experiment tracking capabilities.
|
6
|
-
|
7
|
-
Key Features:
|
8
|
-
- **Experiment Management**: Set experiment names and tracking URIs using Hydra
|
9
|
-
configuration details.
|
10
|
-
- **Parameter Logging**: Log parameters from Hydra configuration objects to MLflow,
|
11
|
-
supporting both synchronous and asynchronous logging.
|
12
|
-
- **Run Collection**: Utilize the `RunCollection` class to manage and interact with
|
13
|
-
multiple MLflow runs, providing methods to filter and retrieve runs based on
|
14
|
-
various criteria.
|
15
6
|
"""
|
16
7
|
|
17
8
|
from __future__ import annotations
|
@@ -21,149 +12,132 @@ from typing import TYPE_CHECKING
|
|
21
12
|
import joblib
|
22
13
|
import mlflow
|
23
14
|
import mlflow.artifacts
|
24
|
-
from hydra.core.hydra_config import HydraConfig
|
25
|
-
from mlflow.entities import ViewType
|
26
|
-
from mlflow.tracking.fluent import SEARCH_MAX_RESULTS_PANDAS, _get_experiment_id
|
27
15
|
|
28
16
|
from hydraflow.config import iter_params
|
29
17
|
from hydraflow.run_collection import RunCollection
|
30
|
-
from hydraflow.utils import get_artifact_dir
|
18
|
+
from hydraflow.utils import file_uri_to_path, get_artifact_dir
|
31
19
|
|
32
20
|
if TYPE_CHECKING:
|
33
21
|
from pathlib import Path
|
34
|
-
|
35
|
-
from mlflow.entities.experiment import Experiment
|
22
|
+
from typing import Any
|
36
23
|
|
37
24
|
|
38
|
-
def
|
39
|
-
|
40
|
-
suffix: str = "",
|
41
|
-
uri: str | Path | None = None,
|
42
|
-
name: str | None = None,
|
43
|
-
) -> Experiment:
|
44
|
-
"""Set the experiment name and tracking URI optionally.
|
25
|
+
def log_params(config: Any, *, synchronous: bool | None = None) -> None:
|
26
|
+
"""Log the parameters from the given configuration object.
|
45
27
|
|
46
|
-
This
|
47
|
-
|
48
|
-
|
28
|
+
This method logs the parameters from the provided configuration object
|
29
|
+
using MLflow. It iterates over the parameters and logs them using the
|
30
|
+
`mlflow.log_param` method.
|
49
31
|
|
50
32
|
Args:
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
name (str | None): The name of the experiment. Defaults to None.
|
33
|
+
config (Any): The configuration object to log the parameters from.
|
34
|
+
synchronous (bool | None): Whether to log the parameters synchronously.
|
35
|
+
Defaults to None.
|
55
36
|
|
56
|
-
|
57
|
-
|
58
|
-
|
37
|
+
"""
|
38
|
+
for key, value in iter_params(config):
|
39
|
+
mlflow.log_param(key, value, synchronous=synchronous)
|
40
|
+
|
41
|
+
|
42
|
+
def log_text(from_dir: Path, pattern: str = "*.log") -> None:
|
43
|
+
"""Log text files in the given directory as artifacts.
|
44
|
+
|
45
|
+
Append the text files to the existing text file in the artifact directory.
|
46
|
+
|
47
|
+
Args:
|
48
|
+
from_dir (Path): The directory to find the logs in.
|
49
|
+
pattern (str): The pattern to match the logs.
|
59
50
|
|
60
51
|
"""
|
61
|
-
|
62
|
-
mlflow.set_tracking_uri(uri)
|
52
|
+
artifact_dir = get_artifact_dir()
|
63
53
|
|
64
|
-
|
65
|
-
|
54
|
+
for file in from_dir.glob(pattern):
|
55
|
+
if not file.is_file():
|
56
|
+
continue
|
66
57
|
|
67
|
-
|
68
|
-
|
69
|
-
|
58
|
+
file_artifact = artifact_dir / file.name
|
59
|
+
if file_artifact.exists():
|
60
|
+
text = file_artifact.read_text()
|
61
|
+
if not text.endswith("\n"):
|
62
|
+
text += "\n"
|
63
|
+
else:
|
64
|
+
text = ""
|
70
65
|
|
66
|
+
text += file.read_text()
|
67
|
+
mlflow.log_text(text, file.name)
|
71
68
|
|
72
|
-
def log_params(config: object, *, synchronous: bool | None = None) -> None:
|
73
|
-
"""Log the parameters from the given configuration object.
|
74
69
|
|
75
|
-
|
76
|
-
|
77
|
-
|
70
|
+
def list_run_paths(
|
71
|
+
experiment_names: str | list[str] | None = None,
|
72
|
+
*other: str,
|
73
|
+
) -> list[Path]:
|
74
|
+
"""List all run paths for the specified experiments.
|
75
|
+
|
76
|
+
This function retrieves all run paths for the given list of experiment names.
|
77
|
+
If no experiment names are provided (None), the function will search all runs
|
78
|
+
for all experiments except the "Default" experiment.
|
78
79
|
|
79
80
|
Args:
|
80
|
-
|
81
|
-
|
82
|
-
|
81
|
+
experiment_names (list[str] | None): List of experiment names to search
|
82
|
+
for runs. If None is provided, the function will search all runs
|
83
|
+
for all experiments except the "Default" experiment.
|
84
|
+
*other (str): The parts of the run directory to join.
|
85
|
+
|
86
|
+
Returns:
|
87
|
+
list[Path]: A list of run paths for the specified experiments.
|
83
88
|
|
84
89
|
"""
|
85
|
-
|
86
|
-
|
90
|
+
if isinstance(experiment_names, str):
|
91
|
+
experiment_names = [experiment_names]
|
92
|
+
|
93
|
+
elif experiment_names is None:
|
94
|
+
experiments = mlflow.search_experiments()
|
95
|
+
experiment_names = [e.name for e in experiments if e.name != "Default"]
|
87
96
|
|
97
|
+
run_paths: list[Path] = []
|
88
98
|
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
filter_string: str = "",
|
93
|
-
run_view_type: int = ViewType.ACTIVE_ONLY,
|
94
|
-
max_results: int = SEARCH_MAX_RESULTS_PANDAS,
|
95
|
-
order_by: list[str] | None = None,
|
96
|
-
search_all_experiments: bool = False,
|
97
|
-
experiment_names: list[str] | None = None,
|
98
|
-
) -> RunCollection:
|
99
|
-
"""Search for Runs that fit the specified criteria.
|
99
|
+
for name in experiment_names:
|
100
|
+
if experiment := mlflow.get_experiment_by_name(name):
|
101
|
+
uri = experiment.artifact_location
|
100
102
|
|
101
|
-
|
102
|
-
|
103
|
-
|
103
|
+
if isinstance(uri, str):
|
104
|
+
path = file_uri_to_path(uri)
|
105
|
+
run_paths.extend(p for p in path.iterdir() if p.is_dir())
|
104
106
|
|
105
|
-
|
106
|
-
|
107
|
+
if other:
|
108
|
+
return [p.joinpath(*other) for p in run_paths]
|
109
|
+
|
110
|
+
return run_paths
|
111
|
+
|
112
|
+
|
113
|
+
def list_run_ids(experiment_names: str | list[str] | None = None) -> list[str]:
|
114
|
+
"""List all run IDs for the specified experiments.
|
115
|
+
|
116
|
+
This function retrieves all runs for the given list of experiment names.
|
117
|
+
If no experiment names are provided (None), the function will search all
|
118
|
+
runs for all experiments except the "Default" experiment.
|
107
119
|
|
108
120
|
Args:
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
error if ``experiment_names`` is also not ``None`` or ``[]``.
|
113
|
-
``None`` will default to the active experiment if ``experiment_names``
|
114
|
-
is ``None`` or ``[]``.
|
115
|
-
filter_string (str): Filter query string, defaults to searching all
|
116
|
-
runs.
|
117
|
-
run_view_type (int): one of enum values ``ACTIVE_ONLY``, ``DELETED_ONLY``,
|
118
|
-
or ``ALL`` runs defined in :py:class:`mlflow.entities.ViewType`.
|
119
|
-
max_results (int): The maximum number of runs to put in the dataframe.
|
120
|
-
Default is 100,000 to avoid causing out-of-memory issues on the user's
|
121
|
-
machine.
|
122
|
-
order_by (list[str] | None): List of columns to order by (e.g.,
|
123
|
-
"metrics.rmse"). The ``order_by`` column can contain an optional
|
124
|
-
``DESC`` or ``ASC`` value. The default is ``ASC``. The default
|
125
|
-
ordering is to sort by ``start_time DESC``, then ``run_id``.
|
126
|
-
``start_time DESC``, then ``run_id``.
|
127
|
-
search_all_experiments (bool): Boolean specifying whether all
|
128
|
-
experiments should be searched. Only honored if ``experiment_ids``
|
129
|
-
is ``[]`` or ``None``.
|
130
|
-
experiment_names (list[str] | None): List of experiment names. Search
|
131
|
-
can work with experiment IDs or experiment names, but not both in
|
132
|
-
the same call. Values other than ``None`` or ``[]`` will result in
|
133
|
-
error if ``experiment_ids`` is also not ``None`` or ``[]``.
|
134
|
-
``experiment_ids`` is also not ``None`` or ``[]``. ``None`` will
|
135
|
-
default to the active experiment if ``experiment_ids`` is ``None``
|
136
|
-
or ``[]``.
|
121
|
+
experiment_names (list[str] | None): List of experiment names to search
|
122
|
+
for runs. If None is provided, the function will search all runs
|
123
|
+
for all experiments except the "Default" experiment.
|
137
124
|
|
138
125
|
Returns:
|
139
|
-
A
|
126
|
+
list[str]: A list of run IDs for the specified experiments.
|
140
127
|
|
141
128
|
"""
|
142
|
-
|
143
|
-
experiment_ids=experiment_ids,
|
144
|
-
filter_string=filter_string,
|
145
|
-
run_view_type=run_view_type,
|
146
|
-
max_results=max_results,
|
147
|
-
order_by=order_by,
|
148
|
-
output_format="list",
|
149
|
-
search_all_experiments=search_all_experiments,
|
150
|
-
experiment_names=experiment_names,
|
151
|
-
)
|
152
|
-
runs = sorted(runs, key=lambda run: run.info.start_time) # type: ignore
|
153
|
-
return RunCollection(runs) # type: ignore
|
129
|
+
return [run_path.stem for run_path in list_run_paths(experiment_names)]
|
154
130
|
|
155
131
|
|
156
132
|
def list_runs(
|
157
133
|
experiment_names: str | list[str] | None = None,
|
158
134
|
n_jobs: int = 0,
|
159
|
-
status: str | list[str] | int | list[int] | None = None,
|
160
135
|
) -> RunCollection:
|
161
136
|
"""List all runs for the specified experiments.
|
162
137
|
|
163
138
|
This function retrieves all runs for the given list of experiment names.
|
164
|
-
If no experiment names are provided (None),
|
165
|
-
for the
|
166
|
-
will search all runs for all experiments except the "Default" experiment.
|
139
|
+
If no experiment names are provided (None), the function will search all runs
|
140
|
+
for all experiments except the "Default" experiment.
|
167
141
|
The function returns the results as a `RunCollection` object.
|
168
142
|
|
169
143
|
Note:
|
@@ -171,55 +145,23 @@ def list_runs(
|
|
171
145
|
|
172
146
|
Args:
|
173
147
|
experiment_names (list[str] | None): List of experiment names to search
|
174
|
-
for runs. If None
|
175
|
-
|
176
|
-
|
177
|
-
n_jobs (int): The number of jobs to run in parallel. If 0, the function
|
178
|
-
will search runs sequentially.
|
179
|
-
status (str | list[str] | int | list[int] | None): The status of the runs
|
180
|
-
to filter.
|
148
|
+
for runs. If None is provided, the function will search all runs
|
149
|
+
for all experiments except the "Default" experiment.
|
150
|
+
n_jobs (int): The number of jobs to retrieve runs in parallel.
|
181
151
|
|
182
152
|
Returns:
|
183
153
|
RunCollection: A `RunCollection` instance containing the runs for the
|
184
154
|
specified experiments.
|
185
155
|
|
186
156
|
"""
|
187
|
-
|
188
|
-
if status is None:
|
189
|
-
return rc
|
190
|
-
|
191
|
-
return rc.filter(status=status)
|
192
|
-
|
193
|
-
|
194
|
-
def _list_runs(
|
195
|
-
experiment_names: str | list[str] | None = None,
|
196
|
-
n_jobs: int = 0,
|
197
|
-
) -> RunCollection:
|
198
|
-
if isinstance(experiment_names, str):
|
199
|
-
experiment_names = [experiment_names]
|
200
|
-
|
201
|
-
elif experiment_names == []:
|
202
|
-
experiments = mlflow.search_experiments()
|
203
|
-
experiment_names = [e.name for e in experiments if e.name != "Default"]
|
157
|
+
run_ids = list_run_ids(experiment_names)
|
204
158
|
|
205
159
|
if n_jobs == 0:
|
206
|
-
|
207
|
-
|
208
|
-
if experiment_names is None:
|
209
|
-
experiment_id = _get_experiment_id()
|
210
|
-
experiment_names = [mlflow.get_experiment(experiment_id).name]
|
211
|
-
|
212
|
-
run_ids = []
|
213
|
-
|
214
|
-
for name in experiment_names:
|
215
|
-
if experiment := mlflow.get_experiment_by_name(name):
|
216
|
-
uri = experiment.artifact_location
|
160
|
+
runs = [mlflow.get_run(run_id) for run_id in run_ids]
|
217
161
|
|
218
|
-
|
219
|
-
|
220
|
-
|
162
|
+
else:
|
163
|
+
it = (joblib.delayed(mlflow.get_run)(run_id) for run_id in run_ids)
|
164
|
+
runs = joblib.Parallel(n_jobs, backend="threading")(it)
|
221
165
|
|
222
|
-
it = (joblib.delayed(mlflow.get_run)(run_id) for run_id in run_ids)
|
223
|
-
runs = joblib.Parallel(n_jobs, prefer="threads")(it)
|
224
166
|
runs = sorted(runs, key=lambda run: run.info.start_time) # type: ignore
|
225
167
|
return RunCollection(runs) # type: ignore
|
hydraflow/param.py
CHANGED
@@ -18,7 +18,7 @@ if TYPE_CHECKING:
|
|
18
18
|
from mlflow.entities import Run
|
19
19
|
|
20
20
|
|
21
|
-
def match(param: str, value: Any) -> bool:
|
21
|
+
def match(param: str, value: Any) -> bool:
|
22
22
|
"""Check if the string matches the specified value.
|
23
23
|
|
24
24
|
Args:
|
@@ -68,7 +68,7 @@ def _match_list(param: str, value: list) -> bool | None:
|
|
68
68
|
|
69
69
|
|
70
70
|
def _match_tuple(param: str, value: tuple) -> bool | None:
|
71
|
-
if len(value) != 2:
|
71
|
+
if len(value) != 2:
|
72
72
|
return None
|
73
73
|
|
74
74
|
if any(param.startswith(x) for x in ["[", "(", "{"]):
|
hydraflow/run_collection.py
CHANGED
@@ -21,7 +21,7 @@ from __future__ import annotations
|
|
21
21
|
|
22
22
|
from dataclasses import dataclass, field
|
23
23
|
from itertools import chain
|
24
|
-
from typing import TYPE_CHECKING, Any,
|
24
|
+
from typing import TYPE_CHECKING, Any, overload
|
25
25
|
|
26
26
|
from mlflow.entities import RunStatus
|
27
27
|
|
@@ -34,15 +34,9 @@ from hydraflow.utils import load_config
|
|
34
34
|
|
35
35
|
if TYPE_CHECKING:
|
36
36
|
from collections.abc import Callable, Iterator
|
37
|
-
from pathlib import Path
|
38
37
|
from typing import Any
|
39
38
|
|
40
39
|
from mlflow.entities.run import Run
|
41
|
-
from omegaconf import DictConfig
|
42
|
-
|
43
|
-
|
44
|
-
T = TypeVar("T")
|
45
|
-
P = ParamSpec("P")
|
46
40
|
|
47
41
|
|
48
42
|
@dataclass
|
@@ -124,11 +118,6 @@ class RunCollection:
|
|
124
118
|
runs = [run for run in self._runs if run not in other._runs] # noqa: SLF001
|
125
119
|
return self.__class__(runs)
|
126
120
|
|
127
|
-
@classmethod
|
128
|
-
def from_list(cls, runs: list[Run]) -> RunCollection:
|
129
|
-
"""Create a `RunCollection` instance from a list of MLflow `Run` instances."""
|
130
|
-
return cls(runs)
|
131
|
-
|
132
121
|
@property
|
133
122
|
def info(self) -> RunCollectionInfo:
|
134
123
|
"""An instance of `RunCollectionInfo`."""
|
@@ -139,26 +128,6 @@ class RunCollection:
|
|
139
128
|
"""An instance of `RunCollectionData`."""
|
140
129
|
return self._data
|
141
130
|
|
142
|
-
def take(self, n: int) -> RunCollection:
|
143
|
-
"""Take the first n runs from the collection.
|
144
|
-
|
145
|
-
If n is negative, the method returns the last n runs
|
146
|
-
from the collection.
|
147
|
-
|
148
|
-
Args:
|
149
|
-
n (int): The number of runs to take. If n is negative, the method
|
150
|
-
returns the last n runs from the collection.
|
151
|
-
|
152
|
-
Returns:
|
153
|
-
A new `RunCollection` instance containing the first n runs if n is
|
154
|
-
positive, or the last n runs if n is negative.
|
155
|
-
|
156
|
-
"""
|
157
|
-
if n < 0:
|
158
|
-
return self.__class__(self._runs[n:])
|
159
|
-
|
160
|
-
return self.__class__(self._runs[:n])
|
161
|
-
|
162
131
|
def one(self) -> Run:
|
163
132
|
"""Get the only `Run` instance in the collection.
|
164
133
|
|
@@ -238,8 +207,8 @@ class RunCollection:
|
|
238
207
|
self,
|
239
208
|
config: object | Callable[[Run], bool] | None = None,
|
240
209
|
*,
|
241
|
-
override: bool = False,
|
242
210
|
select: list[str] | None = None,
|
211
|
+
overrides: list[str] | None = None,
|
243
212
|
status: str | list[str] | int | list[int] | None = None,
|
244
213
|
**kwargs,
|
245
214
|
) -> RunCollection:
|
@@ -264,9 +233,9 @@ class RunCollection:
|
|
264
233
|
to filter the runs. This can be any object that provides key-value
|
265
234
|
pairs through the `iter_params` function, or a callable that
|
266
235
|
takes a `Run` object and returns a boolean value.
|
267
|
-
override (bool): If True, override the configuration object with the
|
268
|
-
provided key-value pairs.
|
269
236
|
select (list[str] | None): The list of parameters to select.
|
237
|
+
overrides (list[str] | None): The list of overrides to filter the
|
238
|
+
runs.
|
270
239
|
status (str | list[str] | int | list[int] | None): The status of the
|
271
240
|
runs to filter.
|
272
241
|
**kwargs: Additional key-value pairs to filter the runs.
|
@@ -279,8 +248,8 @@ class RunCollection:
|
|
279
248
|
filter_runs(
|
280
249
|
self._runs,
|
281
250
|
config,
|
282
|
-
override=override,
|
283
251
|
select=select,
|
252
|
+
overrides=overrides,
|
284
253
|
status=status,
|
285
254
|
**kwargs,
|
286
255
|
),
|
@@ -400,121 +369,6 @@ class RunCollection:
|
|
400
369
|
|
401
370
|
return params
|
402
371
|
|
403
|
-
def map(
|
404
|
-
self,
|
405
|
-
func: Callable[Concatenate[Run, P], T],
|
406
|
-
*args: P.args,
|
407
|
-
**kwargs: P.kwargs,
|
408
|
-
) -> Iterator[T]:
|
409
|
-
"""Return an iterator of results by applying a function to each run.
|
410
|
-
|
411
|
-
This method iterates over each run in the collection and applies the
|
412
|
-
provided function to it, along with any additional arguments and
|
413
|
-
keyword arguments.
|
414
|
-
|
415
|
-
Args:
|
416
|
-
func (Callable[[Run, P], T]): A function that takes a run and
|
417
|
-
additional arguments and returns a result.
|
418
|
-
*args: Additional arguments to pass to the function.
|
419
|
-
**kwargs: Additional keyword arguments to pass to the function.
|
420
|
-
|
421
|
-
Yields:
|
422
|
-
Results obtained by applying the function to each run in the collection.
|
423
|
-
|
424
|
-
"""
|
425
|
-
return (func(run, *args, **kwargs) for run in self)
|
426
|
-
|
427
|
-
def map_id(
|
428
|
-
self,
|
429
|
-
func: Callable[Concatenate[str, P], T],
|
430
|
-
*args: P.args,
|
431
|
-
**kwargs: P.kwargs,
|
432
|
-
) -> Iterator[T]:
|
433
|
-
"""Return an iterator of results by applying a function to each run id.
|
434
|
-
|
435
|
-
Args:
|
436
|
-
func (Callable[[str, P], T]): A function that takes a run id and returns a
|
437
|
-
result.
|
438
|
-
*args: Additional arguments to pass to the function.
|
439
|
-
**kwargs: Additional keyword arguments to pass to the function.
|
440
|
-
|
441
|
-
Yields:
|
442
|
-
Results obtained by applying the function to each run id in the
|
443
|
-
collection.
|
444
|
-
|
445
|
-
"""
|
446
|
-
return (func(run_id, *args, **kwargs) for run_id in self.info.run_id)
|
447
|
-
|
448
|
-
def map_config(
|
449
|
-
self,
|
450
|
-
func: Callable[Concatenate[DictConfig, P], T],
|
451
|
-
*args: P.args,
|
452
|
-
**kwargs: P.kwargs,
|
453
|
-
) -> Iterator[T]:
|
454
|
-
"""Return an iterator of results by applying a function to each run config.
|
455
|
-
|
456
|
-
Args:
|
457
|
-
func (Callable[[DictConfig, P], T]): A function that takes a run
|
458
|
-
configuration and returns a result.
|
459
|
-
*args: Additional arguments to pass to the function.
|
460
|
-
**kwargs: Additional keyword arguments to pass to the function.
|
461
|
-
|
462
|
-
Yields:
|
463
|
-
Results obtained by applying the function to each run configuration
|
464
|
-
in the collection.
|
465
|
-
|
466
|
-
"""
|
467
|
-
return (func(load_config(run), *args, **kwargs) for run in self)
|
468
|
-
|
469
|
-
def map_uri(
|
470
|
-
self,
|
471
|
-
func: Callable[Concatenate[str | None, P], T],
|
472
|
-
*args: P.args,
|
473
|
-
**kwargs: P.kwargs,
|
474
|
-
) -> Iterator[T]:
|
475
|
-
"""Return an iterator of results by applying a function to each artifact URI.
|
476
|
-
|
477
|
-
Iterate over each run in the collection, retrieves the artifact URI, and
|
478
|
-
apply the provided function to it. If a run does not have an artifact
|
479
|
-
URI, None is passed to the function.
|
480
|
-
|
481
|
-
Args:
|
482
|
-
func (Callable[[str | None, P], T]): A function that takes an
|
483
|
-
artifact URI (string or None) and returns a result.
|
484
|
-
*args: Additional arguments to pass to the function.
|
485
|
-
**kwargs: Additional keyword arguments to pass to the function.
|
486
|
-
|
487
|
-
Yields:
|
488
|
-
Results obtained by applying the function to each artifact URI in the
|
489
|
-
collection.
|
490
|
-
|
491
|
-
"""
|
492
|
-
return (func(uri, *args, **kwargs) for uri in self.info.artifact_uri)
|
493
|
-
|
494
|
-
def map_dir(
|
495
|
-
self,
|
496
|
-
func: Callable[Concatenate[Path, P], T],
|
497
|
-
*args: P.args,
|
498
|
-
**kwargs: P.kwargs,
|
499
|
-
) -> Iterator[T]:
|
500
|
-
"""Return an iterator of results by applying a function to each artifact dir.
|
501
|
-
|
502
|
-
Iterate over each run in the collection, downloads the artifact
|
503
|
-
directory, and apply the provided function to the directory path.
|
504
|
-
|
505
|
-
Args:
|
506
|
-
func (Callable[[Path, P], T]): A function that takes an artifact directory
|
507
|
-
path (string) and returns a result.
|
508
|
-
*args: Additional arguments to pass to the function.
|
509
|
-
**kwargs: Additional keyword arguments to pass to the function.
|
510
|
-
|
511
|
-
Yields:
|
512
|
-
Results obtained by applying the function to each artifact directory
|
513
|
-
in the collection.
|
514
|
-
|
515
|
-
"""
|
516
|
-
return (func(dir, *args, **kwargs) for dir in self.info.artifact_dir) # noqa: A001
|
517
|
-
|
518
372
|
def groupby(
|
519
373
|
self,
|
520
374
|
names: str | list[str],
|
@@ -631,8 +485,8 @@ def filter_runs(
|
|
631
485
|
runs: list[Run],
|
632
486
|
config: object | Callable[[Run], bool] | None = None,
|
633
487
|
*,
|
634
|
-
override: bool = False,
|
635
488
|
select: list[str] | None = None,
|
489
|
+
overrides: list[str] | None = None,
|
636
490
|
status: str | list[str] | int | list[int] | None = None,
|
637
491
|
**kwargs,
|
638
492
|
) -> list[Run]:
|
@@ -658,10 +512,10 @@ def filter_runs(
|
|
658
512
|
that provides key-value pairs through the `iter_params` function.
|
659
513
|
This can also be a callable that takes a `Run` object and returns
|
660
514
|
a boolean value. Defaults to None.
|
661
|
-
override (bool, optional): If True, filter the runs based on
|
662
|
-
the overrides. Defaults to False.
|
663
515
|
select (list[str] | None, optional): The list of parameters to select.
|
664
516
|
Defaults to None.
|
517
|
+
overrides (list[str] | None, optional): The list of overrides to filter the
|
518
|
+
runs. Defaults to None.
|
665
519
|
status (str | list[str] | RunStatus | list[RunStatus] | None, optional): The
|
666
520
|
status of the runs to filter. Defaults to None.
|
667
521
|
**kwargs: Additional key-value pairs to filter the runs.
|
@@ -674,8 +528,8 @@ def filter_runs(
|
|
674
528
|
runs = [run for run in runs if config(run)]
|
675
529
|
|
676
530
|
else:
|
677
|
-
if
|
678
|
-
config = select_overrides(config)
|
531
|
+
if overrides:
|
532
|
+
config = select_overrides(config, overrides)
|
679
533
|
elif select:
|
680
534
|
config = select_config(config, select)
|
681
535
|
|
hydraflow/run_data.py
CHANGED
@@ -6,7 +6,8 @@ from typing import TYPE_CHECKING
|
|
6
6
|
|
7
7
|
from pandas import DataFrame
|
8
8
|
|
9
|
-
from hydraflow.config import
|
9
|
+
from hydraflow.config import iter_params
|
10
|
+
from hydraflow.utils import load_config
|
10
11
|
|
11
12
|
if TYPE_CHECKING:
|
12
13
|
from collections.abc import Iterable
|
@@ -39,7 +40,8 @@ class RunCollectionData:
|
|
39
40
|
A DataFrame containing the runs' configurations.
|
40
41
|
|
41
42
|
"""
|
42
|
-
|
43
|
+
values = [dict(iter_params(load_config(r))) for r in self._runs]
|
44
|
+
return DataFrame(values)
|
43
45
|
|
44
46
|
|
45
47
|
def _to_dict(it: Iterable[dict[str, Any]]) -> dict[str, list[Any]]:
|
hydraflow/utils.py
CHANGED
@@ -12,46 +12,42 @@ import mlflow
|
|
12
12
|
import mlflow.artifacts
|
13
13
|
from hydra.core.hydra_config import HydraConfig
|
14
14
|
from mlflow.entities import Run
|
15
|
-
from omegaconf import DictConfig, OmegaConf
|
15
|
+
from omegaconf import DictConfig, ListConfig, OmegaConf
|
16
16
|
|
17
17
|
if TYPE_CHECKING:
|
18
18
|
from collections.abc import Iterable
|
19
19
|
|
20
20
|
|
21
|
-
def
|
21
|
+
def file_uri_to_path(uri: str) -> Path:
|
22
|
+
"""Convert a file URI to a local path."""
|
23
|
+
if not uri.startswith("file:"):
|
24
|
+
return Path(uri)
|
25
|
+
|
26
|
+
path = urllib.parse.urlparse(uri).path
|
27
|
+
return Path(urllib.request.url2pathname(path)) # for Windows
|
28
|
+
|
29
|
+
|
30
|
+
def get_artifact_dir(run: Run | None = None) -> Path:
|
22
31
|
"""Retrieve the artifact directory for the given run.
|
23
32
|
|
24
33
|
This function uses MLflow to get the artifact directory for the given run.
|
25
34
|
|
26
35
|
Args:
|
27
36
|
run (Run | None): The run object. Defaults to None.
|
28
|
-
uri (str | None): The URI of the artifact. Defaults to None.
|
29
37
|
|
30
38
|
Returns:
|
31
39
|
The local path to the directory where the artifacts are downloaded.
|
32
40
|
|
33
41
|
"""
|
34
|
-
if run is
|
35
|
-
raise ValueError("Cannot provide both run and uri")
|
36
|
-
|
37
|
-
if run is None and uri is None:
|
42
|
+
if run is None:
|
38
43
|
uri = mlflow.get_artifact_uri()
|
39
|
-
|
44
|
+
else:
|
40
45
|
uri = run.info.artifact_uri
|
41
46
|
|
42
47
|
if not isinstance(uri, str):
|
43
48
|
raise NotImplementedError
|
44
49
|
|
45
|
-
|
46
|
-
return file_uri_to_path(uri)
|
47
|
-
|
48
|
-
return Path(uri)
|
49
|
-
|
50
|
-
|
51
|
-
def file_uri_to_path(uri: str) -> Path:
|
52
|
-
"""Convert a file URI to a local path."""
|
53
|
-
path = urllib.parse.urlparse(uri).path
|
54
|
-
return Path(urllib.request.url2pathname(path)) # for Windows
|
50
|
+
return file_uri_to_path(uri)
|
55
51
|
|
56
52
|
|
57
53
|
def get_artifact_path(run: Run | None, path: str) -> Path:
|
@@ -123,12 +119,7 @@ def load_config(run: Run) -> DictConfig:
|
|
123
119
|
return OmegaConf.load(path) # type: ignore
|
124
120
|
|
125
121
|
|
126
|
-
def
|
127
|
-
"""Retrieve the overrides for the current run."""
|
128
|
-
return list(HydraConfig.get().overrides.task) # ListConifg -> list
|
129
|
-
|
130
|
-
|
131
|
-
def load_overrides(run: Run) -> list[str]:
|
122
|
+
def load_overrides(run: Run) -> ListConfig:
|
132
123
|
"""Load the overrides for a given run.
|
133
124
|
|
134
125
|
This function loads the overrides for the provided Run instance
|
@@ -137,15 +128,15 @@ def load_overrides(run: Run) -> list[str]:
|
|
137
128
|
`.hydra/overrides.yaml` is not found in the run's artifact directory.
|
138
129
|
|
139
130
|
Args:
|
140
|
-
run (Run): The Run instance for which to load the
|
131
|
+
run (Run): The Run instance for which to load the configuration.
|
141
132
|
|
142
133
|
Returns:
|
143
|
-
The loaded
|
144
|
-
if the
|
134
|
+
The loaded configuration as a DictConfig object. Returns an empty
|
135
|
+
DictConfig if the configuration file is not found.
|
145
136
|
|
146
137
|
"""
|
147
138
|
path = get_artifact_dir(run) / ".hydra/overrides.yaml"
|
148
|
-
return
|
139
|
+
return OmegaConf.load(path) # type: ignore
|
149
140
|
|
150
141
|
|
151
142
|
def remove_run(run: Run | Iterable[Run]) -> None:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: hydraflow
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.8.0
|
4
4
|
Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
|
5
5
|
Project-URL: Documentation, https://daizutabi.github.io/hydraflow/
|
6
6
|
Project-URL: Source, https://github.com/daizutabi/hydraflow
|
@@ -55,7 +55,7 @@ Description-Content-Type: text/markdown
|
|
55
55
|
[pypi-v-link]: https://pypi.org/project/hydraflow/
|
56
56
|
[python-v-image]: https://img.shields.io/pypi/pyversions/hydraflow.svg
|
57
57
|
[python-v-link]: https://pypi.org/project/hydraflow
|
58
|
-
[GHAction-image]: https://github.com/daizutabi/hydraflow/actions/workflows/ci.
|
58
|
+
[GHAction-image]: https://github.com/daizutabi/hydraflow/actions/workflows/ci.yaml/badge.svg?branch=main&event=push
|
59
59
|
[GHAction-link]: https://github.com/daizutabi/hydraflow/actions?query=event%3Apush+branch%3Amain
|
60
60
|
[codecov-image]: https://codecov.io/github/daizutabi/hydraflow/coverage.svg?branch=main
|
61
61
|
[codecov-link]: https://codecov.io/github/daizutabi/hydraflow?branch=main
|
@@ -108,7 +108,7 @@ class MySQLConfig:
|
|
108
108
|
cs = ConfigStore.instance()
|
109
109
|
cs.store(name="config", node=MySQLConfig)
|
110
110
|
|
111
|
-
@hydra.main(
|
111
|
+
@hydra.main(config_name="config", version_base=None)
|
112
112
|
def my_app(cfg: MySQLConfig) -> None:
|
113
113
|
# Set experiment by Hydra job name.
|
114
114
|
hydraflow.set_experiment()
|
@@ -0,0 +1,17 @@
|
|
1
|
+
hydraflow/__init__.py,sha256=yp4LT1FDYPIduR6PqJNuSm9kztVCpL1P0zcPHWGvaJU,712
|
2
|
+
hydraflow/cli.py,sha256=jxqFppNeJWAr2Tb-C_MQXEJtegJ6TXcd3C1CT7Jdb1A,1559
|
3
|
+
hydraflow/config.py,sha256=SJzjgsO_kzB78_whJ3lmy7GlZvTvwZONH1BJBn8zCuI,3817
|
4
|
+
hydraflow/context.py,sha256=H5xeNbhMS23U-epsucprl5G3lbOR1aO9nDES4QGLWNk,4747
|
5
|
+
hydraflow/main.py,sha256=O5ETCMCg12zXoaYlZMHcM4IYAs6GVTkADrmEssrtjkk,4994
|
6
|
+
hydraflow/mlflow.py,sha256=pRRsBaBBH4cfzSko-8mmo5bV04GGklxoO0kORkInypM,5663
|
7
|
+
hydraflow/param.py,sha256=LHU9j9_7oA99igasoOyKofKClVr9FmGA3UABJ-KmyS0,4538
|
8
|
+
hydraflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
9
|
+
hydraflow/run_collection.py,sha256=rtH1cglSlK3QFg9hhifo9lzjDa9veHpoyYxEOmIEM84,19646
|
10
|
+
hydraflow/run_data.py,sha256=S2NNFtA1TleqpgeK4mIn1YY8YbWJFyhF7wXR5NWeYLk,1604
|
11
|
+
hydraflow/run_info.py,sha256=Jf5wrIjRLIV1-k-obHDqwKHa6j_ZonrY8od-rXlbtMo,1024
|
12
|
+
hydraflow/utils.py,sha256=T4ESiepEcqR-FZlo_m7VTBEFMwalrqPI8eFKPagvv3Q,4402
|
13
|
+
hydraflow-0.8.0.dist-info/METADATA,sha256=J1ilgG7L4A8OvzgZSNycp0YgyHk5e8_gwTr9NN82Ejk,4767
|
14
|
+
hydraflow-0.8.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
15
|
+
hydraflow-0.8.0.dist-info/entry_points.txt,sha256=XI0khPbpCIUo9UPqkNEpgh-kqK3Jy8T7L2VCWOdkbSM,48
|
16
|
+
hydraflow-0.8.0.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
|
17
|
+
hydraflow-0.8.0.dist-info/RECORD,,
|
hydraflow-0.7.4.dist-info/RECORD
DELETED
@@ -1,17 +0,0 @@
|
|
1
|
-
hydraflow/__init__.py,sha256=rujOGabEPPhPfyqTHynem3unqIEQ1haTWWSMuu2LuoQ,898
|
2
|
-
hydraflow/cli.py,sha256=jxqFppNeJWAr2Tb-C_MQXEJtegJ6TXcd3C1CT7Jdb1A,1559
|
3
|
-
hydraflow/config.py,sha256=MNX9da5bPVDcjnpji7Cm9ndK6ura92pt361m4PRh6_E,4326
|
4
|
-
hydraflow/context.py,sha256=3xfKhMozkKFqtWeOp9Gie0A5o5URMta4US6iVD5TcLU,6002
|
5
|
-
hydraflow/main.py,sha256=hroncI_SNpNgEtdxLgzI397J5S2Amv7J0atnPxwBePM,1314
|
6
|
-
hydraflow/mlflow.py,sha256=imD3XL0RTlpnKrkyvO8FNy_Bv6hwSfLiOu1yJuL40ck,8773
|
7
|
-
hydraflow/param.py,sha256=yu1aMNXRLegXGDL-68vwIkfeDF9CaU784WZENGLwl7Q,4572
|
8
|
-
hydraflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
9
|
-
hydraflow/run_collection.py,sha256=YCWg5Dz1j49xB2LA75onq5wsAeQQbifXpG4yPUwRN4I,24776
|
10
|
-
hydraflow/run_data.py,sha256=dpyyfnuH9mCtIZeigMo1iFQo9bafMdEL4i4uI2l0UqY,1525
|
11
|
-
hydraflow/run_info.py,sha256=Jf5wrIjRLIV1-k-obHDqwKHa6j_ZonrY8od-rXlbtMo,1024
|
12
|
-
hydraflow/utils.py,sha256=a9i5PEJn8Ssowv9dqHadAihZXlsqtVjHZ9MZvkPq1bY,4747
|
13
|
-
hydraflow-0.7.4.dist-info/METADATA,sha256=GTJi5z8TTIwPy6qpscw-t3Mb1V-GOR0iYU_IB-DB-UE,4766
|
14
|
-
hydraflow-0.7.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
15
|
-
hydraflow-0.7.4.dist-info/entry_points.txt,sha256=XI0khPbpCIUo9UPqkNEpgh-kqK3Jy8T7L2VCWOdkbSM,48
|
16
|
-
hydraflow-0.7.4.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
|
17
|
-
hydraflow-0.7.4.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|