hydraflow 0.2.7__tar.gz → 0.2.8__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {hydraflow-0.2.7 → hydraflow-0.2.8}/PKG-INFO +1 -1
- {hydraflow-0.2.7 → hydraflow-0.2.8}/pyproject.toml +1 -1
- {hydraflow-0.2.7 → hydraflow-0.2.8}/src/hydraflow/__init__.py +4 -4
- {hydraflow-0.2.7 → hydraflow-0.2.8}/src/hydraflow/context.py +24 -8
- {hydraflow-0.2.7 → hydraflow-0.2.8}/src/hydraflow/info.py +57 -4
- hydraflow-0.2.8/src/hydraflow/mlflow.py +175 -0
- {hydraflow-0.2.7 → hydraflow-0.2.8}/src/hydraflow/run_collection.py +25 -101
- hydraflow-0.2.7/tests/scripts/log_run.py → hydraflow-0.2.8/tests/scripts/app.py +21 -3
- hydraflow-0.2.8/tests/test_app.py +109 -0
- {hydraflow-0.2.7 → hydraflow-0.2.8}/tests/test_context.py +1 -1
- {hydraflow-0.2.7 → hydraflow-0.2.8}/tests/test_info.py +14 -1
- {hydraflow-0.2.7 → hydraflow-0.2.8}/tests/test_log_run.py +5 -3
- {hydraflow-0.2.7 → hydraflow-0.2.8}/tests/test_run_collection.py +5 -5
- hydraflow-0.2.7/mlruns/0/meta.yaml +0 -6
- hydraflow-0.2.7/src/hydraflow/mlflow.py +0 -119
- {hydraflow-0.2.7 → hydraflow-0.2.8}/.devcontainer/devcontainer.json +0 -0
- {hydraflow-0.2.7 → hydraflow-0.2.8}/.devcontainer/postCreate.sh +0 -0
- {hydraflow-0.2.7 → hydraflow-0.2.8}/.devcontainer/starship.toml +0 -0
- {hydraflow-0.2.7 → hydraflow-0.2.8}/.gitattributes +0 -0
- {hydraflow-0.2.7 → hydraflow-0.2.8}/.gitignore +0 -0
- {hydraflow-0.2.7 → hydraflow-0.2.8}/LICENSE +0 -0
- {hydraflow-0.2.7 → hydraflow-0.2.8}/README.md +0 -0
- {hydraflow-0.2.7 → hydraflow-0.2.8}/src/hydraflow/asyncio.py +0 -0
- {hydraflow-0.2.7 → hydraflow-0.2.8}/src/hydraflow/config.py +0 -0
- {hydraflow-0.2.7 → hydraflow-0.2.8}/src/hydraflow/progress.py +0 -0
- {hydraflow-0.2.7 → hydraflow-0.2.8}/tests/scripts/__init__.py +0 -0
- {hydraflow-0.2.7 → hydraflow-0.2.8}/tests/scripts/watch.py +0 -0
- {hydraflow-0.2.7 → hydraflow-0.2.8}/tests/test_asyncio.py +0 -0
- {hydraflow-0.2.7 → hydraflow-0.2.8}/tests/test_config.py +0 -0
- {hydraflow-0.2.7 → hydraflow-0.2.8}/tests/test_mlflow.py +0 -0
- {hydraflow-0.2.7 → hydraflow-0.2.8}/tests/test_progress.py +0 -0
- {hydraflow-0.2.7 → hydraflow-0.2.8}/tests/test_version.py +0 -0
- {hydraflow-0.2.7 → hydraflow-0.2.8}/tests/test_watch.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: hydraflow
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.8
|
4
4
|
Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
|
5
5
|
Project-URL: Documentation, https://github.com/daizutabi/hydraflow
|
6
6
|
Project-URL: Source, https://github.com/daizutabi/hydraflow
|
@@ -1,11 +1,11 @@
|
|
1
1
|
from .context import chdir_artifact, log_run, start_run, watch
|
2
|
-
from .info import load_config
|
3
|
-
from .mlflow import
|
4
|
-
from .run_collection import (
|
5
|
-
RunCollection,
|
2
|
+
from .info import get_artifact_dir, get_hydra_output_dir, load_config
|
3
|
+
from .mlflow import (
|
6
4
|
list_runs,
|
7
5
|
search_runs,
|
6
|
+
set_experiment,
|
8
7
|
)
|
8
|
+
from .run_collection import RunCollection
|
9
9
|
|
10
10
|
__all__ = [
|
11
11
|
"RunCollection",
|
@@ -14,10 +14,11 @@ from typing import TYPE_CHECKING
|
|
14
14
|
|
15
15
|
import mlflow
|
16
16
|
from hydra.core.hydra_config import HydraConfig
|
17
|
-
from watchdog.events import FileModifiedEvent,
|
17
|
+
from watchdog.events import FileModifiedEvent, PatternMatchingEventHandler
|
18
18
|
from watchdog.observers import Observer
|
19
19
|
|
20
|
-
from hydraflow.
|
20
|
+
from hydraflow.info import get_artifact_dir
|
21
|
+
from hydraflow.mlflow import log_params
|
21
22
|
|
22
23
|
if TYPE_CHECKING:
|
23
24
|
from collections.abc import Callable, Iterator
|
@@ -68,7 +69,7 @@ def log_run(
|
|
68
69
|
mlflow.log_artifact(local_path)
|
69
70
|
|
70
71
|
try:
|
71
|
-
with watch(log_artifact, output_dir):
|
72
|
+
with watch(log_artifact, output_dir, ignore_log=False):
|
72
73
|
yield
|
73
74
|
|
74
75
|
except Exception as e:
|
@@ -140,9 +141,11 @@ def start_run(
|
|
140
141
|
|
141
142
|
@contextmanager
|
142
143
|
def watch(
|
143
|
-
|
144
|
+
callback: Callable[[Path], None],
|
144
145
|
dir: Path | str = "",
|
145
146
|
timeout: int = 60,
|
147
|
+
ignore_patterns: list[str] | None = None,
|
148
|
+
ignore_log: bool = True,
|
146
149
|
) -> Iterator[None]:
|
147
150
|
"""
|
148
151
|
Watch the given directory for changes and call the provided function
|
@@ -154,7 +157,7 @@ def watch(
|
|
154
157
|
period or until the context is exited.
|
155
158
|
|
156
159
|
Args:
|
157
|
-
|
160
|
+
callback (Callable[[Path], None]): The function to call when a change is
|
158
161
|
detected. It should accept a single argument of type `Path`,
|
159
162
|
which is the path of the modified file.
|
160
163
|
dir (Path | str): The directory to watch. If not specified,
|
@@ -174,7 +177,7 @@ def watch(
|
|
174
177
|
if isinstance(dir, Path):
|
175
178
|
dir = dir.as_posix()
|
176
179
|
|
177
|
-
handler = Handler(
|
180
|
+
handler = Handler(callback, ignore_patterns=ignore_patterns, ignore_log=ignore_log)
|
178
181
|
observer = Observer()
|
179
182
|
observer.schedule(handler, dir, recursive=True)
|
180
183
|
observer.start()
|
@@ -198,10 +201,23 @@ def watch(
|
|
198
201
|
observer.join()
|
199
202
|
|
200
203
|
|
201
|
-
class Handler(
|
202
|
-
def __init__(
|
204
|
+
class Handler(PatternMatchingEventHandler):
|
205
|
+
def __init__(
|
206
|
+
self,
|
207
|
+
func: Callable[[Path], None],
|
208
|
+
ignore_patterns: list[str] | None = None,
|
209
|
+
ignore_log: bool = True,
|
210
|
+
) -> None:
|
203
211
|
self.func = func
|
204
212
|
|
213
|
+
if ignore_log:
|
214
|
+
if ignore_patterns:
|
215
|
+
ignore_patterns.append("*.log")
|
216
|
+
else:
|
217
|
+
ignore_patterns = ["*.log"]
|
218
|
+
|
219
|
+
super().__init__(ignore_patterns=ignore_patterns)
|
220
|
+
|
205
221
|
def on_modified(self, event: FileModifiedEvent) -> None:
|
206
222
|
file = Path(str(event.src_path))
|
207
223
|
if file.is_file():
|
@@ -1,14 +1,14 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
from pathlib import Path
|
3
4
|
from typing import TYPE_CHECKING
|
4
5
|
|
6
|
+
import mlflow
|
7
|
+
from hydra.core.hydra_config import HydraConfig
|
8
|
+
from mlflow.tracking import artifact_utils
|
5
9
|
from omegaconf import DictConfig, OmegaConf
|
6
10
|
|
7
|
-
from hydraflow.mlflow import get_artifact_dir
|
8
|
-
|
9
11
|
if TYPE_CHECKING:
|
10
|
-
from pathlib import Path
|
11
|
-
|
12
12
|
from mlflow.entities import Run
|
13
13
|
|
14
14
|
from hydraflow.run_collection import RunCollection
|
@@ -43,6 +43,59 @@ class RunCollectionInfo:
|
|
43
43
|
return [load_config(run) for run in self._runs]
|
44
44
|
|
45
45
|
|
46
|
+
def get_artifact_dir(run: Run | None = None) -> Path:
|
47
|
+
"""
|
48
|
+
Retrieve the artifact directory for the given run.
|
49
|
+
|
50
|
+
This function uses MLflow to get the artifact directory for the given run.
|
51
|
+
|
52
|
+
Args:
|
53
|
+
run (Run | None): The run object. Defaults to None.
|
54
|
+
|
55
|
+
Returns:
|
56
|
+
The local path to the directory where the artifacts are downloaded.
|
57
|
+
"""
|
58
|
+
if run is None:
|
59
|
+
uri = mlflow.get_artifact_uri()
|
60
|
+
else:
|
61
|
+
uri = artifact_utils.get_artifact_uri(run.info.run_id)
|
62
|
+
|
63
|
+
return Path(mlflow.artifacts.download_artifacts(uri))
|
64
|
+
|
65
|
+
|
66
|
+
def get_hydra_output_dir(run: Run | None = None) -> Path:
|
67
|
+
"""
|
68
|
+
Retrieve the Hydra output directory for the given run.
|
69
|
+
|
70
|
+
This function returns the Hydra output directory. If no run is provided,
|
71
|
+
it retrieves the output directory from the current Hydra configuration.
|
72
|
+
If a run is provided, it retrieves the artifact path for the run, loads
|
73
|
+
the Hydra configuration from the downloaded artifacts, and returns the
|
74
|
+
output directory specified in that configuration.
|
75
|
+
|
76
|
+
Args:
|
77
|
+
run (Run | None): The run object. Defaults to None.
|
78
|
+
|
79
|
+
Returns:
|
80
|
+
Path: The path to the Hydra output directory.
|
81
|
+
|
82
|
+
Raises:
|
83
|
+
FileNotFoundError: If the Hydra configuration file is not found
|
84
|
+
in the artifacts.
|
85
|
+
"""
|
86
|
+
if run is None:
|
87
|
+
hc = HydraConfig.get()
|
88
|
+
return Path(hc.runtime.output_dir)
|
89
|
+
|
90
|
+
path = get_artifact_dir(run) / ".hydra/hydra.yaml"
|
91
|
+
|
92
|
+
if path.exists():
|
93
|
+
hc = OmegaConf.load(path)
|
94
|
+
return Path(hc.hydra.runtime.output_dir)
|
95
|
+
|
96
|
+
raise FileNotFoundError
|
97
|
+
|
98
|
+
|
46
99
|
def load_config(run: Run) -> DictConfig:
|
47
100
|
"""
|
48
101
|
Load the configuration for a given run.
|
@@ -0,0 +1,175 @@
|
|
1
|
+
"""
|
2
|
+
This module provides functionality to log parameters from Hydra configuration objects
|
3
|
+
and set up experiments using MLflow. It includes methods for managing experiments,
|
4
|
+
searching for runs, and logging parameters and artifacts.
|
5
|
+
|
6
|
+
Key Features:
|
7
|
+
- **Experiment Management**: Set and manage MLflow experiments with customizable names
|
8
|
+
based on Hydra configuration.
|
9
|
+
- **Run Logging**: Log parameters and metrics from Hydra configuration objects to
|
10
|
+
MLflow, ensuring that all relevant information is captured during experiments.
|
11
|
+
- **Run Search**: Search for runs based on various criteria, allowing for flexible
|
12
|
+
retrieval of experiment results.
|
13
|
+
- **Artifact Management**: Retrieve and log artifacts associated with runs, facilitating
|
14
|
+
easy access to outputs generated during experiments.
|
15
|
+
|
16
|
+
This module is designed to integrate seamlessly with Hydra, providing a robust
|
17
|
+
solution for tracking machine learning experiments and their associated metadata.
|
18
|
+
"""
|
19
|
+
|
20
|
+
from __future__ import annotations
|
21
|
+
|
22
|
+
from pathlib import Path
|
23
|
+
from typing import TYPE_CHECKING
|
24
|
+
|
25
|
+
import mlflow
|
26
|
+
from hydra.core.hydra_config import HydraConfig
|
27
|
+
from mlflow.entities import ViewType
|
28
|
+
from mlflow.tracking.fluent import SEARCH_MAX_RESULTS_PANDAS
|
29
|
+
|
30
|
+
from hydraflow.config import iter_params
|
31
|
+
from hydraflow.run_collection import RunCollection
|
32
|
+
|
33
|
+
if TYPE_CHECKING:
|
34
|
+
from mlflow.entities.experiment import Experiment
|
35
|
+
|
36
|
+
|
37
|
+
def set_experiment(
|
38
|
+
prefix: str = "",
|
39
|
+
suffix: str = "",
|
40
|
+
uri: str | Path | None = None,
|
41
|
+
) -> Experiment:
|
42
|
+
"""
|
43
|
+
Sets the experiment name and tracking URI optionally.
|
44
|
+
|
45
|
+
This function sets the experiment name by combining the given prefix,
|
46
|
+
the job name from HydraConfig, and the given suffix. Optionally, it can
|
47
|
+
also set the tracking URI.
|
48
|
+
|
49
|
+
Args:
|
50
|
+
prefix (str): The prefix to prepend to the experiment name.
|
51
|
+
suffix (str): The suffix to append to the experiment name.
|
52
|
+
uri (str | Path | None): The tracking URI to use. Defaults to None.
|
53
|
+
|
54
|
+
Returns:
|
55
|
+
Experiment: An instance of `mlflow.entities.Experiment` representing
|
56
|
+
the new active experiment.
|
57
|
+
"""
|
58
|
+
if uri is not None:
|
59
|
+
mlflow.set_tracking_uri(uri)
|
60
|
+
|
61
|
+
hc = HydraConfig.get()
|
62
|
+
name = f"{prefix}{hc.job.name}{suffix}"
|
63
|
+
return mlflow.set_experiment(name)
|
64
|
+
|
65
|
+
|
66
|
+
def log_params(config: object, *, synchronous: bool | None = None) -> None:
|
67
|
+
"""
|
68
|
+
Log the parameters from the given configuration object.
|
69
|
+
|
70
|
+
This method logs the parameters from the provided configuration object
|
71
|
+
using MLflow. It iterates over the parameters and logs them using the
|
72
|
+
`mlflow.log_param` method.
|
73
|
+
|
74
|
+
Args:
|
75
|
+
config (object): The configuration object to log the parameters from.
|
76
|
+
synchronous (bool | None): Whether to log the parameters synchronously.
|
77
|
+
Defaults to None.
|
78
|
+
"""
|
79
|
+
for key, value in iter_params(config):
|
80
|
+
mlflow.log_param(key, value, synchronous=synchronous)
|
81
|
+
|
82
|
+
|
83
|
+
def search_runs(
|
84
|
+
experiment_ids: list[str] | None = None,
|
85
|
+
filter_string: str = "",
|
86
|
+
run_view_type: int = ViewType.ACTIVE_ONLY,
|
87
|
+
max_results: int = SEARCH_MAX_RESULTS_PANDAS,
|
88
|
+
order_by: list[str] | None = None,
|
89
|
+
search_all_experiments: bool = False,
|
90
|
+
experiment_names: list[str] | None = None,
|
91
|
+
) -> RunCollection:
|
92
|
+
"""
|
93
|
+
Search for Runs that fit the specified criteria.
|
94
|
+
|
95
|
+
This function wraps the `mlflow.search_runs` function and returns the
|
96
|
+
results as a `RunCollection` object. It allows for flexible searching of
|
97
|
+
MLflow runs based on various criteria.
|
98
|
+
|
99
|
+
Note:
|
100
|
+
The returned runs are sorted by their start time in ascending order.
|
101
|
+
|
102
|
+
Args:
|
103
|
+
experiment_ids (list[str] | None): List of experiment IDs. Search can
|
104
|
+
work with experiment IDs or experiment names, but not both in the
|
105
|
+
same call. Values other than ``None`` or ``[]`` will result in
|
106
|
+
error if ``experiment_names`` is also not ``None`` or ``[]``.
|
107
|
+
``None`` will default to the active experiment if ``experiment_names``
|
108
|
+
is ``None`` or ``[]``.
|
109
|
+
filter_string (str): Filter query string, defaults to searching all
|
110
|
+
runs.
|
111
|
+
run_view_type (int): one of enum values ``ACTIVE_ONLY``, ``DELETED_ONLY``,
|
112
|
+
or ``ALL`` runs defined in :py:class:`mlflow.entities.ViewType`.
|
113
|
+
max_results (int): The maximum number of runs to put in the dataframe.
|
114
|
+
Default is 100,000 to avoid causing out-of-memory issues on the user's
|
115
|
+
machine.
|
116
|
+
order_by (list[str] | None): List of columns to order by (e.g.,
|
117
|
+
"metrics.rmse"). The ``order_by`` column can contain an optional
|
118
|
+
``DESC`` or ``ASC`` value. The default is ``ASC``. The default
|
119
|
+
ordering is to sort by ``start_time DESC``, then ``run_id``.
|
120
|
+
``start_time DESC``, then ``run_id``.
|
121
|
+
search_all_experiments (bool): Boolean specifying whether all
|
122
|
+
experiments should be searched. Only honored if ``experiment_ids``
|
123
|
+
is ``[]`` or ``None``.
|
124
|
+
experiment_names (list[str] | None): List of experiment names. Search
|
125
|
+
can work with experiment IDs or experiment names, but not both in
|
126
|
+
the same call. Values other than ``None`` or ``[]`` will result in
|
127
|
+
error if ``experiment_ids`` is also not ``None`` or ``[]``.
|
128
|
+
``experiment_ids`` is also not ``None`` or ``[]``. ``None`` will
|
129
|
+
default to the active experiment if ``experiment_ids`` is ``None``
|
130
|
+
or ``[]``.
|
131
|
+
|
132
|
+
Returns:
|
133
|
+
A `RunCollection` object containing the search results.
|
134
|
+
"""
|
135
|
+
runs = mlflow.search_runs(
|
136
|
+
experiment_ids=experiment_ids,
|
137
|
+
filter_string=filter_string,
|
138
|
+
run_view_type=run_view_type,
|
139
|
+
max_results=max_results,
|
140
|
+
order_by=order_by,
|
141
|
+
output_format="list",
|
142
|
+
search_all_experiments=search_all_experiments,
|
143
|
+
experiment_names=experiment_names,
|
144
|
+
)
|
145
|
+
runs = sorted(runs, key=lambda run: run.info.start_time) # type: ignore
|
146
|
+
return RunCollection(runs) # type: ignore
|
147
|
+
|
148
|
+
|
149
|
+
def list_runs(experiment_names: list[str] | None = None) -> RunCollection:
|
150
|
+
"""
|
151
|
+
List all runs for the specified experiments.
|
152
|
+
|
153
|
+
This function retrieves all runs for the given list of experiment names.
|
154
|
+
If no experiment names are provided (None), it defaults to searching all runs
|
155
|
+
for the currently active experiment. If an empty list is provided, the function
|
156
|
+
will search all runs for all experiments except the "Default" experiment.
|
157
|
+
The function returns the results as a `RunCollection` object.
|
158
|
+
|
159
|
+
Note:
|
160
|
+
The returned runs are sorted by their start time in ascending order.
|
161
|
+
|
162
|
+
Args:
|
163
|
+
experiment_names (list[str] | None): List of experiment names to search
|
164
|
+
for runs. If None or an empty list is provided, the function will
|
165
|
+
search the currently active experiment or all experiments except
|
166
|
+
the "Default" experiment.
|
167
|
+
|
168
|
+
Returns:
|
169
|
+
A `RunCollection` object containing the runs for the specified experiments.
|
170
|
+
"""
|
171
|
+
if experiment_names == []:
|
172
|
+
experiments = mlflow.search_experiments()
|
173
|
+
experiment_names = [e.name for e in experiments if e.name != "Default"]
|
174
|
+
|
175
|
+
return search_runs(experiment_names=experiment_names)
|
@@ -1,7 +1,24 @@
|
|
1
1
|
"""
|
2
|
-
This module provides functionality for managing and interacting with MLflow
|
3
|
-
|
4
|
-
|
2
|
+
This module provides functionality for managing and interacting with MLflow runs.
|
3
|
+
It includes the `RunCollection` class, which serves as a container for multiple MLflow
|
4
|
+
run objects, and various methods to filter, retrieve, and manipulate these runs.
|
5
|
+
|
6
|
+
Key Features:
|
7
|
+
- **Run Management**: The `RunCollection` class allows for easy management of multiple
|
8
|
+
MLflow runs, providing methods to access, filter, and sort runs based on various
|
9
|
+
criteria.
|
10
|
+
- **Filtering**: The module supports filtering runs based on specific configurations
|
11
|
+
and parameters, enabling users to easily find runs that match certain conditions.
|
12
|
+
- **Retrieval**: Users can retrieve specific runs, including the first, last, or any
|
13
|
+
run that matches a given configuration.
|
14
|
+
- **Artifact Handling**: The module provides methods to access and manipulate the
|
15
|
+
artifacts associated with each run, including retrieving artifact URIs and directories.
|
16
|
+
|
17
|
+
The `RunCollection` class is designed to work seamlessly with the MLflow tracking
|
18
|
+
API, providing a robust solution for managing machine learning experiment runs and
|
19
|
+
their associated metadata. This module is particularly useful for data scientists and
|
20
|
+
machine learning engineers who need to track and analyze the results of their experiments
|
21
|
+
efficiently.
|
5
22
|
"""
|
6
23
|
|
7
24
|
from __future__ import annotations
|
@@ -10,10 +27,7 @@ from dataclasses import dataclass, field
|
|
10
27
|
from itertools import chain
|
11
28
|
from typing import TYPE_CHECKING, Any, Concatenate, ParamSpec, TypeVar
|
12
29
|
|
13
|
-
import mlflow
|
14
|
-
from mlflow.entities import ViewType
|
15
30
|
from mlflow.entities.run import Run
|
16
|
-
from mlflow.tracking.fluent import SEARCH_MAX_RESULTS_PANDAS
|
17
31
|
|
18
32
|
from hydraflow.config import iter_params
|
19
33
|
from hydraflow.info import RunCollectionInfo
|
@@ -26,101 +40,6 @@ if TYPE_CHECKING:
|
|
26
40
|
from omegaconf import DictConfig
|
27
41
|
|
28
42
|
|
29
|
-
def search_runs(
|
30
|
-
experiment_ids: list[str] | None = None,
|
31
|
-
filter_string: str = "",
|
32
|
-
run_view_type: int = ViewType.ACTIVE_ONLY,
|
33
|
-
max_results: int = SEARCH_MAX_RESULTS_PANDAS,
|
34
|
-
order_by: list[str] | None = None,
|
35
|
-
search_all_experiments: bool = False,
|
36
|
-
experiment_names: list[str] | None = None,
|
37
|
-
) -> RunCollection:
|
38
|
-
"""
|
39
|
-
Search for Runs that fit the specified criteria.
|
40
|
-
|
41
|
-
This function wraps the `mlflow.search_runs` function and returns the
|
42
|
-
results as a `RunCollection` object. It allows for flexible searching of
|
43
|
-
MLflow runs based on various criteria.
|
44
|
-
|
45
|
-
Note:
|
46
|
-
The returned runs are sorted by their start time in ascending order.
|
47
|
-
|
48
|
-
Args:
|
49
|
-
experiment_ids (list[str] | None): List of experiment IDs. Search can
|
50
|
-
work with experiment IDs or experiment names, but not both in the
|
51
|
-
same call. Values other than ``None`` or ``[]`` will result in
|
52
|
-
error if ``experiment_names`` is also not ``None`` or ``[]``.
|
53
|
-
``None`` will default to the active experiment if ``experiment_names``
|
54
|
-
is ``None`` or ``[]``.
|
55
|
-
filter_string (str): Filter query string, defaults to searching all
|
56
|
-
runs.
|
57
|
-
run_view_type (int): one of enum values ``ACTIVE_ONLY``, ``DELETED_ONLY``,
|
58
|
-
or ``ALL`` runs defined in :py:class:`mlflow.entities.ViewType`.
|
59
|
-
max_results (int): The maximum number of runs to put in the dataframe.
|
60
|
-
Default is 100,000 to avoid causing out-of-memory issues on the user's
|
61
|
-
machine.
|
62
|
-
order_by (list[str] | None): List of columns to order by (e.g.,
|
63
|
-
"metrics.rmse"). The ``order_by`` column can contain an optional
|
64
|
-
``DESC`` or ``ASC`` value. The default is ``ASC``. The default
|
65
|
-
ordering is to sort by ``start_time DESC``, then ``run_id``.
|
66
|
-
``start_time DESC``, then ``run_id``.
|
67
|
-
search_all_experiments (bool): Boolean specifying whether all
|
68
|
-
experiments should be searched. Only honored if ``experiment_ids``
|
69
|
-
is ``[]`` or ``None``.
|
70
|
-
experiment_names (list[str] | None): List of experiment names. Search
|
71
|
-
can work with experiment IDs or experiment names, but not both in
|
72
|
-
the same call. Values other than ``None`` or ``[]`` will result in
|
73
|
-
error if ``experiment_ids`` is also not ``None`` or ``[]``.
|
74
|
-
``experiment_ids`` is also not ``None`` or ``[]``. ``None`` will
|
75
|
-
default to the active experiment if ``experiment_ids`` is ``None``
|
76
|
-
or ``[]``.
|
77
|
-
|
78
|
-
Returns:
|
79
|
-
A `RunCollection` object containing the search results.
|
80
|
-
"""
|
81
|
-
runs = mlflow.search_runs(
|
82
|
-
experiment_ids=experiment_ids,
|
83
|
-
filter_string=filter_string,
|
84
|
-
run_view_type=run_view_type,
|
85
|
-
max_results=max_results,
|
86
|
-
order_by=order_by,
|
87
|
-
output_format="list",
|
88
|
-
search_all_experiments=search_all_experiments,
|
89
|
-
experiment_names=experiment_names,
|
90
|
-
)
|
91
|
-
runs = sorted(runs, key=lambda run: run.info.start_time) # type: ignore
|
92
|
-
return RunCollection(runs) # type: ignore
|
93
|
-
|
94
|
-
|
95
|
-
def list_runs(experiment_names: list[str] | None = None) -> RunCollection:
|
96
|
-
"""
|
97
|
-
List all runs for the specified experiments.
|
98
|
-
|
99
|
-
This function retrieves all runs for the given list of experiment names.
|
100
|
-
If no experiment names are provided (None), it defaults to searching all runs
|
101
|
-
for the currently active experiment. If an empty list is provided, the function
|
102
|
-
will search all runs for all experiments except the "Default" experiment.
|
103
|
-
The function returns the results as a `RunCollection` object.
|
104
|
-
|
105
|
-
Note:
|
106
|
-
The returned runs are sorted by their start time in ascending order.
|
107
|
-
|
108
|
-
Args:
|
109
|
-
experiment_names (list[str] | None): List of experiment names to search
|
110
|
-
for runs. If None or an empty list is provided, the function will
|
111
|
-
search the currently active experiment or all experiments except
|
112
|
-
the "Default" experiment.
|
113
|
-
|
114
|
-
Returns:
|
115
|
-
A `RunCollection` object containing the runs for the specified experiments.
|
116
|
-
"""
|
117
|
-
if experiment_names == []:
|
118
|
-
experiments = mlflow.search_experiments()
|
119
|
-
experiment_names = [e.name for e in experiments if e.name != "Default"]
|
120
|
-
|
121
|
-
return search_runs(experiment_names=experiment_names)
|
122
|
-
|
123
|
-
|
124
43
|
T = TypeVar("T")
|
125
44
|
P = ParamSpec("P")
|
126
45
|
|
@@ -132,6 +51,11 @@ class RunCollection:
|
|
132
51
|
|
133
52
|
This class provides methods to interact with the runs, such as filtering,
|
134
53
|
retrieving specific runs, and accessing run information.
|
54
|
+
|
55
|
+
Key Features:
|
56
|
+
- Filtering: Easily filter runs based on various criteria.
|
57
|
+
- Retrieval: Access specific runs by index or through methods.
|
58
|
+
- Metadata: Access run metadata and associated information.
|
135
59
|
"""
|
136
60
|
|
137
61
|
_runs: list[Run]
|
@@ -1,7 +1,9 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
import logging
|
4
|
+
import time
|
4
5
|
from dataclasses import dataclass
|
6
|
+
from pathlib import Path
|
5
7
|
|
6
8
|
import hydra
|
7
9
|
import mlflow
|
@@ -24,16 +26,32 @@ cs.store(name="config", node=MySQLConfig)
|
|
24
26
|
|
25
27
|
@hydra.main(version_base=None, config_name="config")
|
26
28
|
def app(cfg: MySQLConfig):
|
27
|
-
|
29
|
+
hydraflow.set_experiment(prefix="_", suffix="_")
|
28
30
|
with hydraflow.start_run(cfg):
|
31
|
+
log.info(f"START, {cfg.host}, {cfg.port} ")
|
32
|
+
|
29
33
|
artifact_dir = hydraflow.get_artifact_dir()
|
30
34
|
output_dir = hydraflow.get_hydra_output_dir()
|
31
|
-
|
35
|
+
|
32
36
|
mlflow.log_text("A " + artifact_dir.as_posix(), "artifact_dir.txt")
|
33
37
|
mlflow.log_text("B " + output_dir.as_posix(), "output_dir.txt")
|
34
|
-
|
38
|
+
|
39
|
+
with hydraflow.watch(callback, ignore_patterns=["b.txt"]):
|
40
|
+
(artifact_dir / "a.txt").write_text("abc")
|
41
|
+
time.sleep(0.1)
|
42
|
+
|
43
|
+
mlflow.log_metric("m", cfg.port + 1, 1)
|
44
|
+
if cfg.host == "x":
|
45
|
+
mlflow.log_metric("m", cfg.port + 10, 2)
|
46
|
+
|
35
47
|
log.info("END")
|
36
48
|
|
37
49
|
|
50
|
+
def callback(path: Path):
|
51
|
+
log.info(f"WATCH, {path.as_posix()}")
|
52
|
+
m = len(path.read_text()) # len("abc") == 3
|
53
|
+
mlflow.log_metric("watch", m, 1, synchronous=True)
|
54
|
+
|
55
|
+
|
38
56
|
if __name__ == "__main__":
|
39
57
|
app()
|
@@ -0,0 +1,109 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import subprocess
|
4
|
+
import sys
|
5
|
+
from pathlib import Path
|
6
|
+
|
7
|
+
import mlflow
|
8
|
+
import pytest
|
9
|
+
from omegaconf import DictConfig
|
10
|
+
|
11
|
+
from hydraflow.run_collection import RunCollection
|
12
|
+
|
13
|
+
|
14
|
+
@pytest.fixture
|
15
|
+
def rc(monkeypatch, tmp_path):
|
16
|
+
import hydraflow
|
17
|
+
|
18
|
+
file = Path("tests/scripts/app.py").absolute()
|
19
|
+
monkeypatch.chdir(tmp_path)
|
20
|
+
|
21
|
+
args = [sys.executable, file.as_posix(), "-m"]
|
22
|
+
args += ["host=x,y", "port=1,2", "hydra.job.name=info"]
|
23
|
+
subprocess.check_call(args)
|
24
|
+
|
25
|
+
mlflow.set_experiment("_info_")
|
26
|
+
yield hydraflow.list_runs()
|
27
|
+
|
28
|
+
|
29
|
+
def test_app_info_run_id(rc: RunCollection):
|
30
|
+
assert len(rc.info.run_id) == 4
|
31
|
+
|
32
|
+
|
33
|
+
def test_app_info_params(rc: RunCollection):
|
34
|
+
params = rc.info.params
|
35
|
+
assert params[0] == {"port": "1", "host": "x"}
|
36
|
+
assert params[1] == {"port": "2", "host": "x"}
|
37
|
+
assert params[2] == {"port": "1", "host": "y"}
|
38
|
+
assert params[3] == {"port": "2", "host": "y"}
|
39
|
+
|
40
|
+
|
41
|
+
def test_app_info_metrics(rc: RunCollection):
|
42
|
+
metrics = rc.info.metrics
|
43
|
+
assert metrics[0] == {"m": 11, "watch": 3}
|
44
|
+
assert metrics[1] == {"m": 12, "watch": 3}
|
45
|
+
assert metrics[2] == {"m": 2, "watch": 3}
|
46
|
+
assert metrics[3] == {"m": 3, "watch": 3}
|
47
|
+
|
48
|
+
|
49
|
+
def test_app_info_config(rc: RunCollection):
|
50
|
+
config = rc.info.config
|
51
|
+
assert config[0].port == 1
|
52
|
+
assert config[1].port == 2
|
53
|
+
assert config[2].host == "y"
|
54
|
+
assert config[3].host == "y"
|
55
|
+
|
56
|
+
|
57
|
+
def test_app_info_artifact_uri(rc: RunCollection):
|
58
|
+
uris = rc.info.artifact_uri
|
59
|
+
print(uris)
|
60
|
+
assert all(uri.startswith("file://") for uri in uris) # type: ignore
|
61
|
+
assert all(uri.endswith("/artifacts") for uri in uris) # type: ignore
|
62
|
+
assert all("mlruns" in uri for uri in uris) # type: ignore
|
63
|
+
|
64
|
+
|
65
|
+
def test_app_info_artifact_dir(rc: RunCollection):
|
66
|
+
from hydraflow.info import get_artifact_dir
|
67
|
+
|
68
|
+
dirs = list(rc.map(get_artifact_dir))
|
69
|
+
assert rc.info.artifact_dir == dirs
|
70
|
+
|
71
|
+
|
72
|
+
def test_app_hydra_output_dir(rc: RunCollection):
|
73
|
+
from hydraflow.info import get_hydra_output_dir
|
74
|
+
|
75
|
+
dirs = list(rc.map(get_hydra_output_dir))
|
76
|
+
assert dirs[0].stem == "0"
|
77
|
+
assert dirs[1].stem == "1"
|
78
|
+
assert dirs[2].stem == "2"
|
79
|
+
assert dirs[3].stem == "3"
|
80
|
+
|
81
|
+
|
82
|
+
def test_app_map_config(rc: RunCollection):
|
83
|
+
ports = []
|
84
|
+
|
85
|
+
def func(c: DictConfig, *, a: int):
|
86
|
+
ports.append(c.port + 1)
|
87
|
+
return c.host
|
88
|
+
|
89
|
+
hosts = list(rc.map_config(func, a=1))
|
90
|
+
assert hosts == ["x", "x", "y", "y"]
|
91
|
+
assert ports == [2, 3, 2, 3]
|
92
|
+
|
93
|
+
|
94
|
+
def test_app_group_by(rc: RunCollection):
|
95
|
+
grouped = rc.group_by("host")
|
96
|
+
assert len(grouped) == 2
|
97
|
+
assert grouped[("x",)].info.params[0] == {"port": "1", "host": "x"}
|
98
|
+
assert grouped[("x",)].info.params[1] == {"port": "2", "host": "x"}
|
99
|
+
assert grouped[("y",)].info.params[0] == {"port": "1", "host": "y"}
|
100
|
+
assert grouped[("y",)].info.params[1] == {"port": "2", "host": "y"}
|
101
|
+
|
102
|
+
|
103
|
+
def test_app_group_by_values(rc: RunCollection):
|
104
|
+
grouped = rc.group_by_values("port")
|
105
|
+
assert len(grouped) == 2
|
106
|
+
assert grouped[0].info.params[0] == {"port": "1", "host": "x"}
|
107
|
+
assert grouped[0].info.params[1] == {"port": "1", "host": "y"}
|
108
|
+
assert grouped[1].info.params[0] == {"port": "2", "host": "x"}
|
109
|
+
assert grouped[1].info.params[1] == {"port": "2", "host": "y"}
|
@@ -1,3 +1,5 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
1
3
|
from pathlib import Path
|
2
4
|
|
3
5
|
import mlflow
|
@@ -8,11 +10,12 @@ from hydraflow.run_collection import RunCollection
|
|
8
10
|
|
9
11
|
@pytest.fixture
|
10
12
|
def runs(monkeypatch, tmp_path):
|
11
|
-
from hydraflow.
|
13
|
+
from hydraflow.mlflow import search_runs
|
12
14
|
|
13
15
|
monkeypatch.chdir(tmp_path)
|
14
16
|
|
15
17
|
mlflow.set_experiment("test_info")
|
18
|
+
|
16
19
|
for x in range(3):
|
17
20
|
with mlflow.start_run(run_name=f"{x}"):
|
18
21
|
mlflow.log_param("p", x)
|
@@ -49,3 +52,13 @@ def test_info_artifact_dir(runs: RunCollection):
|
|
49
52
|
dir = runs.info.artifact_dir
|
50
53
|
assert all(isinstance(d, Path) for d in dir)
|
51
54
|
assert all(d.stem == "artifacts" for d in dir) # type: ignore
|
55
|
+
|
56
|
+
|
57
|
+
def test_info_empty_run_collection():
|
58
|
+
rc = RunCollection([])
|
59
|
+
assert rc.info.run_id == []
|
60
|
+
assert rc.info.params == []
|
61
|
+
assert rc.info.metrics == []
|
62
|
+
assert rc.info.artifact_uri == []
|
63
|
+
assert rc.info.artifact_dir == []
|
64
|
+
assert rc.info.config == []
|
@@ -12,12 +12,14 @@ from mlflow.entities.run import Run
|
|
12
12
|
|
13
13
|
@pytest.fixture
|
14
14
|
def runs(monkeypatch, tmp_path):
|
15
|
-
file = Path("tests/scripts/
|
15
|
+
file = Path("tests/scripts/app.py").absolute()
|
16
16
|
monkeypatch.chdir(tmp_path)
|
17
17
|
|
18
|
-
|
18
|
+
args = [sys.executable, file.as_posix(), "-m"]
|
19
|
+
args += ["host=x,y", "port=1,2", "hydra.job.name=log_run"]
|
20
|
+
subprocess.check_call(args)
|
19
21
|
|
20
|
-
mlflow.set_experiment("
|
22
|
+
mlflow.set_experiment("_log_run_")
|
21
23
|
runs = mlflow.search_runs(output_format="list")
|
22
24
|
assert len(runs) == 4
|
23
25
|
assert isinstance(runs, list)
|
@@ -11,7 +11,7 @@ from hydraflow.run_collection import RunCollection
|
|
11
11
|
|
12
12
|
@pytest.fixture
|
13
13
|
def runs(monkeypatch, tmp_path):
|
14
|
-
from hydraflow.
|
14
|
+
from hydraflow.mlflow import search_runs
|
15
15
|
|
16
16
|
monkeypatch.chdir(tmp_path)
|
17
17
|
|
@@ -342,7 +342,7 @@ def runs2(monkeypatch, tmp_path):
|
|
342
342
|
|
343
343
|
|
344
344
|
def test_list_runs(runs, runs2):
|
345
|
-
from hydraflow.
|
345
|
+
from hydraflow.mlflow import list_runs
|
346
346
|
|
347
347
|
mlflow.set_experiment("test_run")
|
348
348
|
all_runs = list_runs()
|
@@ -354,7 +354,7 @@ def test_list_runs(runs, runs2):
|
|
354
354
|
|
355
355
|
|
356
356
|
def test_list_runs_empty_list(runs, runs2):
|
357
|
-
from hydraflow.
|
357
|
+
from hydraflow.mlflow import list_runs
|
358
358
|
|
359
359
|
all_runs = list_runs([])
|
360
360
|
assert len(all_runs) == 9
|
@@ -362,14 +362,14 @@ def test_list_runs_empty_list(runs, runs2):
|
|
362
362
|
|
363
363
|
@pytest.mark.parametrize(["name", "n"], [("test_run", 6), ("test_run2", 3)])
|
364
364
|
def test_list_runs_list(runs, runs2, name, n):
|
365
|
-
from hydraflow.
|
365
|
+
from hydraflow.mlflow import list_runs
|
366
366
|
|
367
367
|
filtered_runs = list_runs(experiment_names=[name])
|
368
368
|
assert len(filtered_runs) == n
|
369
369
|
|
370
370
|
|
371
371
|
def test_list_runs_none(runs, runs2):
|
372
|
-
from hydraflow.
|
372
|
+
from hydraflow.mlflow import list_runs
|
373
373
|
|
374
374
|
no_runs = list_runs(experiment_names=["non_existent_experiment"])
|
375
375
|
assert len(no_runs) == 0
|
@@ -1,119 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
This module provides functionality to log parameters from Hydra
|
3
|
-
configuration objects and set up experiments using MLflow.
|
4
|
-
"""
|
5
|
-
|
6
|
-
from __future__ import annotations
|
7
|
-
|
8
|
-
from pathlib import Path
|
9
|
-
from typing import TYPE_CHECKING
|
10
|
-
|
11
|
-
import mlflow
|
12
|
-
from hydra.core.hydra_config import HydraConfig
|
13
|
-
from mlflow.tracking import artifact_utils
|
14
|
-
from omegaconf import OmegaConf
|
15
|
-
|
16
|
-
from hydraflow.config import iter_params
|
17
|
-
|
18
|
-
if TYPE_CHECKING:
|
19
|
-
from mlflow.entities.experiment import Experiment
|
20
|
-
from mlflow.entities.run import Run
|
21
|
-
|
22
|
-
|
23
|
-
def set_experiment(
|
24
|
-
prefix: str = "",
|
25
|
-
suffix: str = "",
|
26
|
-
uri: str | Path | None = None,
|
27
|
-
) -> Experiment:
|
28
|
-
"""
|
29
|
-
Set the experiment name and tracking URI optionally.
|
30
|
-
|
31
|
-
This function sets the experiment name by combining the given prefix,
|
32
|
-
the job name from HydraConfig, and the given suffix. Optionally, it can
|
33
|
-
also set the tracking URI.
|
34
|
-
|
35
|
-
Args:
|
36
|
-
prefix (str): The prefix to prepend to the experiment name.
|
37
|
-
suffix (str): The suffix to append to the experiment name.
|
38
|
-
uri (str | Path | None): The tracking URI to use. Defaults to None.
|
39
|
-
|
40
|
-
Returns:
|
41
|
-
Experiment: An instance of `mlflow.entities.Experiment` representing
|
42
|
-
the new active experiment.
|
43
|
-
"""
|
44
|
-
if uri is not None:
|
45
|
-
mlflow.set_tracking_uri(uri)
|
46
|
-
|
47
|
-
hc = HydraConfig.get()
|
48
|
-
name = f"{prefix}{hc.job.name}{suffix}"
|
49
|
-
return mlflow.set_experiment(name)
|
50
|
-
|
51
|
-
|
52
|
-
def log_params(config: object, *, synchronous: bool | None = None) -> None:
|
53
|
-
"""
|
54
|
-
Log the parameters from the given configuration object.
|
55
|
-
|
56
|
-
This method logs the parameters from the provided configuration object
|
57
|
-
using MLflow. It iterates over the parameters and logs them using the
|
58
|
-
`mlflow.log_param` method.
|
59
|
-
|
60
|
-
Args:
|
61
|
-
config (object): The configuration object to log the parameters from.
|
62
|
-
synchronous (bool | None): Whether to log the parameters synchronously.
|
63
|
-
Defaults to None.
|
64
|
-
"""
|
65
|
-
for key, value in iter_params(config):
|
66
|
-
mlflow.log_param(key, value, synchronous=synchronous)
|
67
|
-
|
68
|
-
|
69
|
-
def get_artifact_dir(run: Run | None = None) -> Path:
|
70
|
-
"""
|
71
|
-
Retrieve the artifact directory for the given run.
|
72
|
-
|
73
|
-
This function uses MLflow to get the artifact directory for the given run.
|
74
|
-
|
75
|
-
Args:
|
76
|
-
run (Run | None): The run object. Defaults to None.
|
77
|
-
|
78
|
-
Returns:
|
79
|
-
The local path to the directory where the artifacts are downloaded.
|
80
|
-
"""
|
81
|
-
if run is None:
|
82
|
-
uri = mlflow.get_artifact_uri()
|
83
|
-
else:
|
84
|
-
uri = artifact_utils.get_artifact_uri(run.info.run_id)
|
85
|
-
|
86
|
-
return Path(mlflow.artifacts.download_artifacts(uri))
|
87
|
-
|
88
|
-
|
89
|
-
def get_hydra_output_dir(*, run: Run | None = None) -> Path:
|
90
|
-
"""
|
91
|
-
Retrieve the Hydra output directory for the given run.
|
92
|
-
|
93
|
-
This function returns the Hydra output directory. If no run is provided,
|
94
|
-
it retrieves the output directory from the current Hydra configuration.
|
95
|
-
If a run is provided, it retrieves the artifact path for the run, loads
|
96
|
-
the Hydra configuration from the downloaded artifacts, and returns the
|
97
|
-
output directory specified in that configuration.
|
98
|
-
|
99
|
-
Args:
|
100
|
-
run (Run | None): The run object. Defaults to None.
|
101
|
-
|
102
|
-
Returns:
|
103
|
-
Path: The path to the Hydra output directory.
|
104
|
-
|
105
|
-
Raises:
|
106
|
-
FileNotFoundError: If the Hydra configuration file is not found
|
107
|
-
in the artifacts.
|
108
|
-
"""
|
109
|
-
if run is None:
|
110
|
-
hc = HydraConfig.get()
|
111
|
-
return Path(hc.runtime.output_dir)
|
112
|
-
|
113
|
-
path = get_artifact_dir(run) / ".hydra/hydra.yaml"
|
114
|
-
|
115
|
-
if path.exists():
|
116
|
-
hc = OmegaConf.load(path)
|
117
|
-
return Path(hc.hydra.runtime.output_dir)
|
118
|
-
|
119
|
-
raise FileNotFoundError
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|