hydraflow 0.2.17__py3-none-any.whl → 0.3.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- hydraflow/__init__.py +4 -1
- hydraflow/asyncio.py +13 -11
- hydraflow/config.py +17 -6
- hydraflow/context.py +16 -16
- hydraflow/mlflow.py +36 -23
- hydraflow/param.py +11 -0
- hydraflow/progress.py +7 -18
- hydraflow/run_collection.py +138 -74
- hydraflow/run_data.py +56 -0
- hydraflow/{info.py → run_info.py} +12 -37
- {hydraflow-0.2.17.dist-info → hydraflow-0.3.0.dist-info}/METADATA +2 -1
- hydraflow-0.3.0.dist-info/RECORD +15 -0
- hydraflow-0.2.17.dist-info/RECORD +0 -14
- {hydraflow-0.2.17.dist-info → hydraflow-0.3.0.dist-info}/WHEEL +0 -0
- {hydraflow-0.2.17.dist-info → hydraflow-0.3.0.dist-info}/licenses/LICENSE +0 -0
hydraflow/__init__.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
|
+
"""Provide a collection of MLflow runs."""
|
2
|
+
|
1
3
|
from .context import chdir_artifact, log_run, start_run, watch
|
2
|
-
from .info import get_artifact_dir, get_hydra_output_dir, load_config
|
3
4
|
from .mlflow import (
|
4
5
|
list_runs,
|
5
6
|
search_runs,
|
@@ -7,6 +8,8 @@ from .mlflow import (
|
|
7
8
|
)
|
8
9
|
from .progress import multi_tasks_progress, parallel_progress
|
9
10
|
from .run_collection import RunCollection
|
11
|
+
from .run_data import load_config
|
12
|
+
from .run_info import get_artifact_dir, get_hydra_output_dir
|
10
13
|
|
11
14
|
__all__ = [
|
12
15
|
"RunCollection",
|
hydraflow/asyncio.py
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
"""Provide functionality for running commands and monitoring file changes."""
|
2
|
+
|
1
3
|
from __future__ import annotations
|
2
4
|
|
3
5
|
import asyncio
|
@@ -27,8 +29,7 @@ async def execute_command(
|
|
27
29
|
stderr: Callable[[str], None] | None = None,
|
28
30
|
stop_event: asyncio.Event,
|
29
31
|
) -> int:
|
30
|
-
"""
|
31
|
-
Runs a command asynchronously and pass the output to callback functions.
|
32
|
+
"""Run a command asynchronously and pass the output to callback functions.
|
32
33
|
|
33
34
|
Args:
|
34
35
|
program (str): The program to run.
|
@@ -39,6 +40,7 @@ async def execute_command(
|
|
39
40
|
|
40
41
|
Returns:
|
41
42
|
int: The return code of the process.
|
43
|
+
|
42
44
|
"""
|
43
45
|
try:
|
44
46
|
process = await asyncio.create_subprocess_exec(
|
@@ -68,13 +70,13 @@ async def process_stream(
|
|
68
70
|
stream: StreamReader | None,
|
69
71
|
callback: Callable[[str], None] | None,
|
70
72
|
) -> None:
|
71
|
-
"""
|
72
|
-
Reads a stream asynchronously and pass each line to a callback function.
|
73
|
+
"""Read a stream asynchronously and pass each line to a callback function.
|
73
74
|
|
74
75
|
Args:
|
75
76
|
stream (StreamReader | None): The stream to read from.
|
76
77
|
callback (Callable[[str], None] | None): The callback function to handle
|
77
78
|
each line.
|
79
|
+
|
78
80
|
"""
|
79
81
|
if stream is None or callback is None:
|
80
82
|
return
|
@@ -93,9 +95,7 @@ async def monitor_file_changes(
|
|
93
95
|
stop_event: asyncio.Event,
|
94
96
|
**awatch_kwargs,
|
95
97
|
) -> None:
|
96
|
-
"""
|
97
|
-
Watches for file changes in specified paths and pass the changes to a
|
98
|
-
callback function.
|
98
|
+
"""Watch file changes in specified paths and pass the changes to a callback.
|
99
99
|
|
100
100
|
Args:
|
101
101
|
paths (list[str | Path]): List of paths to monitor for changes.
|
@@ -103,6 +103,7 @@ async def monitor_file_changes(
|
|
103
103
|
function to handle file changes.
|
104
104
|
stop_event (asyncio.Event): Event to signal when to stop watching.
|
105
105
|
**awatch_kwargs: Additional keyword arguments to pass to watchfiles.awatch.
|
106
|
+
|
106
107
|
"""
|
107
108
|
str_paths = [str(path) for path in paths]
|
108
109
|
try:
|
@@ -127,8 +128,7 @@ async def run_and_monitor(
|
|
127
128
|
paths: list[str | Path] | None = None,
|
128
129
|
**awatch_kwargs,
|
129
130
|
) -> int:
|
130
|
-
"""
|
131
|
-
Runs a command and optionally watch for file changes concurrently.
|
131
|
+
"""Run a command and optionally watch for file changes concurrently.
|
132
132
|
|
133
133
|
Args:
|
134
134
|
program (str): The program to run.
|
@@ -138,6 +138,8 @@ async def run_and_monitor(
|
|
138
138
|
watch (Callable[[set[tuple[Change, str]]], None] | None): Callback for
|
139
139
|
file changes.
|
140
140
|
paths (list[str | Path] | None): List of paths to monitor for changes.
|
141
|
+
**awatch_kwargs: Additional keyword arguments to pass to `watchfiles.awatch`.
|
142
|
+
|
141
143
|
"""
|
142
144
|
stop_event = asyncio.Event()
|
143
145
|
run_task = asyncio.create_task(
|
@@ -184,8 +186,7 @@ def run(
|
|
184
186
|
paths: list[str | Path] | None = None,
|
185
187
|
**awatch_kwargs,
|
186
188
|
) -> int:
|
187
|
-
"""
|
188
|
-
Run a command synchronously and optionally watch for file changes.
|
189
|
+
"""Run a command synchronously and optionally watch for file changes.
|
189
190
|
|
190
191
|
This function is a synchronous wrapper around the asynchronous
|
191
192
|
`run_and_monitor` function. It runs a specified command and optionally
|
@@ -208,6 +209,7 @@ def run(
|
|
208
209
|
|
209
210
|
Returns:
|
210
211
|
int: The return code of the process.
|
212
|
+
|
211
213
|
"""
|
212
214
|
if watch and not paths:
|
213
215
|
paths = [Path.cwd()]
|
hydraflow/config.py
CHANGED
@@ -1,7 +1,4 @@
|
|
1
|
-
"""
|
2
|
-
This module provides functionality for working with configuration
|
3
|
-
objects using the OmegaConf library.
|
4
|
-
"""
|
1
|
+
"""Provide functionality for working with configuration objects using the OmegaConf."""
|
5
2
|
|
6
3
|
from __future__ import annotations
|
7
4
|
|
@@ -14,9 +11,22 @@ if TYPE_CHECKING:
|
|
14
11
|
from typing import Any
|
15
12
|
|
16
13
|
|
17
|
-
def
|
14
|
+
def collect_params(config: object) -> dict[str, Any]:
|
15
|
+
"""Iterate over parameters and collect them into a dictionary.
|
16
|
+
|
17
|
+
Args:
|
18
|
+
config (object): The configuration object to iterate over.
|
19
|
+
prefix (str): The prefix to prepend to the parameter keys.
|
20
|
+
|
21
|
+
Returns:
|
22
|
+
dict[str, Any]: A dictionary of collected parameters.
|
23
|
+
|
18
24
|
"""
|
19
|
-
|
25
|
+
return dict(iter_params(config))
|
26
|
+
|
27
|
+
|
28
|
+
def iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
|
29
|
+
"""Recursively iterate over the parameters in the given configuration object.
|
20
30
|
|
21
31
|
This function traverses the configuration object and yields key-value pairs
|
22
32
|
representing the parameters. The keys are prefixed with the provided prefix.
|
@@ -29,6 +39,7 @@ def iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
|
|
29
39
|
|
30
40
|
Yields:
|
31
41
|
Key-value pairs representing the parameters in the configuration object.
|
42
|
+
|
32
43
|
"""
|
33
44
|
if config is None:
|
34
45
|
return
|
hydraflow/context.py
CHANGED
@@ -1,7 +1,4 @@
|
|
1
|
-
"""
|
2
|
-
This module provides context managers to log parameters and manage the MLflow
|
3
|
-
run context.
|
4
|
-
"""
|
1
|
+
"""Provide context managers to log parameters and manage the MLflow run context."""
|
5
2
|
|
6
3
|
from __future__ import annotations
|
7
4
|
|
@@ -17,8 +14,8 @@ from hydra.core.hydra_config import HydraConfig
|
|
17
14
|
from watchdog.events import FileModifiedEvent, PatternMatchingEventHandler
|
18
15
|
from watchdog.observers import Observer
|
19
16
|
|
20
|
-
from hydraflow.info import get_artifact_dir
|
21
17
|
from hydraflow.mlflow import log_params
|
18
|
+
from hydraflow.run_info import get_artifact_dir
|
22
19
|
|
23
20
|
if TYPE_CHECKING:
|
24
21
|
from collections.abc import Callable, Iterator
|
@@ -34,9 +31,7 @@ def log_run(
|
|
34
31
|
*,
|
35
32
|
synchronous: bool | None = None,
|
36
33
|
) -> Iterator[None]:
|
37
|
-
"""
|
38
|
-
Log the parameters from the given configuration object and manage the MLflow
|
39
|
-
run context.
|
34
|
+
"""Log the parameters from the given configuration object.
|
40
35
|
|
41
36
|
This context manager logs the parameters from the provided configuration object
|
42
37
|
using MLflow. It also manages the MLflow run context, ensuring that artifacts
|
@@ -56,6 +51,7 @@ def log_run(
|
|
56
51
|
# Perform operations within the MLflow run context
|
57
52
|
pass
|
58
53
|
```
|
54
|
+
|
59
55
|
"""
|
60
56
|
log_params(config, synchronous=synchronous)
|
61
57
|
|
@@ -98,8 +94,7 @@ def start_run( # noqa: PLR0913
|
|
98
94
|
log_system_metrics: bool | None = None,
|
99
95
|
synchronous: bool | None = None,
|
100
96
|
) -> Iterator[Run]:
|
101
|
-
"""
|
102
|
-
Start an MLflow run and log parameters using the provided configuration object.
|
97
|
+
"""Start an MLflow run and log parameters using the provided configuration object.
|
103
98
|
|
104
99
|
This context manager starts an MLflow run and logs parameters using the specified
|
105
100
|
configuration object. It ensures that the run is properly closed after completion.
|
@@ -130,6 +125,7 @@ def start_run( # noqa: PLR0913
|
|
130
125
|
- `mlflow.start_run`: The MLflow function to start a run directly.
|
131
126
|
- `log_run`: A context manager to log parameters and manage the MLflow
|
132
127
|
run context.
|
128
|
+
|
133
129
|
"""
|
134
130
|
with (
|
135
131
|
mlflow.start_run(
|
@@ -156,9 +152,7 @@ def watch(
|
|
156
152
|
ignore_patterns: list[str] | None = None,
|
157
153
|
ignore_log: bool = True,
|
158
154
|
) -> Iterator[None]:
|
159
|
-
"""
|
160
|
-
Watch the given directory for changes and call the provided function
|
161
|
-
when a change is detected.
|
155
|
+
"""Watch the given directory for changes.
|
162
156
|
|
163
157
|
This context manager sets up a file system watcher on the specified directory.
|
164
158
|
When a file modification is detected, the provided function is called with
|
@@ -173,6 +167,9 @@ def watch(
|
|
173
167
|
the current MLflow artifact URI is used. Defaults to "".
|
174
168
|
timeout (int): The timeout period in seconds for the watcher
|
175
169
|
to run after the context is exited. Defaults to 60.
|
170
|
+
ignore_patterns (list[str] | None): A list of glob patterns to ignore.
|
171
|
+
Defaults to None.
|
172
|
+
ignore_log (bool): Whether to ignore log files. Defaults to True.
|
176
173
|
|
177
174
|
Yields:
|
178
175
|
None
|
@@ -183,6 +180,7 @@ def watch(
|
|
183
180
|
# Perform operations while watching the directory for changes
|
184
181
|
pass
|
185
182
|
```
|
183
|
+
|
186
184
|
"""
|
187
185
|
dir = dir or get_artifact_dir() # noqa: A001
|
188
186
|
if isinstance(dir, Path):
|
@@ -214,6 +212,8 @@ def watch(
|
|
214
212
|
|
215
213
|
|
216
214
|
class Handler(PatternMatchingEventHandler):
|
215
|
+
"""Monitor file changes and call the given function when a change is detected."""
|
216
|
+
|
217
217
|
def __init__(
|
218
218
|
self,
|
219
219
|
func: Callable[[Path], None],
|
@@ -232,6 +232,7 @@ class Handler(PatternMatchingEventHandler):
|
|
232
232
|
super().__init__(ignore_patterns=ignore_patterns)
|
233
233
|
|
234
234
|
def on_modified(self, event: FileModifiedEvent) -> None:
|
235
|
+
"""Modify when a file is modified."""
|
235
236
|
file = Path(str(event.src_path))
|
236
237
|
if file.is_file():
|
237
238
|
self.func(file)
|
@@ -242,9 +243,7 @@ def chdir_artifact(
|
|
242
243
|
run: Run,
|
243
244
|
artifact_path: str | None = None,
|
244
245
|
) -> Iterator[Path]:
|
245
|
-
"""
|
246
|
-
Change the current working directory to the artifact directory of the
|
247
|
-
given run.
|
246
|
+
"""Change the current working directory to the artifact directory of the given run.
|
248
247
|
|
249
248
|
This context manager changes the current working directory to the artifact
|
250
249
|
directory of the given run. It ensures that the directory is changed back
|
@@ -253,6 +252,7 @@ def chdir_artifact(
|
|
253
252
|
Args:
|
254
253
|
run (Run): The run to get the artifact directory from.
|
255
254
|
artifact_path (str | None): The artifact path.
|
255
|
+
|
256
256
|
"""
|
257
257
|
curdir = Path.cwd()
|
258
258
|
path = mlflow.artifacts.download_artifacts(
|
hydraflow/mlflow.py
CHANGED
@@ -1,20 +1,17 @@
|
|
1
|
-
"""
|
2
|
-
|
3
|
-
|
4
|
-
|
1
|
+
"""Provide functionality to log parameters from Hydra configuration objects.
|
2
|
+
|
3
|
+
This module provides functions to log parameters from Hydra configuration objects
|
4
|
+
to MLflow, set experiments, and manage tracking URIs. It integrates Hydra's
|
5
|
+
configuration management with MLflow's experiment tracking capabilities.
|
5
6
|
|
6
7
|
Key Features:
|
7
|
-
- **Experiment Management**: Set
|
8
|
-
|
9
|
-
- **
|
10
|
-
|
11
|
-
- **Run
|
12
|
-
|
13
|
-
|
14
|
-
easy access to outputs generated during experiments.
|
15
|
-
|
16
|
-
This module is designed to integrate seamlessly with Hydra, providing a robust
|
17
|
-
solution for tracking machine learning experiments and their associated metadata.
|
8
|
+
- **Experiment Management**: Set experiment names and tracking URIs using Hydra
|
9
|
+
configuration details.
|
10
|
+
- **Parameter Logging**: Log parameters from Hydra configuration objects to MLflow,
|
11
|
+
supporting both synchronous and asynchronous logging.
|
12
|
+
- **Run Collection**: Utilize the `RunCollection` class to manage and interact with
|
13
|
+
multiple MLflow runs, providing methods to filter and retrieve runs based on
|
14
|
+
various criteria.
|
18
15
|
"""
|
19
16
|
|
20
17
|
from __future__ import annotations
|
@@ -40,8 +37,7 @@ def set_experiment(
|
|
40
37
|
suffix: str = "",
|
41
38
|
uri: str | Path | None = None,
|
42
39
|
) -> Experiment:
|
43
|
-
"""
|
44
|
-
Sets the experiment name and tracking URI optionally.
|
40
|
+
"""Set the experiment name and tracking URI optionally.
|
45
41
|
|
46
42
|
This function sets the experiment name by combining the given prefix,
|
47
43
|
the job name from HydraConfig, and the given suffix. Optionally, it can
|
@@ -55,6 +51,7 @@ def set_experiment(
|
|
55
51
|
Returns:
|
56
52
|
Experiment: An instance of `mlflow.entities.Experiment` representing
|
57
53
|
the new active experiment.
|
54
|
+
|
58
55
|
"""
|
59
56
|
if uri is not None:
|
60
57
|
mlflow.set_tracking_uri(uri)
|
@@ -65,8 +62,7 @@ def set_experiment(
|
|
65
62
|
|
66
63
|
|
67
64
|
def log_params(config: object, *, synchronous: bool | None = None) -> None:
|
68
|
-
"""
|
69
|
-
Log the parameters from the given configuration object.
|
65
|
+
"""Log the parameters from the given configuration object.
|
70
66
|
|
71
67
|
This method logs the parameters from the provided configuration object
|
72
68
|
using MLflow. It iterates over the parameters and logs them using the
|
@@ -76,6 +72,7 @@ def log_params(config: object, *, synchronous: bool | None = None) -> None:
|
|
76
72
|
config (object): The configuration object to log the parameters from.
|
77
73
|
synchronous (bool | None): Whether to log the parameters synchronously.
|
78
74
|
Defaults to None.
|
75
|
+
|
79
76
|
"""
|
80
77
|
for key, value in iter_params(config):
|
81
78
|
mlflow.log_param(key, value, synchronous=synchronous)
|
@@ -91,8 +88,7 @@ def search_runs( # noqa: PLR0913
|
|
91
88
|
search_all_experiments: bool = False,
|
92
89
|
experiment_names: list[str] | None = None,
|
93
90
|
) -> RunCollection:
|
94
|
-
"""
|
95
|
-
Search for Runs that fit the specified criteria.
|
91
|
+
"""Search for Runs that fit the specified criteria.
|
96
92
|
|
97
93
|
This function wraps the `mlflow.search_runs` function and returns the
|
98
94
|
results as a `RunCollection` object. It allows for flexible searching of
|
@@ -133,6 +129,7 @@ def search_runs( # noqa: PLR0913
|
|
133
129
|
|
134
130
|
Returns:
|
135
131
|
A `RunCollection` object containing the search results.
|
132
|
+
|
136
133
|
"""
|
137
134
|
runs = mlflow.search_runs(
|
138
135
|
experiment_ids=experiment_ids,
|
@@ -151,9 +148,9 @@ def search_runs( # noqa: PLR0913
|
|
151
148
|
def list_runs(
|
152
149
|
experiment_names: str | list[str] | None = None,
|
153
150
|
n_jobs: int = 0,
|
151
|
+
status: str | list[str] | int | list[int] | None = None,
|
154
152
|
) -> RunCollection:
|
155
|
-
"""
|
156
|
-
List all runs for the specified experiments.
|
153
|
+
"""List all runs for the specified experiments.
|
157
154
|
|
158
155
|
This function retrieves all runs for the given list of experiment names.
|
159
156
|
If no experiment names are provided (None), it defaults to searching all runs
|
@@ -169,11 +166,27 @@ def list_runs(
|
|
169
166
|
for runs. If None or an empty list is provided, the function will
|
170
167
|
search the currently active experiment or all experiments except
|
171
168
|
the "Default" experiment.
|
169
|
+
n_jobs (int): The number of jobs to run in parallel. If 0, the function
|
170
|
+
will search runs sequentially.
|
171
|
+
status (str | list[str] | int | list[int] | None): The status of the runs
|
172
|
+
to filter.
|
172
173
|
|
173
174
|
Returns:
|
174
175
|
RunCollection: A `RunCollection` instance containing the runs for the
|
175
176
|
specified experiments.
|
177
|
+
|
176
178
|
"""
|
179
|
+
rc = _list_runs(experiment_names, n_jobs)
|
180
|
+
if status is None:
|
181
|
+
return rc
|
182
|
+
|
183
|
+
return rc.filter(status=status)
|
184
|
+
|
185
|
+
|
186
|
+
def _list_runs(
|
187
|
+
experiment_names: str | list[str] | None = None,
|
188
|
+
n_jobs: int = 0,
|
189
|
+
) -> RunCollection:
|
177
190
|
if isinstance(experiment_names, str):
|
178
191
|
experiment_names = [experiment_names]
|
179
192
|
|
hydraflow/param.py
CHANGED
@@ -1,3 +1,13 @@
|
|
1
|
+
"""Provide utility functions for parameter matching.
|
2
|
+
|
3
|
+
The main function `match` checks if a given parameter matches a specified value.
|
4
|
+
It supports various types of values including None, boolean, list, tuple, int,
|
5
|
+
float, and str.
|
6
|
+
|
7
|
+
Helper functions `_match_list` and `_match_tuple` are used internally to handle
|
8
|
+
matching for list and tuple types respectively.
|
9
|
+
"""
|
10
|
+
|
1
11
|
from __future__ import annotations
|
2
12
|
|
3
13
|
from typing import Any
|
@@ -13,6 +23,7 @@ def match(param: str, value: Any) -> bool:
|
|
13
23
|
Returns:
|
14
24
|
True if the parameter matches the specified value,
|
15
25
|
False otherwise.
|
26
|
+
|
16
27
|
"""
|
17
28
|
if value in [None, True, False]:
|
18
29
|
return param == str(value)
|
hydraflow/progress.py
CHANGED
@@ -1,18 +1,7 @@
|
|
1
|
-
"""
|
2
|
-
Module for managing progress tracking in parallel processing using Joblib
|
3
|
-
and Rich's Progress bar.
|
1
|
+
"""Context managers and functions for parallel task execution with progress.
|
4
2
|
|
5
3
|
Provide context managers and functions to facilitate the execution
|
6
4
|
of tasks in parallel while displaying progress updates.
|
7
|
-
|
8
|
-
The following key components are provided:
|
9
|
-
|
10
|
-
- JoblibProgress: A context manager for tracking progress with Rich's progress
|
11
|
-
bar.
|
12
|
-
- parallel_progress: A function to execute a given function in parallel over
|
13
|
-
an iterable with progress tracking.
|
14
|
-
- multi_tasks_progress: A function to render auto-updating progress bars for
|
15
|
-
multiple tasks concurrently.
|
16
5
|
"""
|
17
6
|
|
18
7
|
from __future__ import annotations
|
@@ -37,8 +26,7 @@ def JoblibProgress( # noqa: N802
|
|
37
26
|
total: int | None = None,
|
38
27
|
**kwargs,
|
39
28
|
) -> Iterator[Progress]:
|
40
|
-
"""
|
41
|
-
Context manager for tracking progress using Joblib with Rich's Progress bar.
|
29
|
+
"""Context manager for tracking progress using Joblib with Rich's Progress bar.
|
42
30
|
|
43
31
|
Args:
|
44
32
|
*columns (ProgressColumn | str): Columns to display in the progress bar.
|
@@ -56,6 +44,7 @@ def JoblibProgress( # noqa: N802
|
|
56
44
|
with JoblibProgress("task", total=100) as progress:
|
57
45
|
# Your parallel processing code here
|
58
46
|
```
|
47
|
+
|
59
48
|
"""
|
60
49
|
if not columns:
|
61
50
|
columns = Progress.get_default_columns()
|
@@ -94,8 +83,7 @@ def parallel_progress(
|
|
94
83
|
description: str | None = None,
|
95
84
|
**kwargs,
|
96
85
|
) -> list[U]:
|
97
|
-
"""
|
98
|
-
Execute a function in parallel over an iterable with progress tracking.
|
86
|
+
"""Execute a function in parallel over an iterable with progress tracking.
|
99
87
|
|
100
88
|
Args:
|
101
89
|
func (Callable[[T], U]): The function to execute on each item in the
|
@@ -112,6 +100,7 @@ def parallel_progress(
|
|
112
100
|
Returns:
|
113
101
|
list[U]: A list of results from applying the function to each item in
|
114
102
|
the iterable.
|
103
|
+
|
115
104
|
"""
|
116
105
|
iterable = list(iterable)
|
117
106
|
total = len(iterable)
|
@@ -130,8 +119,7 @@ def multi_tasks_progress(
|
|
130
119
|
transient: bool | None = None,
|
131
120
|
**kwargs,
|
132
121
|
) -> None:
|
133
|
-
"""
|
134
|
-
Render auto-updating progress bars for multiple tasks concurrently.
|
122
|
+
"""Render auto-updating progress bars for multiple tasks concurrently.
|
135
123
|
|
136
124
|
Args:
|
137
125
|
iterables (Iterable[Iterable[int | tuple[int, int]]]): A collection of
|
@@ -151,6 +139,7 @@ def multi_tasks_progress(
|
|
151
139
|
|
152
140
|
Returns:
|
153
141
|
None
|
142
|
+
|
154
143
|
"""
|
155
144
|
if not columns:
|
156
145
|
columns = Progress.get_default_columns()
|
hydraflow/run_collection.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
"""
|
2
|
-
|
3
|
-
|
1
|
+
"""Provide a collection of MLflow runs.
|
2
|
+
|
3
|
+
This module includes the `RunCollection` class, which serves as a container
|
4
4
|
for multiple MLflow `Run` instances, and various methods to filter and
|
5
5
|
retrieve these runs.
|
6
6
|
|
@@ -23,9 +23,13 @@ from dataclasses import dataclass, field
|
|
23
23
|
from itertools import chain
|
24
24
|
from typing import TYPE_CHECKING, Any, Concatenate, ParamSpec, TypeVar, overload
|
25
25
|
|
26
|
+
from mlflow.entities import RunStatus
|
27
|
+
from polars.dataframe import DataFrame
|
28
|
+
|
26
29
|
import hydraflow.param
|
27
|
-
from hydraflow.config import iter_params
|
28
|
-
from hydraflow.
|
30
|
+
from hydraflow.config import collect_params, iter_params
|
31
|
+
from hydraflow.run_data import RunCollectionData
|
32
|
+
from hydraflow.run_info import RunCollectionInfo
|
29
33
|
|
30
34
|
if TYPE_CHECKING:
|
31
35
|
from collections.abc import Callable, Iterator
|
@@ -42,8 +46,7 @@ P = ParamSpec("P")
|
|
42
46
|
|
43
47
|
@dataclass
|
44
48
|
class RunCollection:
|
45
|
-
"""
|
46
|
-
Represent a collection of MLflow runs.
|
49
|
+
"""Represent a collection of MLflow runs.
|
47
50
|
|
48
51
|
Provide methods to interact with the runs, such as filtering,
|
49
52
|
retrieving specific runs, and accessing run information.
|
@@ -60,8 +63,12 @@ class RunCollection:
|
|
60
63
|
_info: RunCollectionInfo = field(init=False)
|
61
64
|
"""An instance of `RunCollectionInfo`."""
|
62
65
|
|
66
|
+
_data: RunCollectionData = field(init=False)
|
67
|
+
"""An instance of `RunCollectionData`."""
|
68
|
+
|
63
69
|
def __post_init__(self) -> None:
|
64
70
|
self._info = RunCollectionInfo(self)
|
71
|
+
self._data = RunCollectionData(self)
|
65
72
|
|
66
73
|
def __repr__(self) -> str:
|
67
74
|
return f"{self.__class__.__name__}({len(self)})"
|
@@ -93,7 +100,6 @@ class RunCollection:
|
|
93
100
|
@classmethod
|
94
101
|
def from_list(cls, runs: list[Run]) -> RunCollection:
|
95
102
|
"""Create a `RunCollection` instance from a list of MLflow `Run` instances."""
|
96
|
-
|
97
103
|
return cls(runs)
|
98
104
|
|
99
105
|
@property
|
@@ -101,6 +107,11 @@ class RunCollection:
|
|
101
107
|
"""An instance of `RunCollectionInfo`."""
|
102
108
|
return self._info
|
103
109
|
|
110
|
+
@property
|
111
|
+
def data(self) -> RunCollectionData:
|
112
|
+
"""An instance of `RunCollectionData`."""
|
113
|
+
return self._data
|
114
|
+
|
104
115
|
def take(self, n: int) -> RunCollection:
|
105
116
|
"""Take the first n runs from the collection.
|
106
117
|
|
@@ -114,6 +125,7 @@ class RunCollection:
|
|
114
125
|
Returns:
|
115
126
|
A new `RunCollection` instance containing the first n runs if n is
|
116
127
|
positive, or the last n runs if n is negative.
|
128
|
+
|
117
129
|
"""
|
118
130
|
if n < 0:
|
119
131
|
return self.__class__(self._runs[n:])
|
@@ -126,17 +138,28 @@ class RunCollection:
|
|
126
138
|
*,
|
127
139
|
reverse: bool = False,
|
128
140
|
) -> None:
|
141
|
+
"""Sort the runs in the collection.
|
142
|
+
|
143
|
+
Sort the runs in the collection according to the provided key function
|
144
|
+
and optional reverse flag.
|
145
|
+
|
146
|
+
Args:
|
147
|
+
key (Callable[[Run], Any] | None): A function that takes a run and returns
|
148
|
+
a value to sort by.
|
149
|
+
reverse (bool): If True, sort in descending order.
|
150
|
+
|
151
|
+
"""
|
129
152
|
self._runs.sort(key=key or (lambda x: x.info.start_time), reverse=reverse)
|
130
153
|
|
131
154
|
def one(self) -> Run:
|
132
|
-
"""
|
133
|
-
Get the only `Run` instance in the collection.
|
155
|
+
"""Get the only `Run` instance in the collection.
|
134
156
|
|
135
157
|
Returns:
|
136
158
|
The only `Run` instance in the collection.
|
137
159
|
|
138
160
|
Raises:
|
139
161
|
ValueError: If the collection does not contain exactly one run.
|
162
|
+
|
140
163
|
"""
|
141
164
|
if len(self._runs) != 1:
|
142
165
|
raise ValueError("The collection does not contain exactly one run.")
|
@@ -144,24 +167,24 @@ class RunCollection:
|
|
144
167
|
return self._runs[0]
|
145
168
|
|
146
169
|
def try_one(self) -> Run | None:
|
147
|
-
"""
|
148
|
-
Try to get the only `Run` instance in the collection.
|
170
|
+
"""Try to get the only `Run` instance in the collection.
|
149
171
|
|
150
172
|
Returns:
|
151
173
|
The only `Run` instance in the collection, or None if the collection
|
152
174
|
does not contain exactly one run.
|
175
|
+
|
153
176
|
"""
|
154
177
|
return self._runs[0] if len(self._runs) == 1 else None
|
155
178
|
|
156
179
|
def first(self) -> Run:
|
157
|
-
"""
|
158
|
-
Get the first `Run` instance in the collection.
|
180
|
+
"""Get the first `Run` instance in the collection.
|
159
181
|
|
160
182
|
Returns:
|
161
183
|
The first `Run` instance in the collection.
|
162
184
|
|
163
185
|
Raises:
|
164
186
|
ValueError: If the collection is empty.
|
187
|
+
|
165
188
|
"""
|
166
189
|
if not self._runs:
|
167
190
|
raise ValueError("The collection is empty.")
|
@@ -169,24 +192,24 @@ class RunCollection:
|
|
169
192
|
return self._runs[0]
|
170
193
|
|
171
194
|
def try_first(self) -> Run | None:
|
172
|
-
"""
|
173
|
-
Try to get the first `Run` instance in the collection.
|
195
|
+
"""Try to get the first `Run` instance in the collection.
|
174
196
|
|
175
197
|
Returns:
|
176
198
|
The first `Run` instance in the collection, or None if the collection
|
177
199
|
is empty.
|
200
|
+
|
178
201
|
"""
|
179
202
|
return self._runs[0] if self._runs else None
|
180
203
|
|
181
204
|
def last(self) -> Run:
|
182
|
-
"""
|
183
|
-
Get the last `Run` instance in the collection.
|
205
|
+
"""Get the last `Run` instance in the collection.
|
184
206
|
|
185
207
|
Returns:
|
186
208
|
The last `Run` instance in the collection.
|
187
209
|
|
188
210
|
Raises:
|
189
211
|
ValueError: If the collection is empty.
|
212
|
+
|
190
213
|
"""
|
191
214
|
if not self._runs:
|
192
215
|
raise ValueError("The collection is empty.")
|
@@ -194,18 +217,17 @@ class RunCollection:
|
|
194
217
|
return self._runs[-1]
|
195
218
|
|
196
219
|
def try_last(self) -> Run | None:
|
197
|
-
"""
|
198
|
-
Try to get the last `Run` instance in the collection.
|
220
|
+
"""Try to get the last `Run` instance in the collection.
|
199
221
|
|
200
222
|
Returns:
|
201
223
|
The last `Run` instance in the collection, or None if the collection
|
202
224
|
is empty.
|
225
|
+
|
203
226
|
"""
|
204
227
|
return self._runs[-1] if self._runs else None
|
205
228
|
|
206
229
|
def filter(self, config: object | None = None, **kwargs) -> RunCollection:
|
207
|
-
"""
|
208
|
-
Filter the `Run` instances based on the provided configuration.
|
230
|
+
"""Filter the `Run` instances based on the provided configuration.
|
209
231
|
|
210
232
|
This method filters the runs in the collection according to the
|
211
233
|
specified configuration object and additional key-value pairs. The
|
@@ -228,12 +250,12 @@ class RunCollection:
|
|
228
250
|
|
229
251
|
Returns:
|
230
252
|
A new `RunCollection` object containing the filtered runs.
|
253
|
+
|
231
254
|
"""
|
232
255
|
return RunCollection(filter_runs(self._runs, config, **kwargs))
|
233
256
|
|
234
257
|
def find(self, config: object | None = None, **kwargs) -> Run:
|
235
|
-
"""
|
236
|
-
Find the first `Run` instance based on the provided configuration.
|
258
|
+
"""Find the first `Run` instance based on the provided configuration.
|
237
259
|
|
238
260
|
This method filters the runs in the collection according to the
|
239
261
|
specified configuration object and returns the first run that matches
|
@@ -252,6 +274,7 @@ class RunCollection:
|
|
252
274
|
|
253
275
|
See Also:
|
254
276
|
`filter`: Perform the actual filtering logic.
|
277
|
+
|
255
278
|
"""
|
256
279
|
try:
|
257
280
|
return self.filter(config, **kwargs).first()
|
@@ -259,8 +282,7 @@ class RunCollection:
|
|
259
282
|
raise ValueError("No run matches the provided configuration.")
|
260
283
|
|
261
284
|
def try_find(self, config: object | None = None, **kwargs) -> Run | None:
|
262
|
-
"""
|
263
|
-
Try to find the first `Run` instance based on the provided configuration.
|
285
|
+
"""Try to find the first `Run` instance based on the provided configuration.
|
264
286
|
|
265
287
|
This method filters the runs in the collection according to the
|
266
288
|
specified configuration object and returns the first run that matches
|
@@ -277,12 +299,12 @@ class RunCollection:
|
|
277
299
|
|
278
300
|
See Also:
|
279
301
|
`filter`: Perform the actual filtering logic.
|
302
|
+
|
280
303
|
"""
|
281
304
|
return self.filter(config, **kwargs).try_first()
|
282
305
|
|
283
306
|
def find_last(self, config: object | None = None, **kwargs) -> Run:
|
284
|
-
"""
|
285
|
-
Find the last `Run` instance based on the provided configuration.
|
307
|
+
"""Find the last `Run` instance based on the provided configuration.
|
286
308
|
|
287
309
|
This method filters the runs in the collection according to the
|
288
310
|
specified configuration object and returns the last run that matches
|
@@ -301,6 +323,7 @@ class RunCollection:
|
|
301
323
|
|
302
324
|
See Also:
|
303
325
|
`filter`: Perform the actual filtering logic.
|
326
|
+
|
304
327
|
"""
|
305
328
|
try:
|
306
329
|
return self.filter(config, **kwargs).last()
|
@@ -308,8 +331,7 @@ class RunCollection:
|
|
308
331
|
raise ValueError("No run matches the provided configuration.")
|
309
332
|
|
310
333
|
def try_find_last(self, config: object | None = None, **kwargs) -> Run | None:
|
311
|
-
"""
|
312
|
-
Try to find the last `Run` instance based on the provided configuration.
|
334
|
+
"""Try to find the last `Run` instance based on the provided configuration.
|
313
335
|
|
314
336
|
This method filters the runs in the collection according to the
|
315
337
|
specified configuration object and returns the last run that matches
|
@@ -326,12 +348,12 @@ class RunCollection:
|
|
326
348
|
|
327
349
|
See Also:
|
328
350
|
`filter`: Perform the actual filtering logic.
|
351
|
+
|
329
352
|
"""
|
330
353
|
return self.filter(config, **kwargs).try_last()
|
331
354
|
|
332
355
|
def get(self, config: object | None = None, **kwargs) -> Run:
|
333
|
-
"""
|
334
|
-
Retrieve a specific `Run` instance based on the provided configuration.
|
356
|
+
"""Retrieve a specific `Run` instance based on the provided configuration.
|
335
357
|
|
336
358
|
This method filters the runs in the collection according to the
|
337
359
|
specified configuration object and returns the run that matches the
|
@@ -351,6 +373,7 @@ class RunCollection:
|
|
351
373
|
|
352
374
|
See Also:
|
353
375
|
`filter`: Perform the actual filtering logic.
|
376
|
+
|
354
377
|
"""
|
355
378
|
try:
|
356
379
|
return self.filter(config, **kwargs).one()
|
@@ -359,8 +382,7 @@ class RunCollection:
|
|
359
382
|
raise ValueError(msg)
|
360
383
|
|
361
384
|
def try_get(self, config: object | None = None, **kwargs) -> Run | None:
|
362
|
-
"""
|
363
|
-
Try to retrieve a specific `Run` instance based on the provided configuration.
|
385
|
+
"""Try to get a specific `Run` instance based on the provided configuration.
|
364
386
|
|
365
387
|
This method filters the runs in the collection according to the
|
366
388
|
specified configuration object and returns the run that matches the
|
@@ -380,12 +402,12 @@ class RunCollection:
|
|
380
402
|
|
381
403
|
See Also:
|
382
404
|
`filter`: Perform the actual filtering logic.
|
405
|
+
|
383
406
|
"""
|
384
407
|
return self.filter(config, **kwargs).try_one()
|
385
408
|
|
386
409
|
def get_param_names(self) -> list[str]:
|
387
|
-
"""
|
388
|
-
Get the parameter names from the runs.
|
410
|
+
"""Get the parameter names from the runs.
|
389
411
|
|
390
412
|
This method extracts the unique parameter names from the provided list
|
391
413
|
of runs. It iterates through each run and collects the parameter names
|
@@ -393,6 +415,7 @@ class RunCollection:
|
|
393
415
|
|
394
416
|
Returns:
|
395
417
|
A list of unique parameter names.
|
418
|
+
|
396
419
|
"""
|
397
420
|
param_names = set()
|
398
421
|
|
@@ -402,24 +425,30 @@ class RunCollection:
|
|
402
425
|
|
403
426
|
return list(param_names)
|
404
427
|
|
405
|
-
def get_param_dict(self) -> dict[str, list[str]]:
|
406
|
-
"""
|
407
|
-
Get the parameter dictionary from the list of runs.
|
428
|
+
def get_param_dict(self, *, drop_const: bool = False) -> dict[str, list[str]]:
|
429
|
+
"""Get the parameter dictionary from the list of runs.
|
408
430
|
|
409
431
|
This method extracts the parameter names and their corresponding values
|
410
432
|
from the provided list of runs. It iterates through each run and
|
411
433
|
collects the parameter values into a dictionary where the keys are
|
412
434
|
parameter names and the values are lists of parameter values.
|
413
435
|
|
436
|
+
Args:
|
437
|
+
drop_const (bool): If True, drop the parameter values that are constant
|
438
|
+
across all runs.
|
439
|
+
|
414
440
|
Returns:
|
415
441
|
A dictionary where the keys are parameter names and the values are
|
416
442
|
lists of parameter values.
|
443
|
+
|
417
444
|
"""
|
418
445
|
params = {}
|
419
446
|
|
420
447
|
for name in self.get_param_names():
|
421
448
|
it = (run.data.params[name] for run in self if name in run.data.params)
|
422
|
-
|
449
|
+
unique_values = sorted(set(it))
|
450
|
+
if not drop_const or len(unique_values) > 1:
|
451
|
+
params[name] = unique_values
|
423
452
|
|
424
453
|
return params
|
425
454
|
|
@@ -429,9 +458,7 @@ class RunCollection:
|
|
429
458
|
*args: P.args,
|
430
459
|
**kwargs: P.kwargs,
|
431
460
|
) -> Iterator[T]:
|
432
|
-
"""
|
433
|
-
Apply a function to each run in the collection and return an iterator of
|
434
|
-
results.
|
461
|
+
"""Return an iterator of results by applying a function to each run.
|
435
462
|
|
436
463
|
This method iterates over each run in the collection and applies the
|
437
464
|
provided function to it, along with any additional arguments and
|
@@ -445,6 +472,7 @@ class RunCollection:
|
|
445
472
|
|
446
473
|
Yields:
|
447
474
|
Results obtained by applying the function to each run in the collection.
|
475
|
+
|
448
476
|
"""
|
449
477
|
return (func(run, *args, **kwargs) for run in self)
|
450
478
|
|
@@ -454,9 +482,7 @@ class RunCollection:
|
|
454
482
|
*args: P.args,
|
455
483
|
**kwargs: P.kwargs,
|
456
484
|
) -> Iterator[T]:
|
457
|
-
"""
|
458
|
-
Apply a function to each run id in the collection and return an iterator
|
459
|
-
of results.
|
485
|
+
"""Return an iterator of results by applying a function to each run id.
|
460
486
|
|
461
487
|
Args:
|
462
488
|
func (Callable[[str, P], T]): A function that takes a run id and returns a
|
@@ -467,6 +493,7 @@ class RunCollection:
|
|
467
493
|
Yields:
|
468
494
|
Results obtained by applying the function to each run id in the
|
469
495
|
collection.
|
496
|
+
|
470
497
|
"""
|
471
498
|
return (func(run_id, *args, **kwargs) for run_id in self.info.run_id)
|
472
499
|
|
@@ -476,9 +503,7 @@ class RunCollection:
|
|
476
503
|
*args: P.args,
|
477
504
|
**kwargs: P.kwargs,
|
478
505
|
) -> Iterator[T]:
|
479
|
-
"""
|
480
|
-
Apply a function to each run configuration in the collection and return
|
481
|
-
an iterator of results.
|
506
|
+
"""Return an iterator of results by applying a function to each run config.
|
482
507
|
|
483
508
|
Args:
|
484
509
|
func (Callable[[DictConfig, P], T]): A function that takes a run
|
@@ -489,8 +514,9 @@ class RunCollection:
|
|
489
514
|
Yields:
|
490
515
|
Results obtained by applying the function to each run configuration
|
491
516
|
in the collection.
|
517
|
+
|
492
518
|
"""
|
493
|
-
return (func(config, *args, **kwargs) for config in self.
|
519
|
+
return (func(config, *args, **kwargs) for config in self.data.config)
|
494
520
|
|
495
521
|
def map_uri(
|
496
522
|
self,
|
@@ -498,9 +524,7 @@ class RunCollection:
|
|
498
524
|
*args: P.args,
|
499
525
|
**kwargs: P.kwargs,
|
500
526
|
) -> Iterator[T]:
|
501
|
-
"""
|
502
|
-
Apply a function to each artifact URI in the collection and return an
|
503
|
-
iterator of results.
|
527
|
+
"""Return an iterator of results by applying a function to each artifact URI.
|
504
528
|
|
505
529
|
Iterate over each run in the collection, retrieves the artifact URI, and
|
506
530
|
apply the provided function to it. If a run does not have an artifact
|
@@ -515,6 +539,7 @@ class RunCollection:
|
|
515
539
|
Yields:
|
516
540
|
Results obtained by applying the function to each artifact URI in the
|
517
541
|
collection.
|
542
|
+
|
518
543
|
"""
|
519
544
|
return (func(uri, *args, **kwargs) for uri in self.info.artifact_uri)
|
520
545
|
|
@@ -524,9 +549,7 @@ class RunCollection:
|
|
524
549
|
*args: P.args,
|
525
550
|
**kwargs: P.kwargs,
|
526
551
|
) -> Iterator[T]:
|
527
|
-
"""
|
528
|
-
Apply a function to each artifact directory in the collection and return
|
529
|
-
an iterator of results.
|
552
|
+
"""Return an iterator of results by applying a function to each artifact dir.
|
530
553
|
|
531
554
|
Iterate over each run in the collection, downloads the artifact
|
532
555
|
directory, and apply the provided function to the directory path.
|
@@ -540,6 +563,7 @@ class RunCollection:
|
|
540
563
|
Yields:
|
541
564
|
Results obtained by applying the function to each artifact directory
|
542
565
|
in the collection.
|
566
|
+
|
543
567
|
"""
|
544
568
|
return (func(dir, *args, **kwargs) for dir in self.info.artifact_dir) # noqa: A001
|
545
569
|
|
@@ -547,8 +571,7 @@ class RunCollection:
|
|
547
571
|
self,
|
548
572
|
*names: str | list[str],
|
549
573
|
) -> dict[tuple[str | None, ...], RunCollection]:
|
550
|
-
"""
|
551
|
-
Group runs by specified parameter names.
|
574
|
+
"""Group runs by specified parameter names.
|
552
575
|
|
553
576
|
Group the runs in the collection based on the values of the
|
554
577
|
specified parameters. Each unique combination of parameter values will
|
@@ -563,6 +586,7 @@ class RunCollection:
|
|
563
586
|
dict[tuple[str | None, ...], RunCollection]: A dictionary where the keys
|
564
587
|
are tuples of parameter values and the values are RunCollection objects
|
565
588
|
containing the runs that match those parameter values.
|
589
|
+
|
566
590
|
"""
|
567
591
|
grouped_runs: dict[tuple[str | None, ...], list[Run]] = {}
|
568
592
|
for run in self._runs:
|
@@ -571,6 +595,16 @@ class RunCollection:
|
|
571
595
|
|
572
596
|
return {key: RunCollection(runs) for key, runs in grouped_runs.items()}
|
573
597
|
|
598
|
+
@property
|
599
|
+
def config(self) -> DataFrame:
|
600
|
+
"""Get the runs' configurations as a polars DataFrame.
|
601
|
+
|
602
|
+
Returns:
|
603
|
+
A polars DataFrame containing the runs' configurations.
|
604
|
+
|
605
|
+
"""
|
606
|
+
return DataFrame(self.map_config(collect_params))
|
607
|
+
|
574
608
|
|
575
609
|
def _param_matches(run: Run, key: str, value: Any) -> bool:
|
576
610
|
params = run.data.params
|
@@ -588,11 +622,10 @@ def filter_runs(
|
|
588
622
|
runs: list[Run],
|
589
623
|
config: object | None = None,
|
590
624
|
*,
|
591
|
-
status: str | list[str] | None = None,
|
625
|
+
status: str | list[str] | int | list[int] | None = None,
|
592
626
|
**kwargs,
|
593
627
|
) -> list[Run]:
|
594
|
-
"""
|
595
|
-
Filter the runs based on the provided configuration.
|
628
|
+
"""Filter the runs based on the provided configuration.
|
596
629
|
|
597
630
|
Filter the runs in the collection according to the
|
598
631
|
specified configuration object and additional key-value pairs.
|
@@ -612,33 +645,63 @@ def filter_runs(
|
|
612
645
|
config (object | None): The configuration object to filter the runs.
|
613
646
|
This can be any object that provides key-value pairs through the
|
614
647
|
`iter_params` function.
|
615
|
-
status (str | list[str] | None): The status of
|
648
|
+
status (str | list[str] | RunStatus | list[RunStatus] | None): The status of
|
649
|
+
the runs to filter.
|
616
650
|
**kwargs: Additional key-value pairs to filter the runs.
|
617
651
|
|
618
652
|
Returns:
|
619
653
|
A list of runs that match the specified configuration and key-value pairs.
|
654
|
+
|
620
655
|
"""
|
621
656
|
for key, value in chain(iter_params(config), kwargs.items()):
|
622
657
|
runs = [run for run in runs if _param_matches(run, key, value)]
|
623
|
-
|
624
|
-
if len(runs) == 0:
|
658
|
+
if not runs:
|
625
659
|
return []
|
626
660
|
|
627
|
-
if
|
628
|
-
|
629
|
-
return [run for run in runs if run.info.status.lower() != status]
|
661
|
+
if status is None:
|
662
|
+
return runs
|
630
663
|
|
631
|
-
|
632
|
-
status = [status] if isinstance(status, str) else status
|
633
|
-
status = [s.lower() for s in status]
|
634
|
-
return [run for run in runs if run.info.status.lower() in status]
|
664
|
+
return filter_runs_by_status(runs, status)
|
635
665
|
|
636
|
-
return runs
|
637
666
|
|
667
|
+
def filter_runs_by_status(
|
668
|
+
runs: list[Run],
|
669
|
+
status: str | list[str] | int | list[int],
|
670
|
+
) -> list[Run]:
|
671
|
+
"""Filter the runs based on the provided status.
|
672
|
+
|
673
|
+
Args:
|
674
|
+
runs (list[Run]): The list of runs to filter.
|
675
|
+
status (str | list[str] | int | list[int]): The status of the runs
|
676
|
+
to filter.
|
677
|
+
|
678
|
+
Returns:
|
679
|
+
A list of runs that match the specified status.
|
638
680
|
|
639
|
-
def get_params(run: Run, *names: str | list[str]) -> tuple[str | None, ...]:
|
640
681
|
"""
|
641
|
-
|
682
|
+
if isinstance(status, str):
|
683
|
+
if status.startswith("!"):
|
684
|
+
status = status[1:].lower()
|
685
|
+
return [run for run in runs if run.info.status.lower() != status]
|
686
|
+
|
687
|
+
status = [status]
|
688
|
+
|
689
|
+
elif isinstance(status, int):
|
690
|
+
status = [RunStatus.to_string(status)]
|
691
|
+
|
692
|
+
status = [_to_lower(s) for s in status]
|
693
|
+
return [run for run in runs if run.info.status.lower() in status]
|
694
|
+
|
695
|
+
|
696
|
+
def _to_lower(status: str | int) -> str:
|
697
|
+
if isinstance(status, str):
|
698
|
+
return status.lower()
|
699
|
+
|
700
|
+
return RunStatus.to_string(status).lower()
|
701
|
+
|
702
|
+
|
703
|
+
def get_params(run: Run, *names: str | list[str]) -> tuple[str | None, ...]:
|
704
|
+
"""Retrieve the values of specified parameters from the given run.
|
642
705
|
|
643
706
|
This function extracts the values of the parameters identified by the
|
644
707
|
provided names from the specified run. It can accept both individual
|
@@ -653,6 +716,7 @@ def get_params(run: Run, *names: str | list[str]) -> tuple[str | None, ...]:
|
|
653
716
|
Returns:
|
654
717
|
tuple[str | None, ...]: A tuple containing the values of the specified
|
655
718
|
parameters in the order they were provided.
|
719
|
+
|
656
720
|
"""
|
657
721
|
names_ = []
|
658
722
|
for name in names:
|
hydraflow/run_data.py
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
"""Provide information about MLflow runs."""
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
from typing import TYPE_CHECKING
|
6
|
+
|
7
|
+
from omegaconf import DictConfig, OmegaConf
|
8
|
+
|
9
|
+
from hydraflow.run_info import get_artifact_dir
|
10
|
+
|
11
|
+
if TYPE_CHECKING:
|
12
|
+
from mlflow.entities import Run
|
13
|
+
|
14
|
+
from hydraflow.run_collection import RunCollection
|
15
|
+
|
16
|
+
|
17
|
+
class RunCollectionData:
|
18
|
+
"""Provide information about MLflow runs."""
|
19
|
+
|
20
|
+
def __init__(self, runs: RunCollection) -> None:
|
21
|
+
self._runs = runs
|
22
|
+
|
23
|
+
@property
|
24
|
+
def params(self) -> list[dict[str, str]]:
|
25
|
+
"""Get the parameters for each run in the collection."""
|
26
|
+
return [run.data.params for run in self._runs]
|
27
|
+
|
28
|
+
@property
|
29
|
+
def metrics(self) -> list[dict[str, float]]:
|
30
|
+
"""Get the metrics for each run in the collection."""
|
31
|
+
return [run.data.metrics for run in self._runs]
|
32
|
+
|
33
|
+
@property
|
34
|
+
def config(self) -> list[DictConfig]:
|
35
|
+
"""Get the configuration for each run in the collection."""
|
36
|
+
return [load_config(run) for run in self._runs]
|
37
|
+
|
38
|
+
|
39
|
+
def load_config(run: Run) -> DictConfig:
|
40
|
+
"""Load the configuration for a given run.
|
41
|
+
|
42
|
+
This function loads the configuration for the provided Run instance
|
43
|
+
by downloading the configuration file from the MLflow artifacts and
|
44
|
+
loading it using OmegaConf. It returns an empty config if
|
45
|
+
`.hydra/config.yaml` is not found in the run's artifact directory.
|
46
|
+
|
47
|
+
Args:
|
48
|
+
run (Run): The Run instance for which to load the configuration.
|
49
|
+
|
50
|
+
Returns:
|
51
|
+
The loaded configuration as a DictConfig object. Returns an empty
|
52
|
+
DictConfig if the configuration file is not found.
|
53
|
+
|
54
|
+
"""
|
55
|
+
path = get_artifact_dir(run) / ".hydra/config.yaml"
|
56
|
+
return OmegaConf.load(path) # type: ignore
|
@@ -1,3 +1,5 @@
|
|
1
|
+
"""Provide information about MLflow runs."""
|
2
|
+
|
1
3
|
from __future__ import annotations
|
2
4
|
|
3
5
|
from pathlib import Path
|
@@ -6,7 +8,7 @@ from typing import TYPE_CHECKING
|
|
6
8
|
import mlflow
|
7
9
|
from hydra.core.hydra_config import HydraConfig
|
8
10
|
from mlflow.tracking import artifact_utils
|
9
|
-
from omegaconf import
|
11
|
+
from omegaconf import OmegaConf
|
10
12
|
|
11
13
|
if TYPE_CHECKING:
|
12
14
|
from mlflow.entities import Run
|
@@ -15,37 +17,29 @@ if TYPE_CHECKING:
|
|
15
17
|
|
16
18
|
|
17
19
|
class RunCollectionInfo:
|
20
|
+
"""Provide information about MLflow runs."""
|
21
|
+
|
18
22
|
def __init__(self, runs: RunCollection) -> None:
|
19
23
|
self._runs = runs
|
20
24
|
|
21
25
|
@property
|
22
26
|
def run_id(self) -> list[str]:
|
27
|
+
"""Get the run ID for each run in the collection."""
|
23
28
|
return [run.info.run_id for run in self._runs]
|
24
29
|
|
25
|
-
@property
|
26
|
-
def params(self) -> list[dict[str, str]]:
|
27
|
-
return [run.data.params for run in self._runs]
|
28
|
-
|
29
|
-
@property
|
30
|
-
def metrics(self) -> list[dict[str, float]]:
|
31
|
-
return [run.data.metrics for run in self._runs]
|
32
|
-
|
33
30
|
@property
|
34
31
|
def artifact_uri(self) -> list[str | None]:
|
32
|
+
"""Get the artifact URI for each run in the collection."""
|
35
33
|
return [run.info.artifact_uri for run in self._runs]
|
36
34
|
|
37
35
|
@property
|
38
36
|
def artifact_dir(self) -> list[Path]:
|
37
|
+
"""Get the artifact directory for each run in the collection."""
|
39
38
|
return [get_artifact_dir(run) for run in self._runs]
|
40
39
|
|
41
|
-
@property
|
42
|
-
def config(self) -> list[DictConfig]:
|
43
|
-
return [load_config(run) for run in self._runs]
|
44
|
-
|
45
40
|
|
46
41
|
def get_artifact_dir(run: Run | None = None) -> Path:
|
47
|
-
"""
|
48
|
-
Retrieve the artifact directory for the given run.
|
42
|
+
"""Retrieve the artifact directory for the given run.
|
49
43
|
|
50
44
|
This function uses MLflow to get the artifact directory for the given run.
|
51
45
|
|
@@ -54,6 +48,7 @@ def get_artifact_dir(run: Run | None = None) -> Path:
|
|
54
48
|
|
55
49
|
Returns:
|
56
50
|
The local path to the directory where the artifacts are downloaded.
|
51
|
+
|
57
52
|
"""
|
58
53
|
if run is None:
|
59
54
|
uri = mlflow.get_artifact_uri()
|
@@ -64,8 +59,7 @@ def get_artifact_dir(run: Run | None = None) -> Path:
|
|
64
59
|
|
65
60
|
|
66
61
|
def get_hydra_output_dir(run: Run | None = None) -> Path:
|
67
|
-
"""
|
68
|
-
Retrieve the Hydra output directory for the given run.
|
62
|
+
"""Retrieve the Hydra output directory for the given run.
|
69
63
|
|
70
64
|
This function returns the Hydra output directory. If no run is provided,
|
71
65
|
it retrieves the output directory from the current Hydra configuration.
|
@@ -82,6 +76,7 @@ def get_hydra_output_dir(run: Run | None = None) -> Path:
|
|
82
76
|
Raises:
|
83
77
|
FileNotFoundError: If the Hydra configuration file is not found
|
84
78
|
in the artifacts.
|
79
|
+
|
85
80
|
"""
|
86
81
|
if run is None:
|
87
82
|
hc = HydraConfig.get()
|
@@ -94,23 +89,3 @@ def get_hydra_output_dir(run: Run | None = None) -> Path:
|
|
94
89
|
return Path(hc.hydra.runtime.output_dir)
|
95
90
|
|
96
91
|
raise FileNotFoundError
|
97
|
-
|
98
|
-
|
99
|
-
def load_config(run: Run) -> DictConfig:
|
100
|
-
"""
|
101
|
-
Load the configuration for a given run.
|
102
|
-
|
103
|
-
This function loads the configuration for the provided Run instance
|
104
|
-
by downloading the configuration file from the MLflow artifacts and
|
105
|
-
loading it using OmegaConf. It returns an empty config if
|
106
|
-
`.hydra/config.yaml` is not found in the run's artifact directory.
|
107
|
-
|
108
|
-
Args:
|
109
|
-
run (Run): The Run instance for which to load the configuration.
|
110
|
-
|
111
|
-
Returns:
|
112
|
-
The loaded configuration as a DictConfig object. Returns an empty
|
113
|
-
DictConfig if the configuration file is not found.
|
114
|
-
"""
|
115
|
-
path = get_artifact_dir(run) / ".hydra/config.yaml"
|
116
|
-
return OmegaConf.load(path) # type: ignore
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: hydraflow
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.3.0
|
4
4
|
Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
|
5
5
|
Project-URL: Documentation, https://github.com/daizutabi/hydraflow
|
6
6
|
Project-URL: Source, https://github.com/daizutabi/hydraflow
|
@@ -17,6 +17,7 @@ Requires-Python: >=3.10
|
|
17
17
|
Requires-Dist: hydra-core>=1.3
|
18
18
|
Requires-Dist: joblib
|
19
19
|
Requires-Dist: mlflow>=2.15
|
20
|
+
Requires-Dist: polars
|
20
21
|
Requires-Dist: rich
|
21
22
|
Requires-Dist: watchdog
|
22
23
|
Requires-Dist: watchfiles
|
@@ -0,0 +1,15 @@
|
|
1
|
+
hydraflow/__init__.py,sha256=zlLTztJPXyBFJC5Z8G7_OnlfzAHJPRrfE1c2OoDvlTg,667
|
2
|
+
hydraflow/asyncio.py,sha256=-i1C8KAmNDImrjHnk92Csaa1mpjdK8Vp4ZVaQV-l94s,6634
|
3
|
+
hydraflow/config.py,sha256=Wx7jymwLVr5EfpzXBpvv3Ax3VhGhvWyA7Yy6EzsPYWk,2479
|
4
|
+
hydraflow/context.py,sha256=IaDy-ZCdCfWwv95S-gyQNp062oBdtSVaz6dxGmO6Y8w,8226
|
5
|
+
hydraflow/mlflow.py,sha256=GkOr_pXfpfY5USYBLrCigHcP13VgrAK_e9kheR1Wke4,8579
|
6
|
+
hydraflow/param.py,sha256=dvIXcKgc_MPiju3WEk9qz5FOUeK5qSj-YWN2ophCpUM,1938
|
7
|
+
hydraflow/progress.py,sha256=zvKX1HCN8_xDOsgYOEcLLhkhdPdep-U8vHrc0XZ-6SQ,6163
|
8
|
+
hydraflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
9
|
+
hydraflow/run_collection.py,sha256=Xv6-KD5ac-vv-4Q3PZrzJy1x84H_g7UoP7ZqZ8_DQeQ,24973
|
10
|
+
hydraflow/run_data.py,sha256=HgXGjV5oN6VxOAhrFRjubWz5ZiRqT1a2VdS5OcH2UQQ,1732
|
11
|
+
hydraflow/run_info.py,sha256=4QrTmyPEQ_PVn7JKXJIa9NkXGAdqh8k5Sue1ggQS5aQ,2678
|
12
|
+
hydraflow-0.3.0.dist-info/METADATA,sha256=DmC1Yjwuc3snUQiePCr5xvdtbfIevOapiA2sg8w6Aho,3840
|
13
|
+
hydraflow-0.3.0.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
14
|
+
hydraflow-0.3.0.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
|
15
|
+
hydraflow-0.3.0.dist-info/RECORD,,
|
@@ -1,14 +0,0 @@
|
|
1
|
-
hydraflow/__init__.py,sha256=B7rWSiGP5WwWjijcb41Bv9uuo5MQ6gbBbVWGAWYtK-k,598
|
2
|
-
hydraflow/asyncio.py,sha256=eFnDbNOQ5Hmjdforr8rTW6i_rr-zFIVY3xSQQ45gMPA,6511
|
3
|
-
hydraflow/config.py,sha256=YU6xYLinxq-Iqw1R3Zy7s3_u8nfpvnvXlGIkPXJTNLc,2116
|
4
|
-
hydraflow/context.py,sha256=4UDaWGoVmeF36UqsKoh6dd_cS_YVRfz80gFr28ouNlo,8040
|
5
|
-
hydraflow/info.py,sha256=7EsCMEH6LJZB3FZiQ3IpPFTD3Meaz7G3M-HvDQeo1rw,3466
|
6
|
-
hydraflow/mlflow.py,sha256=irD1INrVaI_1RIzUCjI36voBqgZszZ4dkSLo4aT1_FM,8271
|
7
|
-
hydraflow/param.py,sha256=W71zJH39s8cJcy3qV-PFQHJYyQnfa1GbnHOIqCMG3Jc,1573
|
8
|
-
hydraflow/progress.py,sha256=b5LvLm3d0eW3WsaidZAZotJNTTN3OwSY3XwxXXsJV9A,6561
|
9
|
-
hydraflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
|
-
hydraflow/run_collection.py,sha256=ym3M5ApEZVwJ1rYgOs4aYluTBfJeOECD6Z9SLFhv5O8,23260
|
11
|
-
hydraflow-0.2.17.dist-info/METADATA,sha256=uD6q000C_h2JsuFh0mkf1YmpTYxVDI1RLaAUKzZ6fDw,3819
|
12
|
-
hydraflow-0.2.17.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
13
|
-
hydraflow-0.2.17.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
|
14
|
-
hydraflow-0.2.17.dist-info/RECORD,,
|
File without changes
|
File without changes
|