hydraflow 0.2.17__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hydraflow/__init__.py +4 -1
- hydraflow/asyncio.py +13 -11
- hydraflow/config.py +17 -6
- hydraflow/context.py +16 -16
- hydraflow/mlflow.py +36 -23
- hydraflow/param.py +11 -0
- hydraflow/progress.py +7 -18
- hydraflow/run_collection.py +138 -74
- hydraflow/run_data.py +56 -0
- hydraflow/{info.py → run_info.py} +12 -37
- {hydraflow-0.2.17.dist-info → hydraflow-0.3.0.dist-info}/METADATA +2 -1
- hydraflow-0.3.0.dist-info/RECORD +15 -0
- hydraflow-0.2.17.dist-info/RECORD +0 -14
- {hydraflow-0.2.17.dist-info → hydraflow-0.3.0.dist-info}/WHEEL +0 -0
- {hydraflow-0.2.17.dist-info → hydraflow-0.3.0.dist-info}/licenses/LICENSE +0 -0
hydraflow/__init__.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
|
+
"""Provide a collection of MLflow runs."""
|
2
|
+
|
1
3
|
from .context import chdir_artifact, log_run, start_run, watch
|
2
|
-
from .info import get_artifact_dir, get_hydra_output_dir, load_config
|
3
4
|
from .mlflow import (
|
4
5
|
list_runs,
|
5
6
|
search_runs,
|
@@ -7,6 +8,8 @@ from .mlflow import (
|
|
7
8
|
)
|
8
9
|
from .progress import multi_tasks_progress, parallel_progress
|
9
10
|
from .run_collection import RunCollection
|
11
|
+
from .run_data import load_config
|
12
|
+
from .run_info import get_artifact_dir, get_hydra_output_dir
|
10
13
|
|
11
14
|
__all__ = [
|
12
15
|
"RunCollection",
|
hydraflow/asyncio.py
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
"""Provide functionality for running commands and monitoring file changes."""
|
2
|
+
|
1
3
|
from __future__ import annotations
|
2
4
|
|
3
5
|
import asyncio
|
@@ -27,8 +29,7 @@ async def execute_command(
|
|
27
29
|
stderr: Callable[[str], None] | None = None,
|
28
30
|
stop_event: asyncio.Event,
|
29
31
|
) -> int:
|
30
|
-
"""
|
31
|
-
Runs a command asynchronously and pass the output to callback functions.
|
32
|
+
"""Run a command asynchronously and pass the output to callback functions.
|
32
33
|
|
33
34
|
Args:
|
34
35
|
program (str): The program to run.
|
@@ -39,6 +40,7 @@ async def execute_command(
|
|
39
40
|
|
40
41
|
Returns:
|
41
42
|
int: The return code of the process.
|
43
|
+
|
42
44
|
"""
|
43
45
|
try:
|
44
46
|
process = await asyncio.create_subprocess_exec(
|
@@ -68,13 +70,13 @@ async def process_stream(
|
|
68
70
|
stream: StreamReader | None,
|
69
71
|
callback: Callable[[str], None] | None,
|
70
72
|
) -> None:
|
71
|
-
"""
|
72
|
-
Reads a stream asynchronously and pass each line to a callback function.
|
73
|
+
"""Read a stream asynchronously and pass each line to a callback function.
|
73
74
|
|
74
75
|
Args:
|
75
76
|
stream (StreamReader | None): The stream to read from.
|
76
77
|
callback (Callable[[str], None] | None): The callback function to handle
|
77
78
|
each line.
|
79
|
+
|
78
80
|
"""
|
79
81
|
if stream is None or callback is None:
|
80
82
|
return
|
@@ -93,9 +95,7 @@ async def monitor_file_changes(
|
|
93
95
|
stop_event: asyncio.Event,
|
94
96
|
**awatch_kwargs,
|
95
97
|
) -> None:
|
96
|
-
"""
|
97
|
-
Watches for file changes in specified paths and pass the changes to a
|
98
|
-
callback function.
|
98
|
+
"""Watch file changes in specified paths and pass the changes to a callback.
|
99
99
|
|
100
100
|
Args:
|
101
101
|
paths (list[str | Path]): List of paths to monitor for changes.
|
@@ -103,6 +103,7 @@ async def monitor_file_changes(
|
|
103
103
|
function to handle file changes.
|
104
104
|
stop_event (asyncio.Event): Event to signal when to stop watching.
|
105
105
|
**awatch_kwargs: Additional keyword arguments to pass to watchfiles.awatch.
|
106
|
+
|
106
107
|
"""
|
107
108
|
str_paths = [str(path) for path in paths]
|
108
109
|
try:
|
@@ -127,8 +128,7 @@ async def run_and_monitor(
|
|
127
128
|
paths: list[str | Path] | None = None,
|
128
129
|
**awatch_kwargs,
|
129
130
|
) -> int:
|
130
|
-
"""
|
131
|
-
Runs a command and optionally watch for file changes concurrently.
|
131
|
+
"""Run a command and optionally watch for file changes concurrently.
|
132
132
|
|
133
133
|
Args:
|
134
134
|
program (str): The program to run.
|
@@ -138,6 +138,8 @@ async def run_and_monitor(
|
|
138
138
|
watch (Callable[[set[tuple[Change, str]]], None] | None): Callback for
|
139
139
|
file changes.
|
140
140
|
paths (list[str | Path] | None): List of paths to monitor for changes.
|
141
|
+
**awatch_kwargs: Additional keyword arguments to pass to `watchfiles.awatch`.
|
142
|
+
|
141
143
|
"""
|
142
144
|
stop_event = asyncio.Event()
|
143
145
|
run_task = asyncio.create_task(
|
@@ -184,8 +186,7 @@ def run(
|
|
184
186
|
paths: list[str | Path] | None = None,
|
185
187
|
**awatch_kwargs,
|
186
188
|
) -> int:
|
187
|
-
"""
|
188
|
-
Run a command synchronously and optionally watch for file changes.
|
189
|
+
"""Run a command synchronously and optionally watch for file changes.
|
189
190
|
|
190
191
|
This function is a synchronous wrapper around the asynchronous
|
191
192
|
`run_and_monitor` function. It runs a specified command and optionally
|
@@ -208,6 +209,7 @@ def run(
|
|
208
209
|
|
209
210
|
Returns:
|
210
211
|
int: The return code of the process.
|
212
|
+
|
211
213
|
"""
|
212
214
|
if watch and not paths:
|
213
215
|
paths = [Path.cwd()]
|
hydraflow/config.py
CHANGED
@@ -1,7 +1,4 @@
|
|
1
|
-
"""
|
2
|
-
This module provides functionality for working with configuration
|
3
|
-
objects using the OmegaConf library.
|
4
|
-
"""
|
1
|
+
"""Provide functionality for working with configuration objects using the OmegaConf."""
|
5
2
|
|
6
3
|
from __future__ import annotations
|
7
4
|
|
@@ -14,9 +11,22 @@ if TYPE_CHECKING:
|
|
14
11
|
from typing import Any
|
15
12
|
|
16
13
|
|
17
|
-
def
|
14
|
+
def collect_params(config: object) -> dict[str, Any]:
|
15
|
+
"""Iterate over parameters and collect them into a dictionary.
|
16
|
+
|
17
|
+
Args:
|
18
|
+
config (object): The configuration object to iterate over.
|
19
|
+
prefix (str): The prefix to prepend to the parameter keys.
|
20
|
+
|
21
|
+
Returns:
|
22
|
+
dict[str, Any]: A dictionary of collected parameters.
|
23
|
+
|
18
24
|
"""
|
19
|
-
|
25
|
+
return dict(iter_params(config))
|
26
|
+
|
27
|
+
|
28
|
+
def iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
|
29
|
+
"""Recursively iterate over the parameters in the given configuration object.
|
20
30
|
|
21
31
|
This function traverses the configuration object and yields key-value pairs
|
22
32
|
representing the parameters. The keys are prefixed with the provided prefix.
|
@@ -29,6 +39,7 @@ def iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
|
|
29
39
|
|
30
40
|
Yields:
|
31
41
|
Key-value pairs representing the parameters in the configuration object.
|
42
|
+
|
32
43
|
"""
|
33
44
|
if config is None:
|
34
45
|
return
|
hydraflow/context.py
CHANGED
@@ -1,7 +1,4 @@
|
|
1
|
-
"""
|
2
|
-
This module provides context managers to log parameters and manage the MLflow
|
3
|
-
run context.
|
4
|
-
"""
|
1
|
+
"""Provide context managers to log parameters and manage the MLflow run context."""
|
5
2
|
|
6
3
|
from __future__ import annotations
|
7
4
|
|
@@ -17,8 +14,8 @@ from hydra.core.hydra_config import HydraConfig
|
|
17
14
|
from watchdog.events import FileModifiedEvent, PatternMatchingEventHandler
|
18
15
|
from watchdog.observers import Observer
|
19
16
|
|
20
|
-
from hydraflow.info import get_artifact_dir
|
21
17
|
from hydraflow.mlflow import log_params
|
18
|
+
from hydraflow.run_info import get_artifact_dir
|
22
19
|
|
23
20
|
if TYPE_CHECKING:
|
24
21
|
from collections.abc import Callable, Iterator
|
@@ -34,9 +31,7 @@ def log_run(
|
|
34
31
|
*,
|
35
32
|
synchronous: bool | None = None,
|
36
33
|
) -> Iterator[None]:
|
37
|
-
"""
|
38
|
-
Log the parameters from the given configuration object and manage the MLflow
|
39
|
-
run context.
|
34
|
+
"""Log the parameters from the given configuration object.
|
40
35
|
|
41
36
|
This context manager logs the parameters from the provided configuration object
|
42
37
|
using MLflow. It also manages the MLflow run context, ensuring that artifacts
|
@@ -56,6 +51,7 @@ def log_run(
|
|
56
51
|
# Perform operations within the MLflow run context
|
57
52
|
pass
|
58
53
|
```
|
54
|
+
|
59
55
|
"""
|
60
56
|
log_params(config, synchronous=synchronous)
|
61
57
|
|
@@ -98,8 +94,7 @@ def start_run( # noqa: PLR0913
|
|
98
94
|
log_system_metrics: bool | None = None,
|
99
95
|
synchronous: bool | None = None,
|
100
96
|
) -> Iterator[Run]:
|
101
|
-
"""
|
102
|
-
Start an MLflow run and log parameters using the provided configuration object.
|
97
|
+
"""Start an MLflow run and log parameters using the provided configuration object.
|
103
98
|
|
104
99
|
This context manager starts an MLflow run and logs parameters using the specified
|
105
100
|
configuration object. It ensures that the run is properly closed after completion.
|
@@ -130,6 +125,7 @@ def start_run( # noqa: PLR0913
|
|
130
125
|
- `mlflow.start_run`: The MLflow function to start a run directly.
|
131
126
|
- `log_run`: A context manager to log parameters and manage the MLflow
|
132
127
|
run context.
|
128
|
+
|
133
129
|
"""
|
134
130
|
with (
|
135
131
|
mlflow.start_run(
|
@@ -156,9 +152,7 @@ def watch(
|
|
156
152
|
ignore_patterns: list[str] | None = None,
|
157
153
|
ignore_log: bool = True,
|
158
154
|
) -> Iterator[None]:
|
159
|
-
"""
|
160
|
-
Watch the given directory for changes and call the provided function
|
161
|
-
when a change is detected.
|
155
|
+
"""Watch the given directory for changes.
|
162
156
|
|
163
157
|
This context manager sets up a file system watcher on the specified directory.
|
164
158
|
When a file modification is detected, the provided function is called with
|
@@ -173,6 +167,9 @@ def watch(
|
|
173
167
|
the current MLflow artifact URI is used. Defaults to "".
|
174
168
|
timeout (int): The timeout period in seconds for the watcher
|
175
169
|
to run after the context is exited. Defaults to 60.
|
170
|
+
ignore_patterns (list[str] | None): A list of glob patterns to ignore.
|
171
|
+
Defaults to None.
|
172
|
+
ignore_log (bool): Whether to ignore log files. Defaults to True.
|
176
173
|
|
177
174
|
Yields:
|
178
175
|
None
|
@@ -183,6 +180,7 @@ def watch(
|
|
183
180
|
# Perform operations while watching the directory for changes
|
184
181
|
pass
|
185
182
|
```
|
183
|
+
|
186
184
|
"""
|
187
185
|
dir = dir or get_artifact_dir() # noqa: A001
|
188
186
|
if isinstance(dir, Path):
|
@@ -214,6 +212,8 @@ def watch(
|
|
214
212
|
|
215
213
|
|
216
214
|
class Handler(PatternMatchingEventHandler):
|
215
|
+
"""Monitor file changes and call the given function when a change is detected."""
|
216
|
+
|
217
217
|
def __init__(
|
218
218
|
self,
|
219
219
|
func: Callable[[Path], None],
|
@@ -232,6 +232,7 @@ class Handler(PatternMatchingEventHandler):
|
|
232
232
|
super().__init__(ignore_patterns=ignore_patterns)
|
233
233
|
|
234
234
|
def on_modified(self, event: FileModifiedEvent) -> None:
|
235
|
+
"""Modify when a file is modified."""
|
235
236
|
file = Path(str(event.src_path))
|
236
237
|
if file.is_file():
|
237
238
|
self.func(file)
|
@@ -242,9 +243,7 @@ def chdir_artifact(
|
|
242
243
|
run: Run,
|
243
244
|
artifact_path: str | None = None,
|
244
245
|
) -> Iterator[Path]:
|
245
|
-
"""
|
246
|
-
Change the current working directory to the artifact directory of the
|
247
|
-
given run.
|
246
|
+
"""Change the current working directory to the artifact directory of the given run.
|
248
247
|
|
249
248
|
This context manager changes the current working directory to the artifact
|
250
249
|
directory of the given run. It ensures that the directory is changed back
|
@@ -253,6 +252,7 @@ def chdir_artifact(
|
|
253
252
|
Args:
|
254
253
|
run (Run): The run to get the artifact directory from.
|
255
254
|
artifact_path (str | None): The artifact path.
|
255
|
+
|
256
256
|
"""
|
257
257
|
curdir = Path.cwd()
|
258
258
|
path = mlflow.artifacts.download_artifacts(
|
hydraflow/mlflow.py
CHANGED
@@ -1,20 +1,17 @@
|
|
1
|
-
"""
|
2
|
-
|
3
|
-
|
4
|
-
|
1
|
+
"""Provide functionality to log parameters from Hydra configuration objects.
|
2
|
+
|
3
|
+
This module provides functions to log parameters from Hydra configuration objects
|
4
|
+
to MLflow, set experiments, and manage tracking URIs. It integrates Hydra's
|
5
|
+
configuration management with MLflow's experiment tracking capabilities.
|
5
6
|
|
6
7
|
Key Features:
|
7
|
-
- **Experiment Management**: Set
|
8
|
-
|
9
|
-
- **
|
10
|
-
|
11
|
-
- **Run
|
12
|
-
|
13
|
-
|
14
|
-
easy access to outputs generated during experiments.
|
15
|
-
|
16
|
-
This module is designed to integrate seamlessly with Hydra, providing a robust
|
17
|
-
solution for tracking machine learning experiments and their associated metadata.
|
8
|
+
- **Experiment Management**: Set experiment names and tracking URIs using Hydra
|
9
|
+
configuration details.
|
10
|
+
- **Parameter Logging**: Log parameters from Hydra configuration objects to MLflow,
|
11
|
+
supporting both synchronous and asynchronous logging.
|
12
|
+
- **Run Collection**: Utilize the `RunCollection` class to manage and interact with
|
13
|
+
multiple MLflow runs, providing methods to filter and retrieve runs based on
|
14
|
+
various criteria.
|
18
15
|
"""
|
19
16
|
|
20
17
|
from __future__ import annotations
|
@@ -40,8 +37,7 @@ def set_experiment(
|
|
40
37
|
suffix: str = "",
|
41
38
|
uri: str | Path | None = None,
|
42
39
|
) -> Experiment:
|
43
|
-
"""
|
44
|
-
Sets the experiment name and tracking URI optionally.
|
40
|
+
"""Set the experiment name and tracking URI optionally.
|
45
41
|
|
46
42
|
This function sets the experiment name by combining the given prefix,
|
47
43
|
the job name from HydraConfig, and the given suffix. Optionally, it can
|
@@ -55,6 +51,7 @@ def set_experiment(
|
|
55
51
|
Returns:
|
56
52
|
Experiment: An instance of `mlflow.entities.Experiment` representing
|
57
53
|
the new active experiment.
|
54
|
+
|
58
55
|
"""
|
59
56
|
if uri is not None:
|
60
57
|
mlflow.set_tracking_uri(uri)
|
@@ -65,8 +62,7 @@ def set_experiment(
|
|
65
62
|
|
66
63
|
|
67
64
|
def log_params(config: object, *, synchronous: bool | None = None) -> None:
|
68
|
-
"""
|
69
|
-
Log the parameters from the given configuration object.
|
65
|
+
"""Log the parameters from the given configuration object.
|
70
66
|
|
71
67
|
This method logs the parameters from the provided configuration object
|
72
68
|
using MLflow. It iterates over the parameters and logs them using the
|
@@ -76,6 +72,7 @@ def log_params(config: object, *, synchronous: bool | None = None) -> None:
|
|
76
72
|
config (object): The configuration object to log the parameters from.
|
77
73
|
synchronous (bool | None): Whether to log the parameters synchronously.
|
78
74
|
Defaults to None.
|
75
|
+
|
79
76
|
"""
|
80
77
|
for key, value in iter_params(config):
|
81
78
|
mlflow.log_param(key, value, synchronous=synchronous)
|
@@ -91,8 +88,7 @@ def search_runs( # noqa: PLR0913
|
|
91
88
|
search_all_experiments: bool = False,
|
92
89
|
experiment_names: list[str] | None = None,
|
93
90
|
) -> RunCollection:
|
94
|
-
"""
|
95
|
-
Search for Runs that fit the specified criteria.
|
91
|
+
"""Search for Runs that fit the specified criteria.
|
96
92
|
|
97
93
|
This function wraps the `mlflow.search_runs` function and returns the
|
98
94
|
results as a `RunCollection` object. It allows for flexible searching of
|
@@ -133,6 +129,7 @@ def search_runs( # noqa: PLR0913
|
|
133
129
|
|
134
130
|
Returns:
|
135
131
|
A `RunCollection` object containing the search results.
|
132
|
+
|
136
133
|
"""
|
137
134
|
runs = mlflow.search_runs(
|
138
135
|
experiment_ids=experiment_ids,
|
@@ -151,9 +148,9 @@ def search_runs( # noqa: PLR0913
|
|
151
148
|
def list_runs(
|
152
149
|
experiment_names: str | list[str] | None = None,
|
153
150
|
n_jobs: int = 0,
|
151
|
+
status: str | list[str] | int | list[int] | None = None,
|
154
152
|
) -> RunCollection:
|
155
|
-
"""
|
156
|
-
List all runs for the specified experiments.
|
153
|
+
"""List all runs for the specified experiments.
|
157
154
|
|
158
155
|
This function retrieves all runs for the given list of experiment names.
|
159
156
|
If no experiment names are provided (None), it defaults to searching all runs
|
@@ -169,11 +166,27 @@ def list_runs(
|
|
169
166
|
for runs. If None or an empty list is provided, the function will
|
170
167
|
search the currently active experiment or all experiments except
|
171
168
|
the "Default" experiment.
|
169
|
+
n_jobs (int): The number of jobs to run in parallel. If 0, the function
|
170
|
+
will search runs sequentially.
|
171
|
+
status (str | list[str] | int | list[int] | None): The status of the runs
|
172
|
+
to filter.
|
172
173
|
|
173
174
|
Returns:
|
174
175
|
RunCollection: A `RunCollection` instance containing the runs for the
|
175
176
|
specified experiments.
|
177
|
+
|
176
178
|
"""
|
179
|
+
rc = _list_runs(experiment_names, n_jobs)
|
180
|
+
if status is None:
|
181
|
+
return rc
|
182
|
+
|
183
|
+
return rc.filter(status=status)
|
184
|
+
|
185
|
+
|
186
|
+
def _list_runs(
|
187
|
+
experiment_names: str | list[str] | None = None,
|
188
|
+
n_jobs: int = 0,
|
189
|
+
) -> RunCollection:
|
177
190
|
if isinstance(experiment_names, str):
|
178
191
|
experiment_names = [experiment_names]
|
179
192
|
|
hydraflow/param.py
CHANGED
@@ -1,3 +1,13 @@
|
|
1
|
+
"""Provide utility functions for parameter matching.
|
2
|
+
|
3
|
+
The main function `match` checks if a given parameter matches a specified value.
|
4
|
+
It supports various types of values including None, boolean, list, tuple, int,
|
5
|
+
float, and str.
|
6
|
+
|
7
|
+
Helper functions `_match_list` and `_match_tuple` are used internally to handle
|
8
|
+
matching for list and tuple types respectively.
|
9
|
+
"""
|
10
|
+
|
1
11
|
from __future__ import annotations
|
2
12
|
|
3
13
|
from typing import Any
|
@@ -13,6 +23,7 @@ def match(param: str, value: Any) -> bool:
|
|
13
23
|
Returns:
|
14
24
|
True if the parameter matches the specified value,
|
15
25
|
False otherwise.
|
26
|
+
|
16
27
|
"""
|
17
28
|
if value in [None, True, False]:
|
18
29
|
return param == str(value)
|
hydraflow/progress.py
CHANGED
@@ -1,18 +1,7 @@
|
|
1
|
-
"""
|
2
|
-
Module for managing progress tracking in parallel processing using Joblib
|
3
|
-
and Rich's Progress bar.
|
1
|
+
"""Context managers and functions for parallel task execution with progress.
|
4
2
|
|
5
3
|
Provide context managers and functions to facilitate the execution
|
6
4
|
of tasks in parallel while displaying progress updates.
|
7
|
-
|
8
|
-
The following key components are provided:
|
9
|
-
|
10
|
-
- JoblibProgress: A context manager for tracking progress with Rich's progress
|
11
|
-
bar.
|
12
|
-
- parallel_progress: A function to execute a given function in parallel over
|
13
|
-
an iterable with progress tracking.
|
14
|
-
- multi_tasks_progress: A function to render auto-updating progress bars for
|
15
|
-
multiple tasks concurrently.
|
16
5
|
"""
|
17
6
|
|
18
7
|
from __future__ import annotations
|
@@ -37,8 +26,7 @@ def JoblibProgress( # noqa: N802
|
|
37
26
|
total: int | None = None,
|
38
27
|
**kwargs,
|
39
28
|
) -> Iterator[Progress]:
|
40
|
-
"""
|
41
|
-
Context manager for tracking progress using Joblib with Rich's Progress bar.
|
29
|
+
"""Context manager for tracking progress using Joblib with Rich's Progress bar.
|
42
30
|
|
43
31
|
Args:
|
44
32
|
*columns (ProgressColumn | str): Columns to display in the progress bar.
|
@@ -56,6 +44,7 @@ def JoblibProgress( # noqa: N802
|
|
56
44
|
with JoblibProgress("task", total=100) as progress:
|
57
45
|
# Your parallel processing code here
|
58
46
|
```
|
47
|
+
|
59
48
|
"""
|
60
49
|
if not columns:
|
61
50
|
columns = Progress.get_default_columns()
|
@@ -94,8 +83,7 @@ def parallel_progress(
|
|
94
83
|
description: str | None = None,
|
95
84
|
**kwargs,
|
96
85
|
) -> list[U]:
|
97
|
-
"""
|
98
|
-
Execute a function in parallel over an iterable with progress tracking.
|
86
|
+
"""Execute a function in parallel over an iterable with progress tracking.
|
99
87
|
|
100
88
|
Args:
|
101
89
|
func (Callable[[T], U]): The function to execute on each item in the
|
@@ -112,6 +100,7 @@ def parallel_progress(
|
|
112
100
|
Returns:
|
113
101
|
list[U]: A list of results from applying the function to each item in
|
114
102
|
the iterable.
|
103
|
+
|
115
104
|
"""
|
116
105
|
iterable = list(iterable)
|
117
106
|
total = len(iterable)
|
@@ -130,8 +119,7 @@ def multi_tasks_progress(
|
|
130
119
|
transient: bool | None = None,
|
131
120
|
**kwargs,
|
132
121
|
) -> None:
|
133
|
-
"""
|
134
|
-
Render auto-updating progress bars for multiple tasks concurrently.
|
122
|
+
"""Render auto-updating progress bars for multiple tasks concurrently.
|
135
123
|
|
136
124
|
Args:
|
137
125
|
iterables (Iterable[Iterable[int | tuple[int, int]]]): A collection of
|
@@ -151,6 +139,7 @@ def multi_tasks_progress(
|
|
151
139
|
|
152
140
|
Returns:
|
153
141
|
None
|
142
|
+
|
154
143
|
"""
|
155
144
|
if not columns:
|
156
145
|
columns = Progress.get_default_columns()
|
hydraflow/run_collection.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
"""
|
2
|
-
|
3
|
-
|
1
|
+
"""Provide a collection of MLflow runs.
|
2
|
+
|
3
|
+
This module includes the `RunCollection` class, which serves as a container
|
4
4
|
for multiple MLflow `Run` instances, and various methods to filter and
|
5
5
|
retrieve these runs.
|
6
6
|
|
@@ -23,9 +23,13 @@ from dataclasses import dataclass, field
|
|
23
23
|
from itertools import chain
|
24
24
|
from typing import TYPE_CHECKING, Any, Concatenate, ParamSpec, TypeVar, overload
|
25
25
|
|
26
|
+
from mlflow.entities import RunStatus
|
27
|
+
from polars.dataframe import DataFrame
|
28
|
+
|
26
29
|
import hydraflow.param
|
27
|
-
from hydraflow.config import iter_params
|
28
|
-
from hydraflow.
|
30
|
+
from hydraflow.config import collect_params, iter_params
|
31
|
+
from hydraflow.run_data import RunCollectionData
|
32
|
+
from hydraflow.run_info import RunCollectionInfo
|
29
33
|
|
30
34
|
if TYPE_CHECKING:
|
31
35
|
from collections.abc import Callable, Iterator
|
@@ -42,8 +46,7 @@ P = ParamSpec("P")
|
|
42
46
|
|
43
47
|
@dataclass
|
44
48
|
class RunCollection:
|
45
|
-
"""
|
46
|
-
Represent a collection of MLflow runs.
|
49
|
+
"""Represent a collection of MLflow runs.
|
47
50
|
|
48
51
|
Provide methods to interact with the runs, such as filtering,
|
49
52
|
retrieving specific runs, and accessing run information.
|
@@ -60,8 +63,12 @@ class RunCollection:
|
|
60
63
|
_info: RunCollectionInfo = field(init=False)
|
61
64
|
"""An instance of `RunCollectionInfo`."""
|
62
65
|
|
66
|
+
_data: RunCollectionData = field(init=False)
|
67
|
+
"""An instance of `RunCollectionData`."""
|
68
|
+
|
63
69
|
def __post_init__(self) -> None:
|
64
70
|
self._info = RunCollectionInfo(self)
|
71
|
+
self._data = RunCollectionData(self)
|
65
72
|
|
66
73
|
def __repr__(self) -> str:
|
67
74
|
return f"{self.__class__.__name__}({len(self)})"
|
@@ -93,7 +100,6 @@ class RunCollection:
|
|
93
100
|
@classmethod
|
94
101
|
def from_list(cls, runs: list[Run]) -> RunCollection:
|
95
102
|
"""Create a `RunCollection` instance from a list of MLflow `Run` instances."""
|
96
|
-
|
97
103
|
return cls(runs)
|
98
104
|
|
99
105
|
@property
|
@@ -101,6 +107,11 @@ class RunCollection:
|
|
101
107
|
"""An instance of `RunCollectionInfo`."""
|
102
108
|
return self._info
|
103
109
|
|
110
|
+
@property
|
111
|
+
def data(self) -> RunCollectionData:
|
112
|
+
"""An instance of `RunCollectionData`."""
|
113
|
+
return self._data
|
114
|
+
|
104
115
|
def take(self, n: int) -> RunCollection:
|
105
116
|
"""Take the first n runs from the collection.
|
106
117
|
|
@@ -114,6 +125,7 @@ class RunCollection:
|
|
114
125
|
Returns:
|
115
126
|
A new `RunCollection` instance containing the first n runs if n is
|
116
127
|
positive, or the last n runs if n is negative.
|
128
|
+
|
117
129
|
"""
|
118
130
|
if n < 0:
|
119
131
|
return self.__class__(self._runs[n:])
|
@@ -126,17 +138,28 @@ class RunCollection:
|
|
126
138
|
*,
|
127
139
|
reverse: bool = False,
|
128
140
|
) -> None:
|
141
|
+
"""Sort the runs in the collection.
|
142
|
+
|
143
|
+
Sort the runs in the collection according to the provided key function
|
144
|
+
and optional reverse flag.
|
145
|
+
|
146
|
+
Args:
|
147
|
+
key (Callable[[Run], Any] | None): A function that takes a run and returns
|
148
|
+
a value to sort by.
|
149
|
+
reverse (bool): If True, sort in descending order.
|
150
|
+
|
151
|
+
"""
|
129
152
|
self._runs.sort(key=key or (lambda x: x.info.start_time), reverse=reverse)
|
130
153
|
|
131
154
|
def one(self) -> Run:
|
132
|
-
"""
|
133
|
-
Get the only `Run` instance in the collection.
|
155
|
+
"""Get the only `Run` instance in the collection.
|
134
156
|
|
135
157
|
Returns:
|
136
158
|
The only `Run` instance in the collection.
|
137
159
|
|
138
160
|
Raises:
|
139
161
|
ValueError: If the collection does not contain exactly one run.
|
162
|
+
|
140
163
|
"""
|
141
164
|
if len(self._runs) != 1:
|
142
165
|
raise ValueError("The collection does not contain exactly one run.")
|
@@ -144,24 +167,24 @@ class RunCollection:
|
|
144
167
|
return self._runs[0]
|
145
168
|
|
146
169
|
def try_one(self) -> Run | None:
|
147
|
-
"""
|
148
|
-
Try to get the only `Run` instance in the collection.
|
170
|
+
"""Try to get the only `Run` instance in the collection.
|
149
171
|
|
150
172
|
Returns:
|
151
173
|
The only `Run` instance in the collection, or None if the collection
|
152
174
|
does not contain exactly one run.
|
175
|
+
|
153
176
|
"""
|
154
177
|
return self._runs[0] if len(self._runs) == 1 else None
|
155
178
|
|
156
179
|
def first(self) -> Run:
|
157
|
-
"""
|
158
|
-
Get the first `Run` instance in the collection.
|
180
|
+
"""Get the first `Run` instance in the collection.
|
159
181
|
|
160
182
|
Returns:
|
161
183
|
The first `Run` instance in the collection.
|
162
184
|
|
163
185
|
Raises:
|
164
186
|
ValueError: If the collection is empty.
|
187
|
+
|
165
188
|
"""
|
166
189
|
if not self._runs:
|
167
190
|
raise ValueError("The collection is empty.")
|
@@ -169,24 +192,24 @@ class RunCollection:
|
|
169
192
|
return self._runs[0]
|
170
193
|
|
171
194
|
def try_first(self) -> Run | None:
|
172
|
-
"""
|
173
|
-
Try to get the first `Run` instance in the collection.
|
195
|
+
"""Try to get the first `Run` instance in the collection.
|
174
196
|
|
175
197
|
Returns:
|
176
198
|
The first `Run` instance in the collection, or None if the collection
|
177
199
|
is empty.
|
200
|
+
|
178
201
|
"""
|
179
202
|
return self._runs[0] if self._runs else None
|
180
203
|
|
181
204
|
def last(self) -> Run:
|
182
|
-
"""
|
183
|
-
Get the last `Run` instance in the collection.
|
205
|
+
"""Get the last `Run` instance in the collection.
|
184
206
|
|
185
207
|
Returns:
|
186
208
|
The last `Run` instance in the collection.
|
187
209
|
|
188
210
|
Raises:
|
189
211
|
ValueError: If the collection is empty.
|
212
|
+
|
190
213
|
"""
|
191
214
|
if not self._runs:
|
192
215
|
raise ValueError("The collection is empty.")
|
@@ -194,18 +217,17 @@ class RunCollection:
|
|
194
217
|
return self._runs[-1]
|
195
218
|
|
196
219
|
def try_last(self) -> Run | None:
|
197
|
-
"""
|
198
|
-
Try to get the last `Run` instance in the collection.
|
220
|
+
"""Try to get the last `Run` instance in the collection.
|
199
221
|
|
200
222
|
Returns:
|
201
223
|
The last `Run` instance in the collection, or None if the collection
|
202
224
|
is empty.
|
225
|
+
|
203
226
|
"""
|
204
227
|
return self._runs[-1] if self._runs else None
|
205
228
|
|
206
229
|
def filter(self, config: object | None = None, **kwargs) -> RunCollection:
|
207
|
-
"""
|
208
|
-
Filter the `Run` instances based on the provided configuration.
|
230
|
+
"""Filter the `Run` instances based on the provided configuration.
|
209
231
|
|
210
232
|
This method filters the runs in the collection according to the
|
211
233
|
specified configuration object and additional key-value pairs. The
|
@@ -228,12 +250,12 @@ class RunCollection:
|
|
228
250
|
|
229
251
|
Returns:
|
230
252
|
A new `RunCollection` object containing the filtered runs.
|
253
|
+
|
231
254
|
"""
|
232
255
|
return RunCollection(filter_runs(self._runs, config, **kwargs))
|
233
256
|
|
234
257
|
def find(self, config: object | None = None, **kwargs) -> Run:
|
235
|
-
"""
|
236
|
-
Find the first `Run` instance based on the provided configuration.
|
258
|
+
"""Find the first `Run` instance based on the provided configuration.
|
237
259
|
|
238
260
|
This method filters the runs in the collection according to the
|
239
261
|
specified configuration object and returns the first run that matches
|
@@ -252,6 +274,7 @@ class RunCollection:
|
|
252
274
|
|
253
275
|
See Also:
|
254
276
|
`filter`: Perform the actual filtering logic.
|
277
|
+
|
255
278
|
"""
|
256
279
|
try:
|
257
280
|
return self.filter(config, **kwargs).first()
|
@@ -259,8 +282,7 @@ class RunCollection:
|
|
259
282
|
raise ValueError("No run matches the provided configuration.")
|
260
283
|
|
261
284
|
def try_find(self, config: object | None = None, **kwargs) -> Run | None:
|
262
|
-
"""
|
263
|
-
Try to find the first `Run` instance based on the provided configuration.
|
285
|
+
"""Try to find the first `Run` instance based on the provided configuration.
|
264
286
|
|
265
287
|
This method filters the runs in the collection according to the
|
266
288
|
specified configuration object and returns the first run that matches
|
@@ -277,12 +299,12 @@ class RunCollection:
|
|
277
299
|
|
278
300
|
See Also:
|
279
301
|
`filter`: Perform the actual filtering logic.
|
302
|
+
|
280
303
|
"""
|
281
304
|
return self.filter(config, **kwargs).try_first()
|
282
305
|
|
283
306
|
def find_last(self, config: object | None = None, **kwargs) -> Run:
|
284
|
-
"""
|
285
|
-
Find the last `Run` instance based on the provided configuration.
|
307
|
+
"""Find the last `Run` instance based on the provided configuration.
|
286
308
|
|
287
309
|
This method filters the runs in the collection according to the
|
288
310
|
specified configuration object and returns the last run that matches
|
@@ -301,6 +323,7 @@ class RunCollection:
|
|
301
323
|
|
302
324
|
See Also:
|
303
325
|
`filter`: Perform the actual filtering logic.
|
326
|
+
|
304
327
|
"""
|
305
328
|
try:
|
306
329
|
return self.filter(config, **kwargs).last()
|
@@ -308,8 +331,7 @@ class RunCollection:
|
|
308
331
|
raise ValueError("No run matches the provided configuration.")
|
309
332
|
|
310
333
|
def try_find_last(self, config: object | None = None, **kwargs) -> Run | None:
|
311
|
-
"""
|
312
|
-
Try to find the last `Run` instance based on the provided configuration.
|
334
|
+
"""Try to find the last `Run` instance based on the provided configuration.
|
313
335
|
|
314
336
|
This method filters the runs in the collection according to the
|
315
337
|
specified configuration object and returns the last run that matches
|
@@ -326,12 +348,12 @@ class RunCollection:
|
|
326
348
|
|
327
349
|
See Also:
|
328
350
|
`filter`: Perform the actual filtering logic.
|
351
|
+
|
329
352
|
"""
|
330
353
|
return self.filter(config, **kwargs).try_last()
|
331
354
|
|
332
355
|
def get(self, config: object | None = None, **kwargs) -> Run:
|
333
|
-
"""
|
334
|
-
Retrieve a specific `Run` instance based on the provided configuration.
|
356
|
+
"""Retrieve a specific `Run` instance based on the provided configuration.
|
335
357
|
|
336
358
|
This method filters the runs in the collection according to the
|
337
359
|
specified configuration object and returns the run that matches the
|
@@ -351,6 +373,7 @@ class RunCollection:
|
|
351
373
|
|
352
374
|
See Also:
|
353
375
|
`filter`: Perform the actual filtering logic.
|
376
|
+
|
354
377
|
"""
|
355
378
|
try:
|
356
379
|
return self.filter(config, **kwargs).one()
|
@@ -359,8 +382,7 @@ class RunCollection:
|
|
359
382
|
raise ValueError(msg)
|
360
383
|
|
361
384
|
def try_get(self, config: object | None = None, **kwargs) -> Run | None:
|
362
|
-
"""
|
363
|
-
Try to retrieve a specific `Run` instance based on the provided configuration.
|
385
|
+
"""Try to get a specific `Run` instance based on the provided configuration.
|
364
386
|
|
365
387
|
This method filters the runs in the collection according to the
|
366
388
|
specified configuration object and returns the run that matches the
|
@@ -380,12 +402,12 @@ class RunCollection:
|
|
380
402
|
|
381
403
|
See Also:
|
382
404
|
`filter`: Perform the actual filtering logic.
|
405
|
+
|
383
406
|
"""
|
384
407
|
return self.filter(config, **kwargs).try_one()
|
385
408
|
|
386
409
|
def get_param_names(self) -> list[str]:
|
387
|
-
"""
|
388
|
-
Get the parameter names from the runs.
|
410
|
+
"""Get the parameter names from the runs.
|
389
411
|
|
390
412
|
This method extracts the unique parameter names from the provided list
|
391
413
|
of runs. It iterates through each run and collects the parameter names
|
@@ -393,6 +415,7 @@ class RunCollection:
|
|
393
415
|
|
394
416
|
Returns:
|
395
417
|
A list of unique parameter names.
|
418
|
+
|
396
419
|
"""
|
397
420
|
param_names = set()
|
398
421
|
|
@@ -402,24 +425,30 @@ class RunCollection:
|
|
402
425
|
|
403
426
|
return list(param_names)
|
404
427
|
|
405
|
-
def get_param_dict(self) -> dict[str, list[str]]:
|
406
|
-
"""
|
407
|
-
Get the parameter dictionary from the list of runs.
|
428
|
+
def get_param_dict(self, *, drop_const: bool = False) -> dict[str, list[str]]:
|
429
|
+
"""Get the parameter dictionary from the list of runs.
|
408
430
|
|
409
431
|
This method extracts the parameter names and their corresponding values
|
410
432
|
from the provided list of runs. It iterates through each run and
|
411
433
|
collects the parameter values into a dictionary where the keys are
|
412
434
|
parameter names and the values are lists of parameter values.
|
413
435
|
|
436
|
+
Args:
|
437
|
+
drop_const (bool): If True, drop the parameter values that are constant
|
438
|
+
across all runs.
|
439
|
+
|
414
440
|
Returns:
|
415
441
|
A dictionary where the keys are parameter names and the values are
|
416
442
|
lists of parameter values.
|
443
|
+
|
417
444
|
"""
|
418
445
|
params = {}
|
419
446
|
|
420
447
|
for name in self.get_param_names():
|
421
448
|
it = (run.data.params[name] for run in self if name in run.data.params)
|
422
|
-
|
449
|
+
unique_values = sorted(set(it))
|
450
|
+
if not drop_const or len(unique_values) > 1:
|
451
|
+
params[name] = unique_values
|
423
452
|
|
424
453
|
return params
|
425
454
|
|
@@ -429,9 +458,7 @@ class RunCollection:
|
|
429
458
|
*args: P.args,
|
430
459
|
**kwargs: P.kwargs,
|
431
460
|
) -> Iterator[T]:
|
432
|
-
"""
|
433
|
-
Apply a function to each run in the collection and return an iterator of
|
434
|
-
results.
|
461
|
+
"""Return an iterator of results by applying a function to each run.
|
435
462
|
|
436
463
|
This method iterates over each run in the collection and applies the
|
437
464
|
provided function to it, along with any additional arguments and
|
@@ -445,6 +472,7 @@ class RunCollection:
|
|
445
472
|
|
446
473
|
Yields:
|
447
474
|
Results obtained by applying the function to each run in the collection.
|
475
|
+
|
448
476
|
"""
|
449
477
|
return (func(run, *args, **kwargs) for run in self)
|
450
478
|
|
@@ -454,9 +482,7 @@ class RunCollection:
|
|
454
482
|
*args: P.args,
|
455
483
|
**kwargs: P.kwargs,
|
456
484
|
) -> Iterator[T]:
|
457
|
-
"""
|
458
|
-
Apply a function to each run id in the collection and return an iterator
|
459
|
-
of results.
|
485
|
+
"""Return an iterator of results by applying a function to each run id.
|
460
486
|
|
461
487
|
Args:
|
462
488
|
func (Callable[[str, P], T]): A function that takes a run id and returns a
|
@@ -467,6 +493,7 @@ class RunCollection:
|
|
467
493
|
Yields:
|
468
494
|
Results obtained by applying the function to each run id in the
|
469
495
|
collection.
|
496
|
+
|
470
497
|
"""
|
471
498
|
return (func(run_id, *args, **kwargs) for run_id in self.info.run_id)
|
472
499
|
|
@@ -476,9 +503,7 @@ class RunCollection:
|
|
476
503
|
*args: P.args,
|
477
504
|
**kwargs: P.kwargs,
|
478
505
|
) -> Iterator[T]:
|
479
|
-
"""
|
480
|
-
Apply a function to each run configuration in the collection and return
|
481
|
-
an iterator of results.
|
506
|
+
"""Return an iterator of results by applying a function to each run config.
|
482
507
|
|
483
508
|
Args:
|
484
509
|
func (Callable[[DictConfig, P], T]): A function that takes a run
|
@@ -489,8 +514,9 @@ class RunCollection:
|
|
489
514
|
Yields:
|
490
515
|
Results obtained by applying the function to each run configuration
|
491
516
|
in the collection.
|
517
|
+
|
492
518
|
"""
|
493
|
-
return (func(config, *args, **kwargs) for config in self.
|
519
|
+
return (func(config, *args, **kwargs) for config in self.data.config)
|
494
520
|
|
495
521
|
def map_uri(
|
496
522
|
self,
|
@@ -498,9 +524,7 @@ class RunCollection:
|
|
498
524
|
*args: P.args,
|
499
525
|
**kwargs: P.kwargs,
|
500
526
|
) -> Iterator[T]:
|
501
|
-
"""
|
502
|
-
Apply a function to each artifact URI in the collection and return an
|
503
|
-
iterator of results.
|
527
|
+
"""Return an iterator of results by applying a function to each artifact URI.
|
504
528
|
|
505
529
|
Iterate over each run in the collection, retrieves the artifact URI, and
|
506
530
|
apply the provided function to it. If a run does not have an artifact
|
@@ -515,6 +539,7 @@ class RunCollection:
|
|
515
539
|
Yields:
|
516
540
|
Results obtained by applying the function to each artifact URI in the
|
517
541
|
collection.
|
542
|
+
|
518
543
|
"""
|
519
544
|
return (func(uri, *args, **kwargs) for uri in self.info.artifact_uri)
|
520
545
|
|
@@ -524,9 +549,7 @@ class RunCollection:
|
|
524
549
|
*args: P.args,
|
525
550
|
**kwargs: P.kwargs,
|
526
551
|
) -> Iterator[T]:
|
527
|
-
"""
|
528
|
-
Apply a function to each artifact directory in the collection and return
|
529
|
-
an iterator of results.
|
552
|
+
"""Return an iterator of results by applying a function to each artifact dir.
|
530
553
|
|
531
554
|
Iterate over each run in the collection, downloads the artifact
|
532
555
|
directory, and apply the provided function to the directory path.
|
@@ -540,6 +563,7 @@ class RunCollection:
|
|
540
563
|
Yields:
|
541
564
|
Results obtained by applying the function to each artifact directory
|
542
565
|
in the collection.
|
566
|
+
|
543
567
|
"""
|
544
568
|
return (func(dir, *args, **kwargs) for dir in self.info.artifact_dir) # noqa: A001
|
545
569
|
|
@@ -547,8 +571,7 @@ class RunCollection:
|
|
547
571
|
self,
|
548
572
|
*names: str | list[str],
|
549
573
|
) -> dict[tuple[str | None, ...], RunCollection]:
|
550
|
-
"""
|
551
|
-
Group runs by specified parameter names.
|
574
|
+
"""Group runs by specified parameter names.
|
552
575
|
|
553
576
|
Group the runs in the collection based on the values of the
|
554
577
|
specified parameters. Each unique combination of parameter values will
|
@@ -563,6 +586,7 @@ class RunCollection:
|
|
563
586
|
dict[tuple[str | None, ...], RunCollection]: A dictionary where the keys
|
564
587
|
are tuples of parameter values and the values are RunCollection objects
|
565
588
|
containing the runs that match those parameter values.
|
589
|
+
|
566
590
|
"""
|
567
591
|
grouped_runs: dict[tuple[str | None, ...], list[Run]] = {}
|
568
592
|
for run in self._runs:
|
@@ -571,6 +595,16 @@ class RunCollection:
|
|
571
595
|
|
572
596
|
return {key: RunCollection(runs) for key, runs in grouped_runs.items()}
|
573
597
|
|
598
|
+
@property
|
599
|
+
def config(self) -> DataFrame:
|
600
|
+
"""Get the runs' configurations as a polars DataFrame.
|
601
|
+
|
602
|
+
Returns:
|
603
|
+
A polars DataFrame containing the runs' configurations.
|
604
|
+
|
605
|
+
"""
|
606
|
+
return DataFrame(self.map_config(collect_params))
|
607
|
+
|
574
608
|
|
575
609
|
def _param_matches(run: Run, key: str, value: Any) -> bool:
|
576
610
|
params = run.data.params
|
@@ -588,11 +622,10 @@ def filter_runs(
|
|
588
622
|
runs: list[Run],
|
589
623
|
config: object | None = None,
|
590
624
|
*,
|
591
|
-
status: str | list[str] | None = None,
|
625
|
+
status: str | list[str] | int | list[int] | None = None,
|
592
626
|
**kwargs,
|
593
627
|
) -> list[Run]:
|
594
|
-
"""
|
595
|
-
Filter the runs based on the provided configuration.
|
628
|
+
"""Filter the runs based on the provided configuration.
|
596
629
|
|
597
630
|
Filter the runs in the collection according to the
|
598
631
|
specified configuration object and additional key-value pairs.
|
@@ -612,33 +645,63 @@ def filter_runs(
|
|
612
645
|
config (object | None): The configuration object to filter the runs.
|
613
646
|
This can be any object that provides key-value pairs through the
|
614
647
|
`iter_params` function.
|
615
|
-
status (str | list[str] | None): The status of
|
648
|
+
status (str | list[str] | RunStatus | list[RunStatus] | None): The status of
|
649
|
+
the runs to filter.
|
616
650
|
**kwargs: Additional key-value pairs to filter the runs.
|
617
651
|
|
618
652
|
Returns:
|
619
653
|
A list of runs that match the specified configuration and key-value pairs.
|
654
|
+
|
620
655
|
"""
|
621
656
|
for key, value in chain(iter_params(config), kwargs.items()):
|
622
657
|
runs = [run for run in runs if _param_matches(run, key, value)]
|
623
|
-
|
624
|
-
if len(runs) == 0:
|
658
|
+
if not runs:
|
625
659
|
return []
|
626
660
|
|
627
|
-
if
|
628
|
-
|
629
|
-
return [run for run in runs if run.info.status.lower() != status]
|
661
|
+
if status is None:
|
662
|
+
return runs
|
630
663
|
|
631
|
-
|
632
|
-
status = [status] if isinstance(status, str) else status
|
633
|
-
status = [s.lower() for s in status]
|
634
|
-
return [run for run in runs if run.info.status.lower() in status]
|
664
|
+
return filter_runs_by_status(runs, status)
|
635
665
|
|
636
|
-
return runs
|
637
666
|
|
667
|
+
def filter_runs_by_status(
|
668
|
+
runs: list[Run],
|
669
|
+
status: str | list[str] | int | list[int],
|
670
|
+
) -> list[Run]:
|
671
|
+
"""Filter the runs based on the provided status.
|
672
|
+
|
673
|
+
Args:
|
674
|
+
runs (list[Run]): The list of runs to filter.
|
675
|
+
status (str | list[str] | int | list[int]): The status of the runs
|
676
|
+
to filter.
|
677
|
+
|
678
|
+
Returns:
|
679
|
+
A list of runs that match the specified status.
|
638
680
|
|
639
|
-
def get_params(run: Run, *names: str | list[str]) -> tuple[str | None, ...]:
|
640
681
|
"""
|
641
|
-
|
682
|
+
if isinstance(status, str):
|
683
|
+
if status.startswith("!"):
|
684
|
+
status = status[1:].lower()
|
685
|
+
return [run for run in runs if run.info.status.lower() != status]
|
686
|
+
|
687
|
+
status = [status]
|
688
|
+
|
689
|
+
elif isinstance(status, int):
|
690
|
+
status = [RunStatus.to_string(status)]
|
691
|
+
|
692
|
+
status = [_to_lower(s) for s in status]
|
693
|
+
return [run for run in runs if run.info.status.lower() in status]
|
694
|
+
|
695
|
+
|
696
|
+
def _to_lower(status: str | int) -> str:
|
697
|
+
if isinstance(status, str):
|
698
|
+
return status.lower()
|
699
|
+
|
700
|
+
return RunStatus.to_string(status).lower()
|
701
|
+
|
702
|
+
|
703
|
+
def get_params(run: Run, *names: str | list[str]) -> tuple[str | None, ...]:
|
704
|
+
"""Retrieve the values of specified parameters from the given run.
|
642
705
|
|
643
706
|
This function extracts the values of the parameters identified by the
|
644
707
|
provided names from the specified run. It can accept both individual
|
@@ -653,6 +716,7 @@ def get_params(run: Run, *names: str | list[str]) -> tuple[str | None, ...]:
|
|
653
716
|
Returns:
|
654
717
|
tuple[str | None, ...]: A tuple containing the values of the specified
|
655
718
|
parameters in the order they were provided.
|
719
|
+
|
656
720
|
"""
|
657
721
|
names_ = []
|
658
722
|
for name in names:
|
hydraflow/run_data.py
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
"""Provide information about MLflow runs."""
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
from typing import TYPE_CHECKING
|
6
|
+
|
7
|
+
from omegaconf import DictConfig, OmegaConf
|
8
|
+
|
9
|
+
from hydraflow.run_info import get_artifact_dir
|
10
|
+
|
11
|
+
if TYPE_CHECKING:
|
12
|
+
from mlflow.entities import Run
|
13
|
+
|
14
|
+
from hydraflow.run_collection import RunCollection
|
15
|
+
|
16
|
+
|
17
|
+
class RunCollectionData:
|
18
|
+
"""Provide information about MLflow runs."""
|
19
|
+
|
20
|
+
def __init__(self, runs: RunCollection) -> None:
|
21
|
+
self._runs = runs
|
22
|
+
|
23
|
+
@property
|
24
|
+
def params(self) -> list[dict[str, str]]:
|
25
|
+
"""Get the parameters for each run in the collection."""
|
26
|
+
return [run.data.params for run in self._runs]
|
27
|
+
|
28
|
+
@property
|
29
|
+
def metrics(self) -> list[dict[str, float]]:
|
30
|
+
"""Get the metrics for each run in the collection."""
|
31
|
+
return [run.data.metrics for run in self._runs]
|
32
|
+
|
33
|
+
@property
|
34
|
+
def config(self) -> list[DictConfig]:
|
35
|
+
"""Get the configuration for each run in the collection."""
|
36
|
+
return [load_config(run) for run in self._runs]
|
37
|
+
|
38
|
+
|
39
|
+
def load_config(run: Run) -> DictConfig:
|
40
|
+
"""Load the configuration for a given run.
|
41
|
+
|
42
|
+
This function loads the configuration for the provided Run instance
|
43
|
+
by downloading the configuration file from the MLflow artifacts and
|
44
|
+
loading it using OmegaConf. It returns an empty config if
|
45
|
+
`.hydra/config.yaml` is not found in the run's artifact directory.
|
46
|
+
|
47
|
+
Args:
|
48
|
+
run (Run): The Run instance for which to load the configuration.
|
49
|
+
|
50
|
+
Returns:
|
51
|
+
The loaded configuration as a DictConfig object. Returns an empty
|
52
|
+
DictConfig if the configuration file is not found.
|
53
|
+
|
54
|
+
"""
|
55
|
+
path = get_artifact_dir(run) / ".hydra/config.yaml"
|
56
|
+
return OmegaConf.load(path) # type: ignore
|
@@ -1,3 +1,5 @@
|
|
1
|
+
"""Provide information about MLflow runs."""
|
2
|
+
|
1
3
|
from __future__ import annotations
|
2
4
|
|
3
5
|
from pathlib import Path
|
@@ -6,7 +8,7 @@ from typing import TYPE_CHECKING
|
|
6
8
|
import mlflow
|
7
9
|
from hydra.core.hydra_config import HydraConfig
|
8
10
|
from mlflow.tracking import artifact_utils
|
9
|
-
from omegaconf import
|
11
|
+
from omegaconf import OmegaConf
|
10
12
|
|
11
13
|
if TYPE_CHECKING:
|
12
14
|
from mlflow.entities import Run
|
@@ -15,37 +17,29 @@ if TYPE_CHECKING:
|
|
15
17
|
|
16
18
|
|
17
19
|
class RunCollectionInfo:
|
20
|
+
"""Provide information about MLflow runs."""
|
21
|
+
|
18
22
|
def __init__(self, runs: RunCollection) -> None:
|
19
23
|
self._runs = runs
|
20
24
|
|
21
25
|
@property
|
22
26
|
def run_id(self) -> list[str]:
|
27
|
+
"""Get the run ID for each run in the collection."""
|
23
28
|
return [run.info.run_id for run in self._runs]
|
24
29
|
|
25
|
-
@property
|
26
|
-
def params(self) -> list[dict[str, str]]:
|
27
|
-
return [run.data.params for run in self._runs]
|
28
|
-
|
29
|
-
@property
|
30
|
-
def metrics(self) -> list[dict[str, float]]:
|
31
|
-
return [run.data.metrics for run in self._runs]
|
32
|
-
|
33
30
|
@property
|
34
31
|
def artifact_uri(self) -> list[str | None]:
|
32
|
+
"""Get the artifact URI for each run in the collection."""
|
35
33
|
return [run.info.artifact_uri for run in self._runs]
|
36
34
|
|
37
35
|
@property
|
38
36
|
def artifact_dir(self) -> list[Path]:
|
37
|
+
"""Get the artifact directory for each run in the collection."""
|
39
38
|
return [get_artifact_dir(run) for run in self._runs]
|
40
39
|
|
41
|
-
@property
|
42
|
-
def config(self) -> list[DictConfig]:
|
43
|
-
return [load_config(run) for run in self._runs]
|
44
|
-
|
45
40
|
|
46
41
|
def get_artifact_dir(run: Run | None = None) -> Path:
|
47
|
-
"""
|
48
|
-
Retrieve the artifact directory for the given run.
|
42
|
+
"""Retrieve the artifact directory for the given run.
|
49
43
|
|
50
44
|
This function uses MLflow to get the artifact directory for the given run.
|
51
45
|
|
@@ -54,6 +48,7 @@ def get_artifact_dir(run: Run | None = None) -> Path:
|
|
54
48
|
|
55
49
|
Returns:
|
56
50
|
The local path to the directory where the artifacts are downloaded.
|
51
|
+
|
57
52
|
"""
|
58
53
|
if run is None:
|
59
54
|
uri = mlflow.get_artifact_uri()
|
@@ -64,8 +59,7 @@ def get_artifact_dir(run: Run | None = None) -> Path:
|
|
64
59
|
|
65
60
|
|
66
61
|
def get_hydra_output_dir(run: Run | None = None) -> Path:
|
67
|
-
"""
|
68
|
-
Retrieve the Hydra output directory for the given run.
|
62
|
+
"""Retrieve the Hydra output directory for the given run.
|
69
63
|
|
70
64
|
This function returns the Hydra output directory. If no run is provided,
|
71
65
|
it retrieves the output directory from the current Hydra configuration.
|
@@ -82,6 +76,7 @@ def get_hydra_output_dir(run: Run | None = None) -> Path:
|
|
82
76
|
Raises:
|
83
77
|
FileNotFoundError: If the Hydra configuration file is not found
|
84
78
|
in the artifacts.
|
79
|
+
|
85
80
|
"""
|
86
81
|
if run is None:
|
87
82
|
hc = HydraConfig.get()
|
@@ -94,23 +89,3 @@ def get_hydra_output_dir(run: Run | None = None) -> Path:
|
|
94
89
|
return Path(hc.hydra.runtime.output_dir)
|
95
90
|
|
96
91
|
raise FileNotFoundError
|
97
|
-
|
98
|
-
|
99
|
-
def load_config(run: Run) -> DictConfig:
|
100
|
-
"""
|
101
|
-
Load the configuration for a given run.
|
102
|
-
|
103
|
-
This function loads the configuration for the provided Run instance
|
104
|
-
by downloading the configuration file from the MLflow artifacts and
|
105
|
-
loading it using OmegaConf. It returns an empty config if
|
106
|
-
`.hydra/config.yaml` is not found in the run's artifact directory.
|
107
|
-
|
108
|
-
Args:
|
109
|
-
run (Run): The Run instance for which to load the configuration.
|
110
|
-
|
111
|
-
Returns:
|
112
|
-
The loaded configuration as a DictConfig object. Returns an empty
|
113
|
-
DictConfig if the configuration file is not found.
|
114
|
-
"""
|
115
|
-
path = get_artifact_dir(run) / ".hydra/config.yaml"
|
116
|
-
return OmegaConf.load(path) # type: ignore
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: hydraflow
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.3.0
|
4
4
|
Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
|
5
5
|
Project-URL: Documentation, https://github.com/daizutabi/hydraflow
|
6
6
|
Project-URL: Source, https://github.com/daizutabi/hydraflow
|
@@ -17,6 +17,7 @@ Requires-Python: >=3.10
|
|
17
17
|
Requires-Dist: hydra-core>=1.3
|
18
18
|
Requires-Dist: joblib
|
19
19
|
Requires-Dist: mlflow>=2.15
|
20
|
+
Requires-Dist: polars
|
20
21
|
Requires-Dist: rich
|
21
22
|
Requires-Dist: watchdog
|
22
23
|
Requires-Dist: watchfiles
|
@@ -0,0 +1,15 @@
|
|
1
|
+
hydraflow/__init__.py,sha256=zlLTztJPXyBFJC5Z8G7_OnlfzAHJPRrfE1c2OoDvlTg,667
|
2
|
+
hydraflow/asyncio.py,sha256=-i1C8KAmNDImrjHnk92Csaa1mpjdK8Vp4ZVaQV-l94s,6634
|
3
|
+
hydraflow/config.py,sha256=Wx7jymwLVr5EfpzXBpvv3Ax3VhGhvWyA7Yy6EzsPYWk,2479
|
4
|
+
hydraflow/context.py,sha256=IaDy-ZCdCfWwv95S-gyQNp062oBdtSVaz6dxGmO6Y8w,8226
|
5
|
+
hydraflow/mlflow.py,sha256=GkOr_pXfpfY5USYBLrCigHcP13VgrAK_e9kheR1Wke4,8579
|
6
|
+
hydraflow/param.py,sha256=dvIXcKgc_MPiju3WEk9qz5FOUeK5qSj-YWN2ophCpUM,1938
|
7
|
+
hydraflow/progress.py,sha256=zvKX1HCN8_xDOsgYOEcLLhkhdPdep-U8vHrc0XZ-6SQ,6163
|
8
|
+
hydraflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
9
|
+
hydraflow/run_collection.py,sha256=Xv6-KD5ac-vv-4Q3PZrzJy1x84H_g7UoP7ZqZ8_DQeQ,24973
|
10
|
+
hydraflow/run_data.py,sha256=HgXGjV5oN6VxOAhrFRjubWz5ZiRqT1a2VdS5OcH2UQQ,1732
|
11
|
+
hydraflow/run_info.py,sha256=4QrTmyPEQ_PVn7JKXJIa9NkXGAdqh8k5Sue1ggQS5aQ,2678
|
12
|
+
hydraflow-0.3.0.dist-info/METADATA,sha256=DmC1Yjwuc3snUQiePCr5xvdtbfIevOapiA2sg8w6Aho,3840
|
13
|
+
hydraflow-0.3.0.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
14
|
+
hydraflow-0.3.0.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
|
15
|
+
hydraflow-0.3.0.dist-info/RECORD,,
|
@@ -1,14 +0,0 @@
|
|
1
|
-
hydraflow/__init__.py,sha256=B7rWSiGP5WwWjijcb41Bv9uuo5MQ6gbBbVWGAWYtK-k,598
|
2
|
-
hydraflow/asyncio.py,sha256=eFnDbNOQ5Hmjdforr8rTW6i_rr-zFIVY3xSQQ45gMPA,6511
|
3
|
-
hydraflow/config.py,sha256=YU6xYLinxq-Iqw1R3Zy7s3_u8nfpvnvXlGIkPXJTNLc,2116
|
4
|
-
hydraflow/context.py,sha256=4UDaWGoVmeF36UqsKoh6dd_cS_YVRfz80gFr28ouNlo,8040
|
5
|
-
hydraflow/info.py,sha256=7EsCMEH6LJZB3FZiQ3IpPFTD3Meaz7G3M-HvDQeo1rw,3466
|
6
|
-
hydraflow/mlflow.py,sha256=irD1INrVaI_1RIzUCjI36voBqgZszZ4dkSLo4aT1_FM,8271
|
7
|
-
hydraflow/param.py,sha256=W71zJH39s8cJcy3qV-PFQHJYyQnfa1GbnHOIqCMG3Jc,1573
|
8
|
-
hydraflow/progress.py,sha256=b5LvLm3d0eW3WsaidZAZotJNTTN3OwSY3XwxXXsJV9A,6561
|
9
|
-
hydraflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
|
-
hydraflow/run_collection.py,sha256=ym3M5ApEZVwJ1rYgOs4aYluTBfJeOECD6Z9SLFhv5O8,23260
|
11
|
-
hydraflow-0.2.17.dist-info/METADATA,sha256=uD6q000C_h2JsuFh0mkf1YmpTYxVDI1RLaAUKzZ6fDw,3819
|
12
|
-
hydraflow-0.2.17.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
13
|
-
hydraflow-0.2.17.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
|
14
|
-
hydraflow-0.2.17.dist-info/RECORD,,
|
File without changes
|
File without changes
|