hydraflow 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hydraflow/__init__.py +4 -2
- hydraflow/config.py +3 -0
- hydraflow/runs.py +297 -113
- {hydraflow-0.2.1.dist-info → hydraflow-0.2.2.dist-info}/METADATA +15 -6
- hydraflow-0.2.2.dist-info/RECORD +9 -0
- hydraflow-0.2.1.dist-info/RECORD +0 -9
- {hydraflow-0.2.1.dist-info → hydraflow-0.2.2.dist-info}/WHEEL +0 -0
- {hydraflow-0.2.1.dist-info → hydraflow-0.2.2.dist-info}/licenses/LICENSE +0 -0
hydraflow/__init__.py
CHANGED
@@ -2,18 +2,19 @@ from .context import Info, chdir_artifact, log_run, watch
|
|
2
2
|
from .mlflow import set_experiment
|
3
3
|
from .runs import (
|
4
4
|
Run,
|
5
|
-
|
5
|
+
RunCollection,
|
6
6
|
filter_runs,
|
7
7
|
get_param_dict,
|
8
8
|
get_param_names,
|
9
9
|
get_run,
|
10
10
|
load_config,
|
11
|
+
search_runs,
|
11
12
|
)
|
12
13
|
|
13
14
|
__all__ = [
|
14
15
|
"Info",
|
15
16
|
"Run",
|
16
|
-
"
|
17
|
+
"RunCollection",
|
17
18
|
"chdir_artifact",
|
18
19
|
"filter_runs",
|
19
20
|
"get_param_dict",
|
@@ -21,6 +22,7 @@ __all__ = [
|
|
21
22
|
"get_run",
|
22
23
|
"load_config",
|
23
24
|
"log_run",
|
25
|
+
"search_runs",
|
24
26
|
"set_experiment",
|
25
27
|
"watch",
|
26
28
|
]
|
hydraflow/config.py
CHANGED
@@ -30,6 +30,9 @@ def iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
|
|
30
30
|
Yields:
|
31
31
|
Key-value pairs representing the parameters in the configuration object.
|
32
32
|
"""
|
33
|
+
if config is None:
|
34
|
+
return
|
35
|
+
|
33
36
|
if not isinstance(config, (DictConfig, ListConfig)):
|
34
37
|
config = OmegaConf.create(config) # type: ignore
|
35
38
|
|
hydraflow/runs.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
"""
|
2
2
|
This module provides functionality for managing and interacting with MLflow runs.
|
3
|
-
It includes the `
|
4
|
-
log artifacts, and load configurations.
|
3
|
+
It includes the `RunCollection` class and various methods to filter runs,
|
4
|
+
retrieve run information, log artifacts, and load configurations.
|
5
5
|
"""
|
6
6
|
|
7
7
|
from __future__ import annotations
|
@@ -9,9 +9,10 @@ from __future__ import annotations
|
|
9
9
|
from dataclasses import dataclass
|
10
10
|
from functools import cache
|
11
11
|
from itertools import chain
|
12
|
-
from typing import TYPE_CHECKING, Any
|
12
|
+
from typing import TYPE_CHECKING, Any, TypeVar
|
13
13
|
|
14
14
|
import mlflow
|
15
|
+
from mlflow.artifacts import download_artifacts
|
15
16
|
from mlflow.entities import ViewType
|
16
17
|
from mlflow.entities.run import Run
|
17
18
|
from mlflow.tracking.fluent import SEARCH_MAX_RESULTS_PANDAS
|
@@ -20,6 +21,7 @@ from omegaconf import DictConfig, OmegaConf
|
|
20
21
|
from hydraflow.config import iter_params
|
21
22
|
|
22
23
|
if TYPE_CHECKING:
|
24
|
+
from collections.abc import Callable, Iterator
|
23
25
|
from typing import Any
|
24
26
|
|
25
27
|
|
@@ -31,14 +33,17 @@ def search_runs(
|
|
31
33
|
order_by: list[str] | None = None,
|
32
34
|
search_all_experiments: bool = False,
|
33
35
|
experiment_names: list[str] | None = None,
|
34
|
-
) ->
|
36
|
+
) -> RunCollection:
|
35
37
|
"""
|
36
38
|
Search for Runs that fit the specified criteria.
|
37
39
|
|
38
40
|
This function wraps the `mlflow.search_runs` function and returns the results
|
39
|
-
as a `
|
41
|
+
as a `RunCollection` object. It allows for flexible searching of MLflow runs based on
|
40
42
|
various criteria.
|
41
43
|
|
44
|
+
Note:
|
45
|
+
The returned runs are sorted by their start time in ascending order.
|
46
|
+
|
42
47
|
Args:
|
43
48
|
experiment_ids: List of experiment IDs. Search can work with experiment IDs or
|
44
49
|
experiment names, but not both in the same call. Values other than
|
@@ -53,9 +58,6 @@ def search_runs(
|
|
53
58
|
order_by: List of columns to order by (e.g., "metrics.rmse"). The ``order_by`` column
|
54
59
|
can contain an optional ``DESC`` or ``ASC`` value. The default is ``ASC``.
|
55
60
|
The default ordering is to sort by ``start_time DESC``, then ``run_id``.
|
56
|
-
output_format: The output format to be returned. If ``pandas``, a ``pandas.DataFrame``
|
57
|
-
is returned and, if ``list``, a list of :py:class:`mlflow.entities.Run`
|
58
|
-
is returned.
|
59
61
|
search_all_experiments: Boolean specifying whether all experiments should be searched.
|
60
62
|
Only honored if ``experiment_ids`` is ``[]`` or ``None``.
|
61
63
|
experiment_names: List of experiment names. Search can work with experiment IDs or
|
@@ -65,7 +67,7 @@ def search_runs(
|
|
65
67
|
experiment if ``experiment_ids`` is ``None`` or ``[]``.
|
66
68
|
|
67
69
|
Returns:
|
68
|
-
A `
|
70
|
+
A `RunCollection` object containing the search results.
|
69
71
|
"""
|
70
72
|
runs = mlflow.search_runs(
|
71
73
|
experiment_ids=experiment_ids,
|
@@ -77,11 +79,44 @@ def search_runs(
|
|
77
79
|
search_all_experiments=search_all_experiments,
|
78
80
|
experiment_names=experiment_names,
|
79
81
|
)
|
80
|
-
|
82
|
+
runs = sorted(runs, key=lambda run: run.info.start_time) # type: ignore
|
83
|
+
return RunCollection(runs) # type: ignore
|
84
|
+
|
85
|
+
|
86
|
+
def list_runs(experiment_names: list[str] | None = None) -> RunCollection:
|
87
|
+
"""
|
88
|
+
List all runs for the specified experiments.
|
89
|
+
|
90
|
+
This function retrieves all runs for the given list of experiment names.
|
91
|
+
If no experiment names are provided (None), it defaults to searching all runs
|
92
|
+
for the currently active experiment. If an empty list is provided, the function
|
93
|
+
will search all runs for all experiments except the "Default" experiment.
|
94
|
+
The function returns the results as a `RunCollection` object.
|
95
|
+
|
96
|
+
Note:
|
97
|
+
The returned runs are sorted by their start time in ascending order.
|
98
|
+
|
99
|
+
Args:
|
100
|
+
experiment_names: List of experiment names to search for runs.
|
101
|
+
If None or an empty list is provided, the function will search
|
102
|
+
the currently active experiment or all experiments except the
|
103
|
+
"Default" experiment.
|
104
|
+
|
105
|
+
Returns:
|
106
|
+
A `RunCollection` object containing the runs for the specified experiments.
|
107
|
+
"""
|
108
|
+
if experiment_names == []:
|
109
|
+
experiments = mlflow.search_experiments()
|
110
|
+
experiment_names = [e.name for e in experiments if e.name != "Default"]
|
111
|
+
|
112
|
+
return search_runs(experiment_names=experiment_names)
|
113
|
+
|
114
|
+
|
115
|
+
T = TypeVar("T")
|
81
116
|
|
82
117
|
|
83
118
|
@dataclass
|
84
|
-
class
|
119
|
+
class RunCollection:
|
85
120
|
"""
|
86
121
|
A class to represent a collection of MLflow runs.
|
87
122
|
|
@@ -89,83 +124,109 @@ class Runs:
|
|
89
124
|
retrieving specific runs, and accessing run information.
|
90
125
|
"""
|
91
126
|
|
92
|
-
|
127
|
+
_runs: list[Run]
|
128
|
+
"""A list of MLflow Run objects."""
|
93
129
|
|
94
130
|
def __repr__(self) -> str:
|
95
131
|
return f"{self.__class__.__name__}({len(self)})"
|
96
132
|
|
97
133
|
def __len__(self) -> int:
|
98
|
-
return len(self.
|
134
|
+
return len(self._runs)
|
99
135
|
|
100
|
-
def filter(self, config: object) ->
|
136
|
+
def filter(self, config: object | None = None, **kwargs) -> RunCollection:
|
101
137
|
"""
|
102
138
|
Filter the runs based on the provided configuration.
|
103
139
|
|
104
140
|
This method filters the runs in the collection according to the
|
105
|
-
specified configuration object
|
106
|
-
|
107
|
-
|
108
|
-
|
141
|
+
specified configuration object and additional key-value pairs.
|
142
|
+
The configuration object and key-value pairs should contain
|
143
|
+
key-value pairs that correspond to the parameters of the runs.
|
144
|
+
Only the runs that match all the specified parameters will be
|
145
|
+
included in the returned `RunCollection` object.
|
146
|
+
|
147
|
+
The filtering supports:
|
148
|
+
- Exact matches for single values.
|
149
|
+
- Membership checks for lists of values.
|
150
|
+
- Range checks for tuples of two values (inclusive of the lower bound and
|
151
|
+
exclusive of the upper bound).
|
109
152
|
|
110
153
|
Args:
|
111
|
-
config: The configuration object to filter the runs.
|
154
|
+
config: The configuration object to filter the runs. This can be any
|
155
|
+
object that provides key-value pairs through the `iter_params`
|
156
|
+
function.
|
157
|
+
**kwargs: Additional key-value pairs to filter the runs.
|
112
158
|
|
113
159
|
Returns:
|
114
|
-
A new `
|
160
|
+
A new `RunCollection` object containing the filtered runs.
|
115
161
|
"""
|
116
|
-
return
|
162
|
+
return RunCollection(filter_runs(self._runs, config, **kwargs))
|
117
163
|
|
118
|
-
def
|
164
|
+
def find(self, config: object | None = None, **kwargs) -> Run | None:
|
119
165
|
"""
|
120
|
-
|
166
|
+
Find the first run based on the provided configuration.
|
121
167
|
|
122
168
|
This method filters the runs in the collection according to the
|
123
|
-
specified configuration object and returns the run that matches
|
124
|
-
the provided parameters. If
|
125
|
-
a `ValueError` is raised.
|
169
|
+
specified configuration object and returns the first run that matches
|
170
|
+
the provided parameters. If no run matches the criteria, None is returned.
|
126
171
|
|
127
172
|
Args:
|
128
173
|
config: The configuration object to identify the run.
|
174
|
+
**kwargs: Additional key-value pairs to filter the runs.
|
129
175
|
|
130
176
|
Returns:
|
131
|
-
|
132
|
-
|
177
|
+
The first run object that matches the provided configuration, or None
|
178
|
+
if no runs match the criteria.
|
133
179
|
|
134
|
-
|
135
|
-
|
180
|
+
See Also:
|
181
|
+
RunCollection.filter: The method that performs the actual filtering logic.
|
136
182
|
"""
|
137
|
-
return
|
183
|
+
return find_run(self._runs, config, **kwargs)
|
138
184
|
|
139
|
-
def
|
185
|
+
def find_last(self, config: object | None = None, **kwargs) -> Run | None:
|
140
186
|
"""
|
141
|
-
|
187
|
+
Find the last run based on the provided configuration.
|
142
188
|
|
143
|
-
This method filters the runs
|
144
|
-
and returns the run
|
189
|
+
This method filters the runs in the collection according to the
|
190
|
+
specified configuration object and returns the last run that matches
|
191
|
+
the provided parameters. If no run matches the criteria, None is returned.
|
145
192
|
|
146
193
|
Args:
|
147
|
-
config: The configuration object to
|
148
|
-
If None, no filtering is applied.
|
194
|
+
config: The configuration object to identify the run.
|
149
195
|
**kwargs: Additional key-value pairs to filter the runs.
|
150
196
|
|
151
197
|
Returns:
|
152
|
-
The run
|
198
|
+
The last run object that matches the provided configuration, or None
|
199
|
+
if no runs match the criteria.
|
200
|
+
|
201
|
+
See Also:
|
202
|
+
RunCollection.filter: The method that performs the actual filtering logic.
|
153
203
|
"""
|
154
|
-
return
|
204
|
+
return find_last_run(self._runs, config, **kwargs)
|
155
205
|
|
156
|
-
def
|
206
|
+
def get(self, config: object | None = None, **kwargs) -> Run | None:
|
157
207
|
"""
|
158
|
-
|
208
|
+
Retrieve a specific run based on the provided configuration.
|
209
|
+
|
210
|
+
This method filters the runs in the collection according to the
|
211
|
+
specified configuration object and returns the run that matches
|
212
|
+
the provided parameters. If more than one run matches the criteria,
|
213
|
+
a `ValueError` is raised.
|
159
214
|
|
160
215
|
Args:
|
161
|
-
config: The configuration object to
|
162
|
-
If None, no filtering is applied.
|
216
|
+
config: The configuration object to identify the run.
|
163
217
|
**kwargs: Additional key-value pairs to filter the runs.
|
164
218
|
|
165
219
|
Returns:
|
166
|
-
The run
|
220
|
+
The run object that matches the provided configuration, or None
|
221
|
+
if no runs match the criteria.
|
222
|
+
|
223
|
+
Raises:
|
224
|
+
ValueError: If more than one run matches the criteria.
|
225
|
+
|
226
|
+
See Also:
|
227
|
+
RunCollection.filter: The method that performs the actual filtering logic.
|
167
228
|
"""
|
168
|
-
return
|
229
|
+
return get_run(self._runs, config, **kwargs)
|
169
230
|
|
170
231
|
def get_param_names(self) -> list[str]:
|
171
232
|
"""
|
@@ -178,7 +239,7 @@ class Runs:
|
|
178
239
|
Returns:
|
179
240
|
A list of unique parameter names.
|
180
241
|
"""
|
181
|
-
return get_param_names(self.
|
242
|
+
return get_param_names(self._runs)
|
182
243
|
|
183
244
|
def get_param_dict(self) -> dict[str, list[str]]:
|
184
245
|
"""
|
@@ -193,29 +254,164 @@ class Runs:
|
|
193
254
|
A dictionary where the keys are parameter names and the values are lists
|
194
255
|
of parameter values.
|
195
256
|
"""
|
196
|
-
return get_param_dict(self.
|
257
|
+
return get_param_dict(self._runs)
|
258
|
+
|
259
|
+
def first(self) -> Run | None:
|
260
|
+
"""
|
261
|
+
Return the first run in the collection.
|
262
|
+
|
263
|
+
Returns:
|
264
|
+
The first Run object if the collection is not empty, otherwise None.
|
265
|
+
"""
|
266
|
+
return self._runs[0] if self._runs else None
|
267
|
+
|
268
|
+
def last(self) -> Run | None:
|
269
|
+
"""
|
270
|
+
Return the last run in the collection.
|
271
|
+
|
272
|
+
Returns:
|
273
|
+
The last Run object if the collection is not empty, otherwise None.
|
274
|
+
"""
|
275
|
+
return self._runs[-1] if self._runs else None
|
276
|
+
|
277
|
+
def map(self, func: Callable[[Run], T]) -> Iterator[T]:
|
278
|
+
"""
|
279
|
+
Apply a function to each run in the collection and return an iterator of results.
|
280
|
+
|
281
|
+
Args:
|
282
|
+
func: A function that takes a Run object and returns a result.
|
283
|
+
|
284
|
+
Returns:
|
285
|
+
An iterator of results obtained by applying the function to each run
|
286
|
+
in the collection.
|
287
|
+
"""
|
288
|
+
return (func(run) for run in self._runs)
|
289
|
+
|
290
|
+
def map_run_id(self, func: Callable[[str], T]) -> Iterator[T]:
|
291
|
+
"""
|
292
|
+
Apply a function to each run id in the collection and return an iterator of results.
|
293
|
+
|
294
|
+
Args:
|
295
|
+
func: A function that takes a run id and returns a result.
|
296
|
+
|
297
|
+
Returns:
|
298
|
+
An iterator of results obtained by applying the function to each run id
|
299
|
+
in the collection.
|
300
|
+
"""
|
301
|
+
return (func(run.info.run_id) for run in self._runs)
|
302
|
+
|
303
|
+
def map_config(self, func: Callable[[DictConfig], T]) -> Iterator[T]:
|
304
|
+
"""
|
305
|
+
Apply a function to each run config in the collection and return an iterator of results.
|
306
|
+
|
307
|
+
Args:
|
308
|
+
func: A function that takes a run config and returns a result.
|
309
|
+
|
310
|
+
Returns:
|
311
|
+
An iterator of results obtained by applying the function to each run config
|
312
|
+
in the collection.
|
313
|
+
"""
|
314
|
+
return (func(load_config(run)) for run in self._runs)
|
315
|
+
|
316
|
+
def map_uri(self, func: Callable[[str | None], T]) -> Iterator[T]:
|
317
|
+
"""
|
318
|
+
Apply a function to each artifact URI in the collection and return an iterator of results.
|
319
|
+
|
320
|
+
This method iterates over each run in the collection, retrieves the artifact URI,
|
321
|
+
and applies the provided function to it. If a run does not have an artifact URI,
|
322
|
+
None is passed to the function.
|
323
|
+
|
324
|
+
Args:
|
325
|
+
func: A function that takes an artifact URI (string or None) and returns a result.
|
326
|
+
|
327
|
+
Yields:
|
328
|
+
The results obtained by applying the function to each artifact URI in the collection.
|
329
|
+
"""
|
330
|
+
return (func(run.info.artifact_uri) for run in self._runs)
|
331
|
+
|
332
|
+
def map_dir(self, func: Callable[[str], T]) -> Iterator[T]:
|
333
|
+
"""
|
334
|
+
Apply a function to each artifact directory in the collection and return an iterator of results.
|
335
|
+
|
336
|
+
This method iterates over each run in the collection, downloads the artifact directory,
|
337
|
+
and applies the provided function to the directory path.
|
338
|
+
|
339
|
+
Args:
|
340
|
+
func: A function that takes an artifact directory path (string) and returns a result.
|
341
|
+
|
342
|
+
Returns:
|
343
|
+
An iterator of results obtained by applying the function to each artifact directory
|
344
|
+
in the collection.
|
345
|
+
"""
|
346
|
+
return (func(download_artifacts(run_id=run.info.run_id)) for run in self._runs)
|
347
|
+
|
348
|
+
|
349
|
+
def _param_matches(run: Run, key: str, value: Any) -> bool:
|
350
|
+
"""
|
351
|
+
Check if the run's parameter matches the specified key-value pair.
|
352
|
+
|
353
|
+
This function checks if the run's parameters contain the specified key-value pair.
|
354
|
+
It handles different types of values, including lists and tuples.
|
355
|
+
|
356
|
+
Args:
|
357
|
+
run: The run object to check.
|
358
|
+
key: The parameter key to check.
|
359
|
+
value: The parameter value to check.
|
360
|
+
|
361
|
+
Returns:
|
362
|
+
True if the run's parameter matches the specified key-value pair, False otherwise.
|
363
|
+
"""
|
364
|
+
param = run.data.params.get(key, value)
|
365
|
+
|
366
|
+
# FIXME: This is a workaround to handle the case where the parameter value is a list
|
367
|
+
# We need to improve the logic to handle different types of values
|
368
|
+
# For now, we assume that if the parameter is a list, we should check if it contains the value
|
369
|
+
# This is not ideal, but it works for the case where the parameter value is a list of strings
|
370
|
+
# We should improve the logic to handle different types of values in the future
|
371
|
+
|
372
|
+
if param is None:
|
373
|
+
return False
|
374
|
+
|
375
|
+
if param == "None":
|
376
|
+
return value is None
|
197
377
|
|
378
|
+
if isinstance(value, list) and value:
|
379
|
+
return type(value[0])(param) in value
|
198
380
|
|
199
|
-
|
381
|
+
if isinstance(value, tuple) and len(value) == 2:
|
382
|
+
return value[0] <= type(value[0])(param) < value[1]
|
383
|
+
|
384
|
+
return type(value)(param) == value
|
385
|
+
|
386
|
+
|
387
|
+
def filter_runs(runs: list[Run], config: object | None = None, **kwargs) -> list[Run]:
|
200
388
|
"""
|
201
389
|
Filter the runs based on the provided configuration.
|
202
390
|
|
203
391
|
This method filters the runs in the collection according to the
|
204
|
-
specified configuration object
|
205
|
-
|
206
|
-
|
207
|
-
|
392
|
+
specified configuration object and additional key-value pairs.
|
393
|
+
The configuration object and key-value pairs should contain
|
394
|
+
key-value pairs that correspond to the parameters of the runs.
|
395
|
+
Only the runs that match all the specified parameters will be
|
396
|
+
included in the returned list of runs.
|
397
|
+
|
398
|
+
The filtering supports:
|
399
|
+
- Exact matches for single values.
|
400
|
+
- Membership checks for lists of values.
|
401
|
+
- Range checks for tuples of two values (inclusive of the lower bound and
|
402
|
+
exclusive of the upper bound).
|
208
403
|
|
209
404
|
Args:
|
210
|
-
runs: The runs to filter.
|
211
|
-
config: The configuration object to filter the runs.
|
405
|
+
runs: The list of runs to filter.
|
406
|
+
config: The configuration object to filter the runs. This can be any object that
|
407
|
+
provides key-value pairs through the `iter_params` function.
|
212
408
|
**kwargs: Additional key-value pairs to filter the runs.
|
213
409
|
|
214
410
|
Returns:
|
215
|
-
A
|
411
|
+
A list of runs that match the specified configuration and key-value pairs.
|
216
412
|
"""
|
217
413
|
for key, value in chain(iter_params(config), kwargs.items()):
|
218
|
-
runs = [run for run in runs if
|
414
|
+
runs = [run for run in runs if _param_matches(run, key, value)]
|
219
415
|
|
220
416
|
if len(runs) == 0:
|
221
417
|
return []
|
@@ -223,23 +419,13 @@ def filter_runs(runs: list[Run], config: object, **kwargs) -> list[Run]:
|
|
223
419
|
return runs
|
224
420
|
|
225
421
|
|
226
|
-
def
|
227
|
-
param = run.data.params.get(key, value)
|
228
|
-
|
229
|
-
if param is None:
|
230
|
-
return False
|
231
|
-
|
232
|
-
return type(value)(param) == value
|
233
|
-
|
234
|
-
|
235
|
-
def get_run(runs: list[Run], config: object, **kwargs) -> Run | None:
|
422
|
+
def find_run(runs: list[Run], config: object | None = None, **kwargs) -> Run | None:
|
236
423
|
"""
|
237
|
-
|
424
|
+
Find the first run based on the provided configuration.
|
238
425
|
|
239
426
|
This method filters the runs in the collection according to the
|
240
|
-
specified configuration object and returns the run that matches
|
241
|
-
the provided parameters. If
|
242
|
-
a `ValueError` is raised.
|
427
|
+
specified configuration object and returns the first run that matches
|
428
|
+
the provided parameters. If no run matches the criteria, None is returned.
|
243
429
|
|
244
430
|
Args:
|
245
431
|
runs: The runs to filter.
|
@@ -247,66 +433,65 @@ def get_run(runs: list[Run], config: object, **kwargs) -> Run | None:
|
|
247
433
|
**kwargs: Additional key-value pairs to filter the runs.
|
248
434
|
|
249
435
|
Returns:
|
250
|
-
The run object that matches the provided configuration, or None
|
436
|
+
The first run object that matches the provided configuration, or None
|
251
437
|
if no runs match the criteria.
|
252
|
-
|
253
|
-
Raises:
|
254
|
-
ValueError: If more than one run matches the criteria.
|
255
438
|
"""
|
256
439
|
runs = filter_runs(runs, config, **kwargs)
|
440
|
+
return runs[0] if runs else None
|
257
441
|
|
258
|
-
if len(runs) == 0:
|
259
|
-
return None
|
260
|
-
|
261
|
-
if len(runs) == 1:
|
262
|
-
return runs[0]
|
263
|
-
|
264
|
-
msg = f"Multiple runs were filtered. Expected number of runs is 1, but found {len(runs)} runs."
|
265
|
-
raise ValueError(msg)
|
266
442
|
|
267
|
-
|
268
|
-
def get_earliest_run(runs: list[Run], config: object | None = None, **kwargs) -> Run | None:
|
443
|
+
def find_last_run(runs: list[Run], config: object | None = None, **kwargs) -> Run | None:
|
269
444
|
"""
|
270
|
-
|
445
|
+
Find the last run based on the provided configuration.
|
271
446
|
|
272
|
-
This method filters the runs
|
273
|
-
and returns the run
|
447
|
+
This method filters the runs in the collection according to the
|
448
|
+
specified configuration object and returns the last run that matches
|
449
|
+
the provided parameters. If no run matches the criteria, None is returned.
|
274
450
|
|
275
451
|
Args:
|
276
|
-
runs: The
|
277
|
-
config: The configuration object to
|
278
|
-
If None, no filtering is applied.
|
452
|
+
runs: The runs to filter.
|
453
|
+
config: The configuration object to identify the run.
|
279
454
|
**kwargs: Additional key-value pairs to filter the runs.
|
280
455
|
|
281
456
|
Returns:
|
282
|
-
The run
|
457
|
+
The last run object that matches the provided configuration, or None
|
458
|
+
if no runs match the criteria.
|
283
459
|
"""
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
return min(runs, key=lambda run: run.info.start_time, default=None)
|
460
|
+
runs = filter_runs(runs, config, **kwargs)
|
461
|
+
return runs[-1] if runs else None
|
288
462
|
|
289
463
|
|
290
|
-
def
|
464
|
+
def get_run(runs: list[Run], config: object | None = None, **kwargs) -> Run | None:
|
291
465
|
"""
|
292
|
-
|
466
|
+
Retrieve a specific run based on the provided configuration.
|
293
467
|
|
294
|
-
This method filters the runs
|
295
|
-
and returns the run
|
468
|
+
This method filters the runs in the collection according to the
|
469
|
+
specified configuration object and returns the run that matches
|
470
|
+
the provided parameters. If more than one run matches the criteria,
|
471
|
+
a `ValueError` is raised.
|
296
472
|
|
297
473
|
Args:
|
298
|
-
runs: The
|
299
|
-
config: The configuration object to
|
300
|
-
If None, no filtering is applied.
|
474
|
+
runs: The runs to filter.
|
475
|
+
config: The configuration object to identify the run.
|
301
476
|
**kwargs: Additional key-value pairs to filter the runs.
|
302
477
|
|
303
478
|
Returns:
|
304
|
-
The run
|
479
|
+
The run object that matches the provided configuration, or None
|
480
|
+
if no runs match the criteria.
|
481
|
+
|
482
|
+
Raises:
|
483
|
+
ValueError: If more than one run matches the criteria.
|
305
484
|
"""
|
306
|
-
|
307
|
-
runs = filter_runs(runs, config or {}, **kwargs)
|
485
|
+
runs = filter_runs(runs, config, **kwargs)
|
308
486
|
|
309
|
-
|
487
|
+
if len(runs) == 0:
|
488
|
+
return None
|
489
|
+
|
490
|
+
if len(runs) == 1:
|
491
|
+
return runs[0]
|
492
|
+
|
493
|
+
msg = f"Multiple runs were filtered. Expected number of runs is 1, but found {len(runs)} runs."
|
494
|
+
raise ValueError(msg)
|
310
495
|
|
311
496
|
|
312
497
|
def get_param_names(runs: list[Run]) -> list[str]:
|
@@ -363,13 +548,15 @@ def load_config(run: Run) -> DictConfig:
|
|
363
548
|
|
364
549
|
This function loads the configuration for the provided Run instance
|
365
550
|
by downloading the configuration file from the MLflow artifacts and
|
366
|
-
loading it using OmegaConf.
|
551
|
+
loading it using OmegaConf. It returns an empty config if
|
552
|
+
`.hydra/config.yaml` is not found in the run's artifact directory.
|
367
553
|
|
368
554
|
Args:
|
369
|
-
run: The Run instance to load the configuration
|
555
|
+
run: The Run instance for which to load the configuration.
|
370
556
|
|
371
557
|
Returns:
|
372
|
-
The loaded configuration.
|
558
|
+
The loaded configuration as a DictConfig object. Returns an empty
|
559
|
+
DictConfig if the configuration file is not found.
|
373
560
|
"""
|
374
561
|
run_id = run.info.run_id
|
375
562
|
return _load_config(run_id)
|
@@ -378,10 +565,7 @@ def load_config(run: Run) -> DictConfig:
|
|
378
565
|
@cache
|
379
566
|
def _load_config(run_id: str) -> DictConfig:
|
380
567
|
try:
|
381
|
-
path =
|
382
|
-
run_id=run_id,
|
383
|
-
artifact_path=".hydra/config.yaml",
|
384
|
-
)
|
568
|
+
path = download_artifacts(run_id=run_id, artifact_path=".hydra/config.yaml")
|
385
569
|
except OSError:
|
386
570
|
return DictConfig({})
|
387
571
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: hydraflow
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.2
|
4
4
|
Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
|
5
5
|
Project-URL: Documentation, https://github.com/daizutabi/hydraflow
|
6
6
|
Project-URL: Source, https://github.com/daizutabi/hydraflow
|
@@ -46,14 +46,23 @@ Description-Content-Type: text/markdown
|
|
46
46
|
|
47
47
|
## Overview
|
48
48
|
|
49
|
-
Hydraflow is a powerful library designed to seamlessly integrate
|
49
|
+
Hydraflow is a powerful library designed to seamlessly integrate
|
50
|
+
[Hydra](https://hydra.cc/) and [MLflow](https://mlflow.org/), making it easier to
|
51
|
+
manage and track machine learning experiments. By combining the flexibility of
|
52
|
+
Hydra's configuration management with the robust experiment tracking capabilities
|
53
|
+
of MLflow, Hydraflow provides a comprehensive solution for managing complex
|
54
|
+
machine learning workflows.
|
50
55
|
|
51
56
|
## Key Features
|
52
57
|
|
53
|
-
- **Configuration Management**: Utilize Hydra's advanced configuration management
|
54
|
-
|
55
|
-
- **
|
56
|
-
|
58
|
+
- **Configuration Management**: Utilize Hydra's advanced configuration management
|
59
|
+
to handle complex parameter sweeps and experiment setups.
|
60
|
+
- **Experiment Tracking**: Leverage MLflow's tracking capabilities to log parameters,
|
61
|
+
metrics, and artifacts for each run.
|
62
|
+
- **Artifact Management**: Automatically log and manage artifacts, such as model
|
63
|
+
checkpoints and configuration files, with MLflow.
|
64
|
+
- **Seamless Integration**: Easily integrate Hydra and MLflow in your machine learning
|
65
|
+
projects with minimal setup.
|
57
66
|
|
58
67
|
## Installation
|
59
68
|
|
@@ -0,0 +1,9 @@
|
|
1
|
+
hydraflow/__init__.py,sha256=ht4I3q_Ronw2jzk_QRsV-IzObR31F_4Wy7Ve0syNm-8,496
|
2
|
+
hydraflow/config.py,sha256=FNTuCppjCMrZKVByJMrWKbgj3HeMWWwAmQNoyFe029Y,2087
|
3
|
+
hydraflow/context.py,sha256=MqkEhKEZL_N3eb3v5u9D4EqKkiSmiPyXXafhPkALRlg,5129
|
4
|
+
hydraflow/mlflow.py,sha256=_Los9E38eG8sTiN8bGwZmvjCrS0S-wSGiA4fyhQM3Zw,2251
|
5
|
+
hydraflow/runs.py,sha256=kO7Gl9CeS2HjB0y_emGXNMRJTxNoqXBEJ7Ggq96nhMg,22050
|
6
|
+
hydraflow-0.2.2.dist-info/METADATA,sha256=C2lfD6jTDdHyexxATZWfdRQHAUgSOHx7IgvmBUj4tTQ,4232
|
7
|
+
hydraflow-0.2.2.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
8
|
+
hydraflow-0.2.2.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
|
9
|
+
hydraflow-0.2.2.dist-info/RECORD,,
|
hydraflow-0.2.1.dist-info/RECORD
DELETED
@@ -1,9 +0,0 @@
|
|
1
|
-
hydraflow/__init__.py,sha256=PzziOG9RnGAVbl9Yz4ScvsL8nfkjsuN0alMKRvZT-_Y,442
|
2
|
-
hydraflow/config.py,sha256=wI8uNuD2D-hIf4BAhEYJaMC6EyO-erKopy_ia_b1pYA,2048
|
3
|
-
hydraflow/context.py,sha256=MqkEhKEZL_N3eb3v5u9D4EqKkiSmiPyXXafhPkALRlg,5129
|
4
|
-
hydraflow/mlflow.py,sha256=_Los9E38eG8sTiN8bGwZmvjCrS0S-wSGiA4fyhQM3Zw,2251
|
5
|
-
hydraflow/runs.py,sha256=NT7IzE-Pf7T2Ey-eWEPZzQQaX4Gt_RKDKSn2pj2yzGc,14304
|
6
|
-
hydraflow-0.2.1.dist-info/METADATA,sha256=4C_hnw1gMb8WUQXyqj4q8eA1IVbp0wZuLGGthIk1G7U,4224
|
7
|
-
hydraflow-0.2.1.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
8
|
-
hydraflow-0.2.1.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
|
9
|
-
hydraflow-0.2.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|