hydraflow 0.14.3__py3-none-any.whl → 0.15.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hydraflow/__init__.py +3 -13
- hydraflow/core/context.py +12 -32
- hydraflow/core/io.py +36 -115
- hydraflow/core/main.py +3 -3
- hydraflow/core/run.py +341 -0
- hydraflow/core/run_collection.py +525 -0
- hydraflow/core/run_info.py +84 -0
- {hydraflow-0.14.3.dist-info → hydraflow-0.15.0.dist-info}/METADATA +12 -10
- hydraflow-0.15.0.dist-info/RECORD +21 -0
- hydraflow/core/config.py +0 -122
- hydraflow/core/mlflow.py +0 -174
- hydraflow/core/param.py +0 -165
- hydraflow/entities/__init__.py +0 -0
- hydraflow/entities/run_collection.py +0 -583
- hydraflow/entities/run_data.py +0 -61
- hydraflow/entities/run_info.py +0 -36
- hydraflow-0.14.3.dist-info/RECORD +0 -25
- {hydraflow-0.14.3.dist-info → hydraflow-0.15.0.dist-info}/WHEEL +0 -0
- {hydraflow-0.14.3.dist-info → hydraflow-0.15.0.dist-info}/entry_points.txt +0 -0
- {hydraflow-0.14.3.dist-info → hydraflow-0.15.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,525 @@
|
|
1
|
+
"""RunCollection module for HydraFlow.
|
2
|
+
|
3
|
+
This module provides the RunCollection class, which represents a collection
|
4
|
+
of MLflow Runs in HydraFlow. RunCollection offers functionality for filtering,
|
5
|
+
sorting, grouping, and analyzing runs, as well as converting run data to
|
6
|
+
various formats such as DataFrames.
|
7
|
+
|
8
|
+
The RunCollection class implements the Sequence protocol, allowing it to be
|
9
|
+
used like a standard Python list while providing specialized methods for
|
10
|
+
working with Run instances.
|
11
|
+
|
12
|
+
Example:
|
13
|
+
```python
|
14
|
+
# Create a collection from a list of runs
|
15
|
+
runs = RunCollection([run1, run2, run3])
|
16
|
+
|
17
|
+
# Filter runs based on criteria
|
18
|
+
filtered = runs.filter(("metrics.accuracy", lambda acc: acc > 0.9))
|
19
|
+
|
20
|
+
# Sort runs by specific keys
|
21
|
+
sorted_runs = runs.sort("metrics.accuracy", reverse=True)
|
22
|
+
|
23
|
+
# Group runs by model type and compute aggregates
|
24
|
+
grouped = runs.group_by("model.type",
|
25
|
+
avg_acc=lambda rc: sum(r.get("metrics.accuracy")
|
26
|
+
for r in rc) / len(rc))
|
27
|
+
|
28
|
+
# Convert runs to a DataFrame for analysis
|
29
|
+
df = runs.to_frame("run_id", "model.type", "metrics.accuracy")
|
30
|
+
```
|
31
|
+
|
32
|
+
Note:
|
33
|
+
This module requires Polars and NumPy for DataFrame operations and
|
34
|
+
numerical computations.
|
35
|
+
|
36
|
+
"""
|
37
|
+
|
38
|
+
from __future__ import annotations
|
39
|
+
|
40
|
+
from collections.abc import Hashable, Iterable, Sequence
|
41
|
+
from typing import TYPE_CHECKING, overload
|
42
|
+
|
43
|
+
import numpy as np
|
44
|
+
import polars as pl
|
45
|
+
from omegaconf import OmegaConf
|
46
|
+
from polars import DataFrame
|
47
|
+
|
48
|
+
from .run import Run
|
49
|
+
|
50
|
+
if TYPE_CHECKING:
|
51
|
+
from collections.abc import Callable, Iterator
|
52
|
+
from typing import Any, Self
|
53
|
+
|
54
|
+
from numpy.typing import NDArray
|
55
|
+
|
56
|
+
|
57
|
+
class RunCollection[R: Run[Any, Any]](Sequence[R]):
|
58
|
+
"""A collection of Run instances that implements the Sequence protocol.
|
59
|
+
|
60
|
+
RunCollection provides methods for filtering, sorting, grouping, and analyzing
|
61
|
+
runs, as well as converting run data to various formats such as DataFrames.
|
62
|
+
|
63
|
+
Args:
|
64
|
+
runs (Iterable[Run]): An iterable of Run instances to include in
|
65
|
+
the collection.
|
66
|
+
|
67
|
+
"""
|
68
|
+
|
69
|
+
runs: list[R]
|
70
|
+
"""A list containing the Run instances in this collection."""
|
71
|
+
|
72
|
+
def __init__(self, runs: Iterable[R]) -> None:
|
73
|
+
self.runs = list(runs)
|
74
|
+
|
75
|
+
def __repr__(self) -> str:
|
76
|
+
"""Return a string representation of the RunCollection."""
|
77
|
+
class_name = self.__class__.__name__
|
78
|
+
if not self:
|
79
|
+
return f"{class_name}(empty)"
|
80
|
+
|
81
|
+
type_name = repr(self[0])
|
82
|
+
if "(" in type_name:
|
83
|
+
type_name = type_name.split("(", 1)[0]
|
84
|
+
return f"{class_name}({type_name}, n={len(self)})"
|
85
|
+
|
86
|
+
def __len__(self) -> int:
|
87
|
+
"""Return the number of Run instances in the collection.
|
88
|
+
|
89
|
+
Returns:
|
90
|
+
int: The number of runs.
|
91
|
+
|
92
|
+
"""
|
93
|
+
return len(self.runs)
|
94
|
+
|
95
|
+
def __bool__(self) -> bool:
|
96
|
+
"""Return whether the collection contains any Run instances.
|
97
|
+
|
98
|
+
Returns:
|
99
|
+
bool: True if the collection is not empty, False otherwise.
|
100
|
+
|
101
|
+
"""
|
102
|
+
return bool(self.runs)
|
103
|
+
|
104
|
+
@overload
|
105
|
+
def __getitem__(self, index: int) -> R: ...
|
106
|
+
|
107
|
+
@overload
|
108
|
+
def __getitem__(self, index: slice) -> Self: ...
|
109
|
+
|
110
|
+
@overload
|
111
|
+
def __getitem__(self, index: Iterable[int]) -> Self: ...
|
112
|
+
|
113
|
+
def __getitem__(self, index: int | slice | Iterable[int]) -> R | Self:
|
114
|
+
"""Get a Run or a new RunCollection based on the provided index.
|
115
|
+
|
116
|
+
Args:
|
117
|
+
index: Can be one of:
|
118
|
+
- An integer to get a single Run
|
119
|
+
- A slice to get a subrange of Runs
|
120
|
+
- An iterable of integers to get specific Runs
|
121
|
+
|
122
|
+
Returns:
|
123
|
+
R | Self: A single Run if index is an integer, or a new
|
124
|
+
RunCollection if index is a slice or iterable of integers.
|
125
|
+
|
126
|
+
"""
|
127
|
+
if isinstance(index, int):
|
128
|
+
return self.runs[index]
|
129
|
+
if isinstance(index, slice):
|
130
|
+
return self.__class__(self.runs[index])
|
131
|
+
return self.__class__([self.runs[i] for i in index])
|
132
|
+
|
133
|
+
def __iter__(self) -> Iterator[R]:
|
134
|
+
"""Return an iterator over the Runs in the collection.
|
135
|
+
|
136
|
+
Returns:
|
137
|
+
Iterator[R]: An iterator yielding Run instances.
|
138
|
+
|
139
|
+
"""
|
140
|
+
return iter(self.runs)
|
141
|
+
|
142
|
+
@overload
|
143
|
+
def update(
|
144
|
+
self,
|
145
|
+
key: str,
|
146
|
+
value: Any | Callable[[R], Any],
|
147
|
+
*,
|
148
|
+
force: bool = False,
|
149
|
+
) -> None: ...
|
150
|
+
|
151
|
+
@overload
|
152
|
+
def update(
|
153
|
+
self,
|
154
|
+
key: tuple[str, ...],
|
155
|
+
value: Iterable[Any] | Callable[[R], Iterable[Any]],
|
156
|
+
*,
|
157
|
+
force: bool = False,
|
158
|
+
) -> None: ...
|
159
|
+
|
160
|
+
def update(
|
161
|
+
self,
|
162
|
+
key: str | tuple[str, ...],
|
163
|
+
value: Any | Callable[[R], Any],
|
164
|
+
*,
|
165
|
+
force: bool = False,
|
166
|
+
) -> None:
|
167
|
+
"""Update configuration values for all runs in the collection.
|
168
|
+
|
169
|
+
This method calls the update method on each run in the collection.
|
170
|
+
|
171
|
+
Args:
|
172
|
+
key: Either a string representing a single configuration path
|
173
|
+
or a tuple of strings to set multiple configuration values.
|
174
|
+
value: The value(s) to set or a callable that returns such values.
|
175
|
+
force: Whether to force updates even if the keys already exist.
|
176
|
+
|
177
|
+
"""
|
178
|
+
for run in self:
|
179
|
+
run.update(key, value, force=force)
|
180
|
+
|
181
|
+
def filter(
|
182
|
+
self,
|
183
|
+
*predicates: Callable[[R], bool] | tuple[str, Any],
|
184
|
+
**kwargs: Any,
|
185
|
+
) -> Self:
|
186
|
+
"""Filter runs based on predicates or key-value conditions.
|
187
|
+
|
188
|
+
This method allows filtering runs using various criteria:
|
189
|
+
- Callable predicates that take a Run and return a boolean
|
190
|
+
- Key-value tuples where the key is a string and the value
|
191
|
+
is compared using the Run.predicate method
|
192
|
+
- Keyword arguments, where the key is a string and the value
|
193
|
+
is compared using the Run.predicate method
|
194
|
+
|
195
|
+
Args:
|
196
|
+
*predicates: Callable predicates or (key, value) tuples
|
197
|
+
for filtering.
|
198
|
+
**kwargs: Additional key-value pairs for filtering.
|
199
|
+
|
200
|
+
Returns:
|
201
|
+
Self: A new RunCollection containing only the runs that
|
202
|
+
match all criteria.
|
203
|
+
|
204
|
+
"""
|
205
|
+
runs = self.runs
|
206
|
+
|
207
|
+
for predicate in predicates:
|
208
|
+
if callable(predicate):
|
209
|
+
runs = [r for r in runs if predicate(r)]
|
210
|
+
else:
|
211
|
+
runs = [r for r in runs if r.predicate(*predicate)]
|
212
|
+
|
213
|
+
for key, value in kwargs.items():
|
214
|
+
runs = [r for r in runs if r.predicate(key, value)]
|
215
|
+
|
216
|
+
return self.__class__(runs)
|
217
|
+
|
218
|
+
def try_get(
|
219
|
+
self,
|
220
|
+
*predicates: Callable[[R], bool] | tuple[str, Any],
|
221
|
+
**kwargs: Any,
|
222
|
+
) -> R | None:
|
223
|
+
"""Try to get a single run matching the specified criteria.
|
224
|
+
|
225
|
+
This method applies filters and returns a single matching
|
226
|
+
run if exactly one is found, None if no runs are found,
|
227
|
+
or raises ValueError if multiple runs match.
|
228
|
+
|
229
|
+
Args:
|
230
|
+
*predicates: Callable predicates or (key, value) tuples
|
231
|
+
for filtering.
|
232
|
+
**kwargs: Additional key-value pairs for filtering.
|
233
|
+
|
234
|
+
Returns:
|
235
|
+
R | None: A single Run that matches the criteria, or None if
|
236
|
+
no matches are found.
|
237
|
+
|
238
|
+
Raises:
|
239
|
+
ValueError: If multiple runs match the criteria.
|
240
|
+
|
241
|
+
"""
|
242
|
+
runs = self.filter(*predicates, **kwargs)
|
243
|
+
|
244
|
+
n = len(runs)
|
245
|
+
if n == 0:
|
246
|
+
return None
|
247
|
+
|
248
|
+
if n == 1:
|
249
|
+
return runs[0]
|
250
|
+
|
251
|
+
msg = f"Multiple Run ({n}) found matching the criteria, "
|
252
|
+
msg += "expected exactly one"
|
253
|
+
raise ValueError(msg)
|
254
|
+
|
255
|
+
def get(
|
256
|
+
self,
|
257
|
+
*predicates: Callable[[R], bool] | tuple[str, Any],
|
258
|
+
**kwargs: Any,
|
259
|
+
) -> R:
|
260
|
+
"""Get a single run matching the specified criteria.
|
261
|
+
|
262
|
+
This method applies filters and returns a single matching run,
|
263
|
+
or raises ValueError if no runs or multiple runs match.
|
264
|
+
|
265
|
+
Args:
|
266
|
+
*predicates: Callable predicates or (key, value) tuples
|
267
|
+
for filtering.
|
268
|
+
**kwargs: Additional key-value pairs for filtering.
|
269
|
+
|
270
|
+
Returns:
|
271
|
+
R: A single Run that matches the criteria.
|
272
|
+
|
273
|
+
Raises:
|
274
|
+
ValueError: If no runs match or if multiple runs match
|
275
|
+
the criteria.
|
276
|
+
|
277
|
+
"""
|
278
|
+
if run := self.try_get(*predicates, **kwargs):
|
279
|
+
return run
|
280
|
+
|
281
|
+
raise _value_error()
|
282
|
+
|
283
|
+
def first(
|
284
|
+
self,
|
285
|
+
*predicates: Callable[[R], bool] | tuple[str, Any],
|
286
|
+
**kwargs: Any,
|
287
|
+
) -> R:
|
288
|
+
"""Get the first run matching the specified criteria.
|
289
|
+
|
290
|
+
This method applies filters and returns the first matching run,
|
291
|
+
or raises ValueError if no runs match.
|
292
|
+
|
293
|
+
Args:
|
294
|
+
*predicates: Callable predicates or (key, value) tuples
|
295
|
+
for filtering.
|
296
|
+
**kwargs: Additional key-value pairs for filtering.
|
297
|
+
|
298
|
+
Returns:
|
299
|
+
R: The first Run that matches the criteria.
|
300
|
+
|
301
|
+
Raises:
|
302
|
+
ValueError: If no runs match the criteria.
|
303
|
+
|
304
|
+
"""
|
305
|
+
if runs := self.filter(*predicates, **kwargs):
|
306
|
+
return runs[0]
|
307
|
+
|
308
|
+
raise _value_error()
|
309
|
+
|
310
|
+
def last(
|
311
|
+
self,
|
312
|
+
*predicates: Callable[[R], bool] | tuple[str, Any],
|
313
|
+
**kwargs: Any,
|
314
|
+
) -> R:
|
315
|
+
"""Get the last run matching the specified criteria.
|
316
|
+
|
317
|
+
This method applies filters and returns the last matching run,
|
318
|
+
or raises ValueError if no runs match.
|
319
|
+
|
320
|
+
Args:
|
321
|
+
*predicates: Callable predicates or (key, value) tuples
|
322
|
+
for filtering.
|
323
|
+
**kwargs: Additional key-value pairs for filtering.
|
324
|
+
|
325
|
+
Returns:
|
326
|
+
R: The last Run that matches the criteria.
|
327
|
+
|
328
|
+
Raises:
|
329
|
+
ValueError: If no runs match the criteria.
|
330
|
+
|
331
|
+
"""
|
332
|
+
if runs := self.filter(*predicates, **kwargs):
|
333
|
+
return runs[-1]
|
334
|
+
|
335
|
+
raise _value_error()
|
336
|
+
|
337
|
+
def to_list(self, key: str) -> list[Any]:
|
338
|
+
"""Extract a list of values for a specific key from all runs.
|
339
|
+
|
340
|
+
Args:
|
341
|
+
key: The key to extract from each run.
|
342
|
+
|
343
|
+
Returns:
|
344
|
+
list[Any]: A list containing the values for the
|
345
|
+
specified key from each run.
|
346
|
+
|
347
|
+
"""
|
348
|
+
return [run.get(key) for run in self]
|
349
|
+
|
350
|
+
def to_numpy(self, key: str) -> NDArray:
|
351
|
+
"""Extract values for a specific key from all runs as a NumPy array.
|
352
|
+
|
353
|
+
Args:
|
354
|
+
key: The key to extract from each run.
|
355
|
+
|
356
|
+
Returns:
|
357
|
+
NDArray: A NumPy array containing the values for the
|
358
|
+
specified key from each run.
|
359
|
+
|
360
|
+
"""
|
361
|
+
return np.array(self.to_list(key))
|
362
|
+
|
363
|
+
def unique(self, key: str) -> NDArray:
|
364
|
+
"""Get the unique values for a specific key across all runs.
|
365
|
+
|
366
|
+
Args:
|
367
|
+
key: The key to extract unique values for.
|
368
|
+
|
369
|
+
Returns:
|
370
|
+
NDArray: A NumPy array containing the unique values for the
|
371
|
+
specified key.
|
372
|
+
|
373
|
+
"""
|
374
|
+
return np.unique(self.to_numpy(key), axis=0)
|
375
|
+
|
376
|
+
def n_unique(self, key: str) -> int:
|
377
|
+
"""Count the number of unique values for a specific key across all runs.
|
378
|
+
|
379
|
+
Args:
|
380
|
+
key: The key to count unique values for.
|
381
|
+
|
382
|
+
Returns:
|
383
|
+
int: The number of unique values for the specified key.
|
384
|
+
|
385
|
+
"""
|
386
|
+
return len(self.unique(key))
|
387
|
+
|
388
|
+
def sort(self, *keys: str, reverse: bool = False) -> Self:
|
389
|
+
"""Sort runs based on one or more keys.
|
390
|
+
|
391
|
+
Args:
|
392
|
+
*keys: The keys to sort by, in order of priority.
|
393
|
+
reverse: Whether to sort in descending order (default is
|
394
|
+
ascending).
|
395
|
+
|
396
|
+
Returns:
|
397
|
+
Self: A new RunCollection with the runs sorted according to
|
398
|
+
the specified keys.
|
399
|
+
|
400
|
+
"""
|
401
|
+
if not keys:
|
402
|
+
return self
|
403
|
+
|
404
|
+
arrays = [self.to_numpy(key) for key in keys]
|
405
|
+
index = np.lexsort(arrays[::-1])
|
406
|
+
|
407
|
+
if reverse:
|
408
|
+
index = index[::-1]
|
409
|
+
|
410
|
+
return self[index]
|
411
|
+
|
412
|
+
def to_frame(self, *keys: str, **kwargs: Callable[[R], Any]) -> DataFrame:
|
413
|
+
"""Convert the collection to a Polars DataFrame.
|
414
|
+
|
415
|
+
Args:
|
416
|
+
*keys (str): The keys to include as columns in the DataFrame.
|
417
|
+
If not provided, all keys from each run's to_dict() method
|
418
|
+
will be used.
|
419
|
+
**kwargs (Callable[[R], Any]): Additional columns to compute
|
420
|
+
using callables that take a Run and return a value.
|
421
|
+
|
422
|
+
Returns:
|
423
|
+
DataFrame: A Polars DataFrame containing the specified data
|
424
|
+
from the runs.
|
425
|
+
|
426
|
+
"""
|
427
|
+
if keys:
|
428
|
+
df = DataFrame({key: self.to_list(key) for key in keys})
|
429
|
+
else:
|
430
|
+
df = DataFrame(r.to_dict() for r in self)
|
431
|
+
|
432
|
+
if not kwargs:
|
433
|
+
return df
|
434
|
+
|
435
|
+
columns = [pl.Series(k, [v(r) for r in self]) for k, v in kwargs.items()]
|
436
|
+
return df.with_columns(*columns)
|
437
|
+
|
438
|
+
def _group_by(self, *keys: str) -> dict[Any, Self]:
|
439
|
+
result: dict[Any, Self] = {}
|
440
|
+
|
441
|
+
for run in self:
|
442
|
+
keys_ = [to_hashable(run.get(key)) for key in keys]
|
443
|
+
key = keys_[0] if len(keys) == 1 else tuple(keys_)
|
444
|
+
|
445
|
+
if key not in result:
|
446
|
+
result[key] = self.__class__([])
|
447
|
+
result[key].runs.append(run)
|
448
|
+
|
449
|
+
return result
|
450
|
+
|
451
|
+
@overload
|
452
|
+
def group_by(self, *keys: str) -> dict[Any, Self]: ...
|
453
|
+
|
454
|
+
@overload
|
455
|
+
def group_by(
|
456
|
+
self,
|
457
|
+
*keys: str,
|
458
|
+
**kwargs: Callable[[Self | Sequence[R]], Any],
|
459
|
+
) -> DataFrame: ...
|
460
|
+
|
461
|
+
def group_by(
|
462
|
+
self,
|
463
|
+
*keys: str,
|
464
|
+
**kwargs: Callable[[Self | Sequence[R]], Any],
|
465
|
+
) -> dict[Any, Self] | DataFrame:
|
466
|
+
"""Group runs by one or more keys.
|
467
|
+
|
468
|
+
This method can return either:
|
469
|
+
- A dictionary mapping group keys to RunCollections
|
470
|
+
(no kwargs provided)
|
471
|
+
- A Polars DataFrame with group keys and aggregated
|
472
|
+
values (kwargs provided)
|
473
|
+
|
474
|
+
Args:
|
475
|
+
*keys (str): The keys to group by.
|
476
|
+
**kwargs (Callable[[Self | Sequence[R]], Any]): Aggregation
|
477
|
+
functions to apply to each group. Each function should
|
478
|
+
accept a RunCollection or Sequence[Run] and return a value.
|
479
|
+
|
480
|
+
Returns:
|
481
|
+
dict[Any, Self] | DataFrame: Either a dictionary mapping
|
482
|
+
group keys to RunCollections, or a Polars DataFrame with
|
483
|
+
group keys and aggregated values.
|
484
|
+
|
485
|
+
"""
|
486
|
+
gp = self._group_by(*keys)
|
487
|
+
if not kwargs:
|
488
|
+
return gp
|
489
|
+
|
490
|
+
if len(keys) == 1:
|
491
|
+
df = DataFrame({keys[0]: list(gp)})
|
492
|
+
else:
|
493
|
+
df = DataFrame(dict(zip(keys, k, strict=True)) for k in gp)
|
494
|
+
columns = [pl.Series(k, [v(r) for r in gp.values()]) for k, v in kwargs.items()]
|
495
|
+
return df.with_columns(*columns)
|
496
|
+
|
497
|
+
|
498
|
+
def to_hashable(value: Any) -> Hashable:
|
499
|
+
"""Convert a value to a hashable instance.
|
500
|
+
|
501
|
+
This function handles various types of values and converts them to
|
502
|
+
hashable equivalents for use in dictionaries and sets.
|
503
|
+
|
504
|
+
Args:
|
505
|
+
value: The value to convert to a hashable instance.
|
506
|
+
|
507
|
+
Returns:
|
508
|
+
A hashable version of the input value.
|
509
|
+
|
510
|
+
"""
|
511
|
+
if OmegaConf.is_list(value): # Is ListConfig hashable?
|
512
|
+
return tuple(value)
|
513
|
+
if isinstance(value, Hashable):
|
514
|
+
return value
|
515
|
+
if isinstance(value, np.ndarray):
|
516
|
+
return tuple(value.tolist())
|
517
|
+
try:
|
518
|
+
return tuple(value)
|
519
|
+
except TypeError:
|
520
|
+
return str(value)
|
521
|
+
|
522
|
+
|
523
|
+
def _value_error() -> ValueError:
|
524
|
+
msg = "No Run found matching the specified criteria"
|
525
|
+
return ValueError(msg)
|
@@ -0,0 +1,84 @@
|
|
1
|
+
"""RunInfo module for HydraFlow.
|
2
|
+
|
3
|
+
This module provides the RunInfo class, which represents a
|
4
|
+
MLflow Run in HydraFlow. RunInfo contains information about a run,
|
5
|
+
such as the run directory, run ID, and job name.
|
6
|
+
The job name is extracted from the Hydra configuration file and
|
7
|
+
represents the MLflow Experiment name that was used when the run
|
8
|
+
was created.
|
9
|
+
"""
|
10
|
+
|
11
|
+
from __future__ import annotations
|
12
|
+
|
13
|
+
from dataclasses import dataclass
|
14
|
+
from functools import cached_property
|
15
|
+
from typing import TYPE_CHECKING
|
16
|
+
|
17
|
+
if TYPE_CHECKING:
|
18
|
+
from pathlib import Path
|
19
|
+
from typing import Any
|
20
|
+
|
21
|
+
|
22
|
+
@dataclass
|
23
|
+
class RunInfo:
|
24
|
+
"""Information about a MLflow Run in HydraFlow.
|
25
|
+
|
26
|
+
This class represents a MLflow Run and contains information
|
27
|
+
such as the run directory, run ID, and job name.
|
28
|
+
The job name is extracted from the Hydra configuration file
|
29
|
+
and represents the MLflow Experiment name that was used when
|
30
|
+
the run was created.
|
31
|
+
|
32
|
+
"""
|
33
|
+
|
34
|
+
run_dir: Path
|
35
|
+
"""The MLflow Run directory, which contains metrics, parameters, and artifacts."""
|
36
|
+
|
37
|
+
@cached_property
|
38
|
+
def run_id(self) -> str:
|
39
|
+
"""The MLflow run ID, which is the name of the run directory."""
|
40
|
+
return self.run_dir.name
|
41
|
+
|
42
|
+
@cached_property
|
43
|
+
def job_name(self) -> str:
|
44
|
+
"""The Hydra job name, which was used as the MLflow Experiment name.
|
45
|
+
|
46
|
+
An empty string if the job name cannot be extracted from the
|
47
|
+
Hydra configuration file (e.g., if the file does not exist or does not
|
48
|
+
contain the expected format).
|
49
|
+
"""
|
50
|
+
return get_job_name(self.run_dir)
|
51
|
+
|
52
|
+
def to_dict(self) -> dict[str, Any]:
|
53
|
+
"""Convert the RunInfo to a dictionary."""
|
54
|
+
return {
|
55
|
+
"run_id": self.run_id,
|
56
|
+
"run_dir": self.run_dir.as_posix(),
|
57
|
+
"job_name": self.job_name,
|
58
|
+
}
|
59
|
+
|
60
|
+
|
61
|
+
def get_job_name(run_dir: Path) -> str:
|
62
|
+
"""Extract the Hydra job name from the Hydra configuration file.
|
63
|
+
|
64
|
+
Return an empty string if the job name cannot be extracted from the
|
65
|
+
Hydra configuration file (e.g., if the file does not exist or does not
|
66
|
+
contain the expected format).
|
67
|
+
|
68
|
+
Args:
|
69
|
+
run_dir (Path): The directory where the run artifacts are stored.
|
70
|
+
|
71
|
+
Returns:
|
72
|
+
str: The Hydra job name, which was used as the MLflow Experiment name.
|
73
|
+
|
74
|
+
"""
|
75
|
+
hydra_file = run_dir / "artifacts/.hydra/hydra.yaml"
|
76
|
+
|
77
|
+
if not hydra_file.exists():
|
78
|
+
return ""
|
79
|
+
|
80
|
+
text = hydra_file.read_text()
|
81
|
+
if " job:\n name: " in text:
|
82
|
+
return text.split(" job:\n name: ")[1].split("\n")[0]
|
83
|
+
|
84
|
+
return ""
|
@@ -1,7 +1,7 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: hydraflow
|
3
|
-
Version: 0.
|
4
|
-
Summary: HydraFlow seamlessly integrates Hydra and MLflow to streamline ML experiment management
|
3
|
+
Version: 0.15.0
|
4
|
+
Summary: HydraFlow seamlessly integrates Hydra and MLflow to streamline ML experiment management, combining Hydra's configuration management with MLflow's tracking capabilities.
|
5
5
|
Project-URL: Documentation, https://daizutabi.github.io/hydraflow/
|
6
6
|
Project-URL: Source, https://github.com/daizutabi/hydraflow
|
7
7
|
Project-URL: Issues, https://github.com/daizutabi/hydraflow/issues
|
@@ -36,40 +36,40 @@ Classifier: Intended Audience :: Science/Research
|
|
36
36
|
Classifier: License :: OSI Approved :: MIT License
|
37
37
|
Classifier: Operating System :: OS Independent
|
38
38
|
Classifier: Programming Language :: Python
|
39
|
-
Classifier: Programming Language :: Python :: 3.10
|
40
|
-
Classifier: Programming Language :: Python :: 3.11
|
41
|
-
Classifier: Programming Language :: Python :: 3.12
|
42
39
|
Classifier: Programming Language :: Python :: 3.13
|
43
40
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
44
41
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
45
|
-
Requires-Python: >=3.
|
42
|
+
Requires-Python: >=3.13
|
46
43
|
Requires-Dist: hydra-core>=1.3
|
44
|
+
Requires-Dist: joblib>=1.4.0
|
47
45
|
Requires-Dist: mlflow>=2.15
|
48
46
|
Requires-Dist: omegaconf>=2.3
|
47
|
+
Requires-Dist: polars>=1.26
|
49
48
|
Requires-Dist: python-ulid>=3.0.0
|
50
49
|
Requires-Dist: rich>=13.9
|
50
|
+
Requires-Dist: ruff>=0.11
|
51
51
|
Requires-Dist: typer>=0.15
|
52
52
|
Description-Content-Type: text/markdown
|
53
53
|
|
54
54
|
# Hydraflow
|
55
55
|
|
56
56
|
[![PyPI Version][pypi-v-image]][pypi-v-link]
|
57
|
-
[![Python Version][python-v-image]][python-v-link]
|
58
57
|
[![Build Status][GHAction-image]][GHAction-link]
|
59
58
|
[![Coverage Status][codecov-image]][codecov-link]
|
60
59
|
[![Documentation Status][docs-image]][docs-link]
|
60
|
+
[![Python Version][python-v-image]][python-v-link]
|
61
61
|
|
62
62
|
<!-- Badges -->
|
63
63
|
[pypi-v-image]: https://img.shields.io/pypi/v/hydraflow.svg
|
64
64
|
[pypi-v-link]: https://pypi.org/project/hydraflow/
|
65
|
-
[python-v-image]: https://img.shields.io/pypi/pyversions/hydraflow.svg
|
66
|
-
[python-v-link]: https://pypi.org/project/hydraflow
|
67
65
|
[GHAction-image]: https://github.com/daizutabi/hydraflow/actions/workflows/ci.yaml/badge.svg?branch=main&event=push
|
68
66
|
[GHAction-link]: https://github.com/daizutabi/hydraflow/actions?query=event%3Apush+branch%3Amain
|
69
67
|
[codecov-image]: https://codecov.io/github/daizutabi/hydraflow/coverage.svg?branch=main
|
70
68
|
[codecov-link]: https://codecov.io/github/daizutabi/hydraflow?branch=main
|
71
|
-
[docs-image]: https://
|
69
|
+
[docs-image]: https://img.shields.io/badge/docs-latest-blue.svg
|
72
70
|
[docs-link]: https://daizutabi.github.io/hydraflow/
|
71
|
+
[python-v-image]: https://img.shields.io/pypi/pyversions/hydraflow.svg
|
72
|
+
[python-v-link]: https://pypi.org/project/hydraflow
|
73
73
|
|
74
74
|
## Overview
|
75
75
|
|
@@ -101,6 +101,8 @@ You can install Hydraflow via pip:
|
|
101
101
|
pip install hydraflow
|
102
102
|
```
|
103
103
|
|
104
|
+
**Requirements:** Python 3.13+
|
105
|
+
|
104
106
|
## Quick Start
|
105
107
|
|
106
108
|
Here is a simple example to get you started with Hydraflow:
|
@@ -0,0 +1,21 @@
|
|
1
|
+
hydraflow/__init__.py,sha256=5ByA9ogtS5ZfIYIUSMUjMwAIpr6xGXEXmcABOu4O8RA,673
|
2
|
+
hydraflow/cli.py,sha256=3rGr___wwp8KazjLGQ7JO_IgAMqLyMlcVSs_QJK7g0Y,3135
|
3
|
+
hydraflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
+
hydraflow/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
+
hydraflow/core/context.py,sha256=LFPNJxmuJQ2VUt-WBU07MC3ySbjlY8rRZ8VxuAih4o4,4148
|
6
|
+
hydraflow/core/io.py,sha256=ZBXIL_jlBUiCI0L_J6S5S4OwtBMvdVVMXnekzMuC_JA,4404
|
7
|
+
hydraflow/core/main.py,sha256=b9o6Rpn3uoXfDB8o0XZdl-g1yX2SKkOT12-H7lB8Les,5158
|
8
|
+
hydraflow/core/run.py,sha256=9JNk3axDdKLpttGx-BC9aqw3d7rosygn2cIzL-fxVlM,11876
|
9
|
+
hydraflow/core/run_collection.py,sha256=pV3N83uBhmda9OeaNz1jqpF9z6A9j3jfUHtqy-uxCs4,15671
|
10
|
+
hydraflow/core/run_info.py,sha256=3dW9GgWnZZNwbXwMrw-85AqQ956zlQddUi9irSNLR5g,2550
|
11
|
+
hydraflow/executor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
12
|
+
hydraflow/executor/aio.py,sha256=xXsmBPIPdBlopv_1h0FdtOvoKUcuW7PQeKCV2d_lN9I,2122
|
13
|
+
hydraflow/executor/conf.py,sha256=icGbLDh86KgkyiGXwDoEkmZpgAP3X8Jmu_PYqJoTooY,423
|
14
|
+
hydraflow/executor/io.py,sha256=yZMcBVmAbPZZ82cAXhgiJfj9p8WvHmzOCMBg_vtEVek,1509
|
15
|
+
hydraflow/executor/job.py,sha256=JX6xX9ffvHB7IiAVIfzVRjjnWKaPDxBgqdZf4ZO14CY,4651
|
16
|
+
hydraflow/executor/parser.py,sha256=_Rfund3FDgrXitTt_znsTpgEtMDqZ_ICynaB_Zje14Q,14561
|
17
|
+
hydraflow-0.15.0.dist-info/METADATA,sha256=2OpqrXDfnVxQ_ZJkS5tEjQH0VTa3yx8jkfFOjbkCK50,7238
|
18
|
+
hydraflow-0.15.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
19
|
+
hydraflow-0.15.0.dist-info/entry_points.txt,sha256=XI0khPbpCIUo9UPqkNEpgh-kqK3Jy8T7L2VCWOdkbSM,48
|
20
|
+
hydraflow-0.15.0.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
|
21
|
+
hydraflow-0.15.0.dist-info/RECORD,,
|