hydraflow 0.15.1__py3-none-any.whl → 0.16.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hydraflow/__init__.py +2 -0
- hydraflow/core/context.py +4 -4
- hydraflow/core/io.py +6 -0
- hydraflow/core/main.py +19 -11
- hydraflow/core/run.py +13 -3
- hydraflow/core/run_collection.py +119 -12
- hydraflow/core/run_info.py +16 -17
- {hydraflow-0.15.1.dist-info → hydraflow-0.16.0.dist-info}/METADATA +84 -75
- hydraflow-0.16.0.dist-info/RECORD +21 -0
- hydraflow-0.15.1.dist-info/RECORD +0 -21
- {hydraflow-0.15.1.dist-info → hydraflow-0.16.0.dist-info}/WHEEL +0 -0
- {hydraflow-0.15.1.dist-info → hydraflow-0.16.0.dist-info}/entry_points.txt +0 -0
- {hydraflow-0.15.1.dist-info → hydraflow-0.16.0.dist-info}/licenses/LICENSE +0 -0
hydraflow/__init__.py
CHANGED
@@ -3,6 +3,7 @@
|
|
3
3
|
from hydraflow.core.context import chdir_artifact, log_run, start_run
|
4
4
|
from hydraflow.core.io import (
|
5
5
|
get_artifact_dir,
|
6
|
+
get_experiment_names,
|
6
7
|
iter_artifact_paths,
|
7
8
|
iter_artifacts_dirs,
|
8
9
|
iter_experiment_dirs,
|
@@ -17,6 +18,7 @@ __all__ = [
|
|
17
18
|
"RunCollection",
|
18
19
|
"chdir_artifact",
|
19
20
|
"get_artifact_dir",
|
21
|
+
"get_experiment_names",
|
20
22
|
"iter_artifact_paths",
|
21
23
|
"iter_artifacts_dirs",
|
22
24
|
"iter_experiment_dirs",
|
hydraflow/core/context.py
CHANGED
@@ -38,11 +38,11 @@ def log_run(run: Run) -> Iterator[None]:
|
|
38
38
|
import mlflow
|
39
39
|
|
40
40
|
hc = HydraConfig.get()
|
41
|
-
|
41
|
+
hydra_output_dir = Path(hc.runtime.output_dir)
|
42
42
|
|
43
43
|
# Save '.hydra' config directory.
|
44
|
-
|
45
|
-
mlflow.log_artifacts(
|
44
|
+
hydra_dir = hydra_output_dir / (hc.output_subdir or "")
|
45
|
+
mlflow.log_artifacts(hydra_dir.as_posix(), ".hydra")
|
46
46
|
|
47
47
|
try:
|
48
48
|
yield
|
@@ -53,7 +53,7 @@ def log_run(run: Run) -> Iterator[None]:
|
|
53
53
|
raise
|
54
54
|
|
55
55
|
finally:
|
56
|
-
log_text(run,
|
56
|
+
log_text(run, hydra_output_dir)
|
57
57
|
|
58
58
|
|
59
59
|
@contextmanager
|
hydraflow/core/io.py
CHANGED
@@ -107,6 +107,12 @@ def predicate_experiment_dir(
|
|
107
107
|
return experiment_names(name)
|
108
108
|
|
109
109
|
|
110
|
+
def get_experiment_names(tracking_dir: str | Path) -> list[str]:
|
111
|
+
"""Get the experiment names from the tracking directory."""
|
112
|
+
names = [get_experiment_name(path) for path in Path(tracking_dir).iterdir()]
|
113
|
+
return [name for name in names if name is not None and name != "Default"]
|
114
|
+
|
115
|
+
|
110
116
|
def iter_experiment_dirs(
|
111
117
|
tracking_dir: str | Path,
|
112
118
|
experiment_names: str | list[str] | Callable[[str], bool] | None = None,
|
hydraflow/core/main.py
CHANGED
@@ -36,7 +36,8 @@ Example:
|
|
36
36
|
from __future__ import annotations
|
37
37
|
|
38
38
|
from functools import wraps
|
39
|
-
from
|
39
|
+
from pathlib import Path
|
40
|
+
from typing import TYPE_CHECKING
|
40
41
|
|
41
42
|
import hydra
|
42
43
|
from hydra.core.config_store import ConfigStore
|
@@ -48,23 +49,20 @@ from hydraflow.core.io import file_uri_to_path
|
|
48
49
|
|
49
50
|
if TYPE_CHECKING:
|
50
51
|
from collections.abc import Callable
|
51
|
-
from pathlib import Path
|
52
52
|
from typing import Any
|
53
53
|
|
54
54
|
from mlflow.entities import Run
|
55
55
|
|
56
56
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
def main(
|
61
|
-
node: T | type[T],
|
57
|
+
def main[C](
|
58
|
+
node: C | type[C],
|
62
59
|
config_name: str = "config",
|
63
60
|
*,
|
64
61
|
chdir: bool = False,
|
65
62
|
force_new_run: bool = False,
|
66
63
|
match_overrides: bool = False,
|
67
64
|
rerun_finished: bool = False,
|
65
|
+
update: Callable[[C], C | None] | None = None,
|
68
66
|
):
|
69
67
|
"""Decorator for configuring and running MLflow experiments with Hydra.
|
70
68
|
|
@@ -83,6 +81,8 @@ def main(
|
|
83
81
|
instead of full config. Defaults to False.
|
84
82
|
rerun_finished: If True, allows rerunning completed runs. Defaults to
|
85
83
|
False.
|
84
|
+
update: A function that takes a configuration and returns a new
|
85
|
+
configuration. Defaults to None.
|
86
86
|
|
87
87
|
"""
|
88
88
|
import mlflow
|
@@ -90,21 +90,29 @@ def main(
|
|
90
90
|
|
91
91
|
finished = RunStatus.to_string(RunStatus.FINISHED)
|
92
92
|
|
93
|
-
def decorator(app: Callable[[Run,
|
93
|
+
def decorator(app: Callable[[Run, C], None]) -> Callable[[], None]:
|
94
94
|
ConfigStore.instance().store(config_name, node)
|
95
95
|
|
96
96
|
@hydra.main(config_name=config_name, version_base=None)
|
97
97
|
@wraps(app)
|
98
|
-
def inner_decorator(
|
98
|
+
def inner_decorator(cfg: C) -> None:
|
99
99
|
hc = HydraConfig.get()
|
100
100
|
experiment = mlflow.set_experiment(hc.job.name)
|
101
101
|
|
102
|
+
if update:
|
103
|
+
if cfg_ := update(cfg):
|
104
|
+
cfg = cfg_
|
105
|
+
|
106
|
+
hydra_dir = Path(hc.runtime.output_dir) / (hc.output_subdir or "")
|
107
|
+
cfg_path = hydra_dir.joinpath("config.yaml")
|
108
|
+
OmegaConf.save(cfg, cfg_path)
|
109
|
+
|
102
110
|
if force_new_run:
|
103
111
|
run_id = None
|
104
112
|
else:
|
105
113
|
uri = experiment.artifact_location
|
106
114
|
overrides = hc.overrides.task if match_overrides else None
|
107
|
-
run_id = get_run_id(uri,
|
115
|
+
run_id = get_run_id(uri, cfg, overrides)
|
108
116
|
|
109
117
|
if run_id and not rerun_finished:
|
110
118
|
run = mlflow.get_run(run_id)
|
@@ -112,7 +120,7 @@ def main(
|
|
112
120
|
return
|
113
121
|
|
114
122
|
with start_run(run_id=run_id, chdir=chdir) as run:
|
115
|
-
app(run,
|
123
|
+
app(run, cfg)
|
116
124
|
|
117
125
|
return inner_decorator
|
118
126
|
|
hydraflow/core/run.py
CHANGED
@@ -229,6 +229,8 @@ class Run[C, I = None]:
|
|
229
229
|
cfg: DictConfig = self.cfg # type: ignore
|
230
230
|
|
231
231
|
if isinstance(key, str):
|
232
|
+
key = key.replace("__", ".")
|
233
|
+
|
232
234
|
if force or OmegaConf.select(cfg, key, default=MISSING) is MISSING:
|
233
235
|
v = value(self) if callable(value) else value # type: ignore
|
234
236
|
OmegaConf.update(cfg, key, v, force_add=True)
|
@@ -246,16 +248,19 @@ class Run[C, I = None]:
|
|
246
248
|
raise TypeError(msg)
|
247
249
|
|
248
250
|
for k, v in zip(key, value, strict=True):
|
249
|
-
|
250
|
-
|
251
|
+
k_ = k.replace("__", ".")
|
252
|
+
if force or OmegaConf.select(cfg, k_, default=MISSING) is MISSING:
|
253
|
+
OmegaConf.update(cfg, k_, v, force_add=True)
|
251
254
|
|
252
|
-
def get(self, key: str, default: Any = MISSING) -> Any:
|
255
|
+
def get(self, key: str, default: Any | Callable[[Self], Any] = MISSING) -> Any:
|
253
256
|
"""Get a value from the information or configuration.
|
254
257
|
|
255
258
|
Args:
|
256
259
|
key: The key to look for. Can use dot notation for
|
257
260
|
nested keys in configuration.
|
258
261
|
default: Value to return if the key is not found.
|
262
|
+
If a callable, it will be called with the Run instance
|
263
|
+
and the value returned will be used as the default.
|
259
264
|
If not provided, AttributeError will be raised.
|
260
265
|
|
261
266
|
Returns:
|
@@ -268,6 +273,8 @@ class Run[C, I = None]:
|
|
268
273
|
no default is provided.
|
269
274
|
|
270
275
|
"""
|
276
|
+
key = key.replace("__", ".")
|
277
|
+
|
271
278
|
value = OmegaConf.select(self.cfg, key, default=MISSING) # type: ignore
|
272
279
|
if value is not MISSING:
|
273
280
|
return value
|
@@ -280,6 +287,9 @@ class Run[C, I = None]:
|
|
280
287
|
return info[key]
|
281
288
|
|
282
289
|
if default is not MISSING:
|
290
|
+
if callable(default):
|
291
|
+
return default(self)
|
292
|
+
|
283
293
|
return default
|
284
294
|
|
285
295
|
msg = f"No such key: {key}"
|
hydraflow/core/run_collection.py
CHANGED
@@ -38,12 +38,13 @@ Note:
|
|
38
38
|
from __future__ import annotations
|
39
39
|
|
40
40
|
from collections.abc import Hashable, Iterable, Sequence
|
41
|
+
from dataclasses import MISSING
|
41
42
|
from typing import TYPE_CHECKING, overload
|
42
43
|
|
43
44
|
import numpy as np
|
44
45
|
import polars as pl
|
45
46
|
from omegaconf import OmegaConf
|
46
|
-
from polars import DataFrame
|
47
|
+
from polars import DataFrame, Series
|
47
48
|
|
48
49
|
from .run import Run
|
49
50
|
|
@@ -139,6 +140,47 @@ class RunCollection[R: Run[Any, Any]](Sequence[R]):
|
|
139
140
|
"""
|
140
141
|
return iter(self.runs)
|
141
142
|
|
143
|
+
def preload(
|
144
|
+
self,
|
145
|
+
*,
|
146
|
+
n_jobs: int = 0,
|
147
|
+
cfg: bool = True,
|
148
|
+
impl: bool = True,
|
149
|
+
) -> Self:
|
150
|
+
"""Pre-load configuration and implementation objects for all runs in parallel.
|
151
|
+
|
152
|
+
This method eagerly evaluates the cfg and impl properties of all runs
|
153
|
+
in the collection, potentially in parallel using joblib. This can
|
154
|
+
significantly improve performance for subsequent operations that
|
155
|
+
access these properties, as they will be already loaded in memory.
|
156
|
+
|
157
|
+
Args:
|
158
|
+
cfg (bool): Whether to preload the configuration objects
|
159
|
+
impl (bool): Whether to preload the implementation objects
|
160
|
+
n_jobs (int): Number of parallel jobs to run
|
161
|
+
(-1 means using all processors)
|
162
|
+
|
163
|
+
Returns:
|
164
|
+
Self: The same RunCollection instance with preloaded
|
165
|
+
configuration and implementation objects.
|
166
|
+
|
167
|
+
"""
|
168
|
+
|
169
|
+
def load(run: R) -> None:
|
170
|
+
_ = cfg and run.cfg
|
171
|
+
_ = impl and run.impl
|
172
|
+
|
173
|
+
if n_jobs == 0:
|
174
|
+
for run in self:
|
175
|
+
load(run)
|
176
|
+
return self
|
177
|
+
|
178
|
+
from joblib import Parallel, delayed
|
179
|
+
|
180
|
+
parallel = Parallel(backend="threading", n_jobs=n_jobs)
|
181
|
+
parallel(delayed(load)(run) for run in self)
|
182
|
+
return self
|
183
|
+
|
142
184
|
@overload
|
143
185
|
def update(
|
144
186
|
self,
|
@@ -334,56 +376,107 @@ class RunCollection[R: Run[Any, Any]](Sequence[R]):
|
|
334
376
|
|
335
377
|
raise _value_error()
|
336
378
|
|
337
|
-
def to_list(
|
379
|
+
def to_list(
|
380
|
+
self,
|
381
|
+
key: str,
|
382
|
+
default: Any | Callable[[R], Any] = MISSING,
|
383
|
+
) -> list[Any]:
|
338
384
|
"""Extract a list of values for a specific key from all runs.
|
339
385
|
|
340
386
|
Args:
|
341
387
|
key: The key to extract from each run.
|
388
|
+
default: The default value to return if the key is not found.
|
389
|
+
If a callable, it will be called with the Run instance
|
390
|
+
and the value returned will be used as the default.
|
342
391
|
|
343
392
|
Returns:
|
344
393
|
list[Any]: A list containing the values for the
|
345
394
|
specified key from each run.
|
346
395
|
|
347
396
|
"""
|
348
|
-
return [run.get(key) for run in self]
|
397
|
+
return [run.get(key, default) for run in self]
|
349
398
|
|
350
|
-
def to_numpy(
|
399
|
+
def to_numpy(
|
400
|
+
self,
|
401
|
+
key: str,
|
402
|
+
default: Any | Callable[[R], Any] = MISSING,
|
403
|
+
) -> NDArray:
|
351
404
|
"""Extract values for a specific key from all runs as a NumPy array.
|
352
405
|
|
353
406
|
Args:
|
354
407
|
key: The key to extract from each run.
|
408
|
+
default: The default value to return if the key is not found.
|
409
|
+
If a callable, it will be called with the Run instance
|
410
|
+
and the value returned will be used as the default.
|
355
411
|
|
356
412
|
Returns:
|
357
413
|
NDArray: A NumPy array containing the values for the
|
358
414
|
specified key from each run.
|
359
415
|
|
360
416
|
"""
|
361
|
-
return np.array(self.to_list(key))
|
417
|
+
return np.array(self.to_list(key, default))
|
362
418
|
|
363
|
-
def
|
419
|
+
def to_series(
|
420
|
+
self,
|
421
|
+
key: str,
|
422
|
+
default: Any | Callable[[R], Any] = MISSING,
|
423
|
+
*,
|
424
|
+
name: str | None = None,
|
425
|
+
) -> Series:
|
426
|
+
"""Extract values for a specific key from all runs as a Polars series.
|
427
|
+
|
428
|
+
Args:
|
429
|
+
key: The key to extract from each run.
|
430
|
+
default: The default value to return if the key is not found.
|
431
|
+
If a callable, it will be called with the Run instance
|
432
|
+
and the value returned will be used as the default.
|
433
|
+
name: The name of the series. If not provided, the key will be used.
|
434
|
+
|
435
|
+
Returns:
|
436
|
+
Series: A Polars series containing the values for the
|
437
|
+
specified key from each run.
|
438
|
+
|
439
|
+
"""
|
440
|
+
return Series(name or key, self.to_list(key, default))
|
441
|
+
|
442
|
+
def unique(
|
443
|
+
self,
|
444
|
+
key: str,
|
445
|
+
default: Any | Callable[[R], Any] = MISSING,
|
446
|
+
) -> NDArray:
|
364
447
|
"""Get the unique values for a specific key across all runs.
|
365
448
|
|
366
449
|
Args:
|
367
450
|
key: The key to extract unique values for.
|
451
|
+
default: The default value to return if the key is not found.
|
452
|
+
If a callable, it will be called with the Run instance
|
453
|
+
and the value returned will be used as the default.
|
368
454
|
|
369
455
|
Returns:
|
370
456
|
NDArray: A NumPy array containing the unique values for the
|
371
457
|
specified key.
|
372
458
|
|
373
459
|
"""
|
374
|
-
return np.unique(self.to_numpy(key), axis=0)
|
460
|
+
return np.unique(self.to_numpy(key, default), axis=0)
|
375
461
|
|
376
|
-
def n_unique(
|
462
|
+
def n_unique(
|
463
|
+
self,
|
464
|
+
key: str,
|
465
|
+
default: Any | Callable[[R], Any] = MISSING,
|
466
|
+
) -> int:
|
377
467
|
"""Count the number of unique values for a specific key across all runs.
|
378
468
|
|
379
469
|
Args:
|
380
470
|
key: The key to count unique values for.
|
471
|
+
default: The default value to return if the key is not found.
|
472
|
+
If a callable, it will be called with the Run instance
|
473
|
+
and the value returned will be used as the default.
|
381
474
|
|
382
475
|
Returns:
|
383
476
|
int: The number of unique values for the specified key.
|
384
477
|
|
385
478
|
"""
|
386
|
-
return len(self.unique(key))
|
479
|
+
return len(self.unique(key, default))
|
387
480
|
|
388
481
|
def sort(self, *keys: str, reverse: bool = False) -> Self:
|
389
482
|
"""Sort runs based on one or more keys.
|
@@ -409,13 +502,22 @@ class RunCollection[R: Run[Any, Any]](Sequence[R]):
|
|
409
502
|
|
410
503
|
return self[index]
|
411
504
|
|
412
|
-
def to_frame(
|
505
|
+
def to_frame(
|
506
|
+
self,
|
507
|
+
*keys: str,
|
508
|
+
defaults: dict[str, Any | Callable[[R], Any]] | None = None,
|
509
|
+
**kwargs: Callable[[R], Any],
|
510
|
+
) -> DataFrame:
|
413
511
|
"""Convert the collection to a Polars DataFrame.
|
414
512
|
|
415
513
|
Args:
|
416
514
|
*keys (str): The keys to include as columns in the DataFrame.
|
417
515
|
If not provided, all keys from each run's to_dict() method
|
418
516
|
will be used.
|
517
|
+
defaults (dict[str, Any | Callable[[R], Any]] | None): Default
|
518
|
+
values for the keys. If a callable, it will be called with
|
519
|
+
the Run instance and the value returned will be used as the
|
520
|
+
default.
|
419
521
|
**kwargs (Callable[[R], Any]): Additional columns to compute
|
420
522
|
using callables that take a Run and return a value.
|
421
523
|
|
@@ -424,15 +526,20 @@ class RunCollection[R: Run[Any, Any]](Sequence[R]):
|
|
424
526
|
from the runs.
|
425
527
|
|
426
528
|
"""
|
529
|
+
if defaults is None:
|
530
|
+
defaults = {}
|
531
|
+
|
427
532
|
if keys:
|
428
|
-
df = DataFrame(
|
533
|
+
df = DataFrame(
|
534
|
+
{key: self.to_list(key, defaults.get(key, MISSING)) for key in keys},
|
535
|
+
)
|
429
536
|
else:
|
430
537
|
df = DataFrame(r.to_dict() for r in self)
|
431
538
|
|
432
539
|
if not kwargs:
|
433
540
|
return df
|
434
541
|
|
435
|
-
columns = [
|
542
|
+
columns = [Series(k, [v(r) for r in self]) for k, v in kwargs.items()]
|
436
543
|
return df.with_columns(*columns)
|
437
544
|
|
438
545
|
def _group_by(self, *keys: str) -> dict[Any, Self]:
|
hydraflow/core/run_info.py
CHANGED
@@ -11,9 +11,12 @@ was created.
|
|
11
11
|
from __future__ import annotations
|
12
12
|
|
13
13
|
from dataclasses import dataclass
|
14
|
-
from functools import cached_property
|
14
|
+
from functools import cache, cached_property
|
15
|
+
from pathlib import Path
|
15
16
|
from typing import TYPE_CHECKING
|
16
17
|
|
18
|
+
from omegaconf import OmegaConf
|
19
|
+
|
17
20
|
if TYPE_CHECKING:
|
18
21
|
from pathlib import Path
|
19
22
|
from typing import Any
|
@@ -47,7 +50,7 @@ class RunInfo:
|
|
47
50
|
Hydra configuration file (e.g., if the file does not exist or does not
|
48
51
|
contain the expected format).
|
49
52
|
"""
|
50
|
-
return get_job_name(self.run_dir)
|
53
|
+
return get_job_name(self.run_dir.parent)
|
51
54
|
|
52
55
|
def to_dict(self) -> dict[str, Any]:
|
53
56
|
"""Convert the RunInfo to a dictionary."""
|
@@ -58,27 +61,23 @@ class RunInfo:
|
|
58
61
|
}
|
59
62
|
|
60
63
|
|
61
|
-
|
62
|
-
|
64
|
+
@cache
|
65
|
+
def get_job_name(experiment_dir: Path) -> str:
|
66
|
+
"""Get the job name from an experiment directory.
|
63
67
|
|
64
|
-
|
65
|
-
|
66
|
-
contain the expected format).
|
68
|
+
Extracts the job name from the meta.yaml file. Returns an empty string
|
69
|
+
if the file does not exist or if the job name cannot be found.
|
67
70
|
|
68
71
|
Args:
|
69
|
-
|
72
|
+
experiment_dir: Path to the experiment directory containing the meta.yaml file
|
70
73
|
|
71
74
|
Returns:
|
72
|
-
|
75
|
+
The job name as a string, or an empty string if the file does not exist
|
73
76
|
|
74
77
|
"""
|
75
|
-
|
76
|
-
|
77
|
-
if not hydra_file.exists():
|
78
|
+
path = experiment_dir / "meta.yaml"
|
79
|
+
if not path.exists():
|
78
80
|
return ""
|
79
81
|
|
80
|
-
|
81
|
-
|
82
|
-
return text.split(" job:\n name: ")[1].split("\n")[0]
|
83
|
-
|
84
|
-
return ""
|
82
|
+
meta = OmegaConf.load(experiment_dir / "meta.yaml")
|
83
|
+
return OmegaConf.select(meta, "name")
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: hydraflow
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.16.0
|
4
4
|
Summary: HydraFlow seamlessly integrates Hydra and MLflow to streamline ML experiment management, combining Hydra's configuration management with MLflow's tracking capabilities.
|
5
5
|
Project-URL: Documentation, https://daizutabi.github.io/hydraflow/
|
6
6
|
Project-URL: Source, https://github.com/daizutabi/hydraflow
|
@@ -51,7 +51,7 @@ Requires-Dist: ruff>=0.11
|
|
51
51
|
Requires-Dist: typer>=0.15
|
52
52
|
Description-Content-Type: text/markdown
|
53
53
|
|
54
|
-
#
|
54
|
+
# HydraFlow
|
55
55
|
|
56
56
|
[![PyPI Version][pypi-v-image]][pypi-v-link]
|
57
57
|
[![Build Status][GHAction-image]][GHAction-link]
|
@@ -60,6 +60,7 @@ Description-Content-Type: text/markdown
|
|
60
60
|
[![Python Version][python-v-image]][python-v-link]
|
61
61
|
|
62
62
|
<!-- Badges -->
|
63
|
+
|
63
64
|
[pypi-v-image]: https://img.shields.io/pypi/v/hydraflow.svg
|
64
65
|
[pypi-v-link]: https://pypi.org/project/hydraflow/
|
65
66
|
[GHAction-image]: https://github.com/daizutabi/hydraflow/actions/workflows/ci.yaml/badge.svg?branch=main&event=push
|
@@ -73,117 +74,125 @@ Description-Content-Type: text/markdown
|
|
73
74
|
|
74
75
|
## Overview
|
75
76
|
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
77
|
+
HydraFlow seamlessly integrates [Hydra](https://hydra.cc/) and [MLflow](https://mlflow.org/) to streamline machine learning experiment workflows. By combining Hydra's powerful configuration management with MLflow's robust experiment tracking, HydraFlow provides a comprehensive solution for defining, executing, and analyzing machine learning experiments.
|
78
|
+
|
79
|
+
## Design Principles
|
80
|
+
|
81
|
+
HydraFlow is built on the following design principles:
|
82
|
+
|
83
|
+
1. **Type Safety** - Utilizing Python dataclasses for configuration type checking and IDE support
|
84
|
+
2. **Reproducibility** - Automatically tracking all experiment configurations for fully reproducible experiments
|
85
|
+
3. **Analysis Capabilities** - Providing powerful APIs for easily analyzing experiment results
|
86
|
+
4. **Workflow Integration** - Creating a cohesive workflow by integrating Hydra's configuration management with MLflow's experiment tracking
|
82
87
|
|
83
88
|
## Key Features
|
84
89
|
|
85
|
-
- **Configuration Management
|
86
|
-
|
87
|
-
- **
|
88
|
-
|
89
|
-
- **
|
90
|
-
|
91
|
-
- **Seamless Integration**: Easily integrate Hydra and MLflow in your machine learning
|
92
|
-
projects with minimal setup.
|
93
|
-
- **Rich CLI Interface**: Command-line tools for managing experiments and viewing results.
|
94
|
-
- **Cross-Platform Support**: Works consistently across different operating systems.
|
90
|
+
- **Type-safe Configuration Management** - Define experiment parameters using Python dataclasses with full IDE support and validation
|
91
|
+
- **Seamless Hydra-MLflow Integration** - Automatically register configurations with Hydra and track experiments with MLflow
|
92
|
+
- **Advanced Parameter Sweeps** - Define complex parameter spaces using extended sweep syntax for numerical ranges, combinations, and SI prefixes
|
93
|
+
- **Workflow Automation** - Create reusable experiment workflows with YAML-based job definitions
|
94
|
+
- **Powerful Analysis Tools** - Filter, group, and analyze experiment results with type-aware APIs
|
95
|
+
- **Custom Implementation Support** - Extend experiment analysis with domain-specific functionality
|
95
96
|
|
96
97
|
## Installation
|
97
98
|
|
98
|
-
You can install Hydraflow via pip:
|
99
|
-
|
100
99
|
```bash
|
101
100
|
pip install hydraflow
|
102
101
|
```
|
103
102
|
|
104
103
|
**Requirements:** Python 3.13+
|
105
104
|
|
106
|
-
## Quick
|
107
|
-
|
108
|
-
Here is a simple example to get you started with Hydraflow:
|
105
|
+
## Quick Example
|
109
106
|
|
110
107
|
```python
|
111
|
-
from __future__ import annotations
|
112
|
-
|
113
108
|
from dataclasses import dataclass
|
114
|
-
from
|
115
|
-
|
109
|
+
from mlflow.entities import Run
|
116
110
|
import hydraflow
|
117
|
-
import mlflow
|
118
111
|
|
119
|
-
|
120
|
-
|
112
|
+
@dataclass
|
113
|
+
class Config:
|
114
|
+
width: int = 1024
|
115
|
+
height: int = 768
|
121
116
|
|
117
|
+
@hydraflow.main(Config)
|
118
|
+
def app(run: Run, cfg: Config) -> None:
|
119
|
+
# Your experiment code here
|
120
|
+
print(f"Running with width={cfg.width}, height={cfg.height}")
|
121
|
+
|
122
|
+
# Log metrics
|
123
|
+
hydraflow.log_metric("area", cfg.width * cfg.height)
|
122
124
|
|
125
|
+
if __name__ == "__main__":
|
126
|
+
app()
|
127
|
+
```
|
128
|
+
|
129
|
+
Execute a parameter sweep with:
|
130
|
+
|
131
|
+
```bash
|
132
|
+
python app.py -m width=800,1200 height=600,900
|
133
|
+
```
|
134
|
+
|
135
|
+
## Core Components
|
136
|
+
|
137
|
+
HydraFlow consists of the following key components:
|
138
|
+
|
139
|
+
### Configuration Management
|
140
|
+
|
141
|
+
Define type-safe configurations using Python dataclasses:
|
142
|
+
|
143
|
+
```python
|
123
144
|
@dataclass
|
124
145
|
class Config:
|
125
|
-
"""Configuration for the ML training experiment."""
|
126
|
-
# Training hyperparameters
|
127
146
|
learning_rate: float = 0.001
|
128
147
|
batch_size: int = 32
|
129
148
|
epochs: int = 10
|
149
|
+
```
|
130
150
|
|
131
|
-
|
132
|
-
hidden_size: int = 128
|
133
|
-
dropout: float = 0.1
|
134
|
-
|
135
|
-
# Dataset parameters
|
136
|
-
train_size: float = 0.8
|
137
|
-
random_seed: int = 42
|
151
|
+
### Main Decorator
|
138
152
|
|
153
|
+
The `@hydraflow.main` decorator integrates Hydra and MLflow:
|
139
154
|
|
155
|
+
```python
|
140
156
|
@hydraflow.main(Config)
|
141
|
-
def
|
142
|
-
|
143
|
-
|
144
|
-
This example demonstrates how to:
|
157
|
+
def train(run: Run, cfg: Config) -> None:
|
158
|
+
# Your experiment code
|
159
|
+
```
|
145
160
|
|
146
|
-
|
147
|
-
2. Use Hydraflow to integrate with MLflow
|
148
|
-
3. Track metrics and parameters automatically
|
161
|
+
### Workflow Automation
|
149
162
|
|
150
|
-
|
151
|
-
run: MLflow run for the experiment corresponding to the Hydra app.
|
152
|
-
This `Run` instance is automatically created by Hydraflow.
|
153
|
-
cfg: Configuration for the experiment's run.
|
154
|
-
This `Config` instance is originally defined by Hydra, and then
|
155
|
-
automatically passed to the app by Hydraflow.
|
156
|
-
"""
|
157
|
-
# Training loop
|
158
|
-
for epoch in range(cfg.epochs):
|
159
|
-
# Simulate training and validation
|
160
|
-
train_loss = 1.0 / (epoch + 1)
|
161
|
-
val_loss = 1.1 / (epoch + 1)
|
163
|
+
Define reusable experiment workflows in YAML:
|
162
164
|
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
165
|
+
```yaml
|
166
|
+
jobs:
|
167
|
+
train_models:
|
168
|
+
run: python train.py
|
169
|
+
sets:
|
170
|
+
- each: model=small,medium,large
|
171
|
+
all: learning_rate=0.001,0.01,0.1
|
172
|
+
```
|
168
173
|
|
169
|
-
|
174
|
+
### Analysis Tools
|
170
175
|
|
176
|
+
Analyze experiment results with powerful APIs:
|
171
177
|
|
172
|
-
|
173
|
-
|
174
|
-
```
|
178
|
+
```python
|
179
|
+
from hydraflow import Run, iter_run_dirs
|
175
180
|
|
176
|
-
|
181
|
+
# Load runs
|
182
|
+
runs = Run.load(iter_run_dirs("mlruns"))
|
177
183
|
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
- Type-safe configuration with dataclasses
|
184
|
+
# Filter and analyze
|
185
|
+
best_runs = runs.filter(model_type="transformer").to_frame("learning_rate", "accuracy")
|
186
|
+
```
|
182
187
|
|
183
188
|
## Documentation
|
184
189
|
|
185
|
-
For detailed documentation,
|
186
|
-
|
190
|
+
For detailed documentation, visit our [documentation site](https://daizutabi.github.io/hydraflow/):
|
191
|
+
|
192
|
+
- [Getting Started](https://daizutabi.github.io/hydraflow/getting-started/) - Installation and core concepts
|
193
|
+
- [Practical Tutorials](https://daizutabi.github.io/hydraflow/practical-tutorials/) - Learn through hands-on examples
|
194
|
+
- [User Guide](https://daizutabi.github.io/hydraflow/part1-applications/) - Detailed documentation of HydraFlow's capabilities
|
195
|
+
- [API Reference](https://daizutabi.github.io/hydraflow/api/hydraflow/) - Complete API documentation
|
187
196
|
|
188
197
|
## Contributing
|
189
198
|
|
@@ -191,4 +200,4 @@ We welcome contributions! Please see our [contributing guide](CONTRIBUTING.md) f
|
|
191
200
|
|
192
201
|
## License
|
193
202
|
|
194
|
-
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
203
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
@@ -0,0 +1,21 @@
|
|
1
|
+
hydraflow/__init__.py,sha256=8UraqH00Qp0In301ZUmQBRTIGbV1L5zSZACOUlIRPn8,727
|
2
|
+
hydraflow/cli.py,sha256=3rGr___wwp8KazjLGQ7JO_IgAMqLyMlcVSs_QJK7g0Y,3135
|
3
|
+
hydraflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
+
hydraflow/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
+
hydraflow/core/context.py,sha256=igE17oQESGjH-sBnICI8HkZbngY_crkHTgx2E-YkmEo,4155
|
6
|
+
hydraflow/core/io.py,sha256=gIH3-Lzs4d5TL3b9y-Nb064Aya7cXQHAuc7EjgKzxII,4694
|
7
|
+
hydraflow/core/main.py,sha256=mnYcm1SaCaJwpMCKLEm337LcjW6P5G5LMUjOf78ejkk,5574
|
8
|
+
hydraflow/core/run.py,sha256=SugX6JLdBqsfz3JTrB66I3muo03rrmwDvITVZQaF48w,12685
|
9
|
+
hydraflow/core/run_collection.py,sha256=cbaJO68WzE-QNlTc8NhOyQ1pHDNberJs-31qTY7P9Fo,19495
|
10
|
+
hydraflow/core/run_info.py,sha256=DTuT2eYhOj1WEeIsesOLjY0yltCw6f3Y-5hhvIbDROQ,2518
|
11
|
+
hydraflow/executor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
12
|
+
hydraflow/executor/aio.py,sha256=xXsmBPIPdBlopv_1h0FdtOvoKUcuW7PQeKCV2d_lN9I,2122
|
13
|
+
hydraflow/executor/conf.py,sha256=8Xq4UAenRKJIl1NBgNbSfv6VUTJhdwPLayZIEAsiBR0,414
|
14
|
+
hydraflow/executor/io.py,sha256=18wnHpCMQRGYL-oN2841h9W2aSW_X2SmO68Lx-3FIbU,1043
|
15
|
+
hydraflow/executor/job.py,sha256=6QeJ18OMeocXeM04rCYL46GgArfX1SvZs9_4HTomTgE,5436
|
16
|
+
hydraflow/executor/parser.py,sha256=RxP8qpDaJ8VLqZ51VlPFyVitWctObhkE_3iPIsY66Cs,14610
|
17
|
+
hydraflow-0.16.0.dist-info/METADATA,sha256=g8PnKA-cAU6P0YCPg-hU9E-hpvljNk4v9tOgV3bT_dw,7691
|
18
|
+
hydraflow-0.16.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
19
|
+
hydraflow-0.16.0.dist-info/entry_points.txt,sha256=XI0khPbpCIUo9UPqkNEpgh-kqK3Jy8T7L2VCWOdkbSM,48
|
20
|
+
hydraflow-0.16.0.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
|
21
|
+
hydraflow-0.16.0.dist-info/RECORD,,
|
@@ -1,21 +0,0 @@
|
|
1
|
-
hydraflow/__init__.py,sha256=5ByA9ogtS5ZfIYIUSMUjMwAIpr6xGXEXmcABOu4O8RA,673
|
2
|
-
hydraflow/cli.py,sha256=3rGr___wwp8KazjLGQ7JO_IgAMqLyMlcVSs_QJK7g0Y,3135
|
3
|
-
hydraflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
-
hydraflow/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
-
hydraflow/core/context.py,sha256=LFPNJxmuJQ2VUt-WBU07MC3ySbjlY8rRZ8VxuAih4o4,4148
|
6
|
-
hydraflow/core/io.py,sha256=ZBXIL_jlBUiCI0L_J6S5S4OwtBMvdVVMXnekzMuC_JA,4404
|
7
|
-
hydraflow/core/main.py,sha256=b9o6Rpn3uoXfDB8o0XZdl-g1yX2SKkOT12-H7lB8Les,5158
|
8
|
-
hydraflow/core/run.py,sha256=KqaMdRUBOzOU4vkrRUczCrPCsVx30-XUQ_e78B78BSU,12330
|
9
|
-
hydraflow/core/run_collection.py,sha256=pV3N83uBhmda9OeaNz1jqpF9z6A9j3jfUHtqy-uxCs4,15671
|
10
|
-
hydraflow/core/run_info.py,sha256=3dW9GgWnZZNwbXwMrw-85AqQ956zlQddUi9irSNLR5g,2550
|
11
|
-
hydraflow/executor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
12
|
-
hydraflow/executor/aio.py,sha256=xXsmBPIPdBlopv_1h0FdtOvoKUcuW7PQeKCV2d_lN9I,2122
|
13
|
-
hydraflow/executor/conf.py,sha256=8Xq4UAenRKJIl1NBgNbSfv6VUTJhdwPLayZIEAsiBR0,414
|
14
|
-
hydraflow/executor/io.py,sha256=18wnHpCMQRGYL-oN2841h9W2aSW_X2SmO68Lx-3FIbU,1043
|
15
|
-
hydraflow/executor/job.py,sha256=6QeJ18OMeocXeM04rCYL46GgArfX1SvZs9_4HTomTgE,5436
|
16
|
-
hydraflow/executor/parser.py,sha256=RxP8qpDaJ8VLqZ51VlPFyVitWctObhkE_3iPIsY66Cs,14610
|
17
|
-
hydraflow-0.15.1.dist-info/METADATA,sha256=oC-UgH0sZKw2Ry1kBiMPpNobxzlLhmhQgS8W3TIvGJI,7238
|
18
|
-
hydraflow-0.15.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
19
|
-
hydraflow-0.15.1.dist-info/entry_points.txt,sha256=XI0khPbpCIUo9UPqkNEpgh-kqK3Jy8T7L2VCWOdkbSM,48
|
20
|
-
hydraflow-0.15.1.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
|
21
|
-
hydraflow-0.15.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|