hydraflow 0.2.18__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hydraflow/__init__.py +6 -7
- hydraflow/config.py +24 -2
- hydraflow/context.py +20 -1
- hydraflow/mlflow.py +8 -2
- hydraflow/run_collection.py +28 -5
- hydraflow/run_data.py +34 -0
- hydraflow/run_info.py +34 -0
- hydraflow/{info.py → utils.py} +2 -40
- {hydraflow-0.2.18.dist-info → hydraflow-0.3.1.dist-info}/METADATA +2 -1
- hydraflow-0.3.1.dist-info/RECORD +16 -0
- hydraflow-0.2.18.dist-info/RECORD +0 -14
- {hydraflow-0.2.18.dist-info → hydraflow-0.3.1.dist-info}/WHEEL +0 -0
- {hydraflow-0.2.18.dist-info → hydraflow-0.3.1.dist-info}/licenses/LICENSE +0 -0
hydraflow/__init__.py
CHANGED
@@ -1,16 +1,15 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
from .
|
4
|
-
|
5
|
-
search_runs,
|
6
|
-
set_experiment,
|
7
|
-
)
|
1
|
+
"""Integrate Hydra and MLflow to manage and track machine learning experiments."""
|
2
|
+
|
3
|
+
from .context import chdir_artifact, chdir_hydra, log_run, start_run, watch
|
4
|
+
from .mlflow import list_runs, search_runs, set_experiment
|
8
5
|
from .progress import multi_tasks_progress, parallel_progress
|
9
6
|
from .run_collection import RunCollection
|
7
|
+
from .utils import get_artifact_dir, get_hydra_output_dir, load_config
|
10
8
|
|
11
9
|
__all__ = [
|
12
10
|
"RunCollection",
|
13
11
|
"chdir_artifact",
|
12
|
+
"chdir_hydra",
|
14
13
|
"get_artifact_dir",
|
15
14
|
"get_hydra_output_dir",
|
16
15
|
"list_runs",
|
hydraflow/config.py
CHANGED
@@ -11,6 +11,20 @@ if TYPE_CHECKING:
|
|
11
11
|
from typing import Any
|
12
12
|
|
13
13
|
|
14
|
+
def collect_params(config: object) -> dict[str, Any]:
|
15
|
+
"""Iterate over parameters and collect them into a dictionary.
|
16
|
+
|
17
|
+
Args:
|
18
|
+
config (object): The configuration object to iterate over.
|
19
|
+
prefix (str): The prefix to prepend to the parameter keys.
|
20
|
+
|
21
|
+
Returns:
|
22
|
+
dict[str, Any]: A dictionary of collected parameters.
|
23
|
+
|
24
|
+
"""
|
25
|
+
return dict(iter_params(config))
|
26
|
+
|
27
|
+
|
14
28
|
def iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
|
15
29
|
"""Recursively iterate over the parameters in the given configuration object.
|
16
30
|
|
@@ -40,7 +54,7 @@ def _iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
|
|
40
54
|
if isinstance(config, DictConfig):
|
41
55
|
for key, value in config.items():
|
42
56
|
if _is_param(value):
|
43
|
-
yield f"{prefix}{key}", value
|
57
|
+
yield f"{prefix}{key}", _convert(value)
|
44
58
|
|
45
59
|
else:
|
46
60
|
yield from _iter_params(value, f"{prefix}{key}.")
|
@@ -48,7 +62,7 @@ def _iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
|
|
48
62
|
elif isinstance(config, ListConfig):
|
49
63
|
for index, value in enumerate(config):
|
50
64
|
if _is_param(value):
|
51
|
-
yield f"{prefix}{index}", value
|
65
|
+
yield f"{prefix}{index}", _convert(value)
|
52
66
|
|
53
67
|
else:
|
54
68
|
yield from _iter_params(value, f"{prefix}{index}.")
|
@@ -64,3 +78,11 @@ def _is_param(value: object) -> bool:
|
|
64
78
|
return False
|
65
79
|
|
66
80
|
return True
|
81
|
+
|
82
|
+
|
83
|
+
def _convert(value: Any) -> Any:
|
84
|
+
"""Convert the given value to a Python object."""
|
85
|
+
if isinstance(value, ListConfig):
|
86
|
+
return list(value)
|
87
|
+
|
88
|
+
return value
|
hydraflow/context.py
CHANGED
@@ -14,8 +14,8 @@ from hydra.core.hydra_config import HydraConfig
|
|
14
14
|
from watchdog.events import FileModifiedEvent, PatternMatchingEventHandler
|
15
15
|
from watchdog.observers import Observer
|
16
16
|
|
17
|
-
from hydraflow.info import get_artifact_dir
|
18
17
|
from hydraflow.mlflow import log_params
|
18
|
+
from hydraflow.run_info import get_artifact_dir
|
19
19
|
|
20
20
|
if TYPE_CHECKING:
|
21
21
|
from collections.abc import Callable, Iterator
|
@@ -238,6 +238,25 @@ class Handler(PatternMatchingEventHandler):
|
|
238
238
|
self.func(file)
|
239
239
|
|
240
240
|
|
241
|
+
@contextmanager
|
242
|
+
def chdir_hydra() -> Iterator[Path]:
|
243
|
+
"""Change the current working directory to the hydra output directory.
|
244
|
+
|
245
|
+
This context manager changes the current working directory to the hydra output
|
246
|
+
directory. It ensures that the directory is changed back to the original
|
247
|
+
directory after the context is exited.
|
248
|
+
"""
|
249
|
+
curdir = Path.cwd()
|
250
|
+
path = HydraConfig.get().runtime.output_dir
|
251
|
+
|
252
|
+
os.chdir(path)
|
253
|
+
try:
|
254
|
+
yield Path(path)
|
255
|
+
|
256
|
+
finally:
|
257
|
+
os.chdir(curdir)
|
258
|
+
|
259
|
+
|
241
260
|
@contextmanager
|
242
261
|
def chdir_artifact(
|
243
262
|
run: Run,
|
hydraflow/mlflow.py
CHANGED
@@ -207,8 +207,14 @@ def _list_runs(
|
|
207
207
|
if experiment := mlflow.get_experiment_by_name(name):
|
208
208
|
loc = experiment.artifact_location
|
209
209
|
|
210
|
-
if isinstance(loc, str)
|
211
|
-
|
210
|
+
if isinstance(loc, str):
|
211
|
+
if loc.startswith("file://"):
|
212
|
+
path = Path(mlflow.artifacts.download_artifacts(loc))
|
213
|
+
elif Path(loc).is_dir():
|
214
|
+
path = Path(loc)
|
215
|
+
else:
|
216
|
+
continue
|
217
|
+
|
212
218
|
run_ids.extend(file.stem for file in path.iterdir() if file.is_dir())
|
213
219
|
|
214
220
|
it = (joblib.delayed(mlflow.get_run)(run_id) for run_id in run_ids)
|
hydraflow/run_collection.py
CHANGED
@@ -24,10 +24,12 @@ from itertools import chain
|
|
24
24
|
from typing import TYPE_CHECKING, Any, Concatenate, ParamSpec, TypeVar, overload
|
25
25
|
|
26
26
|
from mlflow.entities import RunStatus
|
27
|
+
from polars.dataframe import DataFrame
|
27
28
|
|
28
29
|
import hydraflow.param
|
29
|
-
from hydraflow.config import iter_params
|
30
|
-
from hydraflow.
|
30
|
+
from hydraflow.config import collect_params, iter_params
|
31
|
+
from hydraflow.run_data import RunCollectionData
|
32
|
+
from hydraflow.run_info import RunCollectionInfo
|
31
33
|
|
32
34
|
if TYPE_CHECKING:
|
33
35
|
from collections.abc import Callable, Iterator
|
@@ -61,8 +63,12 @@ class RunCollection:
|
|
61
63
|
_info: RunCollectionInfo = field(init=False)
|
62
64
|
"""An instance of `RunCollectionInfo`."""
|
63
65
|
|
66
|
+
_data: RunCollectionData = field(init=False)
|
67
|
+
"""An instance of `RunCollectionData`."""
|
68
|
+
|
64
69
|
def __post_init__(self) -> None:
|
65
70
|
self._info = RunCollectionInfo(self)
|
71
|
+
self._data = RunCollectionData(self)
|
66
72
|
|
67
73
|
def __repr__(self) -> str:
|
68
74
|
return f"{self.__class__.__name__}({len(self)})"
|
@@ -101,6 +107,11 @@ class RunCollection:
|
|
101
107
|
"""An instance of `RunCollectionInfo`."""
|
102
108
|
return self._info
|
103
109
|
|
110
|
+
@property
|
111
|
+
def data(self) -> RunCollectionData:
|
112
|
+
"""An instance of `RunCollectionData`."""
|
113
|
+
return self._data
|
114
|
+
|
104
115
|
def take(self, n: int) -> RunCollection:
|
105
116
|
"""Take the first n runs from the collection.
|
106
117
|
|
@@ -371,7 +382,7 @@ class RunCollection:
|
|
371
382
|
raise ValueError(msg)
|
372
383
|
|
373
384
|
def try_get(self, config: object | None = None, **kwargs) -> Run | None:
|
374
|
-
"""Try to
|
385
|
+
"""Try to get a specific `Run` instance based on the provided configuration.
|
375
386
|
|
376
387
|
This method filters the runs in the collection according to the
|
377
388
|
specified configuration object and returns the run that matches the
|
@@ -505,7 +516,7 @@ class RunCollection:
|
|
505
516
|
in the collection.
|
506
517
|
|
507
518
|
"""
|
508
|
-
return (func(config, *args, **kwargs) for config in self.
|
519
|
+
return (func(config, *args, **kwargs) for config in self.data.config)
|
509
520
|
|
510
521
|
def map_uri(
|
511
522
|
self,
|
@@ -584,6 +595,16 @@ class RunCollection:
|
|
584
595
|
|
585
596
|
return {key: RunCollection(runs) for key, runs in grouped_runs.items()}
|
586
597
|
|
598
|
+
@property
|
599
|
+
def config(self) -> DataFrame:
|
600
|
+
"""Get the runs' configurations as a polars DataFrame.
|
601
|
+
|
602
|
+
Returns:
|
603
|
+
A polars DataFrame containing the runs' configurations.
|
604
|
+
|
605
|
+
"""
|
606
|
+
return DataFrame(self.map_config(collect_params))
|
607
|
+
|
587
608
|
|
588
609
|
def _param_matches(run: Run, key: str, value: Any) -> bool:
|
589
610
|
params = run.data.params
|
@@ -634,8 +655,10 @@ def filter_runs(
|
|
634
655
|
"""
|
635
656
|
for key, value in chain(iter_params(config), kwargs.items()):
|
636
657
|
runs = [run for run in runs if _param_matches(run, key, value)]
|
658
|
+
if not runs:
|
659
|
+
return []
|
637
660
|
|
638
|
-
if
|
661
|
+
if status is None:
|
639
662
|
return runs
|
640
663
|
|
641
664
|
return filter_runs_by_status(runs, status)
|
hydraflow/run_data.py
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
"""Provide data about `RunCollection` instances."""
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
from typing import TYPE_CHECKING
|
6
|
+
|
7
|
+
from hydraflow.utils import load_config
|
8
|
+
|
9
|
+
if TYPE_CHECKING:
|
10
|
+
from omegaconf import DictConfig
|
11
|
+
|
12
|
+
from hydraflow.run_collection import RunCollection
|
13
|
+
|
14
|
+
|
15
|
+
class RunCollectionData:
|
16
|
+
"""Provide data about a `RunCollection` instance."""
|
17
|
+
|
18
|
+
def __init__(self, runs: RunCollection) -> None:
|
19
|
+
self._runs = runs
|
20
|
+
|
21
|
+
@property
|
22
|
+
def params(self) -> list[dict[str, str]]:
|
23
|
+
"""Get the parameters for each run in the collection."""
|
24
|
+
return [run.data.params for run in self._runs]
|
25
|
+
|
26
|
+
@property
|
27
|
+
def metrics(self) -> list[dict[str, float]]:
|
28
|
+
"""Get the metrics for each run in the collection."""
|
29
|
+
return [run.data.metrics for run in self._runs]
|
30
|
+
|
31
|
+
@property
|
32
|
+
def config(self) -> list[DictConfig]:
|
33
|
+
"""Get the configuration for each run in the collection."""
|
34
|
+
return [load_config(run) for run in self._runs]
|
hydraflow/run_info.py
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
"""Provide information about `RunCollection` instances."""
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
from typing import TYPE_CHECKING
|
6
|
+
|
7
|
+
from hydraflow.utils import get_artifact_dir
|
8
|
+
|
9
|
+
if TYPE_CHECKING:
|
10
|
+
from pathlib import Path
|
11
|
+
|
12
|
+
from hydraflow.run_collection import RunCollection
|
13
|
+
|
14
|
+
|
15
|
+
class RunCollectionInfo:
|
16
|
+
"""Provide information about a `RunCollection` instance."""
|
17
|
+
|
18
|
+
def __init__(self, runs: RunCollection) -> None:
|
19
|
+
self._runs = runs
|
20
|
+
|
21
|
+
@property
|
22
|
+
def run_id(self) -> list[str]:
|
23
|
+
"""Get the run ID for each run in the collection."""
|
24
|
+
return [run.info.run_id for run in self._runs]
|
25
|
+
|
26
|
+
@property
|
27
|
+
def artifact_uri(self) -> list[str | None]:
|
28
|
+
"""Get the artifact URI for each run in the collection."""
|
29
|
+
return [run.info.artifact_uri for run in self._runs]
|
30
|
+
|
31
|
+
@property
|
32
|
+
def artifact_dir(self) -> list[Path]:
|
33
|
+
"""Get the artifact directory for each run in the collection."""
|
34
|
+
return [get_artifact_dir(run) for run in self._runs]
|
hydraflow/{info.py → utils.py}
RENAMED
@@ -1,4 +1,4 @@
|
|
1
|
-
"""Provide
|
1
|
+
"""Provide utility functions for HydraFlow."""
|
2
2
|
|
3
3
|
from __future__ import annotations
|
4
4
|
|
@@ -7,51 +7,13 @@ from typing import TYPE_CHECKING
|
|
7
7
|
|
8
8
|
import mlflow
|
9
9
|
from hydra.core.hydra_config import HydraConfig
|
10
|
+
from mlflow.entities import Run
|
10
11
|
from mlflow.tracking import artifact_utils
|
11
12
|
from omegaconf import DictConfig, OmegaConf
|
12
13
|
|
13
14
|
if TYPE_CHECKING:
|
14
15
|
from mlflow.entities import Run
|
15
16
|
|
16
|
-
from hydraflow.run_collection import RunCollection
|
17
|
-
|
18
|
-
|
19
|
-
class RunCollectionInfo:
|
20
|
-
"""Provide information about MLflow runs."""
|
21
|
-
|
22
|
-
def __init__(self, runs: RunCollection) -> None:
|
23
|
-
self._runs = runs
|
24
|
-
|
25
|
-
@property
|
26
|
-
def run_id(self) -> list[str]:
|
27
|
-
"""Get the run ID for each run in the collection."""
|
28
|
-
return [run.info.run_id for run in self._runs]
|
29
|
-
|
30
|
-
@property
|
31
|
-
def params(self) -> list[dict[str, str]]:
|
32
|
-
"""Get the parameters for each run in the collection."""
|
33
|
-
return [run.data.params for run in self._runs]
|
34
|
-
|
35
|
-
@property
|
36
|
-
def metrics(self) -> list[dict[str, float]]:
|
37
|
-
"""Get the metrics for each run in the collection."""
|
38
|
-
return [run.data.metrics for run in self._runs]
|
39
|
-
|
40
|
-
@property
|
41
|
-
def artifact_uri(self) -> list[str | None]:
|
42
|
-
"""Get the artifact URI for each run in the collection."""
|
43
|
-
return [run.info.artifact_uri for run in self._runs]
|
44
|
-
|
45
|
-
@property
|
46
|
-
def artifact_dir(self) -> list[Path]:
|
47
|
-
"""Get the artifact directory for each run in the collection."""
|
48
|
-
return [get_artifact_dir(run) for run in self._runs]
|
49
|
-
|
50
|
-
@property
|
51
|
-
def config(self) -> list[DictConfig]:
|
52
|
-
"""Get the configuration for each run in the collection."""
|
53
|
-
return [load_config(run) for run in self._runs]
|
54
|
-
|
55
17
|
|
56
18
|
def get_artifact_dir(run: Run | None = None) -> Path:
|
57
19
|
"""Retrieve the artifact directory for the given run.
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: hydraflow
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.3.1
|
4
4
|
Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
|
5
5
|
Project-URL: Documentation, https://github.com/daizutabi/hydraflow
|
6
6
|
Project-URL: Source, https://github.com/daizutabi/hydraflow
|
@@ -17,6 +17,7 @@ Requires-Python: >=3.10
|
|
17
17
|
Requires-Dist: hydra-core>=1.3
|
18
18
|
Requires-Dist: joblib
|
19
19
|
Requires-Dist: mlflow>=2.15
|
20
|
+
Requires-Dist: polars
|
20
21
|
Requires-Dist: rich
|
21
22
|
Requires-Dist: watchdog
|
22
23
|
Requires-Dist: watchfiles
|
@@ -0,0 +1,16 @@
|
|
1
|
+
hydraflow/__init__.py,sha256=6sfM1ashUkfrNf7lOR7raFYhG8YdOAJR-JgRNL_IVo8,698
|
2
|
+
hydraflow/asyncio.py,sha256=-i1C8KAmNDImrjHnk92Csaa1mpjdK8Vp4ZVaQV-l94s,6634
|
3
|
+
hydraflow/config.py,sha256=6V5omJ3-h9-ZwVpM5rTA4FqE_mu8urTy9OqV4zG79gw,2671
|
4
|
+
hydraflow/context.py,sha256=412884e84qIEYtbxJT4roYsKfldGaTKzgo6Q1FAsT5U,8733
|
5
|
+
hydraflow/mlflow.py,sha256=JELqXFCJ9MsEJaQWT5dyleTFk8BHL7cQwW_gzhkPoIg,8729
|
6
|
+
hydraflow/param.py,sha256=dvIXcKgc_MPiju3WEk9qz5FOUeK5qSj-YWN2ophCpUM,1938
|
7
|
+
hydraflow/progress.py,sha256=zvKX1HCN8_xDOsgYOEcLLhkhdPdep-U8vHrc0XZ-6SQ,6163
|
8
|
+
hydraflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
9
|
+
hydraflow/run_collection.py,sha256=Xv6-KD5ac-vv-4Q3PZrzJy1x84H_g7UoP7ZqZ8_DQeQ,24973
|
10
|
+
hydraflow/run_data.py,sha256=ZXVr0PHyufH9wwyQYWtpE4_MheAC2ArTW_J1TTMQ4iI,983
|
11
|
+
hydraflow/run_info.py,sha256=sMXOo20ClaRIommMEzuAbO_OrcXx7M1Yt4FMV7spxz0,998
|
12
|
+
hydraflow/utils.py,sha256=aRdBdToKfvHhN2qFiRzPHIdQxS7cTpZREQeP8HreAfI,2676
|
13
|
+
hydraflow-0.3.1.dist-info/METADATA,sha256=W38pNcCNy7Kmx1t9dwFoANsRjCk40-KBJUWux_BvHqA,3840
|
14
|
+
hydraflow-0.3.1.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
15
|
+
hydraflow-0.3.1.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
|
16
|
+
hydraflow-0.3.1.dist-info/RECORD,,
|
@@ -1,14 +0,0 @@
|
|
1
|
-
hydraflow/__init__.py,sha256=B7rWSiGP5WwWjijcb41Bv9uuo5MQ6gbBbVWGAWYtK-k,598
|
2
|
-
hydraflow/asyncio.py,sha256=-i1C8KAmNDImrjHnk92Csaa1mpjdK8Vp4ZVaQV-l94s,6634
|
3
|
-
hydraflow/config.py,sha256=sBaEYPMAGSIOc_wdDsWm0k4y3AZyWIET8gqa_o95SDA,2089
|
4
|
-
hydraflow/context.py,sha256=ih_jnexaHoToNq1dZ6sBzhJWFluPiQluOlYTYOzNEgk,8222
|
5
|
-
hydraflow/info.py,sha256=Vzyz9dEWcU9ovRG3JWshxIazzod1cZoHF74bHhHL3AI,3946
|
6
|
-
hydraflow/mlflow.py,sha256=GkOr_pXfpfY5USYBLrCigHcP13VgrAK_e9kheR1Wke4,8579
|
7
|
-
hydraflow/param.py,sha256=dvIXcKgc_MPiju3WEk9qz5FOUeK5qSj-YWN2ophCpUM,1938
|
8
|
-
hydraflow/progress.py,sha256=zvKX1HCN8_xDOsgYOEcLLhkhdPdep-U8vHrc0XZ-6SQ,6163
|
9
|
-
hydraflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
|
-
hydraflow/run_collection.py,sha256=gsseBQ6a2YolNanISgEgkjei7o9U6ZGV-Tk50UYH850,24295
|
11
|
-
hydraflow-0.2.18.dist-info/METADATA,sha256=roL3lGtlIibF6rHbCp4aXrCphhq-OkNe0JwLxM1xtBY,3819
|
12
|
-
hydraflow-0.2.18.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
13
|
-
hydraflow-0.2.18.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
|
14
|
-
hydraflow-0.2.18.dist-info/RECORD,,
|
File without changes
|
File without changes
|