hydraflow 0.2.18__tar.gz → 0.3.1__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {hydraflow-0.2.18 → hydraflow-0.3.1}/.devcontainer/devcontainer.json +0 -1
- {hydraflow-0.2.18 → hydraflow-0.3.1}/.gitignore +4 -0
- {hydraflow-0.2.18 → hydraflow-0.3.1}/PKG-INFO +2 -1
- hydraflow-0.3.1/apps/hello.py +31 -0
- {hydraflow-0.2.18 → hydraflow-0.3.1}/mkdocs.yml +3 -0
- {hydraflow-0.2.18 → hydraflow-0.3.1}/pyproject.toml +9 -2
- {hydraflow-0.2.18 → hydraflow-0.3.1}/src/hydraflow/__init__.py +6 -7
- {hydraflow-0.2.18 → hydraflow-0.3.1}/src/hydraflow/config.py +24 -2
- {hydraflow-0.2.18 → hydraflow-0.3.1}/src/hydraflow/context.py +20 -1
- {hydraflow-0.2.18 → hydraflow-0.3.1}/src/hydraflow/mlflow.py +8 -2
- {hydraflow-0.2.18 → hydraflow-0.3.1}/src/hydraflow/run_collection.py +28 -5
- hydraflow-0.3.1/src/hydraflow/run_data.py +34 -0
- hydraflow-0.3.1/src/hydraflow/run_info.py +34 -0
- hydraflow-0.2.18/src/hydraflow/info.py → hydraflow-0.3.1/src/hydraflow/utils.py +2 -40
- {hydraflow-0.2.18 → hydraflow-0.3.1}/tests/scripts/app.py +5 -0
- {hydraflow-0.2.18 → hydraflow-0.3.1}/tests/test_app.py +35 -13
- {hydraflow-0.2.18 → hydraflow-0.3.1}/tests/test_config.py +8 -0
- {hydraflow-0.2.18 → hydraflow-0.3.1}/tests/test_log_run.py +1 -1
- hydraflow-0.3.1/tests/test_run_data.py +43 -0
- hydraflow-0.2.18/tests/test_info.py → hydraflow-0.3.1/tests/test_run_info.py +1 -17
- {hydraflow-0.2.18 → hydraflow-0.3.1}/.devcontainer/postCreate.sh +0 -0
- {hydraflow-0.2.18 → hydraflow-0.3.1}/.devcontainer/starship.toml +0 -0
- {hydraflow-0.2.18 → hydraflow-0.3.1}/.gitattributes +0 -0
- {hydraflow-0.2.18 → hydraflow-0.3.1}/LICENSE +0 -0
- {hydraflow-0.2.18 → hydraflow-0.3.1}/README.md +0 -0
- {hydraflow-0.2.18 → hydraflow-0.3.1}/src/hydraflow/asyncio.py +0 -0
- {hydraflow-0.2.18 → hydraflow-0.3.1}/src/hydraflow/param.py +0 -0
- {hydraflow-0.2.18 → hydraflow-0.3.1}/src/hydraflow/progress.py +0 -0
- {hydraflow-0.2.18 → hydraflow-0.3.1}/src/hydraflow/py.typed +0 -0
- {hydraflow-0.2.18 → hydraflow-0.3.1}/tests/__init__.py +0 -0
- {hydraflow-0.2.18 → hydraflow-0.3.1}/tests/scripts/__init__.py +0 -0
- {hydraflow-0.2.18 → hydraflow-0.3.1}/tests/scripts/progress.py +0 -0
- {hydraflow-0.2.18 → hydraflow-0.3.1}/tests/scripts/watch.py +0 -0
- {hydraflow-0.2.18 → hydraflow-0.3.1}/tests/test_asyncio.py +0 -0
- {hydraflow-0.2.18 → hydraflow-0.3.1}/tests/test_context.py +0 -0
- {hydraflow-0.2.18 → hydraflow-0.3.1}/tests/test_mlflow.py +0 -0
- {hydraflow-0.2.18 → hydraflow-0.3.1}/tests/test_param.py +0 -0
- {hydraflow-0.2.18 → hydraflow-0.3.1}/tests/test_progress.py +0 -0
- {hydraflow-0.2.18 → hydraflow-0.3.1}/tests/test_run_collection.py +0 -0
- {hydraflow-0.2.18 → hydraflow-0.3.1}/tests/test_version.py +0 -0
- {hydraflow-0.2.18 → hydraflow-0.3.1}/tests/test_watch.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: hydraflow
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.3.1
|
4
4
|
Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
|
5
5
|
Project-URL: Documentation, https://github.com/daizutabi/hydraflow
|
6
6
|
Project-URL: Source, https://github.com/daizutabi/hydraflow
|
@@ -17,6 +17,7 @@ Requires-Python: >=3.10
|
|
17
17
|
Requires-Dist: hydra-core>=1.3
|
18
18
|
Requires-Dist: joblib
|
19
19
|
Requires-Dist: mlflow>=2.15
|
20
|
+
Requires-Dist: polars
|
20
21
|
Requires-Dist: rich
|
21
22
|
Requires-Dist: watchdog
|
22
23
|
Requires-Dist: watchfiles
|
@@ -0,0 +1,31 @@
|
|
1
|
+
import logging
|
2
|
+
from dataclasses import dataclass
|
3
|
+
|
4
|
+
import hydra
|
5
|
+
from hydra.core.config_store import ConfigStore
|
6
|
+
|
7
|
+
import hydraflow
|
8
|
+
|
9
|
+
log = logging.getLogger(__name__)
|
10
|
+
|
11
|
+
|
12
|
+
@dataclass
|
13
|
+
class Config:
|
14
|
+
width: int = 1024
|
15
|
+
height: int = 768
|
16
|
+
|
17
|
+
|
18
|
+
cs = ConfigStore.instance()
|
19
|
+
cs.store(name="config", node=Config)
|
20
|
+
|
21
|
+
|
22
|
+
@hydra.main(version_base=None, config_name="config")
|
23
|
+
def app(cfg: Config) -> None:
|
24
|
+
hydraflow.set_experiment()
|
25
|
+
|
26
|
+
with hydraflow.start_run(cfg):
|
27
|
+
log.info(f"{cfg.width=}, {cfg.height=}")
|
28
|
+
|
29
|
+
|
30
|
+
if __name__ == "__main__":
|
31
|
+
app()
|
@@ -38,6 +38,7 @@ theme:
|
|
38
38
|
- navigation.tracking
|
39
39
|
plugins:
|
40
40
|
- search
|
41
|
+
- markdown-exec
|
41
42
|
- mkapi
|
42
43
|
markdown_extensions:
|
43
44
|
- pymdownx.magiclink
|
@@ -50,4 +51,6 @@ markdown_extensions:
|
|
50
51
|
alternate_style: true
|
51
52
|
nav:
|
52
53
|
- Home: index.md
|
54
|
+
- Usage:
|
55
|
+
- Hydra application: usage/hydra.md
|
53
56
|
- Reference: $api/hydraflow.**
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "hydraflow"
|
7
|
-
version = "0.
|
7
|
+
version = "0.3.1"
|
8
8
|
description = "Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments."
|
9
9
|
readme = "README.md"
|
10
10
|
license = "MIT"
|
@@ -21,6 +21,7 @@ dependencies = [
|
|
21
21
|
"hydra-core>=1.3",
|
22
22
|
"joblib",
|
23
23
|
"mlflow>=2.15",
|
24
|
+
"polars",
|
24
25
|
"rich",
|
25
26
|
"watchdog",
|
26
27
|
"watchfiles",
|
@@ -33,6 +34,10 @@ Issues = "https://github.com/daizutabi/hydraflow/issues"
|
|
33
34
|
|
34
35
|
[tool.uv]
|
35
36
|
dev-dependencies = [
|
37
|
+
"markdown-exec[ansi]",
|
38
|
+
"mkapi",
|
39
|
+
"mkdocs-material",
|
40
|
+
"mkdocs>=1.6",
|
36
41
|
"pytest-asyncio",
|
37
42
|
"pytest-clarity",
|
38
43
|
"pytest-cov",
|
@@ -80,7 +85,7 @@ ignore = [
|
|
80
85
|
"PGH003",
|
81
86
|
"TRY003",
|
82
87
|
]
|
83
|
-
exclude = ["tests/scripts/*.py"
|
88
|
+
exclude = ["tests/scripts/*.py"]
|
84
89
|
|
85
90
|
[tool.ruff.lint.per-file-ignores]
|
86
91
|
"tests/*" = [
|
@@ -89,9 +94,11 @@ exclude = ["tests/scripts/*.py", "src/hydraflow/__init__.py"]
|
|
89
94
|
"ARG",
|
90
95
|
"D",
|
91
96
|
"FBT",
|
97
|
+
"PD",
|
92
98
|
"PLR",
|
93
99
|
"PT",
|
94
100
|
"S",
|
95
101
|
"SIM117",
|
96
102
|
"SLF",
|
97
103
|
]
|
104
|
+
"apps/*.py" = ["INP", "D", "G", "T"]
|
@@ -1,16 +1,15 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
from .
|
4
|
-
|
5
|
-
search_runs,
|
6
|
-
set_experiment,
|
7
|
-
)
|
1
|
+
"""Integrate Hydra and MLflow to manage and track machine learning experiments."""
|
2
|
+
|
3
|
+
from .context import chdir_artifact, chdir_hydra, log_run, start_run, watch
|
4
|
+
from .mlflow import list_runs, search_runs, set_experiment
|
8
5
|
from .progress import multi_tasks_progress, parallel_progress
|
9
6
|
from .run_collection import RunCollection
|
7
|
+
from .utils import get_artifact_dir, get_hydra_output_dir, load_config
|
10
8
|
|
11
9
|
__all__ = [
|
12
10
|
"RunCollection",
|
13
11
|
"chdir_artifact",
|
12
|
+
"chdir_hydra",
|
14
13
|
"get_artifact_dir",
|
15
14
|
"get_hydra_output_dir",
|
16
15
|
"list_runs",
|
@@ -11,6 +11,20 @@ if TYPE_CHECKING:
|
|
11
11
|
from typing import Any
|
12
12
|
|
13
13
|
|
14
|
+
def collect_params(config: object) -> dict[str, Any]:
|
15
|
+
"""Iterate over parameters and collect them into a dictionary.
|
16
|
+
|
17
|
+
Args:
|
18
|
+
config (object): The configuration object to iterate over.
|
19
|
+
prefix (str): The prefix to prepend to the parameter keys.
|
20
|
+
|
21
|
+
Returns:
|
22
|
+
dict[str, Any]: A dictionary of collected parameters.
|
23
|
+
|
24
|
+
"""
|
25
|
+
return dict(iter_params(config))
|
26
|
+
|
27
|
+
|
14
28
|
def iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
|
15
29
|
"""Recursively iterate over the parameters in the given configuration object.
|
16
30
|
|
@@ -40,7 +54,7 @@ def _iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
|
|
40
54
|
if isinstance(config, DictConfig):
|
41
55
|
for key, value in config.items():
|
42
56
|
if _is_param(value):
|
43
|
-
yield f"{prefix}{key}", value
|
57
|
+
yield f"{prefix}{key}", _convert(value)
|
44
58
|
|
45
59
|
else:
|
46
60
|
yield from _iter_params(value, f"{prefix}{key}.")
|
@@ -48,7 +62,7 @@ def _iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
|
|
48
62
|
elif isinstance(config, ListConfig):
|
49
63
|
for index, value in enumerate(config):
|
50
64
|
if _is_param(value):
|
51
|
-
yield f"{prefix}{index}", value
|
65
|
+
yield f"{prefix}{index}", _convert(value)
|
52
66
|
|
53
67
|
else:
|
54
68
|
yield from _iter_params(value, f"{prefix}{index}.")
|
@@ -64,3 +78,11 @@ def _is_param(value: object) -> bool:
|
|
64
78
|
return False
|
65
79
|
|
66
80
|
return True
|
81
|
+
|
82
|
+
|
83
|
+
def _convert(value: Any) -> Any:
|
84
|
+
"""Convert the given value to a Python object."""
|
85
|
+
if isinstance(value, ListConfig):
|
86
|
+
return list(value)
|
87
|
+
|
88
|
+
return value
|
@@ -14,8 +14,8 @@ from hydra.core.hydra_config import HydraConfig
|
|
14
14
|
from watchdog.events import FileModifiedEvent, PatternMatchingEventHandler
|
15
15
|
from watchdog.observers import Observer
|
16
16
|
|
17
|
-
from hydraflow.info import get_artifact_dir
|
18
17
|
from hydraflow.mlflow import log_params
|
18
|
+
from hydraflow.run_info import get_artifact_dir
|
19
19
|
|
20
20
|
if TYPE_CHECKING:
|
21
21
|
from collections.abc import Callable, Iterator
|
@@ -238,6 +238,25 @@ class Handler(PatternMatchingEventHandler):
|
|
238
238
|
self.func(file)
|
239
239
|
|
240
240
|
|
241
|
+
@contextmanager
|
242
|
+
def chdir_hydra() -> Iterator[Path]:
|
243
|
+
"""Change the current working directory to the hydra output directory.
|
244
|
+
|
245
|
+
This context manager changes the current working directory to the hydra output
|
246
|
+
directory. It ensures that the directory is changed back to the original
|
247
|
+
directory after the context is exited.
|
248
|
+
"""
|
249
|
+
curdir = Path.cwd()
|
250
|
+
path = HydraConfig.get().runtime.output_dir
|
251
|
+
|
252
|
+
os.chdir(path)
|
253
|
+
try:
|
254
|
+
yield Path(path)
|
255
|
+
|
256
|
+
finally:
|
257
|
+
os.chdir(curdir)
|
258
|
+
|
259
|
+
|
241
260
|
@contextmanager
|
242
261
|
def chdir_artifact(
|
243
262
|
run: Run,
|
@@ -207,8 +207,14 @@ def _list_runs(
|
|
207
207
|
if experiment := mlflow.get_experiment_by_name(name):
|
208
208
|
loc = experiment.artifact_location
|
209
209
|
|
210
|
-
if isinstance(loc, str)
|
211
|
-
|
210
|
+
if isinstance(loc, str):
|
211
|
+
if loc.startswith("file://"):
|
212
|
+
path = Path(mlflow.artifacts.download_artifacts(loc))
|
213
|
+
elif Path(loc).is_dir():
|
214
|
+
path = Path(loc)
|
215
|
+
else:
|
216
|
+
continue
|
217
|
+
|
212
218
|
run_ids.extend(file.stem for file in path.iterdir() if file.is_dir())
|
213
219
|
|
214
220
|
it = (joblib.delayed(mlflow.get_run)(run_id) for run_id in run_ids)
|
@@ -24,10 +24,12 @@ from itertools import chain
|
|
24
24
|
from typing import TYPE_CHECKING, Any, Concatenate, ParamSpec, TypeVar, overload
|
25
25
|
|
26
26
|
from mlflow.entities import RunStatus
|
27
|
+
from polars.dataframe import DataFrame
|
27
28
|
|
28
29
|
import hydraflow.param
|
29
|
-
from hydraflow.config import iter_params
|
30
|
-
from hydraflow.
|
30
|
+
from hydraflow.config import collect_params, iter_params
|
31
|
+
from hydraflow.run_data import RunCollectionData
|
32
|
+
from hydraflow.run_info import RunCollectionInfo
|
31
33
|
|
32
34
|
if TYPE_CHECKING:
|
33
35
|
from collections.abc import Callable, Iterator
|
@@ -61,8 +63,12 @@ class RunCollection:
|
|
61
63
|
_info: RunCollectionInfo = field(init=False)
|
62
64
|
"""An instance of `RunCollectionInfo`."""
|
63
65
|
|
66
|
+
_data: RunCollectionData = field(init=False)
|
67
|
+
"""An instance of `RunCollectionData`."""
|
68
|
+
|
64
69
|
def __post_init__(self) -> None:
|
65
70
|
self._info = RunCollectionInfo(self)
|
71
|
+
self._data = RunCollectionData(self)
|
66
72
|
|
67
73
|
def __repr__(self) -> str:
|
68
74
|
return f"{self.__class__.__name__}({len(self)})"
|
@@ -101,6 +107,11 @@ class RunCollection:
|
|
101
107
|
"""An instance of `RunCollectionInfo`."""
|
102
108
|
return self._info
|
103
109
|
|
110
|
+
@property
|
111
|
+
def data(self) -> RunCollectionData:
|
112
|
+
"""An instance of `RunCollectionData`."""
|
113
|
+
return self._data
|
114
|
+
|
104
115
|
def take(self, n: int) -> RunCollection:
|
105
116
|
"""Take the first n runs from the collection.
|
106
117
|
|
@@ -371,7 +382,7 @@ class RunCollection:
|
|
371
382
|
raise ValueError(msg)
|
372
383
|
|
373
384
|
def try_get(self, config: object | None = None, **kwargs) -> Run | None:
|
374
|
-
"""Try to
|
385
|
+
"""Try to get a specific `Run` instance based on the provided configuration.
|
375
386
|
|
376
387
|
This method filters the runs in the collection according to the
|
377
388
|
specified configuration object and returns the run that matches the
|
@@ -505,7 +516,7 @@ class RunCollection:
|
|
505
516
|
in the collection.
|
506
517
|
|
507
518
|
"""
|
508
|
-
return (func(config, *args, **kwargs) for config in self.
|
519
|
+
return (func(config, *args, **kwargs) for config in self.data.config)
|
509
520
|
|
510
521
|
def map_uri(
|
511
522
|
self,
|
@@ -584,6 +595,16 @@ class RunCollection:
|
|
584
595
|
|
585
596
|
return {key: RunCollection(runs) for key, runs in grouped_runs.items()}
|
586
597
|
|
598
|
+
@property
|
599
|
+
def config(self) -> DataFrame:
|
600
|
+
"""Get the runs' configurations as a polars DataFrame.
|
601
|
+
|
602
|
+
Returns:
|
603
|
+
A polars DataFrame containing the runs' configurations.
|
604
|
+
|
605
|
+
"""
|
606
|
+
return DataFrame(self.map_config(collect_params))
|
607
|
+
|
587
608
|
|
588
609
|
def _param_matches(run: Run, key: str, value: Any) -> bool:
|
589
610
|
params = run.data.params
|
@@ -634,8 +655,10 @@ def filter_runs(
|
|
634
655
|
"""
|
635
656
|
for key, value in chain(iter_params(config), kwargs.items()):
|
636
657
|
runs = [run for run in runs if _param_matches(run, key, value)]
|
658
|
+
if not runs:
|
659
|
+
return []
|
637
660
|
|
638
|
-
if
|
661
|
+
if status is None:
|
639
662
|
return runs
|
640
663
|
|
641
664
|
return filter_runs_by_status(runs, status)
|
@@ -0,0 +1,34 @@
|
|
1
|
+
"""Provide data about `RunCollection` instances."""
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
from typing import TYPE_CHECKING
|
6
|
+
|
7
|
+
from hydraflow.utils import load_config
|
8
|
+
|
9
|
+
if TYPE_CHECKING:
|
10
|
+
from omegaconf import DictConfig
|
11
|
+
|
12
|
+
from hydraflow.run_collection import RunCollection
|
13
|
+
|
14
|
+
|
15
|
+
class RunCollectionData:
|
16
|
+
"""Provide data about a `RunCollection` instance."""
|
17
|
+
|
18
|
+
def __init__(self, runs: RunCollection) -> None:
|
19
|
+
self._runs = runs
|
20
|
+
|
21
|
+
@property
|
22
|
+
def params(self) -> list[dict[str, str]]:
|
23
|
+
"""Get the parameters for each run in the collection."""
|
24
|
+
return [run.data.params for run in self._runs]
|
25
|
+
|
26
|
+
@property
|
27
|
+
def metrics(self) -> list[dict[str, float]]:
|
28
|
+
"""Get the metrics for each run in the collection."""
|
29
|
+
return [run.data.metrics for run in self._runs]
|
30
|
+
|
31
|
+
@property
|
32
|
+
def config(self) -> list[DictConfig]:
|
33
|
+
"""Get the configuration for each run in the collection."""
|
34
|
+
return [load_config(run) for run in self._runs]
|
@@ -0,0 +1,34 @@
|
|
1
|
+
"""Provide information about `RunCollection` instances."""
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
from typing import TYPE_CHECKING
|
6
|
+
|
7
|
+
from hydraflow.utils import get_artifact_dir
|
8
|
+
|
9
|
+
if TYPE_CHECKING:
|
10
|
+
from pathlib import Path
|
11
|
+
|
12
|
+
from hydraflow.run_collection import RunCollection
|
13
|
+
|
14
|
+
|
15
|
+
class RunCollectionInfo:
|
16
|
+
"""Provide information about a `RunCollection` instance."""
|
17
|
+
|
18
|
+
def __init__(self, runs: RunCollection) -> None:
|
19
|
+
self._runs = runs
|
20
|
+
|
21
|
+
@property
|
22
|
+
def run_id(self) -> list[str]:
|
23
|
+
"""Get the run ID for each run in the collection."""
|
24
|
+
return [run.info.run_id for run in self._runs]
|
25
|
+
|
26
|
+
@property
|
27
|
+
def artifact_uri(self) -> list[str | None]:
|
28
|
+
"""Get the artifact URI for each run in the collection."""
|
29
|
+
return [run.info.artifact_uri for run in self._runs]
|
30
|
+
|
31
|
+
@property
|
32
|
+
def artifact_dir(self) -> list[Path]:
|
33
|
+
"""Get the artifact directory for each run in the collection."""
|
34
|
+
return [get_artifact_dir(run) for run in self._runs]
|
@@ -1,4 +1,4 @@
|
|
1
|
-
"""Provide
|
1
|
+
"""Provide utility functions for HydraFlow."""
|
2
2
|
|
3
3
|
from __future__ import annotations
|
4
4
|
|
@@ -7,51 +7,13 @@ from typing import TYPE_CHECKING
|
|
7
7
|
|
8
8
|
import mlflow
|
9
9
|
from hydra.core.hydra_config import HydraConfig
|
10
|
+
from mlflow.entities import Run
|
10
11
|
from mlflow.tracking import artifact_utils
|
11
12
|
from omegaconf import DictConfig, OmegaConf
|
12
13
|
|
13
14
|
if TYPE_CHECKING:
|
14
15
|
from mlflow.entities import Run
|
15
16
|
|
16
|
-
from hydraflow.run_collection import RunCollection
|
17
|
-
|
18
|
-
|
19
|
-
class RunCollectionInfo:
|
20
|
-
"""Provide information about MLflow runs."""
|
21
|
-
|
22
|
-
def __init__(self, runs: RunCollection) -> None:
|
23
|
-
self._runs = runs
|
24
|
-
|
25
|
-
@property
|
26
|
-
def run_id(self) -> list[str]:
|
27
|
-
"""Get the run ID for each run in the collection."""
|
28
|
-
return [run.info.run_id for run in self._runs]
|
29
|
-
|
30
|
-
@property
|
31
|
-
def params(self) -> list[dict[str, str]]:
|
32
|
-
"""Get the parameters for each run in the collection."""
|
33
|
-
return [run.data.params for run in self._runs]
|
34
|
-
|
35
|
-
@property
|
36
|
-
def metrics(self) -> list[dict[str, float]]:
|
37
|
-
"""Get the metrics for each run in the collection."""
|
38
|
-
return [run.data.metrics for run in self._runs]
|
39
|
-
|
40
|
-
@property
|
41
|
-
def artifact_uri(self) -> list[str | None]:
|
42
|
-
"""Get the artifact URI for each run in the collection."""
|
43
|
-
return [run.info.artifact_uri for run in self._runs]
|
44
|
-
|
45
|
-
@property
|
46
|
-
def artifact_dir(self) -> list[Path]:
|
47
|
-
"""Get the artifact directory for each run in the collection."""
|
48
|
-
return [get_artifact_dir(run) for run in self._runs]
|
49
|
-
|
50
|
-
@property
|
51
|
-
def config(self) -> list[DictConfig]:
|
52
|
-
"""Get the configuration for each run in the collection."""
|
53
|
-
return [load_config(run) for run in self._runs]
|
54
|
-
|
55
17
|
|
56
18
|
def get_artifact_dir(run: Run | None = None) -> Path:
|
57
19
|
"""Retrieve the artifact directory for the given run.
|
@@ -27,6 +27,9 @@ cs.store(name="config", node=MySQLConfig)
|
|
27
27
|
|
28
28
|
@hydra.main(version_base=None, config_name="config")
|
29
29
|
def app(cfg: MySQLConfig):
|
30
|
+
with hydraflow.chdir_hydra() as path:
|
31
|
+
Path("chdir_hydra.txt").write_text(path.as_posix())
|
32
|
+
|
30
33
|
hydraflow.set_experiment(prefix="_", suffix="_")
|
31
34
|
with hydraflow.start_run(cfg):
|
32
35
|
log.info(f"START, {cfg.host}, {cfg.port} ")
|
@@ -34,6 +37,8 @@ def app(cfg: MySQLConfig):
|
|
34
37
|
artifact_dir = hydraflow.get_artifact_dir()
|
35
38
|
output_dir = hydraflow.get_hydra_output_dir()
|
36
39
|
|
40
|
+
assert (output_dir / "chdir_hydra.txt").exists()
|
41
|
+
|
37
42
|
mlflow.log_text("A " + artifact_dir.as_posix(), "artifact_dir.txt")
|
38
43
|
mlflow.log_text("B " + output_dir.as_posix(), "output_dir.txt")
|
39
44
|
|
@@ -8,7 +8,7 @@ from typing import TYPE_CHECKING
|
|
8
8
|
import mlflow
|
9
9
|
import pytest
|
10
10
|
from mlflow.entities import RunStatus
|
11
|
-
from omegaconf import OmegaConf
|
11
|
+
from omegaconf import ListConfig, OmegaConf
|
12
12
|
|
13
13
|
if TYPE_CHECKING:
|
14
14
|
from omegaconf import DictConfig
|
@@ -90,30 +90,37 @@ def test_app_info_run_id(rc: RunCollection):
|
|
90
90
|
assert len(rc.info.run_id) == 4
|
91
91
|
|
92
92
|
|
93
|
-
def
|
94
|
-
params = rc.
|
93
|
+
def test_app_data_params(rc: RunCollection):
|
94
|
+
params = rc.data.params
|
95
95
|
assert params[0] == {"port": "1", "host": "x", "values": "[1, 2, 3]"}
|
96
96
|
assert params[1] == {"port": "2", "host": "x", "values": "[1, 2, 3]"}
|
97
97
|
assert params[2] == {"port": "1", "host": "y", "values": "[1, 2, 3]"}
|
98
98
|
assert params[3] == {"port": "2", "host": "y", "values": "[1, 2, 3]"}
|
99
99
|
|
100
100
|
|
101
|
-
def
|
102
|
-
metrics = rc.
|
101
|
+
def test_app_data_metrics(rc: RunCollection):
|
102
|
+
metrics = rc.data.metrics
|
103
103
|
assert metrics[0] == {"m": 11, "watch": 3}
|
104
104
|
assert metrics[1] == {"m": 12, "watch": 3}
|
105
105
|
assert metrics[2] == {"m": 2, "watch": 3}
|
106
106
|
assert metrics[3] == {"m": 3, "watch": 3}
|
107
107
|
|
108
108
|
|
109
|
-
def
|
110
|
-
config = rc.
|
109
|
+
def test_app_data_config(rc: RunCollection):
|
110
|
+
config = rc.data.config
|
111
111
|
assert config[0].port == 1
|
112
112
|
assert config[1].port == 2
|
113
113
|
assert config[2].host == "y"
|
114
114
|
assert config[3].host == "y"
|
115
115
|
|
116
116
|
|
117
|
+
def test_app_data_config_list(rc: RunCollection):
|
118
|
+
config = rc.data.config
|
119
|
+
assert isinstance(config[0]["values"], ListConfig)
|
120
|
+
assert not isinstance(config[0]["values"], list)
|
121
|
+
assert config[0]["values"] == [1, 2, 3]
|
122
|
+
|
123
|
+
|
117
124
|
def test_app_info_artifact_uri(rc: RunCollection):
|
118
125
|
uris = rc.info.artifact_uri
|
119
126
|
assert all(uri.startswith("file://") for uri in uris) # type: ignore
|
@@ -122,14 +129,14 @@ def test_app_info_artifact_uri(rc: RunCollection):
|
|
122
129
|
|
123
130
|
|
124
131
|
def test_app_info_artifact_dir(rc: RunCollection):
|
125
|
-
from hydraflow.
|
132
|
+
from hydraflow.utils import get_artifact_dir
|
126
133
|
|
127
134
|
dirs = list(rc.map(get_artifact_dir))
|
128
135
|
assert rc.info.artifact_dir == dirs
|
129
136
|
|
130
137
|
|
131
138
|
def test_app_hydra_output_dir(rc: RunCollection):
|
132
|
-
from hydraflow.
|
139
|
+
from hydraflow.utils import get_hydra_output_dir
|
133
140
|
|
134
141
|
dirs = list(rc.map(get_hydra_output_dir))
|
135
142
|
assert dirs[0].stem == "0"
|
@@ -154,13 +161,13 @@ def test_app_group_by(rc: RunCollection):
|
|
154
161
|
grouped = rc.group_by("host")
|
155
162
|
assert len(grouped) == 2
|
156
163
|
x = {"port": "1", "host": "x", "values": "[1, 2, 3]"}
|
157
|
-
assert grouped[("x",)].
|
164
|
+
assert grouped[("x",)].data.params[0] == x
|
158
165
|
x = {"port": "2", "host": "x", "values": "[1, 2, 3]"}
|
159
|
-
assert grouped[("x",)].
|
166
|
+
assert grouped[("x",)].data.params[1] == x
|
160
167
|
x = {"port": "1", "host": "y", "values": "[1, 2, 3]"}
|
161
|
-
assert grouped[("y",)].
|
168
|
+
assert grouped[("y",)].data.params[0] == x
|
162
169
|
x = {"port": "2", "host": "y", "values": "[1, 2, 3]"}
|
163
|
-
assert grouped[("y",)].
|
170
|
+
assert grouped[("y",)].data.params[1] == x
|
164
171
|
|
165
172
|
|
166
173
|
def test_app_filter_list(rc: RunCollection):
|
@@ -170,3 +177,18 @@ def test_app_filter_list(rc: RunCollection):
|
|
170
177
|
assert len(filtered) == 4
|
171
178
|
filtered = rc.filter(values=[1])
|
172
179
|
assert not filtered
|
180
|
+
|
181
|
+
|
182
|
+
def test_config(rc: RunCollection):
|
183
|
+
df = rc.config
|
184
|
+
assert df.columns == ["host", "port", "values"]
|
185
|
+
assert df.shape == (4, 3)
|
186
|
+
assert df.select("host").to_series().to_list() == ["x", "x", "y", "y"]
|
187
|
+
assert df.select("port").to_series().to_list() == [1, 2, 1, 2]
|
188
|
+
assert str(df.select("values").dtypes) == "[List(Int64)]"
|
189
|
+
assert df.select("values").to_series().to_list() == [
|
190
|
+
[1, 2, 3],
|
191
|
+
[1, 2, 3],
|
192
|
+
[1, 2, 3],
|
193
|
+
[1, 2, 3],
|
194
|
+
]
|
@@ -87,6 +87,14 @@ def test_iter_params():
|
|
87
87
|
assert next(it) == ("l.1.3", "c")
|
88
88
|
|
89
89
|
|
90
|
+
def test_collect_params():
|
91
|
+
from hydraflow.config import collect_params
|
92
|
+
|
93
|
+
conf = OmegaConf.create({"k": "v", "l": [1, {"a": "1", "b": "2", 3: "c"}]})
|
94
|
+
params = collect_params(conf)
|
95
|
+
assert params == {"k": "v", "l.0": 1, "l.1.a": "1", "l.1.b": "2", "l.1.3": "c"}
|
96
|
+
|
97
|
+
|
90
98
|
@dataclass
|
91
99
|
class Size:
|
92
100
|
x: int = 1
|
@@ -0,0 +1,43 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import mlflow
|
4
|
+
import pytest
|
5
|
+
|
6
|
+
from hydraflow.run_collection import RunCollection
|
7
|
+
|
8
|
+
|
9
|
+
@pytest.fixture
|
10
|
+
def runs(monkeypatch, tmp_path):
|
11
|
+
from hydraflow.mlflow import search_runs
|
12
|
+
|
13
|
+
monkeypatch.chdir(tmp_path)
|
14
|
+
|
15
|
+
mlflow.set_experiment("test_info")
|
16
|
+
|
17
|
+
for x in range(3):
|
18
|
+
with mlflow.start_run(run_name=f"{x}"):
|
19
|
+
mlflow.log_param("p", x)
|
20
|
+
mlflow.log_metric("metric1", x + 1)
|
21
|
+
mlflow.log_metric("metric2", x + 2)
|
22
|
+
|
23
|
+
x = search_runs()
|
24
|
+
assert isinstance(x, RunCollection)
|
25
|
+
return x
|
26
|
+
|
27
|
+
|
28
|
+
def test_data_params(runs: RunCollection):
|
29
|
+
assert runs.data.params == [{"p": "0"}, {"p": "1"}, {"p": "2"}]
|
30
|
+
|
31
|
+
|
32
|
+
def test_data_metrics(runs: RunCollection):
|
33
|
+
m = runs.data.metrics
|
34
|
+
assert m[0] == {"metric1": 1, "metric2": 2}
|
35
|
+
assert m[1] == {"metric1": 2, "metric2": 3}
|
36
|
+
assert m[2] == {"metric1": 3, "metric2": 4}
|
37
|
+
|
38
|
+
|
39
|
+
def test_data_empty_run_collection():
|
40
|
+
rc = RunCollection([])
|
41
|
+
assert rc.data.params == []
|
42
|
+
assert rc.data.metrics == []
|
43
|
+
assert rc.data.config == []
|
@@ -18,9 +18,7 @@ def runs(monkeypatch, tmp_path):
|
|
18
18
|
|
19
19
|
for x in range(3):
|
20
20
|
with mlflow.start_run(run_name=f"{x}"):
|
21
|
-
|
22
|
-
mlflow.log_metric("metric1", x + 1)
|
23
|
-
mlflow.log_metric("metric2", x + 2)
|
21
|
+
pass
|
24
22
|
|
25
23
|
x = search_runs()
|
26
24
|
assert isinstance(x, RunCollection)
|
@@ -31,17 +29,6 @@ def test_info_run_id(runs: RunCollection):
|
|
31
29
|
assert len(runs.info.run_id) == 3
|
32
30
|
|
33
31
|
|
34
|
-
def test_info_params(runs: RunCollection):
|
35
|
-
assert runs.info.params == [{"p": "0"}, {"p": "1"}, {"p": "2"}]
|
36
|
-
|
37
|
-
|
38
|
-
def test_info_metrics(runs: RunCollection):
|
39
|
-
m = runs.info.metrics
|
40
|
-
assert m[0] == {"metric1": 1, "metric2": 2}
|
41
|
-
assert m[1] == {"metric1": 2, "metric2": 3}
|
42
|
-
assert m[2] == {"metric1": 3, "metric2": 4}
|
43
|
-
|
44
|
-
|
45
32
|
def test_info_artifact_uri(runs: RunCollection):
|
46
33
|
uri = runs.info.artifact_uri
|
47
34
|
assert all(u.startswith("file://") for u in uri) # type: ignore
|
@@ -57,8 +44,5 @@ def test_info_artifact_dir(runs: RunCollection):
|
|
57
44
|
def test_info_empty_run_collection():
|
58
45
|
rc = RunCollection([])
|
59
46
|
assert rc.info.run_id == []
|
60
|
-
assert rc.info.params == []
|
61
|
-
assert rc.info.metrics == []
|
62
47
|
assert rc.info.artifact_uri == []
|
63
48
|
assert rc.info.artifact_dir == []
|
64
|
-
assert rc.info.config == []
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|