hydraflow 0.3.0__tar.gz → 0.3.2__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {hydraflow-0.3.0 → hydraflow-0.3.2}/.devcontainer/devcontainer.json +0 -1
- {hydraflow-0.3.0 → hydraflow-0.3.2}/.gitignore +4 -0
- {hydraflow-0.3.0 → hydraflow-0.3.2}/PKG-INFO +1 -1
- hydraflow-0.3.2/apps/quickstart.py +31 -0
- {hydraflow-0.3.0 → hydraflow-0.3.2}/mkdocs.yml +3 -0
- {hydraflow-0.3.0 → hydraflow-0.3.2}/pyproject.toml +6 -1
- {hydraflow-0.3.0 → hydraflow-0.3.2}/src/hydraflow/__init__.py +5 -9
- {hydraflow-0.3.0 → hydraflow-0.3.2}/src/hydraflow/config.py +10 -2
- {hydraflow-0.3.0 → hydraflow-0.3.2}/src/hydraflow/context.py +19 -0
- {hydraflow-0.3.0 → hydraflow-0.3.2}/src/hydraflow/mlflow.py +8 -2
- {hydraflow-0.3.0 → hydraflow-0.3.2}/src/hydraflow/param.py +1 -1
- {hydraflow-0.3.0 → hydraflow-0.3.2}/src/hydraflow/run_collection.py +17 -13
- hydraflow-0.3.2/src/hydraflow/run_data.py +34 -0
- hydraflow-0.3.2/src/hydraflow/run_info.py +34 -0
- hydraflow-0.3.0/src/hydraflow/run_info.py → hydraflow-0.3.2/src/hydraflow/utils.py +23 -26
- {hydraflow-0.3.0 → hydraflow-0.3.2}/tests/scripts/app.py +5 -0
- {hydraflow-0.3.0 → hydraflow-0.3.2}/tests/test_app.py +28 -7
- {hydraflow-0.3.0 → hydraflow-0.3.2}/tests/test_run_collection.py +39 -39
- hydraflow-0.3.0/src/hydraflow/run_data.py +0 -56
- {hydraflow-0.3.0 → hydraflow-0.3.2}/.devcontainer/postCreate.sh +0 -0
- {hydraflow-0.3.0 → hydraflow-0.3.2}/.devcontainer/starship.toml +0 -0
- {hydraflow-0.3.0 → hydraflow-0.3.2}/.gitattributes +0 -0
- {hydraflow-0.3.0 → hydraflow-0.3.2}/LICENSE +0 -0
- {hydraflow-0.3.0 → hydraflow-0.3.2}/README.md +0 -0
- {hydraflow-0.3.0 → hydraflow-0.3.2}/src/hydraflow/asyncio.py +0 -0
- {hydraflow-0.3.0 → hydraflow-0.3.2}/src/hydraflow/progress.py +0 -0
- {hydraflow-0.3.0 → hydraflow-0.3.2}/src/hydraflow/py.typed +0 -0
- {hydraflow-0.3.0 → hydraflow-0.3.2}/tests/__init__.py +0 -0
- {hydraflow-0.3.0 → hydraflow-0.3.2}/tests/scripts/__init__.py +0 -0
- {hydraflow-0.3.0 → hydraflow-0.3.2}/tests/scripts/progress.py +0 -0
- {hydraflow-0.3.0 → hydraflow-0.3.2}/tests/scripts/watch.py +0 -0
- {hydraflow-0.3.0 → hydraflow-0.3.2}/tests/test_asyncio.py +0 -0
- {hydraflow-0.3.0 → hydraflow-0.3.2}/tests/test_config.py +0 -0
- {hydraflow-0.3.0 → hydraflow-0.3.2}/tests/test_context.py +0 -0
- {hydraflow-0.3.0 → hydraflow-0.3.2}/tests/test_log_run.py +0 -0
- {hydraflow-0.3.0 → hydraflow-0.3.2}/tests/test_mlflow.py +0 -0
- {hydraflow-0.3.0 → hydraflow-0.3.2}/tests/test_param.py +0 -0
- {hydraflow-0.3.0 → hydraflow-0.3.2}/tests/test_progress.py +0 -0
- {hydraflow-0.3.0 → hydraflow-0.3.2}/tests/test_run_data.py +0 -0
- {hydraflow-0.3.0 → hydraflow-0.3.2}/tests/test_run_info.py +0 -0
- {hydraflow-0.3.0 → hydraflow-0.3.2}/tests/test_version.py +0 -0
- {hydraflow-0.3.0 → hydraflow-0.3.2}/tests/test_watch.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: hydraflow
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.2
|
4
4
|
Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
|
5
5
|
Project-URL: Documentation, https://github.com/daizutabi/hydraflow
|
6
6
|
Project-URL: Source, https://github.com/daizutabi/hydraflow
|
@@ -0,0 +1,31 @@
|
|
1
|
+
import logging
|
2
|
+
from dataclasses import dataclass
|
3
|
+
|
4
|
+
import hydra
|
5
|
+
from hydra.core.config_store import ConfigStore
|
6
|
+
|
7
|
+
import hydraflow
|
8
|
+
|
9
|
+
log = logging.getLogger(__name__)
|
10
|
+
|
11
|
+
|
12
|
+
@dataclass
|
13
|
+
class Config:
|
14
|
+
width: int = 1024
|
15
|
+
height: int = 768
|
16
|
+
|
17
|
+
|
18
|
+
cs = ConfigStore.instance()
|
19
|
+
cs.store(name="config", node=Config)
|
20
|
+
|
21
|
+
|
22
|
+
@hydra.main(version_base=None, config_name="config")
|
23
|
+
def app(cfg: Config) -> None:
|
24
|
+
hydraflow.set_experiment()
|
25
|
+
|
26
|
+
with hydraflow.start_run(cfg):
|
27
|
+
log.info(f"{cfg.width=}, {cfg.height=}")
|
28
|
+
|
29
|
+
|
30
|
+
if __name__ == "__main__":
|
31
|
+
app()
|
@@ -38,6 +38,7 @@ theme:
|
|
38
38
|
- navigation.tracking
|
39
39
|
plugins:
|
40
40
|
- search
|
41
|
+
- markdown-exec
|
41
42
|
- mkapi
|
42
43
|
markdown_extensions:
|
43
44
|
- pymdownx.magiclink
|
@@ -50,4 +51,6 @@ markdown_extensions:
|
|
50
51
|
alternate_style: true
|
51
52
|
nav:
|
52
53
|
- Home: index.md
|
54
|
+
- Usage:
|
55
|
+
- usage/quickstart.md
|
53
56
|
- Reference: $api/hydraflow.**
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "hydraflow"
|
7
|
-
version = "0.3.
|
7
|
+
version = "0.3.2"
|
8
8
|
description = "Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments."
|
9
9
|
readme = "README.md"
|
10
10
|
license = "MIT"
|
@@ -34,6 +34,10 @@ Issues = "https://github.com/daizutabi/hydraflow/issues"
|
|
34
34
|
|
35
35
|
[tool.uv]
|
36
36
|
dev-dependencies = [
|
37
|
+
"markdown-exec[ansi]",
|
38
|
+
"mkapi",
|
39
|
+
"mkdocs-material",
|
40
|
+
"mkdocs>=1.6",
|
37
41
|
"pytest-asyncio",
|
38
42
|
"pytest-clarity",
|
39
43
|
"pytest-cov",
|
@@ -97,3 +101,4 @@ exclude = ["tests/scripts/*.py"]
|
|
97
101
|
"SIM117",
|
98
102
|
"SLF",
|
99
103
|
]
|
104
|
+
"apps/*.py" = ["INP", "D", "G", "T"]
|
@@ -1,19 +1,15 @@
|
|
1
|
-
"""
|
1
|
+
"""Integrate Hydra and MLflow to manage and track machine learning experiments."""
|
2
2
|
|
3
|
-
from .context import chdir_artifact, log_run, start_run, watch
|
4
|
-
from .mlflow import
|
5
|
-
list_runs,
|
6
|
-
search_runs,
|
7
|
-
set_experiment,
|
8
|
-
)
|
3
|
+
from .context import chdir_artifact, chdir_hydra, log_run, start_run, watch
|
4
|
+
from .mlflow import list_runs, search_runs, set_experiment
|
9
5
|
from .progress import multi_tasks_progress, parallel_progress
|
10
6
|
from .run_collection import RunCollection
|
11
|
-
from .
|
12
|
-
from .run_info import get_artifact_dir, get_hydra_output_dir
|
7
|
+
from .utils import get_artifact_dir, get_hydra_output_dir, load_config
|
13
8
|
|
14
9
|
__all__ = [
|
15
10
|
"RunCollection",
|
16
11
|
"chdir_artifact",
|
12
|
+
"chdir_hydra",
|
17
13
|
"get_artifact_dir",
|
18
14
|
"get_hydra_output_dir",
|
19
15
|
"list_runs",
|
@@ -54,7 +54,7 @@ def _iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
|
|
54
54
|
if isinstance(config, DictConfig):
|
55
55
|
for key, value in config.items():
|
56
56
|
if _is_param(value):
|
57
|
-
yield f"{prefix}{key}", value
|
57
|
+
yield f"{prefix}{key}", _convert(value)
|
58
58
|
|
59
59
|
else:
|
60
60
|
yield from _iter_params(value, f"{prefix}{key}.")
|
@@ -62,7 +62,7 @@ def _iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
|
|
62
62
|
elif isinstance(config, ListConfig):
|
63
63
|
for index, value in enumerate(config):
|
64
64
|
if _is_param(value):
|
65
|
-
yield f"{prefix}{index}", value
|
65
|
+
yield f"{prefix}{index}", _convert(value)
|
66
66
|
|
67
67
|
else:
|
68
68
|
yield from _iter_params(value, f"{prefix}{index}.")
|
@@ -78,3 +78,11 @@ def _is_param(value: object) -> bool:
|
|
78
78
|
return False
|
79
79
|
|
80
80
|
return True
|
81
|
+
|
82
|
+
|
83
|
+
def _convert(value: Any) -> Any:
|
84
|
+
"""Convert the given value to a Python object."""
|
85
|
+
if isinstance(value, ListConfig):
|
86
|
+
return list(value)
|
87
|
+
|
88
|
+
return value
|
@@ -238,6 +238,25 @@ class Handler(PatternMatchingEventHandler):
|
|
238
238
|
self.func(file)
|
239
239
|
|
240
240
|
|
241
|
+
@contextmanager
|
242
|
+
def chdir_hydra() -> Iterator[Path]:
|
243
|
+
"""Change the current working directory to the hydra output directory.
|
244
|
+
|
245
|
+
This context manager changes the current working directory to the hydra output
|
246
|
+
directory. It ensures that the directory is changed back to the original
|
247
|
+
directory after the context is exited.
|
248
|
+
"""
|
249
|
+
curdir = Path.cwd()
|
250
|
+
path = HydraConfig.get().runtime.output_dir
|
251
|
+
|
252
|
+
os.chdir(path)
|
253
|
+
try:
|
254
|
+
yield Path(path)
|
255
|
+
|
256
|
+
finally:
|
257
|
+
os.chdir(curdir)
|
258
|
+
|
259
|
+
|
241
260
|
@contextmanager
|
242
261
|
def chdir_artifact(
|
243
262
|
run: Run,
|
@@ -207,8 +207,14 @@ def _list_runs(
|
|
207
207
|
if experiment := mlflow.get_experiment_by_name(name):
|
208
208
|
loc = experiment.artifact_location
|
209
209
|
|
210
|
-
if isinstance(loc, str)
|
211
|
-
|
210
|
+
if isinstance(loc, str):
|
211
|
+
if loc.startswith("file://"):
|
212
|
+
path = Path(mlflow.artifacts.download_artifacts(loc))
|
213
|
+
elif Path(loc).is_dir():
|
214
|
+
path = Path(loc)
|
215
|
+
else:
|
216
|
+
continue
|
217
|
+
|
212
218
|
run_ids.extend(file.stem for file in path.iterdir() if file.is_dir())
|
213
219
|
|
214
220
|
it = (joblib.delayed(mlflow.get_run)(run_id) for run_id in run_ids)
|
@@ -239,8 +239,8 @@ class RunCollection:
|
|
239
239
|
The filtering supports:
|
240
240
|
- Exact matches for single values.
|
241
241
|
- Membership checks for lists of values.
|
242
|
-
- Range checks for tuples of two values (inclusive of the lower
|
243
|
-
and
|
242
|
+
- Range checks for tuples of two values (inclusive of both the lower
|
243
|
+
and upper bound).
|
244
244
|
|
245
245
|
Args:
|
246
246
|
config (object | None): The configuration object to filter the runs.
|
@@ -476,7 +476,7 @@ class RunCollection:
|
|
476
476
|
"""
|
477
477
|
return (func(run, *args, **kwargs) for run in self)
|
478
478
|
|
479
|
-
def
|
479
|
+
def map_id(
|
480
480
|
self,
|
481
481
|
func: Callable[Concatenate[str, P], T],
|
482
482
|
*args: P.args,
|
@@ -569,8 +569,8 @@ class RunCollection:
|
|
569
569
|
|
570
570
|
def group_by(
|
571
571
|
self,
|
572
|
-
|
573
|
-
) -> dict[tuple[str | None, ...], RunCollection]:
|
572
|
+
names: str | list[str],
|
573
|
+
) -> dict[str | None | tuple[str | None, ...], RunCollection]:
|
574
574
|
"""Group runs by specified parameter names.
|
575
575
|
|
576
576
|
Group the runs in the collection based on the values of the
|
@@ -578,19 +578,23 @@ class RunCollection:
|
|
578
578
|
form a key in the returned dictionary.
|
579
579
|
|
580
580
|
Args:
|
581
|
-
|
581
|
+
names (str | list[str]): The names of the parameters to group by.
|
582
582
|
This can be a single parameter name or multiple names provided
|
583
583
|
as separate arguments or as a list.
|
584
584
|
|
585
585
|
Returns:
|
586
|
-
dict[tuple[str | None, ...], RunCollection]: A
|
587
|
-
are tuples of parameter values and the
|
588
|
-
containing the runs that match
|
586
|
+
dict[str | None | tuple[str | None, ...], RunCollection]: A
|
587
|
+
dictionary where the keys are tuples of parameter values and the
|
588
|
+
values are `RunCollection` objects containing the runs that match
|
589
|
+
those parameter values.
|
589
590
|
|
590
591
|
"""
|
591
|
-
grouped_runs: dict[tuple[str | None, ...], list[Run]] = {}
|
592
|
+
grouped_runs: dict[str | None | tuple[str | None, ...], list[Run]] = {}
|
593
|
+
is_list = isinstance(names, list)
|
592
594
|
for run in self._runs:
|
593
|
-
key = get_params(run,
|
595
|
+
key = get_params(run, names)
|
596
|
+
if not is_list:
|
597
|
+
key = key[0]
|
594
598
|
grouped_runs.setdefault(key, []).append(run)
|
595
599
|
|
596
600
|
return {key: RunCollection(runs) for key, runs in grouped_runs.items()}
|
@@ -637,8 +641,8 @@ def filter_runs(
|
|
637
641
|
The filtering supports:
|
638
642
|
- Exact matches for single values.
|
639
643
|
- Membership checks for lists of values.
|
640
|
-
- Range checks for tuples of two values (inclusive of the lower
|
641
|
-
|
644
|
+
- Range checks for tuples of two values (inclusive of both the lower and
|
645
|
+
upper bound).
|
642
646
|
|
643
647
|
Args:
|
644
648
|
runs (list[Run]): The list of runs to filter.
|
@@ -0,0 +1,34 @@
|
|
1
|
+
"""Provide data about `RunCollection` instances."""
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
from typing import TYPE_CHECKING
|
6
|
+
|
7
|
+
from hydraflow.utils import load_config
|
8
|
+
|
9
|
+
if TYPE_CHECKING:
|
10
|
+
from omegaconf import DictConfig
|
11
|
+
|
12
|
+
from hydraflow.run_collection import RunCollection
|
13
|
+
|
14
|
+
|
15
|
+
class RunCollectionData:
|
16
|
+
"""Provide data about a `RunCollection` instance."""
|
17
|
+
|
18
|
+
def __init__(self, runs: RunCollection) -> None:
|
19
|
+
self._runs = runs
|
20
|
+
|
21
|
+
@property
|
22
|
+
def params(self) -> list[dict[str, str]]:
|
23
|
+
"""Get the parameters for each run in the collection."""
|
24
|
+
return [run.data.params for run in self._runs]
|
25
|
+
|
26
|
+
@property
|
27
|
+
def metrics(self) -> list[dict[str, float]]:
|
28
|
+
"""Get the metrics for each run in the collection."""
|
29
|
+
return [run.data.metrics for run in self._runs]
|
30
|
+
|
31
|
+
@property
|
32
|
+
def config(self) -> list[DictConfig]:
|
33
|
+
"""Get the configuration for each run in the collection."""
|
34
|
+
return [load_config(run) for run in self._runs]
|
@@ -0,0 +1,34 @@
|
|
1
|
+
"""Provide information about `RunCollection` instances."""
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
from typing import TYPE_CHECKING
|
6
|
+
|
7
|
+
from hydraflow.utils import get_artifact_dir
|
8
|
+
|
9
|
+
if TYPE_CHECKING:
|
10
|
+
from pathlib import Path
|
11
|
+
|
12
|
+
from hydraflow.run_collection import RunCollection
|
13
|
+
|
14
|
+
|
15
|
+
class RunCollectionInfo:
|
16
|
+
"""Provide information about a `RunCollection` instance."""
|
17
|
+
|
18
|
+
def __init__(self, runs: RunCollection) -> None:
|
19
|
+
self._runs = runs
|
20
|
+
|
21
|
+
@property
|
22
|
+
def run_id(self) -> list[str]:
|
23
|
+
"""Get the run ID for each run in the collection."""
|
24
|
+
return [run.info.run_id for run in self._runs]
|
25
|
+
|
26
|
+
@property
|
27
|
+
def artifact_uri(self) -> list[str | None]:
|
28
|
+
"""Get the artifact URI for each run in the collection."""
|
29
|
+
return [run.info.artifact_uri for run in self._runs]
|
30
|
+
|
31
|
+
@property
|
32
|
+
def artifact_dir(self) -> list[Path]:
|
33
|
+
"""Get the artifact directory for each run in the collection."""
|
34
|
+
return [get_artifact_dir(run) for run in self._runs]
|
@@ -1,4 +1,4 @@
|
|
1
|
-
"""Provide
|
1
|
+
"""Provide utility functions for HydraFlow."""
|
2
2
|
|
3
3
|
from __future__ import annotations
|
4
4
|
|
@@ -7,36 +7,13 @@ from typing import TYPE_CHECKING
|
|
7
7
|
|
8
8
|
import mlflow
|
9
9
|
from hydra.core.hydra_config import HydraConfig
|
10
|
+
from mlflow.entities import Run
|
10
11
|
from mlflow.tracking import artifact_utils
|
11
|
-
from omegaconf import OmegaConf
|
12
|
+
from omegaconf import DictConfig, OmegaConf
|
12
13
|
|
13
14
|
if TYPE_CHECKING:
|
14
15
|
from mlflow.entities import Run
|
15
16
|
|
16
|
-
from hydraflow.run_collection import RunCollection
|
17
|
-
|
18
|
-
|
19
|
-
class RunCollectionInfo:
|
20
|
-
"""Provide information about MLflow runs."""
|
21
|
-
|
22
|
-
def __init__(self, runs: RunCollection) -> None:
|
23
|
-
self._runs = runs
|
24
|
-
|
25
|
-
@property
|
26
|
-
def run_id(self) -> list[str]:
|
27
|
-
"""Get the run ID for each run in the collection."""
|
28
|
-
return [run.info.run_id for run in self._runs]
|
29
|
-
|
30
|
-
@property
|
31
|
-
def artifact_uri(self) -> list[str | None]:
|
32
|
-
"""Get the artifact URI for each run in the collection."""
|
33
|
-
return [run.info.artifact_uri for run in self._runs]
|
34
|
-
|
35
|
-
@property
|
36
|
-
def artifact_dir(self) -> list[Path]:
|
37
|
-
"""Get the artifact directory for each run in the collection."""
|
38
|
-
return [get_artifact_dir(run) for run in self._runs]
|
39
|
-
|
40
17
|
|
41
18
|
def get_artifact_dir(run: Run | None = None) -> Path:
|
42
19
|
"""Retrieve the artifact directory for the given run.
|
@@ -89,3 +66,23 @@ def get_hydra_output_dir(run: Run | None = None) -> Path:
|
|
89
66
|
return Path(hc.hydra.runtime.output_dir)
|
90
67
|
|
91
68
|
raise FileNotFoundError
|
69
|
+
|
70
|
+
|
71
|
+
def load_config(run: Run) -> DictConfig:
|
72
|
+
"""Load the configuration for a given run.
|
73
|
+
|
74
|
+
This function loads the configuration for the provided Run instance
|
75
|
+
by downloading the configuration file from the MLflow artifacts and
|
76
|
+
loading it using OmegaConf. It returns an empty config if
|
77
|
+
`.hydra/config.yaml` is not found in the run's artifact directory.
|
78
|
+
|
79
|
+
Args:
|
80
|
+
run (Run): The Run instance for which to load the configuration.
|
81
|
+
|
82
|
+
Returns:
|
83
|
+
The loaded configuration as a DictConfig object. Returns an empty
|
84
|
+
DictConfig if the configuration file is not found.
|
85
|
+
|
86
|
+
"""
|
87
|
+
path = get_artifact_dir(run) / ".hydra/config.yaml"
|
88
|
+
return OmegaConf.load(path) # type: ignore
|
@@ -27,6 +27,9 @@ cs.store(name="config", node=MySQLConfig)
|
|
27
27
|
|
28
28
|
@hydra.main(version_base=None, config_name="config")
|
29
29
|
def app(cfg: MySQLConfig):
|
30
|
+
with hydraflow.chdir_hydra() as path:
|
31
|
+
Path("chdir_hydra.txt").write_text(path.as_posix())
|
32
|
+
|
30
33
|
hydraflow.set_experiment(prefix="_", suffix="_")
|
31
34
|
with hydraflow.start_run(cfg):
|
32
35
|
log.info(f"START, {cfg.host}, {cfg.port} ")
|
@@ -34,6 +37,8 @@ def app(cfg: MySQLConfig):
|
|
34
37
|
artifact_dir = hydraflow.get_artifact_dir()
|
35
38
|
output_dir = hydraflow.get_hydra_output_dir()
|
36
39
|
|
40
|
+
assert (output_dir / "chdir_hydra.txt").exists()
|
41
|
+
|
37
42
|
mlflow.log_text("A " + artifact_dir.as_posix(), "artifact_dir.txt")
|
38
43
|
mlflow.log_text("B " + output_dir.as_posix(), "output_dir.txt")
|
39
44
|
|
@@ -8,7 +8,7 @@ from typing import TYPE_CHECKING
|
|
8
8
|
import mlflow
|
9
9
|
import pytest
|
10
10
|
from mlflow.entities import RunStatus
|
11
|
-
from omegaconf import OmegaConf
|
11
|
+
from omegaconf import ListConfig, OmegaConf
|
12
12
|
|
13
13
|
if TYPE_CHECKING:
|
14
14
|
from omegaconf import DictConfig
|
@@ -114,6 +114,13 @@ def test_app_data_config(rc: RunCollection):
|
|
114
114
|
assert config[3].host == "y"
|
115
115
|
|
116
116
|
|
117
|
+
def test_app_data_config_list(rc: RunCollection):
|
118
|
+
config = rc.data.config
|
119
|
+
assert isinstance(config[0]["values"], ListConfig)
|
120
|
+
assert not isinstance(config[0]["values"], list)
|
121
|
+
assert config[0]["values"] == [1, 2, 3]
|
122
|
+
|
123
|
+
|
117
124
|
def test_app_info_artifact_uri(rc: RunCollection):
|
118
125
|
uris = rc.info.artifact_uri
|
119
126
|
assert all(uri.startswith("file://") for uri in uris) # type: ignore
|
@@ -122,14 +129,14 @@ def test_app_info_artifact_uri(rc: RunCollection):
|
|
122
129
|
|
123
130
|
|
124
131
|
def test_app_info_artifact_dir(rc: RunCollection):
|
125
|
-
from hydraflow.
|
132
|
+
from hydraflow.utils import get_artifact_dir
|
126
133
|
|
127
134
|
dirs = list(rc.map(get_artifact_dir))
|
128
135
|
assert rc.info.artifact_dir == dirs
|
129
136
|
|
130
137
|
|
131
138
|
def test_app_hydra_output_dir(rc: RunCollection):
|
132
|
-
from hydraflow.
|
139
|
+
from hydraflow.utils import get_hydra_output_dir
|
133
140
|
|
134
141
|
dirs = list(rc.map(get_hydra_output_dir))
|
135
142
|
assert dirs[0].stem == "0"
|
@@ -154,13 +161,20 @@ def test_app_group_by(rc: RunCollection):
|
|
154
161
|
grouped = rc.group_by("host")
|
155
162
|
assert len(grouped) == 2
|
156
163
|
x = {"port": "1", "host": "x", "values": "[1, 2, 3]"}
|
157
|
-
assert grouped[
|
164
|
+
assert grouped["x"].data.params[0] == x
|
158
165
|
x = {"port": "2", "host": "x", "values": "[1, 2, 3]"}
|
159
|
-
assert grouped[
|
166
|
+
assert grouped["x"].data.params[1] == x
|
160
167
|
x = {"port": "1", "host": "y", "values": "[1, 2, 3]"}
|
161
|
-
assert grouped[
|
168
|
+
assert grouped["y"].data.params[0] == x
|
162
169
|
x = {"port": "2", "host": "y", "values": "[1, 2, 3]"}
|
163
|
-
assert grouped[
|
170
|
+
assert grouped["y"].data.params[1] == x
|
171
|
+
|
172
|
+
|
173
|
+
def test_app_group_by_list(rc: RunCollection):
|
174
|
+
grouped = rc.group_by(["host"])
|
175
|
+
assert len(grouped) == 2
|
176
|
+
assert ("x",) in grouped
|
177
|
+
assert ("y",) in grouped
|
164
178
|
|
165
179
|
|
166
180
|
def test_app_filter_list(rc: RunCollection):
|
@@ -178,3 +192,10 @@ def test_config(rc: RunCollection):
|
|
178
192
|
assert df.shape == (4, 3)
|
179
193
|
assert df.select("host").to_series().to_list() == ["x", "x", "y", "y"]
|
180
194
|
assert df.select("port").to_series().to_list() == [1, 2, 1, 2]
|
195
|
+
assert str(df.select("values").dtypes) == "[List(Int64)]"
|
196
|
+
assert df.select("values").to_series().to_list() == [
|
197
|
+
[1, 2, 3],
|
198
|
+
[1, 2, 3],
|
199
|
+
[1, 2, 3],
|
200
|
+
[1, 2, 3],
|
201
|
+
]
|
@@ -28,12 +28,12 @@ def rc(monkeypatch, tmp_path):
|
|
28
28
|
return x
|
29
29
|
|
30
30
|
|
31
|
-
def
|
31
|
+
def test_bool_false():
|
32
32
|
assert not RunCollection([])
|
33
33
|
assert bool(RunCollection.from_list([])) is False
|
34
34
|
|
35
35
|
|
36
|
-
def
|
36
|
+
def test_bool_true(rc: RunCollection):
|
37
37
|
assert rc
|
38
38
|
assert bool(rc) is True
|
39
39
|
|
@@ -90,7 +90,7 @@ def test_filter_tuple(run_list: list[Run]):
|
|
90
90
|
from hydraflow.run_collection import filter_runs
|
91
91
|
|
92
92
|
x = filter_runs(run_list, p=(1, 3))
|
93
|
-
assert len(x) ==
|
93
|
+
assert len(x) == 3
|
94
94
|
|
95
95
|
|
96
96
|
def test_filter_invalid_param(run_list: list[Run]):
|
@@ -139,45 +139,45 @@ def test_chdir_artifact_list(i: int, run_list: list[Run]):
|
|
139
139
|
assert not Path("abc.txt").exists()
|
140
140
|
|
141
141
|
|
142
|
-
def
|
142
|
+
def test_repr(rc: RunCollection):
|
143
143
|
assert repr(rc) == "RunCollection(6)"
|
144
144
|
|
145
145
|
|
146
|
-
def
|
146
|
+
def test_first(rc: RunCollection):
|
147
147
|
run = rc.first()
|
148
148
|
assert isinstance(run, Run)
|
149
149
|
assert run.data.params["p"] == "0"
|
150
150
|
|
151
151
|
|
152
|
-
def
|
152
|
+
def test_first_empty(rc: RunCollection):
|
153
153
|
rc._runs = []
|
154
154
|
with pytest.raises(ValueError):
|
155
155
|
rc.first()
|
156
156
|
|
157
157
|
|
158
|
-
def
|
158
|
+
def test_try_first_none(rc: RunCollection):
|
159
159
|
rc._runs = []
|
160
160
|
assert rc.try_first() is None
|
161
161
|
|
162
162
|
|
163
|
-
def
|
163
|
+
def test_last(rc: RunCollection):
|
164
164
|
run = rc.last()
|
165
165
|
assert isinstance(run, Run)
|
166
166
|
assert run.data.params["p"] == "5"
|
167
167
|
|
168
168
|
|
169
|
-
def
|
169
|
+
def test_last_empty(rc: RunCollection):
|
170
170
|
rc._runs = []
|
171
171
|
with pytest.raises(ValueError):
|
172
172
|
rc.last()
|
173
173
|
|
174
174
|
|
175
|
-
def
|
175
|
+
def test_try_last_none(rc: RunCollection):
|
176
176
|
rc._runs = []
|
177
177
|
assert rc.try_last() is None
|
178
178
|
|
179
179
|
|
180
|
-
def
|
180
|
+
def test_filter(rc: RunCollection):
|
181
181
|
assert len(rc.filter()) == 6
|
182
182
|
assert len(rc.filter({})) == 6
|
183
183
|
assert len(rc.filter({"p": 1})) == 1
|
@@ -192,14 +192,14 @@ def test_runs_filter(rc: RunCollection):
|
|
192
192
|
assert len(rc.filter(r=0)) == 2
|
193
193
|
|
194
194
|
|
195
|
-
def
|
195
|
+
def test_get(rc: RunCollection):
|
196
196
|
run = rc.get({"p": 4})
|
197
197
|
assert isinstance(run, Run)
|
198
198
|
run = rc.get(p=2)
|
199
199
|
assert isinstance(run, Run)
|
200
200
|
|
201
201
|
|
202
|
-
def
|
202
|
+
def test_try_get(rc: RunCollection):
|
203
203
|
run = rc.try_get({"p": 5})
|
204
204
|
assert isinstance(run, Run)
|
205
205
|
run = rc.try_get(p=1)
|
@@ -208,7 +208,7 @@ def test_runs_try_get(rc: RunCollection):
|
|
208
208
|
assert run is None
|
209
209
|
|
210
210
|
|
211
|
-
def
|
211
|
+
def test_get_param_names(rc: RunCollection):
|
212
212
|
names = rc.get_param_names()
|
213
213
|
assert len(names) == 3
|
214
214
|
assert "p" in names
|
@@ -216,14 +216,14 @@ def test_runs_get_params_names(rc: RunCollection):
|
|
216
216
|
assert "r" in names
|
217
217
|
|
218
218
|
|
219
|
-
def
|
219
|
+
def test_get_param_dict(rc: RunCollection):
|
220
220
|
params = rc.get_param_dict()
|
221
221
|
assert params["p"] == ["0", "1", "2", "3", "4", "5"]
|
222
222
|
assert params["q"] == ["0", "None"]
|
223
223
|
assert params["r"] == ["0", "1", "2"]
|
224
224
|
|
225
225
|
|
226
|
-
def
|
226
|
+
def test_get_param_dict_drop_const(rc: RunCollection):
|
227
227
|
rc_ = rc.filter(q=0)
|
228
228
|
params = rc_.get_param_dict(drop_const=True)
|
229
229
|
assert len(params) == 2
|
@@ -232,7 +232,7 @@ def test_runs_get_params_dict_drop_const(rc: RunCollection):
|
|
232
232
|
assert "r" in params
|
233
233
|
|
234
234
|
|
235
|
-
def
|
235
|
+
def test_find(rc: RunCollection):
|
236
236
|
run = rc.find({"r": 0})
|
237
237
|
assert isinstance(run, Run)
|
238
238
|
assert run.data.params["p"] == "0"
|
@@ -241,17 +241,17 @@ def test_runs_find(rc: RunCollection):
|
|
241
241
|
assert run.data.params["p"] == "2"
|
242
242
|
|
243
243
|
|
244
|
-
def
|
244
|
+
def test_find_none(rc: RunCollection):
|
245
245
|
with pytest.raises(ValueError):
|
246
246
|
rc.find({"r": 10})
|
247
247
|
|
248
248
|
|
249
|
-
def
|
249
|
+
def test_try_find_none(rc: RunCollection):
|
250
250
|
run = rc.try_find({"r": 10})
|
251
251
|
assert run is None
|
252
252
|
|
253
253
|
|
254
|
-
def
|
254
|
+
def test_find_last(rc: RunCollection):
|
255
255
|
run = rc.find_last({"r": 0})
|
256
256
|
assert isinstance(run, Run)
|
257
257
|
assert run.data.params["p"] == "3"
|
@@ -260,12 +260,12 @@ def test_runs_find_last(rc: RunCollection):
|
|
260
260
|
assert run.data.params["p"] == "5"
|
261
261
|
|
262
262
|
|
263
|
-
def
|
263
|
+
def test_find_last_none(rc: RunCollection):
|
264
264
|
with pytest.raises(ValueError):
|
265
265
|
rc.find_last({"p": 10})
|
266
266
|
|
267
267
|
|
268
|
-
def
|
268
|
+
def test_try_find_last_none(rc: RunCollection):
|
269
269
|
run = rc.try_find_last({"p": 10})
|
270
270
|
assert run is None
|
271
271
|
|
@@ -313,42 +313,42 @@ def test_list_runs_none(rc, runs2):
|
|
313
313
|
assert not no_runs
|
314
314
|
|
315
315
|
|
316
|
-
def
|
316
|
+
def test_map(rc: RunCollection):
|
317
317
|
results = list(rc.map(lambda run: run.info.run_id))
|
318
318
|
assert len(results) == len(rc._runs)
|
319
319
|
assert all(isinstance(run_id, str) for run_id in results)
|
320
320
|
|
321
321
|
|
322
|
-
def
|
322
|
+
def test_map_args(rc: RunCollection):
|
323
323
|
results = list(rc.map(lambda run, x: run.info.run_id + x, "test"))
|
324
324
|
assert all(x.endswith("test") for x in results)
|
325
325
|
|
326
326
|
|
327
|
-
def
|
328
|
-
results = list(rc.
|
327
|
+
def test_map_id(rc: RunCollection):
|
328
|
+
results = list(rc.map_id(lambda run_id: run_id))
|
329
329
|
assert len(results) == len(rc._runs)
|
330
330
|
assert all(isinstance(run_id, str) for run_id in results)
|
331
331
|
|
332
332
|
|
333
|
-
def
|
334
|
-
results = list(rc.
|
333
|
+
def test_map_id_kwargs(rc: RunCollection):
|
334
|
+
results = list(rc.map_id(lambda run_id, x: x + run_id, x="test"))
|
335
335
|
assert all(x.startswith("test") for x in results)
|
336
336
|
|
337
337
|
|
338
|
-
def
|
338
|
+
def test_map_uri(rc: RunCollection):
|
339
339
|
results = list(rc.map_uri(lambda uri: uri))
|
340
340
|
assert len(results) == len(rc._runs)
|
341
341
|
assert all(isinstance(uri, str | type(None)) for uri in results)
|
342
342
|
|
343
343
|
|
344
|
-
def
|
344
|
+
def test_map_dir(rc: RunCollection):
|
345
345
|
results = list(rc.map_dir(lambda dir_path, x: dir_path / x, "a.csv"))
|
346
346
|
assert len(results) == len(rc._runs)
|
347
347
|
assert all(isinstance(dir_path, Path) for dir_path in results)
|
348
348
|
assert all(dir_path.stem == "a" for dir_path in results)
|
349
349
|
|
350
350
|
|
351
|
-
def
|
351
|
+
def test_sort(rc: RunCollection):
|
352
352
|
rc.sort(key=lambda x: x.data.params["p"])
|
353
353
|
assert [run.data.params["p"] for run in rc] == ["0", "1", "2", "3", "4", "5"]
|
354
354
|
|
@@ -356,7 +356,7 @@ def test_run_collection_sort(rc: RunCollection):
|
|
356
356
|
assert [run.data.params["p"] for run in rc] == ["5", "4", "3", "2", "1", "0"]
|
357
357
|
|
358
358
|
|
359
|
-
def
|
359
|
+
def test_iter(rc: RunCollection):
|
360
360
|
assert list(rc) == rc._runs
|
361
361
|
|
362
362
|
|
@@ -366,39 +366,39 @@ def test_run_collection_getitem(rc: RunCollection, i: int):
|
|
366
366
|
|
367
367
|
|
368
368
|
@pytest.mark.parametrize("i", range(6))
|
369
|
-
def
|
369
|
+
def test_getitem_slice(rc: RunCollection, i: int):
|
370
370
|
assert rc[i : i + 2]._runs == rc._runs[i : i + 2]
|
371
371
|
|
372
372
|
|
373
373
|
@pytest.mark.parametrize("i", range(6))
|
374
|
-
def
|
374
|
+
def test_getitem_slice_step(rc: RunCollection, i: int):
|
375
375
|
assert rc[i::2]._runs == rc._runs[i::2]
|
376
376
|
|
377
377
|
|
378
378
|
@pytest.mark.parametrize("i", range(6))
|
379
|
-
def
|
379
|
+
def test_getitem_slice_step_neg(rc: RunCollection, i: int):
|
380
380
|
assert rc[i::-2]._runs == rc._runs[i::-2]
|
381
381
|
|
382
382
|
|
383
|
-
def
|
383
|
+
def test_take(rc: RunCollection):
|
384
384
|
assert rc.take(3)._runs == rc._runs[:3]
|
385
385
|
assert len(rc.take(4)) == 4
|
386
386
|
assert rc.take(10)._runs == rc._runs
|
387
387
|
|
388
388
|
|
389
|
-
def
|
389
|
+
def test_take_neg(rc: RunCollection):
|
390
390
|
assert rc.take(-3)._runs == rc._runs[-3:]
|
391
391
|
assert len(rc.take(-4)) == 4
|
392
392
|
assert rc.take(-10)._runs == rc._runs
|
393
393
|
|
394
394
|
|
395
395
|
@pytest.mark.parametrize("i", range(6))
|
396
|
-
def
|
396
|
+
def test_contains(rc: RunCollection, i: int):
|
397
397
|
assert rc[i] in rc
|
398
398
|
assert rc._runs[i] in rc
|
399
399
|
|
400
400
|
|
401
|
-
def
|
401
|
+
def test_group_by(rc: RunCollection):
|
402
402
|
grouped = rc.group_by(["p"])
|
403
403
|
assert len(grouped) == 6
|
404
404
|
assert all(isinstance(group, RunCollection) for group in grouped.values())
|
@@ -1,56 +0,0 @@
|
|
1
|
-
"""Provide information about MLflow runs."""
|
2
|
-
|
3
|
-
from __future__ import annotations
|
4
|
-
|
5
|
-
from typing import TYPE_CHECKING
|
6
|
-
|
7
|
-
from omegaconf import DictConfig, OmegaConf
|
8
|
-
|
9
|
-
from hydraflow.run_info import get_artifact_dir
|
10
|
-
|
11
|
-
if TYPE_CHECKING:
|
12
|
-
from mlflow.entities import Run
|
13
|
-
|
14
|
-
from hydraflow.run_collection import RunCollection
|
15
|
-
|
16
|
-
|
17
|
-
class RunCollectionData:
|
18
|
-
"""Provide information about MLflow runs."""
|
19
|
-
|
20
|
-
def __init__(self, runs: RunCollection) -> None:
|
21
|
-
self._runs = runs
|
22
|
-
|
23
|
-
@property
|
24
|
-
def params(self) -> list[dict[str, str]]:
|
25
|
-
"""Get the parameters for each run in the collection."""
|
26
|
-
return [run.data.params for run in self._runs]
|
27
|
-
|
28
|
-
@property
|
29
|
-
def metrics(self) -> list[dict[str, float]]:
|
30
|
-
"""Get the metrics for each run in the collection."""
|
31
|
-
return [run.data.metrics for run in self._runs]
|
32
|
-
|
33
|
-
@property
|
34
|
-
def config(self) -> list[DictConfig]:
|
35
|
-
"""Get the configuration for each run in the collection."""
|
36
|
-
return [load_config(run) for run in self._runs]
|
37
|
-
|
38
|
-
|
39
|
-
def load_config(run: Run) -> DictConfig:
|
40
|
-
"""Load the configuration for a given run.
|
41
|
-
|
42
|
-
This function loads the configuration for the provided Run instance
|
43
|
-
by downloading the configuration file from the MLflow artifacts and
|
44
|
-
loading it using OmegaConf. It returns an empty config if
|
45
|
-
`.hydra/config.yaml` is not found in the run's artifact directory.
|
46
|
-
|
47
|
-
Args:
|
48
|
-
run (Run): The Run instance for which to load the configuration.
|
49
|
-
|
50
|
-
Returns:
|
51
|
-
The loaded configuration as a DictConfig object. Returns an empty
|
52
|
-
DictConfig if the configuration file is not found.
|
53
|
-
|
54
|
-
"""
|
55
|
-
path = get_artifact_dir(run) / ".hydra/config.yaml"
|
56
|
-
return OmegaConf.load(path) # type: ignore
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|