hydraflow 0.3.0__tar.gz → 0.3.2__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. {hydraflow-0.3.0 → hydraflow-0.3.2}/.devcontainer/devcontainer.json +0 -1
  2. {hydraflow-0.3.0 → hydraflow-0.3.2}/.gitignore +4 -0
  3. {hydraflow-0.3.0 → hydraflow-0.3.2}/PKG-INFO +1 -1
  4. hydraflow-0.3.2/apps/quickstart.py +31 -0
  5. {hydraflow-0.3.0 → hydraflow-0.3.2}/mkdocs.yml +3 -0
  6. {hydraflow-0.3.0 → hydraflow-0.3.2}/pyproject.toml +6 -1
  7. {hydraflow-0.3.0 → hydraflow-0.3.2}/src/hydraflow/__init__.py +5 -9
  8. {hydraflow-0.3.0 → hydraflow-0.3.2}/src/hydraflow/config.py +10 -2
  9. {hydraflow-0.3.0 → hydraflow-0.3.2}/src/hydraflow/context.py +19 -0
  10. {hydraflow-0.3.0 → hydraflow-0.3.2}/src/hydraflow/mlflow.py +8 -2
  11. {hydraflow-0.3.0 → hydraflow-0.3.2}/src/hydraflow/param.py +1 -1
  12. {hydraflow-0.3.0 → hydraflow-0.3.2}/src/hydraflow/run_collection.py +17 -13
  13. hydraflow-0.3.2/src/hydraflow/run_data.py +34 -0
  14. hydraflow-0.3.2/src/hydraflow/run_info.py +34 -0
  15. hydraflow-0.3.0/src/hydraflow/run_info.py → hydraflow-0.3.2/src/hydraflow/utils.py +23 -26
  16. {hydraflow-0.3.0 → hydraflow-0.3.2}/tests/scripts/app.py +5 -0
  17. {hydraflow-0.3.0 → hydraflow-0.3.2}/tests/test_app.py +28 -7
  18. {hydraflow-0.3.0 → hydraflow-0.3.2}/tests/test_run_collection.py +39 -39
  19. hydraflow-0.3.0/src/hydraflow/run_data.py +0 -56
  20. {hydraflow-0.3.0 → hydraflow-0.3.2}/.devcontainer/postCreate.sh +0 -0
  21. {hydraflow-0.3.0 → hydraflow-0.3.2}/.devcontainer/starship.toml +0 -0
  22. {hydraflow-0.3.0 → hydraflow-0.3.2}/.gitattributes +0 -0
  23. {hydraflow-0.3.0 → hydraflow-0.3.2}/LICENSE +0 -0
  24. {hydraflow-0.3.0 → hydraflow-0.3.2}/README.md +0 -0
  25. {hydraflow-0.3.0 → hydraflow-0.3.2}/src/hydraflow/asyncio.py +0 -0
  26. {hydraflow-0.3.0 → hydraflow-0.3.2}/src/hydraflow/progress.py +0 -0
  27. {hydraflow-0.3.0 → hydraflow-0.3.2}/src/hydraflow/py.typed +0 -0
  28. {hydraflow-0.3.0 → hydraflow-0.3.2}/tests/__init__.py +0 -0
  29. {hydraflow-0.3.0 → hydraflow-0.3.2}/tests/scripts/__init__.py +0 -0
  30. {hydraflow-0.3.0 → hydraflow-0.3.2}/tests/scripts/progress.py +0 -0
  31. {hydraflow-0.3.0 → hydraflow-0.3.2}/tests/scripts/watch.py +0 -0
  32. {hydraflow-0.3.0 → hydraflow-0.3.2}/tests/test_asyncio.py +0 -0
  33. {hydraflow-0.3.0 → hydraflow-0.3.2}/tests/test_config.py +0 -0
  34. {hydraflow-0.3.0 → hydraflow-0.3.2}/tests/test_context.py +0 -0
  35. {hydraflow-0.3.0 → hydraflow-0.3.2}/tests/test_log_run.py +0 -0
  36. {hydraflow-0.3.0 → hydraflow-0.3.2}/tests/test_mlflow.py +0 -0
  37. {hydraflow-0.3.0 → hydraflow-0.3.2}/tests/test_param.py +0 -0
  38. {hydraflow-0.3.0 → hydraflow-0.3.2}/tests/test_progress.py +0 -0
  39. {hydraflow-0.3.0 → hydraflow-0.3.2}/tests/test_run_data.py +0 -0
  40. {hydraflow-0.3.0 → hydraflow-0.3.2}/tests/test_run_info.py +0 -0
  41. {hydraflow-0.3.0 → hydraflow-0.3.2}/tests/test_version.py +0 -0
  42. {hydraflow-0.3.0 → hydraflow-0.3.2}/tests/test_watch.py +0 -0
@@ -1,5 +1,4 @@
1
1
  {
2
- "name": "hydraflow",
3
2
  "image": "mcr.microsoft.com/vscode/devcontainers/python:3.12",
4
3
  "features": {
5
4
  "ghcr.io/devcontainers-contrib/features/starship:1": {},
@@ -1,7 +1,11 @@
1
+ *.db
1
2
  .coverage
2
3
  .env
3
4
  .venv/
4
5
  __pycache__/
5
6
  dist/
6
7
  lcov.info
8
+ mlruns/
9
+ multirun/
10
+ outputs/
7
11
  uv.lock
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: hydraflow
3
- Version: 0.3.0
3
+ Version: 0.3.2
4
4
  Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
5
5
  Project-URL: Documentation, https://github.com/daizutabi/hydraflow
6
6
  Project-URL: Source, https://github.com/daizutabi/hydraflow
@@ -0,0 +1,31 @@
1
+ import logging
2
+ from dataclasses import dataclass
3
+
4
+ import hydra
5
+ from hydra.core.config_store import ConfigStore
6
+
7
+ import hydraflow
8
+
9
+ log = logging.getLogger(__name__)
10
+
11
+
12
+ @dataclass
13
+ class Config:
14
+ width: int = 1024
15
+ height: int = 768
16
+
17
+
18
+ cs = ConfigStore.instance()
19
+ cs.store(name="config", node=Config)
20
+
21
+
22
+ @hydra.main(version_base=None, config_name="config")
23
+ def app(cfg: Config) -> None:
24
+ hydraflow.set_experiment()
25
+
26
+ with hydraflow.start_run(cfg):
27
+ log.info(f"{cfg.width=}, {cfg.height=}")
28
+
29
+
30
+ if __name__ == "__main__":
31
+ app()
@@ -38,6 +38,7 @@ theme:
38
38
  - navigation.tracking
39
39
  plugins:
40
40
  - search
41
+ - markdown-exec
41
42
  - mkapi
42
43
  markdown_extensions:
43
44
  - pymdownx.magiclink
@@ -50,4 +51,6 @@ markdown_extensions:
50
51
  alternate_style: true
51
52
  nav:
52
53
  - Home: index.md
54
+ - Usage:
55
+ - usage/quickstart.md
53
56
  - Reference: $api/hydraflow.**
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "hydraflow"
7
- version = "0.3.0"
7
+ version = "0.3.2"
8
8
  description = "Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments."
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -34,6 +34,10 @@ Issues = "https://github.com/daizutabi/hydraflow/issues"
34
34
 
35
35
  [tool.uv]
36
36
  dev-dependencies = [
37
+ "markdown-exec[ansi]",
38
+ "mkapi",
39
+ "mkdocs-material",
40
+ "mkdocs>=1.6",
37
41
  "pytest-asyncio",
38
42
  "pytest-clarity",
39
43
  "pytest-cov",
@@ -97,3 +101,4 @@ exclude = ["tests/scripts/*.py"]
97
101
  "SIM117",
98
102
  "SLF",
99
103
  ]
104
+ "apps/*.py" = ["INP", "D", "G", "T"]
@@ -1,19 +1,15 @@
1
- """Provide a collection of MLflow runs."""
1
+ """Integrate Hydra and MLflow to manage and track machine learning experiments."""
2
2
 
3
- from .context import chdir_artifact, log_run, start_run, watch
4
- from .mlflow import (
5
- list_runs,
6
- search_runs,
7
- set_experiment,
8
- )
3
+ from .context import chdir_artifact, chdir_hydra, log_run, start_run, watch
4
+ from .mlflow import list_runs, search_runs, set_experiment
9
5
  from .progress import multi_tasks_progress, parallel_progress
10
6
  from .run_collection import RunCollection
11
- from .run_data import load_config
12
- from .run_info import get_artifact_dir, get_hydra_output_dir
7
+ from .utils import get_artifact_dir, get_hydra_output_dir, load_config
13
8
 
14
9
  __all__ = [
15
10
  "RunCollection",
16
11
  "chdir_artifact",
12
+ "chdir_hydra",
17
13
  "get_artifact_dir",
18
14
  "get_hydra_output_dir",
19
15
  "list_runs",
@@ -54,7 +54,7 @@ def _iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
54
54
  if isinstance(config, DictConfig):
55
55
  for key, value in config.items():
56
56
  if _is_param(value):
57
- yield f"{prefix}{key}", value
57
+ yield f"{prefix}{key}", _convert(value)
58
58
 
59
59
  else:
60
60
  yield from _iter_params(value, f"{prefix}{key}.")
@@ -62,7 +62,7 @@ def _iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
62
62
  elif isinstance(config, ListConfig):
63
63
  for index, value in enumerate(config):
64
64
  if _is_param(value):
65
- yield f"{prefix}{index}", value
65
+ yield f"{prefix}{index}", _convert(value)
66
66
 
67
67
  else:
68
68
  yield from _iter_params(value, f"{prefix}{index}.")
@@ -78,3 +78,11 @@ def _is_param(value: object) -> bool:
78
78
  return False
79
79
 
80
80
  return True
81
+
82
+
83
+ def _convert(value: Any) -> Any:
84
+ """Convert the given value to a Python object."""
85
+ if isinstance(value, ListConfig):
86
+ return list(value)
87
+
88
+ return value
@@ -238,6 +238,25 @@ class Handler(PatternMatchingEventHandler):
238
238
  self.func(file)
239
239
 
240
240
 
241
+ @contextmanager
242
+ def chdir_hydra() -> Iterator[Path]:
243
+ """Change the current working directory to the hydra output directory.
244
+
245
+ This context manager changes the current working directory to the hydra output
246
+ directory. It ensures that the directory is changed back to the original
247
+ directory after the context is exited.
248
+ """
249
+ curdir = Path.cwd()
250
+ path = HydraConfig.get().runtime.output_dir
251
+
252
+ os.chdir(path)
253
+ try:
254
+ yield Path(path)
255
+
256
+ finally:
257
+ os.chdir(curdir)
258
+
259
+
241
260
  @contextmanager
242
261
  def chdir_artifact(
243
262
  run: Run,
@@ -207,8 +207,14 @@ def _list_runs(
207
207
  if experiment := mlflow.get_experiment_by_name(name):
208
208
  loc = experiment.artifact_location
209
209
 
210
- if isinstance(loc, str) and loc.startswith("file://"):
211
- path = Path(mlflow.artifacts.download_artifacts(loc))
210
+ if isinstance(loc, str):
211
+ if loc.startswith("file://"):
212
+ path = Path(mlflow.artifacts.download_artifacts(loc))
213
+ elif Path(loc).is_dir():
214
+ path = Path(loc)
215
+ else:
216
+ continue
217
+
212
218
  run_ids.extend(file.stem for file in path.iterdir() if file.is_dir())
213
219
 
214
220
  it = (joblib.delayed(mlflow.get_run)(run_id) for run_id in run_ids)
@@ -72,4 +72,4 @@ def _match_tuple(param: str, value: tuple) -> bool | None:
72
72
  if type(value[0]) is not type(value[1]):
73
73
  return None
74
74
 
75
- return value[0] <= type(value[0])(param) < value[1] # type: ignore
75
+ return value[0] <= type(value[0])(param) <= value[1] # type: ignore
@@ -239,8 +239,8 @@ class RunCollection:
239
239
  The filtering supports:
240
240
  - Exact matches for single values.
241
241
  - Membership checks for lists of values.
242
- - Range checks for tuples of two values (inclusive of the lower bound
243
- and exclusive of the upper bound).
242
+ - Range checks for tuples of two values (inclusive of both the lower
243
+ and upper bound).
244
244
 
245
245
  Args:
246
246
  config (object | None): The configuration object to filter the runs.
@@ -476,7 +476,7 @@ class RunCollection:
476
476
  """
477
477
  return (func(run, *args, **kwargs) for run in self)
478
478
 
479
- def map_run_id(
479
+ def map_id(
480
480
  self,
481
481
  func: Callable[Concatenate[str, P], T],
482
482
  *args: P.args,
@@ -569,8 +569,8 @@ class RunCollection:
569
569
 
570
570
  def group_by(
571
571
  self,
572
- *names: str | list[str],
573
- ) -> dict[tuple[str | None, ...], RunCollection]:
572
+ names: str | list[str],
573
+ ) -> dict[str | None | tuple[str | None, ...], RunCollection]:
574
574
  """Group runs by specified parameter names.
575
575
 
576
576
  Group the runs in the collection based on the values of the
@@ -578,19 +578,23 @@ class RunCollection:
578
578
  form a key in the returned dictionary.
579
579
 
580
580
  Args:
581
- *names (str | list[str]): The names of the parameters to group by.
581
+ names (str | list[str]): The names of the parameters to group by.
582
582
  This can be a single parameter name or multiple names provided
583
583
  as separate arguments or as a list.
584
584
 
585
585
  Returns:
586
- dict[tuple[str | None, ...], RunCollection]: A dictionary where the keys
587
- are tuples of parameter values and the values are RunCollection objects
588
- containing the runs that match those parameter values.
586
+ dict[str | None | tuple[str | None, ...], RunCollection]: A
587
+ dictionary where the keys are tuples of parameter values and the
588
+ values are `RunCollection` objects containing the runs that match
589
+ those parameter values.
589
590
 
590
591
  """
591
- grouped_runs: dict[tuple[str | None, ...], list[Run]] = {}
592
+ grouped_runs: dict[str | None | tuple[str | None, ...], list[Run]] = {}
593
+ is_list = isinstance(names, list)
592
594
  for run in self._runs:
593
- key = get_params(run, *names)
595
+ key = get_params(run, names)
596
+ if not is_list:
597
+ key = key[0]
594
598
  grouped_runs.setdefault(key, []).append(run)
595
599
 
596
600
  return {key: RunCollection(runs) for key, runs in grouped_runs.items()}
@@ -637,8 +641,8 @@ def filter_runs(
637
641
  The filtering supports:
638
642
  - Exact matches for single values.
639
643
  - Membership checks for lists of values.
640
- - Range checks for tuples of two values (inclusive of the lower bound and
641
- exclusive of the upper bound).
644
+ - Range checks for tuples of two values (inclusive of both the lower and
645
+ upper bound).
642
646
 
643
647
  Args:
644
648
  runs (list[Run]): The list of runs to filter.
@@ -0,0 +1,34 @@
1
+ """Provide data about `RunCollection` instances."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING
6
+
7
+ from hydraflow.utils import load_config
8
+
9
+ if TYPE_CHECKING:
10
+ from omegaconf import DictConfig
11
+
12
+ from hydraflow.run_collection import RunCollection
13
+
14
+
15
+ class RunCollectionData:
16
+ """Provide data about a `RunCollection` instance."""
17
+
18
+ def __init__(self, runs: RunCollection) -> None:
19
+ self._runs = runs
20
+
21
+ @property
22
+ def params(self) -> list[dict[str, str]]:
23
+ """Get the parameters for each run in the collection."""
24
+ return [run.data.params for run in self._runs]
25
+
26
+ @property
27
+ def metrics(self) -> list[dict[str, float]]:
28
+ """Get the metrics for each run in the collection."""
29
+ return [run.data.metrics for run in self._runs]
30
+
31
+ @property
32
+ def config(self) -> list[DictConfig]:
33
+ """Get the configuration for each run in the collection."""
34
+ return [load_config(run) for run in self._runs]
@@ -0,0 +1,34 @@
1
+ """Provide information about `RunCollection` instances."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING
6
+
7
+ from hydraflow.utils import get_artifact_dir
8
+
9
+ if TYPE_CHECKING:
10
+ from pathlib import Path
11
+
12
+ from hydraflow.run_collection import RunCollection
13
+
14
+
15
+ class RunCollectionInfo:
16
+ """Provide information about a `RunCollection` instance."""
17
+
18
+ def __init__(self, runs: RunCollection) -> None:
19
+ self._runs = runs
20
+
21
+ @property
22
+ def run_id(self) -> list[str]:
23
+ """Get the run ID for each run in the collection."""
24
+ return [run.info.run_id for run in self._runs]
25
+
26
+ @property
27
+ def artifact_uri(self) -> list[str | None]:
28
+ """Get the artifact URI for each run in the collection."""
29
+ return [run.info.artifact_uri for run in self._runs]
30
+
31
+ @property
32
+ def artifact_dir(self) -> list[Path]:
33
+ """Get the artifact directory for each run in the collection."""
34
+ return [get_artifact_dir(run) for run in self._runs]
@@ -1,4 +1,4 @@
1
- """Provide information about MLflow runs."""
1
+ """Provide utility functions for HydraFlow."""
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -7,36 +7,13 @@ from typing import TYPE_CHECKING
7
7
 
8
8
  import mlflow
9
9
  from hydra.core.hydra_config import HydraConfig
10
+ from mlflow.entities import Run
10
11
  from mlflow.tracking import artifact_utils
11
- from omegaconf import OmegaConf
12
+ from omegaconf import DictConfig, OmegaConf
12
13
 
13
14
  if TYPE_CHECKING:
14
15
  from mlflow.entities import Run
15
16
 
16
- from hydraflow.run_collection import RunCollection
17
-
18
-
19
- class RunCollectionInfo:
20
- """Provide information about MLflow runs."""
21
-
22
- def __init__(self, runs: RunCollection) -> None:
23
- self._runs = runs
24
-
25
- @property
26
- def run_id(self) -> list[str]:
27
- """Get the run ID for each run in the collection."""
28
- return [run.info.run_id for run in self._runs]
29
-
30
- @property
31
- def artifact_uri(self) -> list[str | None]:
32
- """Get the artifact URI for each run in the collection."""
33
- return [run.info.artifact_uri for run in self._runs]
34
-
35
- @property
36
- def artifact_dir(self) -> list[Path]:
37
- """Get the artifact directory for each run in the collection."""
38
- return [get_artifact_dir(run) for run in self._runs]
39
-
40
17
 
41
18
  def get_artifact_dir(run: Run | None = None) -> Path:
42
19
  """Retrieve the artifact directory for the given run.
@@ -89,3 +66,23 @@ def get_hydra_output_dir(run: Run | None = None) -> Path:
89
66
  return Path(hc.hydra.runtime.output_dir)
90
67
 
91
68
  raise FileNotFoundError
69
+
70
+
71
+ def load_config(run: Run) -> DictConfig:
72
+ """Load the configuration for a given run.
73
+
74
+ This function loads the configuration for the provided Run instance
75
+ by downloading the configuration file from the MLflow artifacts and
76
+ loading it using OmegaConf. It returns an empty config if
77
+ `.hydra/config.yaml` is not found in the run's artifact directory.
78
+
79
+ Args:
80
+ run (Run): The Run instance for which to load the configuration.
81
+
82
+ Returns:
83
+ The loaded configuration as a DictConfig object. Returns an empty
84
+ DictConfig if the configuration file is not found.
85
+
86
+ """
87
+ path = get_artifact_dir(run) / ".hydra/config.yaml"
88
+ return OmegaConf.load(path) # type: ignore
@@ -27,6 +27,9 @@ cs.store(name="config", node=MySQLConfig)
27
27
 
28
28
  @hydra.main(version_base=None, config_name="config")
29
29
  def app(cfg: MySQLConfig):
30
+ with hydraflow.chdir_hydra() as path:
31
+ Path("chdir_hydra.txt").write_text(path.as_posix())
32
+
30
33
  hydraflow.set_experiment(prefix="_", suffix="_")
31
34
  with hydraflow.start_run(cfg):
32
35
  log.info(f"START, {cfg.host}, {cfg.port} ")
@@ -34,6 +37,8 @@ def app(cfg: MySQLConfig):
34
37
  artifact_dir = hydraflow.get_artifact_dir()
35
38
  output_dir = hydraflow.get_hydra_output_dir()
36
39
 
40
+ assert (output_dir / "chdir_hydra.txt").exists()
41
+
37
42
  mlflow.log_text("A " + artifact_dir.as_posix(), "artifact_dir.txt")
38
43
  mlflow.log_text("B " + output_dir.as_posix(), "output_dir.txt")
39
44
 
@@ -8,7 +8,7 @@ from typing import TYPE_CHECKING
8
8
  import mlflow
9
9
  import pytest
10
10
  from mlflow.entities import RunStatus
11
- from omegaconf import OmegaConf
11
+ from omegaconf import ListConfig, OmegaConf
12
12
 
13
13
  if TYPE_CHECKING:
14
14
  from omegaconf import DictConfig
@@ -114,6 +114,13 @@ def test_app_data_config(rc: RunCollection):
114
114
  assert config[3].host == "y"
115
115
 
116
116
 
117
+ def test_app_data_config_list(rc: RunCollection):
118
+ config = rc.data.config
119
+ assert isinstance(config[0]["values"], ListConfig)
120
+ assert not isinstance(config[0]["values"], list)
121
+ assert config[0]["values"] == [1, 2, 3]
122
+
123
+
117
124
  def test_app_info_artifact_uri(rc: RunCollection):
118
125
  uris = rc.info.artifact_uri
119
126
  assert all(uri.startswith("file://") for uri in uris) # type: ignore
@@ -122,14 +129,14 @@ def test_app_info_artifact_uri(rc: RunCollection):
122
129
 
123
130
 
124
131
  def test_app_info_artifact_dir(rc: RunCollection):
125
- from hydraflow.run_info import get_artifact_dir
132
+ from hydraflow.utils import get_artifact_dir
126
133
 
127
134
  dirs = list(rc.map(get_artifact_dir))
128
135
  assert rc.info.artifact_dir == dirs
129
136
 
130
137
 
131
138
  def test_app_hydra_output_dir(rc: RunCollection):
132
- from hydraflow.run_info import get_hydra_output_dir
139
+ from hydraflow.utils import get_hydra_output_dir
133
140
 
134
141
  dirs = list(rc.map(get_hydra_output_dir))
135
142
  assert dirs[0].stem == "0"
@@ -154,13 +161,20 @@ def test_app_group_by(rc: RunCollection):
154
161
  grouped = rc.group_by("host")
155
162
  assert len(grouped) == 2
156
163
  x = {"port": "1", "host": "x", "values": "[1, 2, 3]"}
157
- assert grouped[("x",)].data.params[0] == x
164
+ assert grouped["x"].data.params[0] == x
158
165
  x = {"port": "2", "host": "x", "values": "[1, 2, 3]"}
159
- assert grouped[("x",)].data.params[1] == x
166
+ assert grouped["x"].data.params[1] == x
160
167
  x = {"port": "1", "host": "y", "values": "[1, 2, 3]"}
161
- assert grouped[("y",)].data.params[0] == x
168
+ assert grouped["y"].data.params[0] == x
162
169
  x = {"port": "2", "host": "y", "values": "[1, 2, 3]"}
163
- assert grouped[("y",)].data.params[1] == x
170
+ assert grouped["y"].data.params[1] == x
171
+
172
+
173
+ def test_app_group_by_list(rc: RunCollection):
174
+ grouped = rc.group_by(["host"])
175
+ assert len(grouped) == 2
176
+ assert ("x",) in grouped
177
+ assert ("y",) in grouped
164
178
 
165
179
 
166
180
  def test_app_filter_list(rc: RunCollection):
@@ -178,3 +192,10 @@ def test_config(rc: RunCollection):
178
192
  assert df.shape == (4, 3)
179
193
  assert df.select("host").to_series().to_list() == ["x", "x", "y", "y"]
180
194
  assert df.select("port").to_series().to_list() == [1, 2, 1, 2]
195
+ assert str(df.select("values").dtypes) == "[List(Int64)]"
196
+ assert df.select("values").to_series().to_list() == [
197
+ [1, 2, 3],
198
+ [1, 2, 3],
199
+ [1, 2, 3],
200
+ [1, 2, 3],
201
+ ]
@@ -28,12 +28,12 @@ def rc(monkeypatch, tmp_path):
28
28
  return x
29
29
 
30
30
 
31
- def test_run_collection_bool_false():
31
+ def test_bool_false():
32
32
  assert not RunCollection([])
33
33
  assert bool(RunCollection.from_list([])) is False
34
34
 
35
35
 
36
- def test_run_collection_bool_true(rc: RunCollection):
36
+ def test_bool_true(rc: RunCollection):
37
37
  assert rc
38
38
  assert bool(rc) is True
39
39
 
@@ -90,7 +90,7 @@ def test_filter_tuple(run_list: list[Run]):
90
90
  from hydraflow.run_collection import filter_runs
91
91
 
92
92
  x = filter_runs(run_list, p=(1, 3))
93
- assert len(x) == 2
93
+ assert len(x) == 3
94
94
 
95
95
 
96
96
  def test_filter_invalid_param(run_list: list[Run]):
@@ -139,45 +139,45 @@ def test_chdir_artifact_list(i: int, run_list: list[Run]):
139
139
  assert not Path("abc.txt").exists()
140
140
 
141
141
 
142
- def test_runs_repr(rc: RunCollection):
142
+ def test_repr(rc: RunCollection):
143
143
  assert repr(rc) == "RunCollection(6)"
144
144
 
145
145
 
146
- def test_runs_first(rc: RunCollection):
146
+ def test_first(rc: RunCollection):
147
147
  run = rc.first()
148
148
  assert isinstance(run, Run)
149
149
  assert run.data.params["p"] == "0"
150
150
 
151
151
 
152
- def test_runs_first_empty(rc: RunCollection):
152
+ def test_first_empty(rc: RunCollection):
153
153
  rc._runs = []
154
154
  with pytest.raises(ValueError):
155
155
  rc.first()
156
156
 
157
157
 
158
- def test_runs_try_first_none(rc: RunCollection):
158
+ def test_try_first_none(rc: RunCollection):
159
159
  rc._runs = []
160
160
  assert rc.try_first() is None
161
161
 
162
162
 
163
- def test_runs_last(rc: RunCollection):
163
+ def test_last(rc: RunCollection):
164
164
  run = rc.last()
165
165
  assert isinstance(run, Run)
166
166
  assert run.data.params["p"] == "5"
167
167
 
168
168
 
169
- def test_runs_last_empty(rc: RunCollection):
169
+ def test_last_empty(rc: RunCollection):
170
170
  rc._runs = []
171
171
  with pytest.raises(ValueError):
172
172
  rc.last()
173
173
 
174
174
 
175
- def test_runs_try_last_none(rc: RunCollection):
175
+ def test_try_last_none(rc: RunCollection):
176
176
  rc._runs = []
177
177
  assert rc.try_last() is None
178
178
 
179
179
 
180
- def test_runs_filter(rc: RunCollection):
180
+ def test_filter(rc: RunCollection):
181
181
  assert len(rc.filter()) == 6
182
182
  assert len(rc.filter({})) == 6
183
183
  assert len(rc.filter({"p": 1})) == 1
@@ -192,14 +192,14 @@ def test_runs_filter(rc: RunCollection):
192
192
  assert len(rc.filter(r=0)) == 2
193
193
 
194
194
 
195
- def test_runs_get(rc: RunCollection):
195
+ def test_get(rc: RunCollection):
196
196
  run = rc.get({"p": 4})
197
197
  assert isinstance(run, Run)
198
198
  run = rc.get(p=2)
199
199
  assert isinstance(run, Run)
200
200
 
201
201
 
202
- def test_runs_try_get(rc: RunCollection):
202
+ def test_try_get(rc: RunCollection):
203
203
  run = rc.try_get({"p": 5})
204
204
  assert isinstance(run, Run)
205
205
  run = rc.try_get(p=1)
@@ -208,7 +208,7 @@ def test_runs_try_get(rc: RunCollection):
208
208
  assert run is None
209
209
 
210
210
 
211
- def test_runs_get_params_names(rc: RunCollection):
211
+ def test_get_param_names(rc: RunCollection):
212
212
  names = rc.get_param_names()
213
213
  assert len(names) == 3
214
214
  assert "p" in names
@@ -216,14 +216,14 @@ def test_runs_get_params_names(rc: RunCollection):
216
216
  assert "r" in names
217
217
 
218
218
 
219
- def test_runs_get_params_dict(rc: RunCollection):
219
+ def test_get_param_dict(rc: RunCollection):
220
220
  params = rc.get_param_dict()
221
221
  assert params["p"] == ["0", "1", "2", "3", "4", "5"]
222
222
  assert params["q"] == ["0", "None"]
223
223
  assert params["r"] == ["0", "1", "2"]
224
224
 
225
225
 
226
- def test_runs_get_params_dict_drop_const(rc: RunCollection):
226
+ def test_get_param_dict_drop_const(rc: RunCollection):
227
227
  rc_ = rc.filter(q=0)
228
228
  params = rc_.get_param_dict(drop_const=True)
229
229
  assert len(params) == 2
@@ -232,7 +232,7 @@ def test_runs_get_params_dict_drop_const(rc: RunCollection):
232
232
  assert "r" in params
233
233
 
234
234
 
235
- def test_runs_find(rc: RunCollection):
235
+ def test_find(rc: RunCollection):
236
236
  run = rc.find({"r": 0})
237
237
  assert isinstance(run, Run)
238
238
  assert run.data.params["p"] == "0"
@@ -241,17 +241,17 @@ def test_runs_find(rc: RunCollection):
241
241
  assert run.data.params["p"] == "2"
242
242
 
243
243
 
244
- def test_runs_find_none(rc: RunCollection):
244
+ def test_find_none(rc: RunCollection):
245
245
  with pytest.raises(ValueError):
246
246
  rc.find({"r": 10})
247
247
 
248
248
 
249
- def test_runs_try_find_none(rc: RunCollection):
249
+ def test_try_find_none(rc: RunCollection):
250
250
  run = rc.try_find({"r": 10})
251
251
  assert run is None
252
252
 
253
253
 
254
- def test_runs_find_last(rc: RunCollection):
254
+ def test_find_last(rc: RunCollection):
255
255
  run = rc.find_last({"r": 0})
256
256
  assert isinstance(run, Run)
257
257
  assert run.data.params["p"] == "3"
@@ -260,12 +260,12 @@ def test_runs_find_last(rc: RunCollection):
260
260
  assert run.data.params["p"] == "5"
261
261
 
262
262
 
263
- def test_runs_find_last_none(rc: RunCollection):
263
+ def test_find_last_none(rc: RunCollection):
264
264
  with pytest.raises(ValueError):
265
265
  rc.find_last({"p": 10})
266
266
 
267
267
 
268
- def test_runs_try_find_last_none(rc: RunCollection):
268
+ def test_try_find_last_none(rc: RunCollection):
269
269
  run = rc.try_find_last({"p": 10})
270
270
  assert run is None
271
271
 
@@ -313,42 +313,42 @@ def test_list_runs_none(rc, runs2):
313
313
  assert not no_runs
314
314
 
315
315
 
316
- def test_run_collection_map(rc: RunCollection):
316
+ def test_map(rc: RunCollection):
317
317
  results = list(rc.map(lambda run: run.info.run_id))
318
318
  assert len(results) == len(rc._runs)
319
319
  assert all(isinstance(run_id, str) for run_id in results)
320
320
 
321
321
 
322
- def test_run_collection_map_args(rc: RunCollection):
322
+ def test_map_args(rc: RunCollection):
323
323
  results = list(rc.map(lambda run, x: run.info.run_id + x, "test"))
324
324
  assert all(x.endswith("test") for x in results)
325
325
 
326
326
 
327
- def test_run_collection_map_run_id(rc: RunCollection):
328
- results = list(rc.map_run_id(lambda run_id: run_id))
327
+ def test_map_id(rc: RunCollection):
328
+ results = list(rc.map_id(lambda run_id: run_id))
329
329
  assert len(results) == len(rc._runs)
330
330
  assert all(isinstance(run_id, str) for run_id in results)
331
331
 
332
332
 
333
- def test_run_collection_map_run_id_kwargs(rc: RunCollection):
334
- results = list(rc.map_run_id(lambda run_id, x: x + run_id, x="test"))
333
+ def test_map_id_kwargs(rc: RunCollection):
334
+ results = list(rc.map_id(lambda run_id, x: x + run_id, x="test"))
335
335
  assert all(x.startswith("test") for x in results)
336
336
 
337
337
 
338
- def test_run_collection_map_uri(rc: RunCollection):
338
+ def test_map_uri(rc: RunCollection):
339
339
  results = list(rc.map_uri(lambda uri: uri))
340
340
  assert len(results) == len(rc._runs)
341
341
  assert all(isinstance(uri, str | type(None)) for uri in results)
342
342
 
343
343
 
344
- def test_run_collection_map_dir(rc: RunCollection):
344
+ def test_map_dir(rc: RunCollection):
345
345
  results = list(rc.map_dir(lambda dir_path, x: dir_path / x, "a.csv"))
346
346
  assert len(results) == len(rc._runs)
347
347
  assert all(isinstance(dir_path, Path) for dir_path in results)
348
348
  assert all(dir_path.stem == "a" for dir_path in results)
349
349
 
350
350
 
351
- def test_run_collection_sort(rc: RunCollection):
351
+ def test_sort(rc: RunCollection):
352
352
  rc.sort(key=lambda x: x.data.params["p"])
353
353
  assert [run.data.params["p"] for run in rc] == ["0", "1", "2", "3", "4", "5"]
354
354
 
@@ -356,7 +356,7 @@ def test_run_collection_sort(rc: RunCollection):
356
356
  assert [run.data.params["p"] for run in rc] == ["5", "4", "3", "2", "1", "0"]
357
357
 
358
358
 
359
- def test_run_collection_iter(rc: RunCollection):
359
+ def test_iter(rc: RunCollection):
360
360
  assert list(rc) == rc._runs
361
361
 
362
362
 
@@ -366,39 +366,39 @@ def test_run_collection_getitem(rc: RunCollection, i: int):
366
366
 
367
367
 
368
368
  @pytest.mark.parametrize("i", range(6))
369
- def test_run_collection_getitem_slice(rc: RunCollection, i: int):
369
+ def test_getitem_slice(rc: RunCollection, i: int):
370
370
  assert rc[i : i + 2]._runs == rc._runs[i : i + 2]
371
371
 
372
372
 
373
373
  @pytest.mark.parametrize("i", range(6))
374
- def test_run_collection_getitem_slice_step(rc: RunCollection, i: int):
374
+ def test_getitem_slice_step(rc: RunCollection, i: int):
375
375
  assert rc[i::2]._runs == rc._runs[i::2]
376
376
 
377
377
 
378
378
  @pytest.mark.parametrize("i", range(6))
379
- def test_run_collection_getitem_slice_step_neg(rc: RunCollection, i: int):
379
+ def test_getitem_slice_step_neg(rc: RunCollection, i: int):
380
380
  assert rc[i::-2]._runs == rc._runs[i::-2]
381
381
 
382
382
 
383
- def test_run_collection_take(rc: RunCollection):
383
+ def test_take(rc: RunCollection):
384
384
  assert rc.take(3)._runs == rc._runs[:3]
385
385
  assert len(rc.take(4)) == 4
386
386
  assert rc.take(10)._runs == rc._runs
387
387
 
388
388
 
389
- def test_run_collection_take_neg(rc: RunCollection):
389
+ def test_take_neg(rc: RunCollection):
390
390
  assert rc.take(-3)._runs == rc._runs[-3:]
391
391
  assert len(rc.take(-4)) == 4
392
392
  assert rc.take(-10)._runs == rc._runs
393
393
 
394
394
 
395
395
  @pytest.mark.parametrize("i", range(6))
396
- def test_run_collection_contains(rc: RunCollection, i: int):
396
+ def test_contains(rc: RunCollection, i: int):
397
397
  assert rc[i] in rc
398
398
  assert rc._runs[i] in rc
399
399
 
400
400
 
401
- def test_run_collection_group_by(rc: RunCollection):
401
+ def test_group_by(rc: RunCollection):
402
402
  grouped = rc.group_by(["p"])
403
403
  assert len(grouped) == 6
404
404
  assert all(isinstance(group, RunCollection) for group in grouped.values())
@@ -1,56 +0,0 @@
1
- """Provide information about MLflow runs."""
2
-
3
- from __future__ import annotations
4
-
5
- from typing import TYPE_CHECKING
6
-
7
- from omegaconf import DictConfig, OmegaConf
8
-
9
- from hydraflow.run_info import get_artifact_dir
10
-
11
- if TYPE_CHECKING:
12
- from mlflow.entities import Run
13
-
14
- from hydraflow.run_collection import RunCollection
15
-
16
-
17
- class RunCollectionData:
18
- """Provide information about MLflow runs."""
19
-
20
- def __init__(self, runs: RunCollection) -> None:
21
- self._runs = runs
22
-
23
- @property
24
- def params(self) -> list[dict[str, str]]:
25
- """Get the parameters for each run in the collection."""
26
- return [run.data.params for run in self._runs]
27
-
28
- @property
29
- def metrics(self) -> list[dict[str, float]]:
30
- """Get the metrics for each run in the collection."""
31
- return [run.data.metrics for run in self._runs]
32
-
33
- @property
34
- def config(self) -> list[DictConfig]:
35
- """Get the configuration for each run in the collection."""
36
- return [load_config(run) for run in self._runs]
37
-
38
-
39
- def load_config(run: Run) -> DictConfig:
40
- """Load the configuration for a given run.
41
-
42
- This function loads the configuration for the provided Run instance
43
- by downloading the configuration file from the MLflow artifacts and
44
- loading it using OmegaConf. It returns an empty config if
45
- `.hydra/config.yaml` is not found in the run's artifact directory.
46
-
47
- Args:
48
- run (Run): The Run instance for which to load the configuration.
49
-
50
- Returns:
51
- The loaded configuration as a DictConfig object. Returns an empty
52
- DictConfig if the configuration file is not found.
53
-
54
- """
55
- path = get_artifact_dir(run) / ".hydra/config.yaml"
56
- return OmegaConf.load(path) # type: ignore
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes