hydraflow 0.2.18__py3-none-any.whl → 0.3.1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
hydraflow/__init__.py CHANGED
@@ -1,16 +1,15 @@
1
- from .context import chdir_artifact, log_run, start_run, watch
2
- from .info import get_artifact_dir, get_hydra_output_dir, load_config
3
- from .mlflow import (
4
- list_runs,
5
- search_runs,
6
- set_experiment,
7
- )
1
+ """Integrate Hydra and MLflow to manage and track machine learning experiments."""
2
+
3
+ from .context import chdir_artifact, chdir_hydra, log_run, start_run, watch
4
+ from .mlflow import list_runs, search_runs, set_experiment
8
5
  from .progress import multi_tasks_progress, parallel_progress
9
6
  from .run_collection import RunCollection
7
+ from .utils import get_artifact_dir, get_hydra_output_dir, load_config
10
8
 
11
9
  __all__ = [
12
10
  "RunCollection",
13
11
  "chdir_artifact",
12
+ "chdir_hydra",
14
13
  "get_artifact_dir",
15
14
  "get_hydra_output_dir",
16
15
  "list_runs",
hydraflow/config.py CHANGED
@@ -11,6 +11,20 @@ if TYPE_CHECKING:
11
11
  from typing import Any
12
12
 
13
13
 
14
+ def collect_params(config: object) -> dict[str, Any]:
15
+ """Iterate over parameters and collect them into a dictionary.
16
+
17
+ Args:
18
+ config (object): The configuration object to iterate over.
19
+ prefix (str): The prefix to prepend to the parameter keys.
20
+
21
+ Returns:
22
+ dict[str, Any]: A dictionary of collected parameters.
23
+
24
+ """
25
+ return dict(iter_params(config))
26
+
27
+
14
28
  def iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
15
29
  """Recursively iterate over the parameters in the given configuration object.
16
30
 
@@ -40,7 +54,7 @@ def _iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
40
54
  if isinstance(config, DictConfig):
41
55
  for key, value in config.items():
42
56
  if _is_param(value):
43
- yield f"{prefix}{key}", value
57
+ yield f"{prefix}{key}", _convert(value)
44
58
 
45
59
  else:
46
60
  yield from _iter_params(value, f"{prefix}{key}.")
@@ -48,7 +62,7 @@ def _iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
48
62
  elif isinstance(config, ListConfig):
49
63
  for index, value in enumerate(config):
50
64
  if _is_param(value):
51
- yield f"{prefix}{index}", value
65
+ yield f"{prefix}{index}", _convert(value)
52
66
 
53
67
  else:
54
68
  yield from _iter_params(value, f"{prefix}{index}.")
@@ -64,3 +78,11 @@ def _is_param(value: object) -> bool:
64
78
  return False
65
79
 
66
80
  return True
81
+
82
+
83
+ def _convert(value: Any) -> Any:
84
+ """Convert the given value to a Python object."""
85
+ if isinstance(value, ListConfig):
86
+ return list(value)
87
+
88
+ return value
hydraflow/context.py CHANGED
@@ -14,8 +14,8 @@ from hydra.core.hydra_config import HydraConfig
14
14
  from watchdog.events import FileModifiedEvent, PatternMatchingEventHandler
15
15
  from watchdog.observers import Observer
16
16
 
17
- from hydraflow.info import get_artifact_dir
18
17
  from hydraflow.mlflow import log_params
18
+ from hydraflow.run_info import get_artifact_dir
19
19
 
20
20
  if TYPE_CHECKING:
21
21
  from collections.abc import Callable, Iterator
@@ -238,6 +238,25 @@ class Handler(PatternMatchingEventHandler):
238
238
  self.func(file)
239
239
 
240
240
 
241
+ @contextmanager
242
+ def chdir_hydra() -> Iterator[Path]:
243
+ """Change the current working directory to the hydra output directory.
244
+
245
+ This context manager changes the current working directory to the hydra output
246
+ directory. It ensures that the directory is changed back to the original
247
+ directory after the context is exited.
248
+ """
249
+ curdir = Path.cwd()
250
+ path = HydraConfig.get().runtime.output_dir
251
+
252
+ os.chdir(path)
253
+ try:
254
+ yield Path(path)
255
+
256
+ finally:
257
+ os.chdir(curdir)
258
+
259
+
241
260
  @contextmanager
242
261
  def chdir_artifact(
243
262
  run: Run,
hydraflow/mlflow.py CHANGED
@@ -207,8 +207,14 @@ def _list_runs(
207
207
  if experiment := mlflow.get_experiment_by_name(name):
208
208
  loc = experiment.artifact_location
209
209
 
210
- if isinstance(loc, str) and loc.startswith("file://"):
211
- path = Path(mlflow.artifacts.download_artifacts(loc))
210
+ if isinstance(loc, str):
211
+ if loc.startswith("file://"):
212
+ path = Path(mlflow.artifacts.download_artifacts(loc))
213
+ elif Path(loc).is_dir():
214
+ path = Path(loc)
215
+ else:
216
+ continue
217
+
212
218
  run_ids.extend(file.stem for file in path.iterdir() if file.is_dir())
213
219
 
214
220
  it = (joblib.delayed(mlflow.get_run)(run_id) for run_id in run_ids)
@@ -24,10 +24,12 @@ from itertools import chain
24
24
  from typing import TYPE_CHECKING, Any, Concatenate, ParamSpec, TypeVar, overload
25
25
 
26
26
  from mlflow.entities import RunStatus
27
+ from polars.dataframe import DataFrame
27
28
 
28
29
  import hydraflow.param
29
- from hydraflow.config import iter_params
30
- from hydraflow.info import RunCollectionInfo
30
+ from hydraflow.config import collect_params, iter_params
31
+ from hydraflow.run_data import RunCollectionData
32
+ from hydraflow.run_info import RunCollectionInfo
31
33
 
32
34
  if TYPE_CHECKING:
33
35
  from collections.abc import Callable, Iterator
@@ -61,8 +63,12 @@ class RunCollection:
61
63
  _info: RunCollectionInfo = field(init=False)
62
64
  """An instance of `RunCollectionInfo`."""
63
65
 
66
+ _data: RunCollectionData = field(init=False)
67
+ """An instance of `RunCollectionData`."""
68
+
64
69
  def __post_init__(self) -> None:
65
70
  self._info = RunCollectionInfo(self)
71
+ self._data = RunCollectionData(self)
66
72
 
67
73
  def __repr__(self) -> str:
68
74
  return f"{self.__class__.__name__}({len(self)})"
@@ -101,6 +107,11 @@ class RunCollection:
101
107
  """An instance of `RunCollectionInfo`."""
102
108
  return self._info
103
109
 
110
+ @property
111
+ def data(self) -> RunCollectionData:
112
+ """An instance of `RunCollectionData`."""
113
+ return self._data
114
+
104
115
  def take(self, n: int) -> RunCollection:
105
116
  """Take the first n runs from the collection.
106
117
 
@@ -371,7 +382,7 @@ class RunCollection:
371
382
  raise ValueError(msg)
372
383
 
373
384
  def try_get(self, config: object | None = None, **kwargs) -> Run | None:
374
- """Try to retrieve a specific `Run` instance based on the provided config.
385
+ """Try to get a specific `Run` instance based on the provided configuration.
375
386
 
376
387
  This method filters the runs in the collection according to the
377
388
  specified configuration object and returns the run that matches the
@@ -505,7 +516,7 @@ class RunCollection:
505
516
  in the collection.
506
517
 
507
518
  """
508
- return (func(config, *args, **kwargs) for config in self.info.config)
519
+ return (func(config, *args, **kwargs) for config in self.data.config)
509
520
 
510
521
  def map_uri(
511
522
  self,
@@ -584,6 +595,16 @@ class RunCollection:
584
595
 
585
596
  return {key: RunCollection(runs) for key, runs in grouped_runs.items()}
586
597
 
598
+ @property
599
+ def config(self) -> DataFrame:
600
+ """Get the runs' configurations as a polars DataFrame.
601
+
602
+ Returns:
603
+ A polars DataFrame containing the runs' configurations.
604
+
605
+ """
606
+ return DataFrame(self.map_config(collect_params))
607
+
587
608
 
588
609
  def _param_matches(run: Run, key: str, value: Any) -> bool:
589
610
  params = run.data.params
@@ -634,8 +655,10 @@ def filter_runs(
634
655
  """
635
656
  for key, value in chain(iter_params(config), kwargs.items()):
636
657
  runs = [run for run in runs if _param_matches(run, key, value)]
658
+ if not runs:
659
+ return []
637
660
 
638
- if len(runs) == 0 or status is None:
661
+ if status is None:
639
662
  return runs
640
663
 
641
664
  return filter_runs_by_status(runs, status)
hydraflow/run_data.py ADDED
@@ -0,0 +1,34 @@
1
+ """Provide data about `RunCollection` instances."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING
6
+
7
+ from hydraflow.utils import load_config
8
+
9
+ if TYPE_CHECKING:
10
+ from omegaconf import DictConfig
11
+
12
+ from hydraflow.run_collection import RunCollection
13
+
14
+
15
+ class RunCollectionData:
16
+ """Provide data about a `RunCollection` instance."""
17
+
18
+ def __init__(self, runs: RunCollection) -> None:
19
+ self._runs = runs
20
+
21
+ @property
22
+ def params(self) -> list[dict[str, str]]:
23
+ """Get the parameters for each run in the collection."""
24
+ return [run.data.params for run in self._runs]
25
+
26
+ @property
27
+ def metrics(self) -> list[dict[str, float]]:
28
+ """Get the metrics for each run in the collection."""
29
+ return [run.data.metrics for run in self._runs]
30
+
31
+ @property
32
+ def config(self) -> list[DictConfig]:
33
+ """Get the configuration for each run in the collection."""
34
+ return [load_config(run) for run in self._runs]
hydraflow/run_info.py ADDED
@@ -0,0 +1,34 @@
1
+ """Provide information about `RunCollection` instances."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING
6
+
7
+ from hydraflow.utils import get_artifact_dir
8
+
9
+ if TYPE_CHECKING:
10
+ from pathlib import Path
11
+
12
+ from hydraflow.run_collection import RunCollection
13
+
14
+
15
+ class RunCollectionInfo:
16
+ """Provide information about a `RunCollection` instance."""
17
+
18
+ def __init__(self, runs: RunCollection) -> None:
19
+ self._runs = runs
20
+
21
+ @property
22
+ def run_id(self) -> list[str]:
23
+ """Get the run ID for each run in the collection."""
24
+ return [run.info.run_id for run in self._runs]
25
+
26
+ @property
27
+ def artifact_uri(self) -> list[str | None]:
28
+ """Get the artifact URI for each run in the collection."""
29
+ return [run.info.artifact_uri for run in self._runs]
30
+
31
+ @property
32
+ def artifact_dir(self) -> list[Path]:
33
+ """Get the artifact directory for each run in the collection."""
34
+ return [get_artifact_dir(run) for run in self._runs]
@@ -1,4 +1,4 @@
1
- """Provide information about MLflow runs."""
1
+ """Provide utility functions for HydraFlow."""
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -7,51 +7,13 @@ from typing import TYPE_CHECKING
7
7
 
8
8
  import mlflow
9
9
  from hydra.core.hydra_config import HydraConfig
10
+ from mlflow.entities import Run
10
11
  from mlflow.tracking import artifact_utils
11
12
  from omegaconf import DictConfig, OmegaConf
12
13
 
13
14
  if TYPE_CHECKING:
14
15
  from mlflow.entities import Run
15
16
 
16
- from hydraflow.run_collection import RunCollection
17
-
18
-
19
- class RunCollectionInfo:
20
- """Provide information about MLflow runs."""
21
-
22
- def __init__(self, runs: RunCollection) -> None:
23
- self._runs = runs
24
-
25
- @property
26
- def run_id(self) -> list[str]:
27
- """Get the run ID for each run in the collection."""
28
- return [run.info.run_id for run in self._runs]
29
-
30
- @property
31
- def params(self) -> list[dict[str, str]]:
32
- """Get the parameters for each run in the collection."""
33
- return [run.data.params for run in self._runs]
34
-
35
- @property
36
- def metrics(self) -> list[dict[str, float]]:
37
- """Get the metrics for each run in the collection."""
38
- return [run.data.metrics for run in self._runs]
39
-
40
- @property
41
- def artifact_uri(self) -> list[str | None]:
42
- """Get the artifact URI for each run in the collection."""
43
- return [run.info.artifact_uri for run in self._runs]
44
-
45
- @property
46
- def artifact_dir(self) -> list[Path]:
47
- """Get the artifact directory for each run in the collection."""
48
- return [get_artifact_dir(run) for run in self._runs]
49
-
50
- @property
51
- def config(self) -> list[DictConfig]:
52
- """Get the configuration for each run in the collection."""
53
- return [load_config(run) for run in self._runs]
54
-
55
17
 
56
18
  def get_artifact_dir(run: Run | None = None) -> Path:
57
19
  """Retrieve the artifact directory for the given run.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: hydraflow
3
- Version: 0.2.18
3
+ Version: 0.3.1
4
4
  Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
5
5
  Project-URL: Documentation, https://github.com/daizutabi/hydraflow
6
6
  Project-URL: Source, https://github.com/daizutabi/hydraflow
@@ -17,6 +17,7 @@ Requires-Python: >=3.10
17
17
  Requires-Dist: hydra-core>=1.3
18
18
  Requires-Dist: joblib
19
19
  Requires-Dist: mlflow>=2.15
20
+ Requires-Dist: polars
20
21
  Requires-Dist: rich
21
22
  Requires-Dist: watchdog
22
23
  Requires-Dist: watchfiles
@@ -0,0 +1,16 @@
1
+ hydraflow/__init__.py,sha256=6sfM1ashUkfrNf7lOR7raFYhG8YdOAJR-JgRNL_IVo8,698
2
+ hydraflow/asyncio.py,sha256=-i1C8KAmNDImrjHnk92Csaa1mpjdK8Vp4ZVaQV-l94s,6634
3
+ hydraflow/config.py,sha256=6V5omJ3-h9-ZwVpM5rTA4FqE_mu8urTy9OqV4zG79gw,2671
4
+ hydraflow/context.py,sha256=412884e84qIEYtbxJT4roYsKfldGaTKzgo6Q1FAsT5U,8733
5
+ hydraflow/mlflow.py,sha256=JELqXFCJ9MsEJaQWT5dyleTFk8BHL7cQwW_gzhkPoIg,8729
6
+ hydraflow/param.py,sha256=dvIXcKgc_MPiju3WEk9qz5FOUeK5qSj-YWN2ophCpUM,1938
7
+ hydraflow/progress.py,sha256=zvKX1HCN8_xDOsgYOEcLLhkhdPdep-U8vHrc0XZ-6SQ,6163
8
+ hydraflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ hydraflow/run_collection.py,sha256=Xv6-KD5ac-vv-4Q3PZrzJy1x84H_g7UoP7ZqZ8_DQeQ,24973
10
+ hydraflow/run_data.py,sha256=ZXVr0PHyufH9wwyQYWtpE4_MheAC2ArTW_J1TTMQ4iI,983
11
+ hydraflow/run_info.py,sha256=sMXOo20ClaRIommMEzuAbO_OrcXx7M1Yt4FMV7spxz0,998
12
+ hydraflow/utils.py,sha256=aRdBdToKfvHhN2qFiRzPHIdQxS7cTpZREQeP8HreAfI,2676
13
+ hydraflow-0.3.1.dist-info/METADATA,sha256=W38pNcCNy7Kmx1t9dwFoANsRjCk40-KBJUWux_BvHqA,3840
14
+ hydraflow-0.3.1.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
15
+ hydraflow-0.3.1.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
16
+ hydraflow-0.3.1.dist-info/RECORD,,
@@ -1,14 +0,0 @@
1
- hydraflow/__init__.py,sha256=B7rWSiGP5WwWjijcb41Bv9uuo5MQ6gbBbVWGAWYtK-k,598
2
- hydraflow/asyncio.py,sha256=-i1C8KAmNDImrjHnk92Csaa1mpjdK8Vp4ZVaQV-l94s,6634
3
- hydraflow/config.py,sha256=sBaEYPMAGSIOc_wdDsWm0k4y3AZyWIET8gqa_o95SDA,2089
4
- hydraflow/context.py,sha256=ih_jnexaHoToNq1dZ6sBzhJWFluPiQluOlYTYOzNEgk,8222
5
- hydraflow/info.py,sha256=Vzyz9dEWcU9ovRG3JWshxIazzod1cZoHF74bHhHL3AI,3946
6
- hydraflow/mlflow.py,sha256=GkOr_pXfpfY5USYBLrCigHcP13VgrAK_e9kheR1Wke4,8579
7
- hydraflow/param.py,sha256=dvIXcKgc_MPiju3WEk9qz5FOUeK5qSj-YWN2ophCpUM,1938
8
- hydraflow/progress.py,sha256=zvKX1HCN8_xDOsgYOEcLLhkhdPdep-U8vHrc0XZ-6SQ,6163
9
- hydraflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- hydraflow/run_collection.py,sha256=gsseBQ6a2YolNanISgEgkjei7o9U6ZGV-Tk50UYH850,24295
11
- hydraflow-0.2.18.dist-info/METADATA,sha256=roL3lGtlIibF6rHbCp4aXrCphhq-OkNe0JwLxM1xtBY,3819
12
- hydraflow-0.2.18.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
13
- hydraflow-0.2.18.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
14
- hydraflow-0.2.18.dist-info/RECORD,,