hydraflow 0.1.4__tar.gz → 0.2.0__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (32) hide show
  1. hydraflow-0.2.0/PKG-INFO +111 -0
  2. hydraflow-0.2.0/README.md +82 -0
  3. {hydraflow-0.1.4 → hydraflow-0.2.0}/pyproject.toml +2 -2
  4. {hydraflow-0.1.4 → hydraflow-0.2.0}/src/hydraflow/__init__.py +0 -10
  5. hydraflow-0.2.0/src/hydraflow/config.py +55 -0
  6. hydraflow-0.2.0/src/hydraflow/context.py +188 -0
  7. hydraflow-0.2.0/src/hydraflow/mlflow.py +72 -0
  8. hydraflow-0.2.0/src/hydraflow/runs.py +422 -0
  9. {hydraflow-0.1.4 → hydraflow-0.2.0}/tests/scripts/log_run.py +2 -2
  10. {hydraflow-0.1.4 → hydraflow-0.2.0}/tests/test_config.py +1 -2
  11. hydraflow-0.2.0/tests/test_context.py +36 -0
  12. {hydraflow-0.1.4 → hydraflow-0.2.0}/tests/test_log_run.py +26 -8
  13. hydraflow-0.2.0/tests/test_mlflow.py +35 -0
  14. hydraflow-0.2.0/tests/test_runs.py +277 -0
  15. hydraflow-0.1.4/PKG-INFO +0 -45
  16. hydraflow-0.1.4/README.md +0 -16
  17. hydraflow-0.1.4/src/hydraflow/config.py +0 -30
  18. hydraflow-0.1.4/src/hydraflow/context.py +0 -111
  19. hydraflow-0.1.4/src/hydraflow/mlflow.py +0 -20
  20. hydraflow-0.1.4/src/hydraflow/runs.py +0 -217
  21. hydraflow-0.1.4/src/hydraflow/util.py +0 -11
  22. hydraflow-0.1.4/tests/test_runs.py +0 -260
  23. {hydraflow-0.1.4 → hydraflow-0.2.0}/.devcontainer/devcontainer.json +0 -0
  24. {hydraflow-0.1.4 → hydraflow-0.2.0}/.devcontainer/postCreate.sh +0 -0
  25. {hydraflow-0.1.4 → hydraflow-0.2.0}/.devcontainer/starship.toml +0 -0
  26. {hydraflow-0.1.4 → hydraflow-0.2.0}/.gitattributes +0 -0
  27. {hydraflow-0.1.4 → hydraflow-0.2.0}/.gitignore +0 -0
  28. {hydraflow-0.1.4 → hydraflow-0.2.0}/LICENSE +0 -0
  29. {hydraflow-0.1.4 → hydraflow-0.2.0}/tests/scripts/__init__.py +0 -0
  30. {hydraflow-0.1.4 → hydraflow-0.2.0}/tests/scripts/watch.py +0 -0
  31. {hydraflow-0.1.4 → hydraflow-0.2.0}/tests/test_version.py +0 -0
  32. {hydraflow-0.1.4 → hydraflow-0.2.0}/tests/test_watch.py +0 -0
@@ -0,0 +1,111 @@
1
+ Metadata-Version: 2.3
2
+ Name: hydraflow
3
+ Version: 0.2.0
4
+ Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
5
+ Project-URL: Documentation, https://github.com/daizutabi/hydraflow
6
+ Project-URL: Source, https://github.com/daizutabi/hydraflow
7
+ Project-URL: Issues, https://github.com/daizutabi/hydraflow/issues
8
+ Author-email: daizutabi <daizutabi@gmail.com>
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Programming Language :: Python
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Topic :: Documentation
17
+ Classifier: Topic :: Software Development :: Documentation
18
+ Requires-Python: >=3.10
19
+ Requires-Dist: hydra-core>1.3
20
+ Requires-Dist: mlflow>2.15
21
+ Requires-Dist: setuptools
22
+ Requires-Dist: watchdog
23
+ Provides-Extra: dev
24
+ Requires-Dist: pytest-clarity; extra == 'dev'
25
+ Requires-Dist: pytest-cov; extra == 'dev'
26
+ Requires-Dist: pytest-randomly; extra == 'dev'
27
+ Requires-Dist: pytest-xdist; extra == 'dev'
28
+ Description-Content-Type: text/markdown
29
+
30
+ # Hydraflow
31
+
32
+ [![PyPI Version][pypi-v-image]][pypi-v-link]
33
+ [![Python Version][python-v-image]][python-v-link]
34
+ [![Build Status][GHAction-image]][GHAction-link]
35
+ [![Coverage Status][codecov-image]][codecov-link]
36
+
37
+ <!-- Badges -->
38
+ [pypi-v-image]: https://img.shields.io/pypi/v/hydraflow.svg
39
+ [pypi-v-link]: https://pypi.org/project/hydraflow/
40
+ [python-v-image]: https://img.shields.io/pypi/pyversions/hydraflow.svg
41
+ [python-v-link]: https://pypi.org/project/hydraflow
42
+ [GHAction-image]: https://github.com/daizutabi/hydraflow/actions/workflows/ci.yml/badge.svg?branch=main&event=push
43
+ [GHAction-link]: https://github.com/daizutabi/hydraflow/actions?query=event%3Apush+branch%3Amain
44
+ [codecov-image]: https://codecov.io/github/daizutabi/hydraflow/coverage.svg?branch=main
45
+ [codecov-link]: https://codecov.io/github/daizutabi/hydraflow?branch=main
46
+
47
+ ## Overview
48
+
49
+ Hydraflow is a powerful library designed to seamlessly integrate [Hydra](https://hydra.cc/) and [MLflow](https://mlflow.org/), making it easier to manage and track machine learning experiments. By combining the flexibility of Hydra's configuration management with the robust experiment tracking capabilities of MLflow, Hydraflow provides a comprehensive solution for managing complex machine learning workflows.
50
+
51
+ ## Key Features
52
+
53
+ - **Configuration Management**: Utilize Hydra's advanced configuration management to handle complex parameter sweeps and experiment setups.
54
+ - **Experiment Tracking**: Leverage MLflow's tracking capabilities to log parameters, metrics, and artifacts for each run.
55
+ - **Artifact Management**: Automatically log and manage artifacts, such as model checkpoints and configuration files, with MLflow.
56
+ - **Seamless Integration**: Easily integrate Hydra and MLflow in your machine learning projects with minimal setup.
57
+
58
+ ## Installation
59
+
60
+ You can install Hydraflow via pip:
61
+
62
+ ```bash
63
+ pip install hydraflow
64
+ ```
65
+
66
+ ## Getting Started
67
+
68
+ Here is a simple example to get you started with Hydraflow:
69
+
70
+ ```python
71
+ import hydra
72
+ import hydraflow
73
+ import mlflow
74
+ from dataclasses import dataclass
75
+ from hydra.core.config_store import ConfigStore
76
+ from pathlib import Path
77
+
78
+ @dataclass
79
+ class MySQLConfig:
80
+ host: str = "localhost"
81
+ port: int = 3306
82
+
83
+ cs = ConfigStore.instance()
84
+ cs.store(name="config", node=MySQLConfig)
85
+
86
+ @hydra.main(version_base=None, config_name="config")
87
+ def my_app(cfg: MySQLConfig) -> None:
88
+ # Set experiment by Hydra job name.
89
+ hydraflow.set_experiment()
90
+
91
+ # Automatically log params using Hydra config.
92
+ with mlflow.start_run(), hydraflow.log_run(cfg) as info:
93
+ # Your app code below.
94
+
95
+ # `info.output_dir` is the Hydra output directory.
96
+ # `info.artifact_dir` is the MLflow artifact directory.
97
+
98
+ with hydraflow.watch(callback):
99
+ # Watch files in the MLflow artifact directory.
100
+ # You can update metrics or log other artifacts
101
+ # according to the watched files in your callback
102
+ # function.
103
+ pass
104
+
105
+ # Your callback function here.
106
+ def callback(file: Path) -> None:
107
+ pass
108
+
109
+ if __name__ == "__main__":
110
+ my_app()
111
+ ```
@@ -0,0 +1,82 @@
1
+ # Hydraflow
2
+
3
+ [![PyPI Version][pypi-v-image]][pypi-v-link]
4
+ [![Python Version][python-v-image]][python-v-link]
5
+ [![Build Status][GHAction-image]][GHAction-link]
6
+ [![Coverage Status][codecov-image]][codecov-link]
7
+
8
+ <!-- Badges -->
9
+ [pypi-v-image]: https://img.shields.io/pypi/v/hydraflow.svg
10
+ [pypi-v-link]: https://pypi.org/project/hydraflow/
11
+ [python-v-image]: https://img.shields.io/pypi/pyversions/hydraflow.svg
12
+ [python-v-link]: https://pypi.org/project/hydraflow
13
+ [GHAction-image]: https://github.com/daizutabi/hydraflow/actions/workflows/ci.yml/badge.svg?branch=main&event=push
14
+ [GHAction-link]: https://github.com/daizutabi/hydraflow/actions?query=event%3Apush+branch%3Amain
15
+ [codecov-image]: https://codecov.io/github/daizutabi/hydraflow/coverage.svg?branch=main
16
+ [codecov-link]: https://codecov.io/github/daizutabi/hydraflow?branch=main
17
+
18
+ ## Overview
19
+
20
+ Hydraflow is a powerful library designed to seamlessly integrate [Hydra](https://hydra.cc/) and [MLflow](https://mlflow.org/), making it easier to manage and track machine learning experiments. By combining the flexibility of Hydra's configuration management with the robust experiment tracking capabilities of MLflow, Hydraflow provides a comprehensive solution for managing complex machine learning workflows.
21
+
22
+ ## Key Features
23
+
24
+ - **Configuration Management**: Utilize Hydra's advanced configuration management to handle complex parameter sweeps and experiment setups.
25
+ - **Experiment Tracking**: Leverage MLflow's tracking capabilities to log parameters, metrics, and artifacts for each run.
26
+ - **Artifact Management**: Automatically log and manage artifacts, such as model checkpoints and configuration files, with MLflow.
27
+ - **Seamless Integration**: Easily integrate Hydra and MLflow in your machine learning projects with minimal setup.
28
+
29
+ ## Installation
30
+
31
+ You can install Hydraflow via pip:
32
+
33
+ ```bash
34
+ pip install hydraflow
35
+ ```
36
+
37
+ ## Getting Started
38
+
39
+ Here is a simple example to get you started with Hydraflow:
40
+
41
+ ```python
42
+ import hydra
43
+ import hydraflow
44
+ import mlflow
45
+ from dataclasses import dataclass
46
+ from hydra.core.config_store import ConfigStore
47
+ from pathlib import Path
48
+
49
+ @dataclass
50
+ class MySQLConfig:
51
+ host: str = "localhost"
52
+ port: int = 3306
53
+
54
+ cs = ConfigStore.instance()
55
+ cs.store(name="config", node=MySQLConfig)
56
+
57
+ @hydra.main(version_base=None, config_name="config")
58
+ def my_app(cfg: MySQLConfig) -> None:
59
+ # Set experiment by Hydra job name.
60
+ hydraflow.set_experiment()
61
+
62
+ # Automatically log params using Hydra config.
63
+ with mlflow.start_run(), hydraflow.log_run(cfg) as info:
64
+ # Your app code below.
65
+
66
+ # `info.output_dir` is the Hydra output directory.
67
+ # `info.artifact_dir` is the MLflow artifact directory.
68
+
69
+ with hydraflow.watch(callback):
70
+ # Watch files in the MLflow artifact directory.
71
+ # You can update metrics or log other artifacts
72
+ # according to the watched files in your callback
73
+ # function.
74
+ pass
75
+
76
+ # Your callback function here.
77
+ def callback(file: Path) -> None:
78
+ pass
79
+
80
+ if __name__ == "__main__":
81
+ my_app()
82
+ ```
@@ -4,8 +4,8 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "hydraflow"
7
- version = "0.1.4"
8
- description = "Hydra with MLflow"
7
+ version = "0.2.0"
8
+ description = "Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments."
9
9
  readme = "README.md"
10
10
  license = "MIT"
11
11
  authors = [{ name = "daizutabi", email = "daizutabi@gmail.com" }]
@@ -3,15 +3,10 @@ from .mlflow import set_experiment
3
3
  from .runs import (
4
4
  Run,
5
5
  Runs,
6
- drop_unique_params,
7
6
  filter_runs,
8
- get_artifact_dir,
9
- get_artifact_path,
10
- get_artifact_uri,
11
7
  get_param_dict,
12
8
  get_param_names,
13
9
  get_run,
14
- get_run_id,
15
10
  load_config,
16
11
  )
17
12
 
@@ -20,15 +15,10 @@ __all__ = [
20
15
  "Run",
21
16
  "Runs",
22
17
  "chdir_artifact",
23
- "drop_unique_params",
24
18
  "filter_runs",
25
- "get_artifact_dir",
26
- "get_artifact_path",
27
- "get_artifact_uri",
28
19
  "get_param_dict",
29
20
  "get_param_names",
30
21
  "get_run",
31
- "get_run_id",
32
22
  "load_config",
33
23
  "log_run",
34
24
  "set_experiment",
@@ -0,0 +1,55 @@
1
+ """
2
+ This module provides functionality for working with configuration
3
+ objects using the OmegaConf library.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from typing import TYPE_CHECKING
9
+
10
+ from omegaconf import DictConfig, ListConfig, OmegaConf
11
+
12
+ if TYPE_CHECKING:
13
+ from collections.abc import Iterator
14
+ from typing import Any
15
+
16
+
17
+ def iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
18
+ """
19
+ Recursively iterate over the parameters in the given configuration object.
20
+
21
+ This function traverses the configuration object and yields key-value pairs
22
+ representing the parameters. The keys are prefixed with the provided prefix.
23
+
24
+ Args:
25
+ config: The configuration object to iterate over. This can be a dictionary,
26
+ list, DictConfig, or ListConfig.
27
+ prefix: The prefix to prepend to the parameter keys.
28
+ Defaults to an empty string.
29
+
30
+ Yields:
31
+ Key-value pairs representing the parameters in the configuration object.
32
+ """
33
+ if not isinstance(config, (DictConfig, ListConfig)):
34
+ config = OmegaConf.create(config) # type: ignore
35
+
36
+ if isinstance(config, DictConfig):
37
+ for key, value in config.items():
38
+ if isinstance(value, ListConfig) and not any(
39
+ isinstance(v, (DictConfig, ListConfig)) for v in value
40
+ ):
41
+ yield f"{prefix}{key}", value
42
+
43
+ elif isinstance(value, (DictConfig, ListConfig)):
44
+ yield from iter_params(value, f"{prefix}{key}.")
45
+
46
+ else:
47
+ yield f"{prefix}{key}", value
48
+
49
+ elif isinstance(config, ListConfig):
50
+ for index, value in enumerate(config):
51
+ if isinstance(value, (DictConfig, ListConfig)):
52
+ yield from iter_params(value, f"{prefix}{index}.")
53
+
54
+ else:
55
+ yield f"{prefix}{index}", value
@@ -0,0 +1,188 @@
1
+ """
2
+ This module provides context managers to log parameters and manage the MLflow
3
+ run context.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import logging
9
+ import os
10
+ import time
11
+ from contextlib import contextmanager
12
+ from dataclasses import dataclass
13
+ from pathlib import Path
14
+ from typing import TYPE_CHECKING
15
+
16
+ import mlflow
17
+ from hydra.core.hydra_config import HydraConfig
18
+ from watchdog.events import FileModifiedEvent, FileSystemEventHandler
19
+ from watchdog.observers import Observer
20
+
21
+ from hydraflow.mlflow import get_artifact_dir, log_params
22
+
23
+ if TYPE_CHECKING:
24
+ from collections.abc import Callable, Iterator
25
+
26
+ from mlflow.entities.run import Run
27
+
28
+ log = logging.getLogger(__name__)
29
+
30
+
31
+ @dataclass
32
+ class Info:
33
+ output_dir: Path
34
+ artifact_dir: Path
35
+
36
+
37
+ @contextmanager
38
+ def log_run(
39
+ config: object,
40
+ *,
41
+ synchronous: bool | None = None,
42
+ ) -> Iterator[Info]:
43
+ """
44
+ Log the parameters from the given configuration object and manage the MLflow
45
+ run context.
46
+
47
+ This context manager logs the parameters from the provided configuration object
48
+ using MLflow. It also manages the MLflow run context, ensuring that artifacts
49
+ are logged and the run is properly closed.
50
+
51
+ Args:
52
+ config: The configuration object to log the parameters from.
53
+ synchronous: Whether to log the parameters synchronously.
54
+ Defaults to None.
55
+
56
+ Yields:
57
+ Info: An `Info` object containing the output directory and artifact directory
58
+ paths.
59
+
60
+ Example:
61
+ with log_run(config) as info:
62
+ # Perform operations within the MLflow run context
63
+ pass
64
+ """
65
+ log_params(config, synchronous=synchronous)
66
+
67
+ hc = HydraConfig.get()
68
+ output_dir = Path(hc.runtime.output_dir)
69
+ info = Info(output_dir, get_artifact_dir())
70
+
71
+ # Save '.hydra' config directory first.
72
+ output_subdir = output_dir / (hc.output_subdir or "")
73
+ mlflow.log_artifacts(output_subdir.as_posix(), hc.output_subdir)
74
+
75
+ def log_artifact(path: Path) -> None:
76
+ local_path = (output_dir / path).as_posix()
77
+ mlflow.log_artifact(local_path)
78
+
79
+ try:
80
+ with watch(log_artifact, output_dir):
81
+ yield info
82
+
83
+ except Exception as e:
84
+ log.error(f"Error during log_run: {e}")
85
+ raise
86
+
87
+ finally:
88
+ # Save output_dir including '.hydra' config directory.
89
+ mlflow.log_artifacts(output_dir.as_posix())
90
+
91
+
92
+ @contextmanager
93
+ def watch(
94
+ func: Callable[[Path], None],
95
+ dir: Path | str = "",
96
+ timeout: int = 60,
97
+ ) -> Iterator[None]:
98
+ """
99
+ Watch the given directory for changes and call the provided function
100
+ when a change is detected.
101
+
102
+ This context manager sets up a file system watcher on the specified directory.
103
+ When a file modification is detected, the provided function is called with
104
+ the path of the modified file. The watcher runs for the specified timeout
105
+ period or until the context is exited.
106
+
107
+ Args:
108
+ func: The function to call when a change is
109
+ detected. It should accept a single argument of type `Path`,
110
+ which is the path of the modified file.
111
+ dir: The directory to watch. If not specified,
112
+ the current MLflow artifact URI is used. Defaults to "".
113
+ timeout: The timeout period in seconds for the watcher
114
+ to run after the context is exited. Defaults to 60.
115
+
116
+ Yields:
117
+ None
118
+
119
+ Example:
120
+ with watch(log_artifact, "/path/to/dir"):
121
+ # Perform operations while watching the directory for changes
122
+ pass
123
+ """
124
+ dir = dir or get_artifact_dir()
125
+
126
+ handler = Handler(func)
127
+ observer = Observer()
128
+ observer.schedule(handler, dir, recursive=True)
129
+ observer.start()
130
+
131
+ try:
132
+ yield
133
+
134
+ except Exception as e:
135
+ log.error(f"Error during watch: {e}")
136
+ raise
137
+
138
+ finally:
139
+ elapsed = 0
140
+ while not observer.event_queue.empty():
141
+ time.sleep(0.2)
142
+ elapsed += 0.2
143
+ if elapsed > timeout:
144
+ break
145
+
146
+ observer.stop()
147
+ observer.join()
148
+
149
+
150
+ class Handler(FileSystemEventHandler):
151
+ def __init__(self, func: Callable[[Path], None]) -> None:
152
+ self.func = func
153
+
154
+ def on_modified(self, event: FileModifiedEvent) -> None:
155
+ file = Path(event.src_path)
156
+ if file.is_file():
157
+ self.func(file)
158
+
159
+
160
+ @contextmanager
161
+ def chdir_artifact(
162
+ run: Run,
163
+ artifact_path: str | None = None,
164
+ ) -> Iterator[Path]:
165
+ """
166
+ Change the current working directory to the artifact directory of the
167
+ given run.
168
+
169
+ This context manager changes the current working directory to the artifact
170
+ directory of the given run. It ensures that the directory is changed back
171
+ to the original directory after the context is exited.
172
+
173
+ Args:
174
+ run: The run to get the artifact directory from.
175
+ artifact_path: The artifact path.
176
+ """
177
+ curdir = Path.cwd()
178
+ path = mlflow.artifacts.download_artifacts(
179
+ run_id=run.info.run_id,
180
+ artifact_path=artifact_path,
181
+ )
182
+
183
+ os.chdir(path)
184
+ try:
185
+ yield Path(path)
186
+
187
+ finally:
188
+ os.chdir(curdir)
@@ -0,0 +1,72 @@
1
+ """
2
+ This module provides functionality to log parameters from Hydra
3
+ configuration objects and set up experiments using MLflow.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from pathlib import Path
9
+
10
+ import mlflow
11
+ from hydra.core.hydra_config import HydraConfig
12
+
13
+ from hydraflow.config import iter_params
14
+
15
+
16
+ def set_experiment(prefix: str = "", suffix: str = "", uri: str | None = None) -> None:
17
+ """
18
+ Set the experiment name and tracking URI optionally.
19
+
20
+ This function sets the experiment name by combining the given prefix,
21
+ the job name from HydraConfig, and the given suffix. Optionally, it can
22
+ also set the tracking URI.
23
+
24
+ Args:
25
+ prefix: The prefix to prepend to the experiment name.
26
+ suffix: The suffix to append to the experiment name.
27
+ uri: The tracking URI to use.
28
+ """
29
+ if uri:
30
+ mlflow.set_tracking_uri(uri)
31
+
32
+ hc = HydraConfig.get()
33
+ name = f"{prefix}{hc.job.name}{suffix}"
34
+ mlflow.set_experiment(name)
35
+
36
+
37
+ def log_params(config: object, *, synchronous: bool | None = None) -> None:
38
+ """
39
+ Log the parameters from the given configuration object.
40
+
41
+ This method logs the parameters from the provided configuration object
42
+ using MLflow. It iterates over the parameters and logs them using the
43
+ `mlflow.log_param` method.
44
+
45
+ Args:
46
+ config: The configuration object to log the parameters from.
47
+ synchronous: Whether to log the parameters synchronously.
48
+ Defaults to None.
49
+ """
50
+ for key, value in iter_params(config):
51
+ mlflow.log_param(key, value, synchronous=synchronous)
52
+
53
+
54
+ def get_artifact_dir(artifact_path: str | None = None) -> Path:
55
+ """
56
+ Get the artifact directory for the given artifact path.
57
+
58
+ This function retrieves the artifact URI for the specified artifact path
59
+ using MLflow, downloads the artifacts to a local directory, and returns
60
+ the path to that directory.
61
+
62
+ Args:
63
+ artifact_path: The artifact path for which to get the directory.
64
+ Defaults to None.
65
+
66
+ Returns:
67
+ The local path to the directory where the artifacts are downloaded.
68
+ """
69
+ uri = mlflow.get_artifact_uri(artifact_path)
70
+ dir = mlflow.artifacts.download_artifacts(artifact_uri=uri)
71
+
72
+ return Path(dir)