hydraflow 0.1.2__tar.gz → 0.1.5__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (29) hide show
  1. hydraflow-0.1.5/PKG-INFO +111 -0
  2. hydraflow-0.1.5/README.md +82 -0
  3. {hydraflow-0.1.2 → hydraflow-0.1.5}/pyproject.toml +2 -2
  4. {hydraflow-0.1.2 → hydraflow-0.1.5}/src/hydraflow/__init__.py +9 -1
  5. hydraflow-0.1.5/src/hydraflow/config.py +54 -0
  6. {hydraflow-0.1.2 → hydraflow-0.1.5}/src/hydraflow/context.py +65 -0
  7. hydraflow-0.1.5/src/hydraflow/mlflow.py +49 -0
  8. hydraflow-0.1.5/src/hydraflow/runs.py +512 -0
  9. hydraflow-0.1.5/src/hydraflow/util.py +24 -0
  10. {hydraflow-0.1.2 → hydraflow-0.1.5}/tests/test_config.py +1 -2
  11. hydraflow-0.1.2/PKG-INFO +0 -45
  12. hydraflow-0.1.2/README.md +0 -16
  13. hydraflow-0.1.2/src/hydraflow/config.py +0 -30
  14. hydraflow-0.1.2/src/hydraflow/mlflow.py +0 -20
  15. hydraflow-0.1.2/src/hydraflow/runs.py +0 -217
  16. hydraflow-0.1.2/src/hydraflow/util.py +0 -11
  17. {hydraflow-0.1.2 → hydraflow-0.1.5}/.devcontainer/devcontainer.json +0 -0
  18. {hydraflow-0.1.2 → hydraflow-0.1.5}/.devcontainer/postCreate.sh +0 -0
  19. {hydraflow-0.1.2 → hydraflow-0.1.5}/.devcontainer/starship.toml +0 -0
  20. {hydraflow-0.1.2 → hydraflow-0.1.5}/.gitattributes +0 -0
  21. {hydraflow-0.1.2 → hydraflow-0.1.5}/.gitignore +0 -0
  22. {hydraflow-0.1.2 → hydraflow-0.1.5}/LICENSE +0 -0
  23. {hydraflow-0.1.2 → hydraflow-0.1.5}/tests/scripts/__init__.py +0 -0
  24. {hydraflow-0.1.2 → hydraflow-0.1.5}/tests/scripts/log_run.py +0 -0
  25. {hydraflow-0.1.2 → hydraflow-0.1.5}/tests/scripts/watch.py +0 -0
  26. {hydraflow-0.1.2 → hydraflow-0.1.5}/tests/test_log_run.py +0 -0
  27. {hydraflow-0.1.2 → hydraflow-0.1.5}/tests/test_runs.py +0 -0
  28. {hydraflow-0.1.2 → hydraflow-0.1.5}/tests/test_version.py +0 -0
  29. {hydraflow-0.1.2 → hydraflow-0.1.5}/tests/test_watch.py +0 -0
@@ -0,0 +1,111 @@
1
+ Metadata-Version: 2.3
2
+ Name: hydraflow
3
+ Version: 0.1.5
4
+ Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
5
+ Project-URL: Documentation, https://github.com/daizutabi/hydraflow
6
+ Project-URL: Source, https://github.com/daizutabi/hydraflow
7
+ Project-URL: Issues, https://github.com/daizutabi/hydraflow/issues
8
+ Author-email: daizutabi <daizutabi@gmail.com>
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Programming Language :: Python
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Topic :: Documentation
17
+ Classifier: Topic :: Software Development :: Documentation
18
+ Requires-Python: >=3.10
19
+ Requires-Dist: hydra-core>1.3
20
+ Requires-Dist: mlflow>2.15
21
+ Requires-Dist: setuptools
22
+ Requires-Dist: watchdog
23
+ Provides-Extra: dev
24
+ Requires-Dist: pytest-clarity; extra == 'dev'
25
+ Requires-Dist: pytest-cov; extra == 'dev'
26
+ Requires-Dist: pytest-randomly; extra == 'dev'
27
+ Requires-Dist: pytest-xdist; extra == 'dev'
28
+ Description-Content-Type: text/markdown
29
+
30
+ # Hydraflow
31
+
32
+ [![PyPI Version][pypi-v-image]][pypi-v-link]
33
+ [![Python Version][python-v-image]][python-v-link]
34
+ [![Build Status][GHAction-image]][GHAction-link]
35
+ [![Coverage Status][codecov-image]][codecov-link]
36
+
37
+ <!-- Badges -->
38
+ [pypi-v-image]: https://img.shields.io/pypi/v/hydraflow.svg
39
+ [pypi-v-link]: https://pypi.org/project/hydraflow/
40
+ [python-v-image]: https://img.shields.io/pypi/pyversions/hydraflow.svg
41
+ [python-v-link]: https://pypi.org/project/hydraflow
42
+ [GHAction-image]: https://github.com/daizutabi/hydraflow/actions/workflows/ci.yml/badge.svg?branch=main&event=push
43
+ [GHAction-link]: https://github.com/daizutabi/hydraflow/actions?query=event%3Apush+branch%3Amain
44
+ [codecov-image]: https://codecov.io/github/daizutabi/hydraflow/coverage.svg?branch=main
45
+ [codecov-link]: https://codecov.io/github/daizutabi/hydraflow?branch=main
46
+
47
+ ## Overview
48
+
49
+ Hydraflow is a powerful library designed to seamlessly integrate [Hydra](https://hydra.cc/) and [MLflow](https://mlflow.org/), making it easier to manage and track machine learning experiments. By combining the flexibility of Hydra's configuration management with the robust experiment tracking capabilities of MLflow, Hydraflow provides a comprehensive solution for managing complex machine learning workflows.
50
+
51
+ ## Key Features
52
+
53
+ - **Configuration Management**: Utilize Hydra's advanced configuration management to handle complex parameter sweeps and experiment setups.
54
+ - **Experiment Tracking**: Leverage MLflow's tracking capabilities to log parameters, metrics, and artifacts for each run.
55
+ - **Artifact Management**: Automatically log and manage artifacts, such as model checkpoints and configuration files, with MLflow.
56
+ - **Seamless Integration**: Easily integrate Hydra and MLflow in your machine learning projects with minimal setup.
57
+
58
+ ## Installation
59
+
60
+ You can install Hydraflow via pip:
61
+
62
+ ```bash
63
+ pip install hydraflow
64
+ ```
65
+
66
+ ## Getting Started
67
+
68
+ Here is a simple example to get you started with Hydraflow:
69
+
70
+ ```python
71
+ import hydra
72
+ import hydraflow
73
+ import mlflow
74
+ from dataclasses import dataclass
75
+ from hydra.core.config_store import ConfigStore
76
+ from pathlib import Path
77
+
78
+ @dataclass
79
+ class MySQLConfig:
80
+ host: str = "localhost"
81
+ port: int = 3306
82
+
83
+ cs = ConfigStore.instance()
84
+ cs.store(name="config", node=MySQLConfig)
85
+
86
+ @hydra.main(version_base=None, config_name="config")
87
+ def my_app(cfg: MySQLConfig) -> None:
88
+ # Set experiment by Hydra job name.
89
+ hydraflow.set_experiment()
90
+
91
+ # Automatically log params using Hydra config.
92
+ with mlflow.start_run(), hydraflow.log_run(cfg) as info:
93
+ # Your app code below.
94
+
95
+ # `info.output_dir` is the Hydra output directory.
96
+ # `info.artifact_dir` is the MLflow artifact directory.
97
+
98
+ with hydraflow.watch(callback):
99
+ # Watch files in the MLflow artifact directory.
100
+ # You can update metrics or log other artifacts
101
+ # according to the watched files in your callback
102
+ # function.
103
+ pass
104
+
105
+ # Your callback function here.
106
+ def callback(file: Path) -> None:
107
+ pass
108
+
109
+ if __name__ == "__main__":
110
+ my_app()
111
+ ```
@@ -0,0 +1,82 @@
1
+ # Hydraflow
2
+
3
+ [![PyPI Version][pypi-v-image]][pypi-v-link]
4
+ [![Python Version][python-v-image]][python-v-link]
5
+ [![Build Status][GHAction-image]][GHAction-link]
6
+ [![Coverage Status][codecov-image]][codecov-link]
7
+
8
+ <!-- Badges -->
9
+ [pypi-v-image]: https://img.shields.io/pypi/v/hydraflow.svg
10
+ [pypi-v-link]: https://pypi.org/project/hydraflow/
11
+ [python-v-image]: https://img.shields.io/pypi/pyversions/hydraflow.svg
12
+ [python-v-link]: https://pypi.org/project/hydraflow
13
+ [GHAction-image]: https://github.com/daizutabi/hydraflow/actions/workflows/ci.yml/badge.svg?branch=main&event=push
14
+ [GHAction-link]: https://github.com/daizutabi/hydraflow/actions?query=event%3Apush+branch%3Amain
15
+ [codecov-image]: https://codecov.io/github/daizutabi/hydraflow/coverage.svg?branch=main
16
+ [codecov-link]: https://codecov.io/github/daizutabi/hydraflow?branch=main
17
+
18
+ ## Overview
19
+
20
+ Hydraflow is a powerful library designed to seamlessly integrate [Hydra](https://hydra.cc/) and [MLflow](https://mlflow.org/), making it easier to manage and track machine learning experiments. By combining the flexibility of Hydra's configuration management with the robust experiment tracking capabilities of MLflow, Hydraflow provides a comprehensive solution for managing complex machine learning workflows.
21
+
22
+ ## Key Features
23
+
24
+ - **Configuration Management**: Utilize Hydra's advanced configuration management to handle complex parameter sweeps and experiment setups.
25
+ - **Experiment Tracking**: Leverage MLflow's tracking capabilities to log parameters, metrics, and artifacts for each run.
26
+ - **Artifact Management**: Automatically log and manage artifacts, such as model checkpoints and configuration files, with MLflow.
27
+ - **Seamless Integration**: Easily integrate Hydra and MLflow in your machine learning projects with minimal setup.
28
+
29
+ ## Installation
30
+
31
+ You can install Hydraflow via pip:
32
+
33
+ ```bash
34
+ pip install hydraflow
35
+ ```
36
+
37
+ ## Getting Started
38
+
39
+ Here is a simple example to get you started with Hydraflow:
40
+
41
+ ```python
42
+ import hydra
43
+ import hydraflow
44
+ import mlflow
45
+ from dataclasses import dataclass
46
+ from hydra.core.config_store import ConfigStore
47
+ from pathlib import Path
48
+
49
+ @dataclass
50
+ class MySQLConfig:
51
+ host: str = "localhost"
52
+ port: int = 3306
53
+
54
+ cs = ConfigStore.instance()
55
+ cs.store(name="config", node=MySQLConfig)
56
+
57
+ @hydra.main(version_base=None, config_name="config")
58
+ def my_app(cfg: MySQLConfig) -> None:
59
+ # Set experiment by Hydra job name.
60
+ hydraflow.set_experiment()
61
+
62
+ # Automatically log params using Hydra config.
63
+ with mlflow.start_run(), hydraflow.log_run(cfg) as info:
64
+ # Your app code below.
65
+
66
+ # `info.output_dir` is the Hydra output directory.
67
+ # `info.artifact_dir` is the MLflow artifact directory.
68
+
69
+ with hydraflow.watch(callback):
70
+ # Watch files in the MLflow artifact directory.
71
+ # You can update metrics or log other artifacts
72
+ # according to the watched files in your callback
73
+ # function.
74
+ pass
75
+
76
+ # Your callback function here.
77
+ def callback(file: Path) -> None:
78
+ pass
79
+
80
+ if __name__ == "__main__":
81
+ my_app()
82
+ ```
@@ -4,8 +4,8 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "hydraflow"
7
- version = "0.1.2"
8
- description = "Hydra with MLflow"
7
+ version = "0.1.5"
8
+ description = "Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments."
9
9
  readme = "README.md"
10
10
  license = "MIT"
11
11
  authors = [{ name = "daizutabi", email = "daizutabi@gmail.com" }]
@@ -1,6 +1,9 @@
1
1
  from .context import Info, chdir_artifact, log_run, watch
2
2
  from .mlflow import set_experiment
3
3
  from .runs import (
4
+ Run,
5
+ Runs,
6
+ drop_unique_params,
4
7
  filter_runs,
5
8
  get_artifact_dir,
6
9
  get_artifact_path,
@@ -9,19 +12,24 @@ from .runs import (
9
12
  get_param_names,
10
13
  get_run,
11
14
  get_run_id,
15
+ load_config,
12
16
  )
13
17
 
14
18
  __all__ = [
15
19
  "Info",
20
+ "Run",
21
+ "Runs",
16
22
  "chdir_artifact",
23
+ "drop_unique_params",
17
24
  "filter_runs",
18
25
  "get_artifact_dir",
19
26
  "get_artifact_path",
20
27
  "get_artifact_uri",
21
- "get_run",
22
28
  "get_param_dict",
23
29
  "get_param_names",
30
+ "get_run",
24
31
  "get_run_id",
32
+ "load_config",
25
33
  "log_run",
26
34
  "set_experiment",
27
35
  "watch",
@@ -0,0 +1,54 @@
1
+ """
2
+ This module provides functionality for working with configuration
3
+ objects using the OmegaConf library.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from typing import TYPE_CHECKING
9
+
10
+ from omegaconf import DictConfig, ListConfig, OmegaConf
11
+
12
+ if TYPE_CHECKING:
13
+ from collections.abc import Iterator
14
+ from typing import Any
15
+
16
+
17
+ def iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
18
+ """
19
+ Iterate over the parameters in the given configuration object.
20
+
21
+ This function recursively traverses the configuration object and yields
22
+ key-value pairs representing the parameters.
23
+
24
+ Args:
25
+ config (object): The configuration object to iterate over.
26
+ prefix (str, optional): The prefix to prepend to the parameter keys.
27
+ Defaults to "".
28
+
29
+ Yields:
30
+ Key-value pairs representing the parameters.
31
+ """
32
+ if not isinstance(config, (DictConfig, ListConfig)):
33
+ config = OmegaConf.create(config) # type: ignore
34
+
35
+ if isinstance(config, DictConfig):
36
+ for key, value in config.items():
37
+ if isinstance(value, ListConfig) and not any(
38
+ isinstance(v, (DictConfig, ListConfig)) for v in value
39
+ ):
40
+ yield f"{prefix}{key}", value
41
+
42
+ elif isinstance(value, (DictConfig, ListConfig)):
43
+ yield from iter_params(value, f"{prefix}{key}.")
44
+
45
+ else:
46
+ yield f"{prefix}{key}", value
47
+
48
+ elif isinstance(config, ListConfig):
49
+ for index, value in enumerate(config):
50
+ if isinstance(value, (DictConfig, ListConfig)):
51
+ yield from iter_params(value, f"{prefix}{index}.")
52
+
53
+ else:
54
+ yield f"{prefix}{index}", value
@@ -1,3 +1,8 @@
1
+ """
2
+ This module provides context managers to log parameters and manage the MLflow
3
+ run context.
4
+ """
5
+
1
6
  from __future__ import annotations
2
7
 
3
8
  import os
@@ -35,6 +40,28 @@ def log_run(
35
40
  *,
36
41
  synchronous: bool | None = None,
37
42
  ) -> Iterator[Info]:
43
+ """
44
+ Log the parameters from the given configuration object and manage the MLflow
45
+ run context.
46
+
47
+ This context manager logs the parameters from the provided configuration object
48
+ using MLflow. It also manages the MLflow run context, ensuring that artifacts
49
+ are logged and the run is properly closed.
50
+
51
+ Args:
52
+ config: The configuration object to log the parameters from.
53
+ synchronous: Whether to log the parameters synchronously.
54
+ Defaults to None.
55
+
56
+ Yields:
57
+ Info: An `Info` object containing the output directory and artifact directory
58
+ paths.
59
+
60
+ Example:
61
+ with log_run(config) as info:
62
+ # Perform operations within the MLflow run context
63
+ pass
64
+ """
38
65
  log_params(config, synchronous=synchronous)
39
66
 
40
67
  hc = HydraConfig.get()
@@ -61,6 +88,32 @@ def log_run(
61
88
 
62
89
  @contextmanager
63
90
  def watch(func: Callable[[Path], None], dir: Path | str = "", timeout: int = 60) -> Iterator[None]:
91
+ """
92
+ Watch the given directory for changes and call the provided function
93
+ when a change is detected.
94
+
95
+ This context manager sets up a file system watcher on the specified directory.
96
+ When a file modification is detected, the provided function is called with
97
+ the path of the modified file. The watcher runs for the specified timeout
98
+ period or until the context is exited.
99
+
100
+ Args:
101
+ func (Callable[[Path], None]): The function to call when a change is
102
+ detected. It should accept a single argument of type `Path`,
103
+ which is the path of the modified file.
104
+ dir (Path | str, optional): The directory to watch. If not specified,
105
+ the current MLflow artifact URI is used. Defaults to "".
106
+ timeout (int, optional): The timeout period in seconds for the watcher
107
+ to run after the context is exited. Defaults to 60.
108
+
109
+ Yields:
110
+ None: This context manager does not return any value.
111
+
112
+ Example:
113
+ with watch(log_artifact, "/path/to/dir"):
114
+ # Perform operations while watching the directory for changes
115
+ pass
116
+ """
64
117
  if not dir:
65
118
  uri = mlflow.get_artifact_uri()
66
119
  dir = uri_to_path(uri)
@@ -100,6 +153,18 @@ def chdir_artifact(
100
153
  run: Run | Series | str,
101
154
  artifact_path: str | None = None,
102
155
  ) -> Iterator[Path]:
156
+ """
157
+ Change the current working directory to the artifact directory of the
158
+ given run.
159
+
160
+ This context manager changes the current working directory to the artifact
161
+ directory of the given run. It ensures that the directory is changed back
162
+ to the original directory after the context is exited.
163
+
164
+ Args:
165
+ run: The run to get the artifact directory from.
166
+ artifact_path: The artifact path.
167
+ """
103
168
  curdir = Path.cwd()
104
169
 
105
170
  artifact_dir = get_artifact_path(run, artifact_path)
@@ -0,0 +1,49 @@
1
+ """
2
+ This module provides functionality to log parameters from Hydra
3
+ configuration objects and set up experiments using MLflow.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import mlflow
9
+ from hydra.core.hydra_config import HydraConfig
10
+
11
+ from hydraflow.config import iter_params
12
+
13
+
14
+ def set_experiment(prefix: str = "", suffix: str = "", uri: str | None = None) -> None:
15
+ """
16
+ Set the experiment name and tracking URI optionally.
17
+
18
+ This function sets the experiment name by combining the given prefix,
19
+ the job name from HydraConfig, and the given suffix. Optionally, it can
20
+ also set the tracking URI.
21
+
22
+ Args:
23
+ prefix: The prefix to prepend to the experiment name.
24
+ suffix: The suffix to append to the experiment name.
25
+ uri: The tracking URI to use.
26
+ """
27
+ if uri:
28
+ mlflow.set_tracking_uri(uri)
29
+
30
+ hc = HydraConfig.get()
31
+ name = f"{prefix}{hc.job.name}{suffix}"
32
+ mlflow.set_experiment(name)
33
+
34
+
35
+ def log_params(config: object, *, synchronous: bool | None = None) -> None:
36
+ """
37
+ Log the parameters from the given configuration object.
38
+
39
+ This method logs the parameters from the provided configuration object
40
+ using MLflow. It iterates over the parameters and logs them using the
41
+ `mlflow.log_param` method.
42
+
43
+ Args:
44
+ config: The configuration object to log the parameters from.
45
+ synchronous: Whether to log the parameters synchronously.
46
+ Defaults to None.
47
+ """
48
+ for key, value in iter_params(config):
49
+ mlflow.log_param(key, value, synchronous=synchronous)