hydraflow 0.2.7__tar.gz → 0.2.8__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (33) hide show
  1. {hydraflow-0.2.7 → hydraflow-0.2.8}/PKG-INFO +1 -1
  2. {hydraflow-0.2.7 → hydraflow-0.2.8}/pyproject.toml +1 -1
  3. {hydraflow-0.2.7 → hydraflow-0.2.8}/src/hydraflow/__init__.py +4 -4
  4. {hydraflow-0.2.7 → hydraflow-0.2.8}/src/hydraflow/context.py +24 -8
  5. {hydraflow-0.2.7 → hydraflow-0.2.8}/src/hydraflow/info.py +57 -4
  6. hydraflow-0.2.8/src/hydraflow/mlflow.py +175 -0
  7. {hydraflow-0.2.7 → hydraflow-0.2.8}/src/hydraflow/run_collection.py +25 -101
  8. hydraflow-0.2.7/tests/scripts/log_run.py → hydraflow-0.2.8/tests/scripts/app.py +21 -3
  9. hydraflow-0.2.8/tests/test_app.py +109 -0
  10. {hydraflow-0.2.7 → hydraflow-0.2.8}/tests/test_context.py +1 -1
  11. {hydraflow-0.2.7 → hydraflow-0.2.8}/tests/test_info.py +14 -1
  12. {hydraflow-0.2.7 → hydraflow-0.2.8}/tests/test_log_run.py +5 -3
  13. {hydraflow-0.2.7 → hydraflow-0.2.8}/tests/test_run_collection.py +5 -5
  14. hydraflow-0.2.7/mlruns/0/meta.yaml +0 -6
  15. hydraflow-0.2.7/src/hydraflow/mlflow.py +0 -119
  16. {hydraflow-0.2.7 → hydraflow-0.2.8}/.devcontainer/devcontainer.json +0 -0
  17. {hydraflow-0.2.7 → hydraflow-0.2.8}/.devcontainer/postCreate.sh +0 -0
  18. {hydraflow-0.2.7 → hydraflow-0.2.8}/.devcontainer/starship.toml +0 -0
  19. {hydraflow-0.2.7 → hydraflow-0.2.8}/.gitattributes +0 -0
  20. {hydraflow-0.2.7 → hydraflow-0.2.8}/.gitignore +0 -0
  21. {hydraflow-0.2.7 → hydraflow-0.2.8}/LICENSE +0 -0
  22. {hydraflow-0.2.7 → hydraflow-0.2.8}/README.md +0 -0
  23. {hydraflow-0.2.7 → hydraflow-0.2.8}/src/hydraflow/asyncio.py +0 -0
  24. {hydraflow-0.2.7 → hydraflow-0.2.8}/src/hydraflow/config.py +0 -0
  25. {hydraflow-0.2.7 → hydraflow-0.2.8}/src/hydraflow/progress.py +0 -0
  26. {hydraflow-0.2.7 → hydraflow-0.2.8}/tests/scripts/__init__.py +0 -0
  27. {hydraflow-0.2.7 → hydraflow-0.2.8}/tests/scripts/watch.py +0 -0
  28. {hydraflow-0.2.7 → hydraflow-0.2.8}/tests/test_asyncio.py +0 -0
  29. {hydraflow-0.2.7 → hydraflow-0.2.8}/tests/test_config.py +0 -0
  30. {hydraflow-0.2.7 → hydraflow-0.2.8}/tests/test_mlflow.py +0 -0
  31. {hydraflow-0.2.7 → hydraflow-0.2.8}/tests/test_progress.py +0 -0
  32. {hydraflow-0.2.7 → hydraflow-0.2.8}/tests/test_version.py +0 -0
  33. {hydraflow-0.2.7 → hydraflow-0.2.8}/tests/test_watch.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: hydraflow
3
- Version: 0.2.7
3
+ Version: 0.2.8
4
4
  Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
5
5
  Project-URL: Documentation, https://github.com/daizutabi/hydraflow
6
6
  Project-URL: Source, https://github.com/daizutabi/hydraflow
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "hydraflow"
7
- version = "0.2.7"
7
+ version = "0.2.8"
8
8
  description = "Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments."
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -1,11 +1,11 @@
1
1
  from .context import chdir_artifact, log_run, start_run, watch
2
- from .info import load_config
3
- from .mlflow import get_artifact_dir, get_hydra_output_dir, set_experiment
4
- from .run_collection import (
5
- RunCollection,
2
+ from .info import get_artifact_dir, get_hydra_output_dir, load_config
3
+ from .mlflow import (
6
4
  list_runs,
7
5
  search_runs,
6
+ set_experiment,
8
7
  )
8
+ from .run_collection import RunCollection
9
9
 
10
10
  __all__ = [
11
11
  "RunCollection",
@@ -14,10 +14,11 @@ from typing import TYPE_CHECKING
14
14
 
15
15
  import mlflow
16
16
  from hydra.core.hydra_config import HydraConfig
17
- from watchdog.events import FileModifiedEvent, FileSystemEventHandler
17
+ from watchdog.events import FileModifiedEvent, PatternMatchingEventHandler
18
18
  from watchdog.observers import Observer
19
19
 
20
- from hydraflow.mlflow import get_artifact_dir, log_params
20
+ from hydraflow.info import get_artifact_dir
21
+ from hydraflow.mlflow import log_params
21
22
 
22
23
  if TYPE_CHECKING:
23
24
  from collections.abc import Callable, Iterator
@@ -68,7 +69,7 @@ def log_run(
68
69
  mlflow.log_artifact(local_path)
69
70
 
70
71
  try:
71
- with watch(log_artifact, output_dir):
72
+ with watch(log_artifact, output_dir, ignore_log=False):
72
73
  yield
73
74
 
74
75
  except Exception as e:
@@ -140,9 +141,11 @@ def start_run(
140
141
 
141
142
  @contextmanager
142
143
  def watch(
143
- func: Callable[[Path], None],
144
+ callback: Callable[[Path], None],
144
145
  dir: Path | str = "",
145
146
  timeout: int = 60,
147
+ ignore_patterns: list[str] | None = None,
148
+ ignore_log: bool = True,
146
149
  ) -> Iterator[None]:
147
150
  """
148
151
  Watch the given directory for changes and call the provided function
@@ -154,7 +157,7 @@ def watch(
154
157
  period or until the context is exited.
155
158
 
156
159
  Args:
157
- func (Callable[[Path], None]): The function to call when a change is
160
+ callback (Callable[[Path], None]): The function to call when a change is
158
161
  detected. It should accept a single argument of type `Path`,
159
162
  which is the path of the modified file.
160
163
  dir (Path | str): The directory to watch. If not specified,
@@ -174,7 +177,7 @@ def watch(
174
177
  if isinstance(dir, Path):
175
178
  dir = dir.as_posix()
176
179
 
177
- handler = Handler(func)
180
+ handler = Handler(callback, ignore_patterns=ignore_patterns, ignore_log=ignore_log)
178
181
  observer = Observer()
179
182
  observer.schedule(handler, dir, recursive=True)
180
183
  observer.start()
@@ -198,10 +201,23 @@ def watch(
198
201
  observer.join()
199
202
 
200
203
 
201
- class Handler(FileSystemEventHandler):
202
- def __init__(self, func: Callable[[Path], None]) -> None:
204
+ class Handler(PatternMatchingEventHandler):
205
+ def __init__(
206
+ self,
207
+ func: Callable[[Path], None],
208
+ ignore_patterns: list[str] | None = None,
209
+ ignore_log: bool = True,
210
+ ) -> None:
203
211
  self.func = func
204
212
 
213
+ if ignore_log:
214
+ if ignore_patterns:
215
+ ignore_patterns.append("*.log")
216
+ else:
217
+ ignore_patterns = ["*.log"]
218
+
219
+ super().__init__(ignore_patterns=ignore_patterns)
220
+
205
221
  def on_modified(self, event: FileModifiedEvent) -> None:
206
222
  file = Path(str(event.src_path))
207
223
  if file.is_file():
@@ -1,14 +1,14 @@
1
1
  from __future__ import annotations
2
2
 
3
+ from pathlib import Path
3
4
  from typing import TYPE_CHECKING
4
5
 
6
+ import mlflow
7
+ from hydra.core.hydra_config import HydraConfig
8
+ from mlflow.tracking import artifact_utils
5
9
  from omegaconf import DictConfig, OmegaConf
6
10
 
7
- from hydraflow.mlflow import get_artifact_dir
8
-
9
11
  if TYPE_CHECKING:
10
- from pathlib import Path
11
-
12
12
  from mlflow.entities import Run
13
13
 
14
14
  from hydraflow.run_collection import RunCollection
@@ -43,6 +43,59 @@ class RunCollectionInfo:
43
43
  return [load_config(run) for run in self._runs]
44
44
 
45
45
 
46
+ def get_artifact_dir(run: Run | None = None) -> Path:
47
+ """
48
+ Retrieve the artifact directory for the given run.
49
+
50
+ This function uses MLflow to get the artifact directory for the given run.
51
+
52
+ Args:
53
+ run (Run | None): The run object. Defaults to None.
54
+
55
+ Returns:
56
+ The local path to the directory where the artifacts are downloaded.
57
+ """
58
+ if run is None:
59
+ uri = mlflow.get_artifact_uri()
60
+ else:
61
+ uri = artifact_utils.get_artifact_uri(run.info.run_id)
62
+
63
+ return Path(mlflow.artifacts.download_artifacts(uri))
64
+
65
+
66
+ def get_hydra_output_dir(run: Run | None = None) -> Path:
67
+ """
68
+ Retrieve the Hydra output directory for the given run.
69
+
70
+ This function returns the Hydra output directory. If no run is provided,
71
+ it retrieves the output directory from the current Hydra configuration.
72
+ If a run is provided, it retrieves the artifact path for the run, loads
73
+ the Hydra configuration from the downloaded artifacts, and returns the
74
+ output directory specified in that configuration.
75
+
76
+ Args:
77
+ run (Run | None): The run object. Defaults to None.
78
+
79
+ Returns:
80
+ Path: The path to the Hydra output directory.
81
+
82
+ Raises:
83
+ FileNotFoundError: If the Hydra configuration file is not found
84
+ in the artifacts.
85
+ """
86
+ if run is None:
87
+ hc = HydraConfig.get()
88
+ return Path(hc.runtime.output_dir)
89
+
90
+ path = get_artifact_dir(run) / ".hydra/hydra.yaml"
91
+
92
+ if path.exists():
93
+ hc = OmegaConf.load(path)
94
+ return Path(hc.hydra.runtime.output_dir)
95
+
96
+ raise FileNotFoundError
97
+
98
+
46
99
  def load_config(run: Run) -> DictConfig:
47
100
  """
48
101
  Load the configuration for a given run.
@@ -0,0 +1,175 @@
1
+ """
2
+ This module provides functionality to log parameters from Hydra configuration objects
3
+ and set up experiments using MLflow. It includes methods for managing experiments,
4
+ searching for runs, and logging parameters and artifacts.
5
+
6
+ Key Features:
7
+ - **Experiment Management**: Set and manage MLflow experiments with customizable names
8
+ based on Hydra configuration.
9
+ - **Run Logging**: Log parameters and metrics from Hydra configuration objects to
10
+ MLflow, ensuring that all relevant information is captured during experiments.
11
+ - **Run Search**: Search for runs based on various criteria, allowing for flexible
12
+ retrieval of experiment results.
13
+ - **Artifact Management**: Retrieve and log artifacts associated with runs, facilitating
14
+ easy access to outputs generated during experiments.
15
+
16
+ This module is designed to integrate seamlessly with Hydra, providing a robust
17
+ solution for tracking machine learning experiments and their associated metadata.
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ from pathlib import Path
23
+ from typing import TYPE_CHECKING
24
+
25
+ import mlflow
26
+ from hydra.core.hydra_config import HydraConfig
27
+ from mlflow.entities import ViewType
28
+ from mlflow.tracking.fluent import SEARCH_MAX_RESULTS_PANDAS
29
+
30
+ from hydraflow.config import iter_params
31
+ from hydraflow.run_collection import RunCollection
32
+
33
+ if TYPE_CHECKING:
34
+ from mlflow.entities.experiment import Experiment
35
+
36
+
37
+ def set_experiment(
38
+ prefix: str = "",
39
+ suffix: str = "",
40
+ uri: str | Path | None = None,
41
+ ) -> Experiment:
42
+ """
43
+ Sets the experiment name and tracking URI optionally.
44
+
45
+ This function sets the experiment name by combining the given prefix,
46
+ the job name from HydraConfig, and the given suffix. Optionally, it can
47
+ also set the tracking URI.
48
+
49
+ Args:
50
+ prefix (str): The prefix to prepend to the experiment name.
51
+ suffix (str): The suffix to append to the experiment name.
52
+ uri (str | Path | None): The tracking URI to use. Defaults to None.
53
+
54
+ Returns:
55
+ Experiment: An instance of `mlflow.entities.Experiment` representing
56
+ the new active experiment.
57
+ """
58
+ if uri is not None:
59
+ mlflow.set_tracking_uri(uri)
60
+
61
+ hc = HydraConfig.get()
62
+ name = f"{prefix}{hc.job.name}{suffix}"
63
+ return mlflow.set_experiment(name)
64
+
65
+
66
+ def log_params(config: object, *, synchronous: bool | None = None) -> None:
67
+ """
68
+ Log the parameters from the given configuration object.
69
+
70
+ This method logs the parameters from the provided configuration object
71
+ using MLflow. It iterates over the parameters and logs them using the
72
+ `mlflow.log_param` method.
73
+
74
+ Args:
75
+ config (object): The configuration object to log the parameters from.
76
+ synchronous (bool | None): Whether to log the parameters synchronously.
77
+ Defaults to None.
78
+ """
79
+ for key, value in iter_params(config):
80
+ mlflow.log_param(key, value, synchronous=synchronous)
81
+
82
+
83
+ def search_runs(
84
+ experiment_ids: list[str] | None = None,
85
+ filter_string: str = "",
86
+ run_view_type: int = ViewType.ACTIVE_ONLY,
87
+ max_results: int = SEARCH_MAX_RESULTS_PANDAS,
88
+ order_by: list[str] | None = None,
89
+ search_all_experiments: bool = False,
90
+ experiment_names: list[str] | None = None,
91
+ ) -> RunCollection:
92
+ """
93
+ Search for Runs that fit the specified criteria.
94
+
95
+ This function wraps the `mlflow.search_runs` function and returns the
96
+ results as a `RunCollection` object. It allows for flexible searching of
97
+ MLflow runs based on various criteria.
98
+
99
+ Note:
100
+ The returned runs are sorted by their start time in ascending order.
101
+
102
+ Args:
103
+ experiment_ids (list[str] | None): List of experiment IDs. Search can
104
+ work with experiment IDs or experiment names, but not both in the
105
+ same call. Values other than ``None`` or ``[]`` will result in
106
+ error if ``experiment_names`` is also not ``None`` or ``[]``.
107
+ ``None`` will default to the active experiment if ``experiment_names``
108
+ is ``None`` or ``[]``.
109
+ filter_string (str): Filter query string, defaults to searching all
110
+ runs.
111
+ run_view_type (int): one of enum values ``ACTIVE_ONLY``, ``DELETED_ONLY``,
112
+ or ``ALL`` runs defined in :py:class:`mlflow.entities.ViewType`.
113
+ max_results (int): The maximum number of runs to put in the dataframe.
114
+ Default is 100,000 to avoid causing out-of-memory issues on the user's
115
+ machine.
116
+ order_by (list[str] | None): List of columns to order by (e.g.,
117
+ "metrics.rmse"). The ``order_by`` column can contain an optional
118
+ ``DESC`` or ``ASC`` value. The default is ``ASC``. The default
119
+ ordering is to sort by ``start_time DESC``, then ``run_id``.
120
+ ``start_time DESC``, then ``run_id``.
121
+ search_all_experiments (bool): Boolean specifying whether all
122
+ experiments should be searched. Only honored if ``experiment_ids``
123
+ is ``[]`` or ``None``.
124
+ experiment_names (list[str] | None): List of experiment names. Search
125
+ can work with experiment IDs or experiment names, but not both in
126
+ the same call. Values other than ``None`` or ``[]`` will result in
127
+ error if ``experiment_ids`` is also not ``None`` or ``[]``.
128
+ ``experiment_ids`` is also not ``None`` or ``[]``. ``None`` will
129
+ default to the active experiment if ``experiment_ids`` is ``None``
130
+ or ``[]``.
131
+
132
+ Returns:
133
+ A `RunCollection` object containing the search results.
134
+ """
135
+ runs = mlflow.search_runs(
136
+ experiment_ids=experiment_ids,
137
+ filter_string=filter_string,
138
+ run_view_type=run_view_type,
139
+ max_results=max_results,
140
+ order_by=order_by,
141
+ output_format="list",
142
+ search_all_experiments=search_all_experiments,
143
+ experiment_names=experiment_names,
144
+ )
145
+ runs = sorted(runs, key=lambda run: run.info.start_time) # type: ignore
146
+ return RunCollection(runs) # type: ignore
147
+
148
+
149
+ def list_runs(experiment_names: list[str] | None = None) -> RunCollection:
150
+ """
151
+ List all runs for the specified experiments.
152
+
153
+ This function retrieves all runs for the given list of experiment names.
154
+ If no experiment names are provided (None), it defaults to searching all runs
155
+ for the currently active experiment. If an empty list is provided, the function
156
+ will search all runs for all experiments except the "Default" experiment.
157
+ The function returns the results as a `RunCollection` object.
158
+
159
+ Note:
160
+ The returned runs are sorted by their start time in ascending order.
161
+
162
+ Args:
163
+ experiment_names (list[str] | None): List of experiment names to search
164
+ for runs. If None or an empty list is provided, the function will
165
+ search the currently active experiment or all experiments except
166
+ the "Default" experiment.
167
+
168
+ Returns:
169
+ A `RunCollection` object containing the runs for the specified experiments.
170
+ """
171
+ if experiment_names == []:
172
+ experiments = mlflow.search_experiments()
173
+ experiment_names = [e.name for e in experiments if e.name != "Default"]
174
+
175
+ return search_runs(experiment_names=experiment_names)
@@ -1,7 +1,24 @@
1
1
  """
2
- This module provides functionality for managing and interacting with MLflow
3
- runs. It includes the `RunCollection` class and various methods to filter
4
- runs, retrieve run information, log artifacts, and load configurations.
2
+ This module provides functionality for managing and interacting with MLflow runs.
3
+ It includes the `RunCollection` class, which serves as a container for multiple MLflow
4
+ run objects, and various methods to filter, retrieve, and manipulate these runs.
5
+
6
+ Key Features:
7
+ - **Run Management**: The `RunCollection` class allows for easy management of multiple
8
+ MLflow runs, providing methods to access, filter, and sort runs based on various
9
+ criteria.
10
+ - **Filtering**: The module supports filtering runs based on specific configurations
11
+ and parameters, enabling users to easily find runs that match certain conditions.
12
+ - **Retrieval**: Users can retrieve specific runs, including the first, last, or any
13
+ run that matches a given configuration.
14
+ - **Artifact Handling**: The module provides methods to access and manipulate the
15
+ artifacts associated with each run, including retrieving artifact URIs and directories.
16
+
17
+ The `RunCollection` class is designed to work seamlessly with the MLflow tracking
18
+ API, providing a robust solution for managing machine learning experiment runs and
19
+ their associated metadata. This module is particularly useful for data scientists and
20
+ machine learning engineers who need to track and analyze the results of their experiments
21
+ efficiently.
5
22
  """
6
23
 
7
24
  from __future__ import annotations
@@ -10,10 +27,7 @@ from dataclasses import dataclass, field
10
27
  from itertools import chain
11
28
  from typing import TYPE_CHECKING, Any, Concatenate, ParamSpec, TypeVar
12
29
 
13
- import mlflow
14
- from mlflow.entities import ViewType
15
30
  from mlflow.entities.run import Run
16
- from mlflow.tracking.fluent import SEARCH_MAX_RESULTS_PANDAS
17
31
 
18
32
  from hydraflow.config import iter_params
19
33
  from hydraflow.info import RunCollectionInfo
@@ -26,101 +40,6 @@ if TYPE_CHECKING:
26
40
  from omegaconf import DictConfig
27
41
 
28
42
 
29
- def search_runs(
30
- experiment_ids: list[str] | None = None,
31
- filter_string: str = "",
32
- run_view_type: int = ViewType.ACTIVE_ONLY,
33
- max_results: int = SEARCH_MAX_RESULTS_PANDAS,
34
- order_by: list[str] | None = None,
35
- search_all_experiments: bool = False,
36
- experiment_names: list[str] | None = None,
37
- ) -> RunCollection:
38
- """
39
- Search for Runs that fit the specified criteria.
40
-
41
- This function wraps the `mlflow.search_runs` function and returns the
42
- results as a `RunCollection` object. It allows for flexible searching of
43
- MLflow runs based on various criteria.
44
-
45
- Note:
46
- The returned runs are sorted by their start time in ascending order.
47
-
48
- Args:
49
- experiment_ids (list[str] | None): List of experiment IDs. Search can
50
- work with experiment IDs or experiment names, but not both in the
51
- same call. Values other than ``None`` or ``[]`` will result in
52
- error if ``experiment_names`` is also not ``None`` or ``[]``.
53
- ``None`` will default to the active experiment if ``experiment_names``
54
- is ``None`` or ``[]``.
55
- filter_string (str): Filter query string, defaults to searching all
56
- runs.
57
- run_view_type (int): one of enum values ``ACTIVE_ONLY``, ``DELETED_ONLY``,
58
- or ``ALL`` runs defined in :py:class:`mlflow.entities.ViewType`.
59
- max_results (int): The maximum number of runs to put in the dataframe.
60
- Default is 100,000 to avoid causing out-of-memory issues on the user's
61
- machine.
62
- order_by (list[str] | None): List of columns to order by (e.g.,
63
- "metrics.rmse"). The ``order_by`` column can contain an optional
64
- ``DESC`` or ``ASC`` value. The default is ``ASC``. The default
65
- ordering is to sort by ``start_time DESC``, then ``run_id``.
66
- ``start_time DESC``, then ``run_id``.
67
- search_all_experiments (bool): Boolean specifying whether all
68
- experiments should be searched. Only honored if ``experiment_ids``
69
- is ``[]`` or ``None``.
70
- experiment_names (list[str] | None): List of experiment names. Search
71
- can work with experiment IDs or experiment names, but not both in
72
- the same call. Values other than ``None`` or ``[]`` will result in
73
- error if ``experiment_ids`` is also not ``None`` or ``[]``.
74
- ``experiment_ids`` is also not ``None`` or ``[]``. ``None`` will
75
- default to the active experiment if ``experiment_ids`` is ``None``
76
- or ``[]``.
77
-
78
- Returns:
79
- A `RunCollection` object containing the search results.
80
- """
81
- runs = mlflow.search_runs(
82
- experiment_ids=experiment_ids,
83
- filter_string=filter_string,
84
- run_view_type=run_view_type,
85
- max_results=max_results,
86
- order_by=order_by,
87
- output_format="list",
88
- search_all_experiments=search_all_experiments,
89
- experiment_names=experiment_names,
90
- )
91
- runs = sorted(runs, key=lambda run: run.info.start_time) # type: ignore
92
- return RunCollection(runs) # type: ignore
93
-
94
-
95
- def list_runs(experiment_names: list[str] | None = None) -> RunCollection:
96
- """
97
- List all runs for the specified experiments.
98
-
99
- This function retrieves all runs for the given list of experiment names.
100
- If no experiment names are provided (None), it defaults to searching all runs
101
- for the currently active experiment. If an empty list is provided, the function
102
- will search all runs for all experiments except the "Default" experiment.
103
- The function returns the results as a `RunCollection` object.
104
-
105
- Note:
106
- The returned runs are sorted by their start time in ascending order.
107
-
108
- Args:
109
- experiment_names (list[str] | None): List of experiment names to search
110
- for runs. If None or an empty list is provided, the function will
111
- search the currently active experiment or all experiments except
112
- the "Default" experiment.
113
-
114
- Returns:
115
- A `RunCollection` object containing the runs for the specified experiments.
116
- """
117
- if experiment_names == []:
118
- experiments = mlflow.search_experiments()
119
- experiment_names = [e.name for e in experiments if e.name != "Default"]
120
-
121
- return search_runs(experiment_names=experiment_names)
122
-
123
-
124
43
  T = TypeVar("T")
125
44
  P = ParamSpec("P")
126
45
 
@@ -132,6 +51,11 @@ class RunCollection:
132
51
 
133
52
  This class provides methods to interact with the runs, such as filtering,
134
53
  retrieving specific runs, and accessing run information.
54
+
55
+ Key Features:
56
+ - Filtering: Easily filter runs based on various criteria.
57
+ - Retrieval: Access specific runs by index or through methods.
58
+ - Metadata: Access run metadata and associated information.
135
59
  """
136
60
 
137
61
  _runs: list[Run]
@@ -1,7 +1,9 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
+ import time
4
5
  from dataclasses import dataclass
6
+ from pathlib import Path
5
7
 
6
8
  import hydra
7
9
  import mlflow
@@ -24,16 +26,32 @@ cs.store(name="config", node=MySQLConfig)
24
26
 
25
27
  @hydra.main(version_base=None, config_name="config")
26
28
  def app(cfg: MySQLConfig):
27
- mlflow.set_experiment("log_run")
29
+ hydraflow.set_experiment(prefix="_", suffix="_")
28
30
  with hydraflow.start_run(cfg):
31
+ log.info(f"START, {cfg.host}, {cfg.port} ")
32
+
29
33
  artifact_dir = hydraflow.get_artifact_dir()
30
34
  output_dir = hydraflow.get_hydra_output_dir()
31
- log.info(f"START, {cfg.host}, {cfg.port} ")
35
+
32
36
  mlflow.log_text("A " + artifact_dir.as_posix(), "artifact_dir.txt")
33
37
  mlflow.log_text("B " + output_dir.as_posix(), "output_dir.txt")
34
- (artifact_dir / "a.txt").write_text("abc")
38
+
39
+ with hydraflow.watch(callback, ignore_patterns=["b.txt"]):
40
+ (artifact_dir / "a.txt").write_text("abc")
41
+ time.sleep(0.1)
42
+
43
+ mlflow.log_metric("m", cfg.port + 1, 1)
44
+ if cfg.host == "x":
45
+ mlflow.log_metric("m", cfg.port + 10, 2)
46
+
35
47
  log.info("END")
36
48
 
37
49
 
50
+ def callback(path: Path):
51
+ log.info(f"WATCH, {path.as_posix()}")
52
+ m = len(path.read_text()) # len("abc") == 3
53
+ mlflow.log_metric("watch", m, 1, synchronous=True)
54
+
55
+
38
56
  if __name__ == "__main__":
39
57
  app()
@@ -0,0 +1,109 @@
1
+ from __future__ import annotations
2
+
3
+ import subprocess
4
+ import sys
5
+ from pathlib import Path
6
+
7
+ import mlflow
8
+ import pytest
9
+ from omegaconf import DictConfig
10
+
11
+ from hydraflow.run_collection import RunCollection
12
+
13
+
14
+ @pytest.fixture
15
+ def rc(monkeypatch, tmp_path):
16
+ import hydraflow
17
+
18
+ file = Path("tests/scripts/app.py").absolute()
19
+ monkeypatch.chdir(tmp_path)
20
+
21
+ args = [sys.executable, file.as_posix(), "-m"]
22
+ args += ["host=x,y", "port=1,2", "hydra.job.name=info"]
23
+ subprocess.check_call(args)
24
+
25
+ mlflow.set_experiment("_info_")
26
+ yield hydraflow.list_runs()
27
+
28
+
29
+ def test_app_info_run_id(rc: RunCollection):
30
+ assert len(rc.info.run_id) == 4
31
+
32
+
33
+ def test_app_info_params(rc: RunCollection):
34
+ params = rc.info.params
35
+ assert params[0] == {"port": "1", "host": "x"}
36
+ assert params[1] == {"port": "2", "host": "x"}
37
+ assert params[2] == {"port": "1", "host": "y"}
38
+ assert params[3] == {"port": "2", "host": "y"}
39
+
40
+
41
+ def test_app_info_metrics(rc: RunCollection):
42
+ metrics = rc.info.metrics
43
+ assert metrics[0] == {"m": 11, "watch": 3}
44
+ assert metrics[1] == {"m": 12, "watch": 3}
45
+ assert metrics[2] == {"m": 2, "watch": 3}
46
+ assert metrics[3] == {"m": 3, "watch": 3}
47
+
48
+
49
+ def test_app_info_config(rc: RunCollection):
50
+ config = rc.info.config
51
+ assert config[0].port == 1
52
+ assert config[1].port == 2
53
+ assert config[2].host == "y"
54
+ assert config[3].host == "y"
55
+
56
+
57
+ def test_app_info_artifact_uri(rc: RunCollection):
58
+ uris = rc.info.artifact_uri
59
+ print(uris)
60
+ assert all(uri.startswith("file://") for uri in uris) # type: ignore
61
+ assert all(uri.endswith("/artifacts") for uri in uris) # type: ignore
62
+ assert all("mlruns" in uri for uri in uris) # type: ignore
63
+
64
+
65
+ def test_app_info_artifact_dir(rc: RunCollection):
66
+ from hydraflow.info import get_artifact_dir
67
+
68
+ dirs = list(rc.map(get_artifact_dir))
69
+ assert rc.info.artifact_dir == dirs
70
+
71
+
72
+ def test_app_hydra_output_dir(rc: RunCollection):
73
+ from hydraflow.info import get_hydra_output_dir
74
+
75
+ dirs = list(rc.map(get_hydra_output_dir))
76
+ assert dirs[0].stem == "0"
77
+ assert dirs[1].stem == "1"
78
+ assert dirs[2].stem == "2"
79
+ assert dirs[3].stem == "3"
80
+
81
+
82
+ def test_app_map_config(rc: RunCollection):
83
+ ports = []
84
+
85
+ def func(c: DictConfig, *, a: int):
86
+ ports.append(c.port + 1)
87
+ return c.host
88
+
89
+ hosts = list(rc.map_config(func, a=1))
90
+ assert hosts == ["x", "x", "y", "y"]
91
+ assert ports == [2, 3, 2, 3]
92
+
93
+
94
+ def test_app_group_by(rc: RunCollection):
95
+ grouped = rc.group_by("host")
96
+ assert len(grouped) == 2
97
+ assert grouped[("x",)].info.params[0] == {"port": "1", "host": "x"}
98
+ assert grouped[("x",)].info.params[1] == {"port": "2", "host": "x"}
99
+ assert grouped[("y",)].info.params[0] == {"port": "1", "host": "y"}
100
+ assert grouped[("y",)].info.params[1] == {"port": "2", "host": "y"}
101
+
102
+
103
+ def test_app_group_by_values(rc: RunCollection):
104
+ grouped = rc.group_by_values("port")
105
+ assert len(grouped) == 2
106
+ assert grouped[0].info.params[0] == {"port": "1", "host": "x"}
107
+ assert grouped[0].info.params[1] == {"port": "1", "host": "y"}
108
+ assert grouped[1].info.params[0] == {"port": "2", "host": "x"}
109
+ assert grouped[1].info.params[1] == {"port": "2", "host": "y"}
@@ -11,7 +11,7 @@ from hydraflow.run_collection import RunCollection
11
11
 
12
12
  @pytest.fixture
13
13
  def runs(monkeypatch, tmp_path):
14
- from hydraflow.run_collection import list_runs
14
+ from hydraflow.mlflow import list_runs
15
15
 
16
16
  monkeypatch.chdir(tmp_path)
17
17
 
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  from pathlib import Path
2
4
 
3
5
  import mlflow
@@ -8,11 +10,12 @@ from hydraflow.run_collection import RunCollection
8
10
 
9
11
  @pytest.fixture
10
12
  def runs(monkeypatch, tmp_path):
11
- from hydraflow.run_collection import search_runs
13
+ from hydraflow.mlflow import search_runs
12
14
 
13
15
  monkeypatch.chdir(tmp_path)
14
16
 
15
17
  mlflow.set_experiment("test_info")
18
+
16
19
  for x in range(3):
17
20
  with mlflow.start_run(run_name=f"{x}"):
18
21
  mlflow.log_param("p", x)
@@ -49,3 +52,13 @@ def test_info_artifact_dir(runs: RunCollection):
49
52
  dir = runs.info.artifact_dir
50
53
  assert all(isinstance(d, Path) for d in dir)
51
54
  assert all(d.stem == "artifacts" for d in dir) # type: ignore
55
+
56
+
57
+ def test_info_empty_run_collection():
58
+ rc = RunCollection([])
59
+ assert rc.info.run_id == []
60
+ assert rc.info.params == []
61
+ assert rc.info.metrics == []
62
+ assert rc.info.artifact_uri == []
63
+ assert rc.info.artifact_dir == []
64
+ assert rc.info.config == []
@@ -12,12 +12,14 @@ from mlflow.entities.run import Run
12
12
 
13
13
  @pytest.fixture
14
14
  def runs(monkeypatch, tmp_path):
15
- file = Path("tests/scripts/log_run.py").absolute()
15
+ file = Path("tests/scripts/app.py").absolute()
16
16
  monkeypatch.chdir(tmp_path)
17
17
 
18
- subprocess.check_call([sys.executable, file.as_posix(), "-m", "host=x,y", "port=1,2"])
18
+ args = [sys.executable, file.as_posix(), "-m"]
19
+ args += ["host=x,y", "port=1,2", "hydra.job.name=log_run"]
20
+ subprocess.check_call(args)
19
21
 
20
- mlflow.set_experiment("log_run")
22
+ mlflow.set_experiment("_log_run_")
21
23
  runs = mlflow.search_runs(output_format="list")
22
24
  assert len(runs) == 4
23
25
  assert isinstance(runs, list)
@@ -11,7 +11,7 @@ from hydraflow.run_collection import RunCollection
11
11
 
12
12
  @pytest.fixture
13
13
  def runs(monkeypatch, tmp_path):
14
- from hydraflow.run_collection import search_runs
14
+ from hydraflow.mlflow import search_runs
15
15
 
16
16
  monkeypatch.chdir(tmp_path)
17
17
 
@@ -342,7 +342,7 @@ def runs2(monkeypatch, tmp_path):
342
342
 
343
343
 
344
344
  def test_list_runs(runs, runs2):
345
- from hydraflow.run_collection import list_runs
345
+ from hydraflow.mlflow import list_runs
346
346
 
347
347
  mlflow.set_experiment("test_run")
348
348
  all_runs = list_runs()
@@ -354,7 +354,7 @@ def test_list_runs(runs, runs2):
354
354
 
355
355
 
356
356
  def test_list_runs_empty_list(runs, runs2):
357
- from hydraflow.run_collection import list_runs
357
+ from hydraflow.mlflow import list_runs
358
358
 
359
359
  all_runs = list_runs([])
360
360
  assert len(all_runs) == 9
@@ -362,14 +362,14 @@ def test_list_runs_empty_list(runs, runs2):
362
362
 
363
363
  @pytest.mark.parametrize(["name", "n"], [("test_run", 6), ("test_run2", 3)])
364
364
  def test_list_runs_list(runs, runs2, name, n):
365
- from hydraflow.run_collection import list_runs
365
+ from hydraflow.mlflow import list_runs
366
366
 
367
367
  filtered_runs = list_runs(experiment_names=[name])
368
368
  assert len(filtered_runs) == n
369
369
 
370
370
 
371
371
  def test_list_runs_none(runs, runs2):
372
- from hydraflow.run_collection import list_runs
372
+ from hydraflow.mlflow import list_runs
373
373
 
374
374
  no_runs = list_runs(experiment_names=["non_existent_experiment"])
375
375
  assert len(no_runs) == 0
@@ -1,6 +0,0 @@
1
- artifact_location: file:///workspaces/hydraflow/mlruns/0
2
- creation_time: 1725536713011
3
- experiment_id: '0'
4
- last_update_time: 1725536713011
5
- lifecycle_stage: active
6
- name: Default
@@ -1,119 +0,0 @@
1
- """
2
- This module provides functionality to log parameters from Hydra
3
- configuration objects and set up experiments using MLflow.
4
- """
5
-
6
- from __future__ import annotations
7
-
8
- from pathlib import Path
9
- from typing import TYPE_CHECKING
10
-
11
- import mlflow
12
- from hydra.core.hydra_config import HydraConfig
13
- from mlflow.tracking import artifact_utils
14
- from omegaconf import OmegaConf
15
-
16
- from hydraflow.config import iter_params
17
-
18
- if TYPE_CHECKING:
19
- from mlflow.entities.experiment import Experiment
20
- from mlflow.entities.run import Run
21
-
22
-
23
- def set_experiment(
24
- prefix: str = "",
25
- suffix: str = "",
26
- uri: str | Path | None = None,
27
- ) -> Experiment:
28
- """
29
- Set the experiment name and tracking URI optionally.
30
-
31
- This function sets the experiment name by combining the given prefix,
32
- the job name from HydraConfig, and the given suffix. Optionally, it can
33
- also set the tracking URI.
34
-
35
- Args:
36
- prefix (str): The prefix to prepend to the experiment name.
37
- suffix (str): The suffix to append to the experiment name.
38
- uri (str | Path | None): The tracking URI to use. Defaults to None.
39
-
40
- Returns:
41
- Experiment: An instance of `mlflow.entities.Experiment` representing
42
- the new active experiment.
43
- """
44
- if uri is not None:
45
- mlflow.set_tracking_uri(uri)
46
-
47
- hc = HydraConfig.get()
48
- name = f"{prefix}{hc.job.name}{suffix}"
49
- return mlflow.set_experiment(name)
50
-
51
-
52
- def log_params(config: object, *, synchronous: bool | None = None) -> None:
53
- """
54
- Log the parameters from the given configuration object.
55
-
56
- This method logs the parameters from the provided configuration object
57
- using MLflow. It iterates over the parameters and logs them using the
58
- `mlflow.log_param` method.
59
-
60
- Args:
61
- config (object): The configuration object to log the parameters from.
62
- synchronous (bool | None): Whether to log the parameters synchronously.
63
- Defaults to None.
64
- """
65
- for key, value in iter_params(config):
66
- mlflow.log_param(key, value, synchronous=synchronous)
67
-
68
-
69
- def get_artifact_dir(run: Run | None = None) -> Path:
70
- """
71
- Retrieve the artifact directory for the given run.
72
-
73
- This function uses MLflow to get the artifact directory for the given run.
74
-
75
- Args:
76
- run (Run | None): The run object. Defaults to None.
77
-
78
- Returns:
79
- The local path to the directory where the artifacts are downloaded.
80
- """
81
- if run is None:
82
- uri = mlflow.get_artifact_uri()
83
- else:
84
- uri = artifact_utils.get_artifact_uri(run.info.run_id)
85
-
86
- return Path(mlflow.artifacts.download_artifacts(uri))
87
-
88
-
89
- def get_hydra_output_dir(*, run: Run | None = None) -> Path:
90
- """
91
- Retrieve the Hydra output directory for the given run.
92
-
93
- This function returns the Hydra output directory. If no run is provided,
94
- it retrieves the output directory from the current Hydra configuration.
95
- If a run is provided, it retrieves the artifact path for the run, loads
96
- the Hydra configuration from the downloaded artifacts, and returns the
97
- output directory specified in that configuration.
98
-
99
- Args:
100
- run (Run | None): The run object. Defaults to None.
101
-
102
- Returns:
103
- Path: The path to the Hydra output directory.
104
-
105
- Raises:
106
- FileNotFoundError: If the Hydra configuration file is not found
107
- in the artifacts.
108
- """
109
- if run is None:
110
- hc = HydraConfig.get()
111
- return Path(hc.runtime.output_dir)
112
-
113
- path = get_artifact_dir(run) / ".hydra/hydra.yaml"
114
-
115
- if path.exists():
116
- hc = OmegaConf.load(path)
117
- return Path(hc.hydra.runtime.output_dir)
118
-
119
- raise FileNotFoundError
File without changes
File without changes
File without changes
File without changes
File without changes