hydraflow 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hydraflow/config.py +26 -2
- hydraflow/context.py +65 -0
- hydraflow/mlflow.py +29 -0
- hydraflow/runs.py +299 -4
- hydraflow/util.py +13 -0
- hydraflow-0.1.5.dist-info/METADATA +111 -0
- hydraflow-0.1.5.dist-info/RECORD +10 -0
- hydraflow-0.1.4.dist-info/METADATA +0 -45
- hydraflow-0.1.4.dist-info/RECORD +0 -10
- {hydraflow-0.1.4.dist-info → hydraflow-0.1.5.dist-info}/WHEEL +0 -0
- {hydraflow-0.1.4.dist-info → hydraflow-0.1.5.dist-info}/licenses/LICENSE +0 -0
hydraflow/config.py
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
"""
|
2
|
+
This module provides functionality for working with configuration
|
3
|
+
objects using the OmegaConf library.
|
4
|
+
"""
|
5
|
+
|
1
6
|
from __future__ import annotations
|
2
7
|
|
3
8
|
from typing import TYPE_CHECKING
|
@@ -10,12 +15,31 @@ if TYPE_CHECKING:
|
|
10
15
|
|
11
16
|
|
12
17
|
def iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
|
13
|
-
|
18
|
+
"""
|
19
|
+
Iterate over the parameters in the given configuration object.
|
20
|
+
|
21
|
+
This function recursively traverses the configuration object and yields
|
22
|
+
key-value pairs representing the parameters.
|
23
|
+
|
24
|
+
Args:
|
25
|
+
config (object): The configuration object to iterate over.
|
26
|
+
prefix (str, optional): The prefix to prepend to the parameter keys.
|
27
|
+
Defaults to "".
|
28
|
+
|
29
|
+
Yields:
|
30
|
+
Key-value pairs representing the parameters.
|
31
|
+
"""
|
32
|
+
if not isinstance(config, (DictConfig, ListConfig)):
|
14
33
|
config = OmegaConf.create(config) # type: ignore
|
15
34
|
|
16
35
|
if isinstance(config, DictConfig):
|
17
36
|
for key, value in config.items():
|
18
|
-
if isinstance(value,
|
37
|
+
if isinstance(value, ListConfig) and not any(
|
38
|
+
isinstance(v, (DictConfig, ListConfig)) for v in value
|
39
|
+
):
|
40
|
+
yield f"{prefix}{key}", value
|
41
|
+
|
42
|
+
elif isinstance(value, (DictConfig, ListConfig)):
|
19
43
|
yield from iter_params(value, f"{prefix}{key}.")
|
20
44
|
|
21
45
|
else:
|
hydraflow/context.py
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
"""
|
2
|
+
This module provides context managers to log parameters and manage the MLflow
|
3
|
+
run context.
|
4
|
+
"""
|
5
|
+
|
1
6
|
from __future__ import annotations
|
2
7
|
|
3
8
|
import os
|
@@ -35,6 +40,28 @@ def log_run(
|
|
35
40
|
*,
|
36
41
|
synchronous: bool | None = None,
|
37
42
|
) -> Iterator[Info]:
|
43
|
+
"""
|
44
|
+
Log the parameters from the given configuration object and manage the MLflow
|
45
|
+
run context.
|
46
|
+
|
47
|
+
This context manager logs the parameters from the provided configuration object
|
48
|
+
using MLflow. It also manages the MLflow run context, ensuring that artifacts
|
49
|
+
are logged and the run is properly closed.
|
50
|
+
|
51
|
+
Args:
|
52
|
+
config: The configuration object to log the parameters from.
|
53
|
+
synchronous: Whether to log the parameters synchronously.
|
54
|
+
Defaults to None.
|
55
|
+
|
56
|
+
Yields:
|
57
|
+
Info: An `Info` object containing the output directory and artifact directory
|
58
|
+
paths.
|
59
|
+
|
60
|
+
Example:
|
61
|
+
with log_run(config) as info:
|
62
|
+
# Perform operations within the MLflow run context
|
63
|
+
pass
|
64
|
+
"""
|
38
65
|
log_params(config, synchronous=synchronous)
|
39
66
|
|
40
67
|
hc = HydraConfig.get()
|
@@ -61,6 +88,32 @@ def log_run(
|
|
61
88
|
|
62
89
|
@contextmanager
|
63
90
|
def watch(func: Callable[[Path], None], dir: Path | str = "", timeout: int = 60) -> Iterator[None]:
|
91
|
+
"""
|
92
|
+
Watch the given directory for changes and call the provided function
|
93
|
+
when a change is detected.
|
94
|
+
|
95
|
+
This context manager sets up a file system watcher on the specified directory.
|
96
|
+
When a file modification is detected, the provided function is called with
|
97
|
+
the path of the modified file. The watcher runs for the specified timeout
|
98
|
+
period or until the context is exited.
|
99
|
+
|
100
|
+
Args:
|
101
|
+
func (Callable[[Path], None]): The function to call when a change is
|
102
|
+
detected. It should accept a single argument of type `Path`,
|
103
|
+
which is the path of the modified file.
|
104
|
+
dir (Path | str, optional): The directory to watch. If not specified,
|
105
|
+
the current MLflow artifact URI is used. Defaults to "".
|
106
|
+
timeout (int, optional): The timeout period in seconds for the watcher
|
107
|
+
to run after the context is exited. Defaults to 60.
|
108
|
+
|
109
|
+
Yields:
|
110
|
+
None: This context manager does not return any value.
|
111
|
+
|
112
|
+
Example:
|
113
|
+
with watch(log_artifact, "/path/to/dir"):
|
114
|
+
# Perform operations while watching the directory for changes
|
115
|
+
pass
|
116
|
+
"""
|
64
117
|
if not dir:
|
65
118
|
uri = mlflow.get_artifact_uri()
|
66
119
|
dir = uri_to_path(uri)
|
@@ -100,6 +153,18 @@ def chdir_artifact(
|
|
100
153
|
run: Run | Series | str,
|
101
154
|
artifact_path: str | None = None,
|
102
155
|
) -> Iterator[Path]:
|
156
|
+
"""
|
157
|
+
Change the current working directory to the artifact directory of the
|
158
|
+
given run.
|
159
|
+
|
160
|
+
This context manager changes the current working directory to the artifact
|
161
|
+
directory of the given run. It ensures that the directory is changed back
|
162
|
+
to the original directory after the context is exited.
|
163
|
+
|
164
|
+
Args:
|
165
|
+
run: The run to get the artifact directory from.
|
166
|
+
artifact_path: The artifact path.
|
167
|
+
"""
|
103
168
|
curdir = Path.cwd()
|
104
169
|
|
105
170
|
artifact_dir = get_artifact_path(run, artifact_path)
|
hydraflow/mlflow.py
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
"""
|
2
|
+
This module provides functionality to log parameters from Hydra
|
3
|
+
configuration objects and set up experiments using MLflow.
|
4
|
+
"""
|
5
|
+
|
1
6
|
from __future__ import annotations
|
2
7
|
|
3
8
|
import mlflow
|
@@ -7,6 +12,18 @@ from hydraflow.config import iter_params
|
|
7
12
|
|
8
13
|
|
9
14
|
def set_experiment(prefix: str = "", suffix: str = "", uri: str | None = None) -> None:
|
15
|
+
"""
|
16
|
+
Set the experiment name and tracking URI optionally.
|
17
|
+
|
18
|
+
This function sets the experiment name by combining the given prefix,
|
19
|
+
the job name from HydraConfig, and the given suffix. Optionally, it can
|
20
|
+
also set the tracking URI.
|
21
|
+
|
22
|
+
Args:
|
23
|
+
prefix: The prefix to prepend to the experiment name.
|
24
|
+
suffix: The suffix to append to the experiment name.
|
25
|
+
uri: The tracking URI to use.
|
26
|
+
"""
|
10
27
|
if uri:
|
11
28
|
mlflow.set_tracking_uri(uri)
|
12
29
|
|
@@ -16,5 +33,17 @@ def set_experiment(prefix: str = "", suffix: str = "", uri: str | None = None) -
|
|
16
33
|
|
17
34
|
|
18
35
|
def log_params(config: object, *, synchronous: bool | None = None) -> None:
|
36
|
+
"""
|
37
|
+
Log the parameters from the given configuration object.
|
38
|
+
|
39
|
+
This method logs the parameters from the provided configuration object
|
40
|
+
using MLflow. It iterates over the parameters and logs them using the
|
41
|
+
`mlflow.log_param` method.
|
42
|
+
|
43
|
+
Args:
|
44
|
+
config: The configuration object to log the parameters from.
|
45
|
+
synchronous: Whether to log the parameters synchronously.
|
46
|
+
Defaults to None.
|
47
|
+
"""
|
19
48
|
for key, value in iter_params(config):
|
20
49
|
mlflow.log_param(key, value, synchronous=synchronous)
|
hydraflow/runs.py
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
"""
|
2
|
+
This module provides functionality for managing and interacting with MLflow runs.
|
3
|
+
It includes classes and functions to filter runs, retrieve run information, and
|
4
|
+
log artifacts and configurations.
|
5
|
+
"""
|
6
|
+
|
1
7
|
from __future__ import annotations
|
2
8
|
|
3
9
|
from dataclasses import dataclass
|
@@ -21,6 +27,13 @@ if TYPE_CHECKING:
|
|
21
27
|
|
22
28
|
@dataclass
|
23
29
|
class Runs:
|
30
|
+
"""
|
31
|
+
A class to represent a collection of MLflow runs.
|
32
|
+
|
33
|
+
This class provides methods to interact with the runs, such as filtering,
|
34
|
+
retrieving specific runs, and accessing run information.
|
35
|
+
"""
|
36
|
+
|
24
37
|
runs: list[Run_] | DataFrame
|
25
38
|
|
26
39
|
def __repr__(self) -> str:
|
@@ -30,35 +43,145 @@ class Runs:
|
|
30
43
|
return len(self.runs)
|
31
44
|
|
32
45
|
def filter(self, config: object) -> Runs:
|
46
|
+
"""
|
47
|
+
Filter the runs based on the provided configuration.
|
48
|
+
|
49
|
+
This method filters the runs in the collection according to the
|
50
|
+
specified configuration object. The configuration object should
|
51
|
+
contain key-value pairs that correspond to the parameters of the
|
52
|
+
runs. Only the runs that match all the specified parameters will
|
53
|
+
be included in the returned `Runs` object.
|
54
|
+
|
55
|
+
Args:
|
56
|
+
config (object): The configuration object to filter the runs.
|
57
|
+
This object should contain key-value pairs representing
|
58
|
+
the parameters to filter by.
|
59
|
+
|
60
|
+
Returns:
|
61
|
+
Runs: A new `Runs` object containing the filtered runs.
|
62
|
+
"""
|
33
63
|
return Runs(filter_runs(self.runs, config))
|
34
64
|
|
35
65
|
def get(self, config: object) -> Run:
|
66
|
+
"""
|
67
|
+
Retrieve a specific run based on the provided configuration.
|
68
|
+
|
69
|
+
This method filters the runs in the collection according to the
|
70
|
+
specified configuration object and returns the run that matches
|
71
|
+
the provided parameters. If more than one run matches the criteria,
|
72
|
+
an error is raised.
|
73
|
+
|
74
|
+
Args:
|
75
|
+
config (object): The configuration object to identify the run.
|
76
|
+
|
77
|
+
Returns:
|
78
|
+
Run: The run object that matches the provided configuration.
|
79
|
+
|
80
|
+
Raises:
|
81
|
+
ValueError: If the number of filtered runs is not exactly one.
|
82
|
+
"""
|
36
83
|
return Run(get_run(self.runs, config))
|
37
84
|
|
38
85
|
def drop_unique_params(self) -> Runs:
|
86
|
+
"""
|
87
|
+
Drop unique parameters from the runs and return a new Runs object.
|
88
|
+
|
89
|
+
This method removes parameters that have unique values across all runs
|
90
|
+
in the collection. This is useful for identifying common parameters
|
91
|
+
that are shared among multiple runs.
|
92
|
+
|
93
|
+
Returns:
|
94
|
+
Runs: A new `Runs` object with unique parameters dropped.
|
95
|
+
|
96
|
+
Raises:
|
97
|
+
NotImplementedError: If the runs are not in a DataFrame format.
|
98
|
+
"""
|
39
99
|
if isinstance(self.runs, DataFrame):
|
40
100
|
return Runs(drop_unique_params(self.runs))
|
41
101
|
|
42
102
|
raise NotImplementedError
|
43
103
|
|
44
104
|
def get_param_names(self) -> list[str]:
|
105
|
+
"""
|
106
|
+
Get the parameter names from the runs.
|
107
|
+
|
108
|
+
This method extracts the parameter names from the runs in the collection.
|
109
|
+
If the runs are stored in a DataFrame, it retrieves the column names
|
110
|
+
that correspond to the parameters.
|
111
|
+
|
112
|
+
Returns:
|
113
|
+
list[str]: A list of parameter names.
|
114
|
+
|
115
|
+
Raises:
|
116
|
+
NotImplementedError: If the runs are not in a DataFrame format.
|
117
|
+
"""
|
45
118
|
if isinstance(self.runs, DataFrame):
|
46
119
|
return get_param_names(self.runs)
|
47
120
|
|
48
121
|
raise NotImplementedError
|
49
122
|
|
50
123
|
def get_param_dict(self) -> dict[str, list[str]]:
|
124
|
+
"""
|
125
|
+
Get the parameter dictionary from the runs.
|
126
|
+
|
127
|
+
This method extracts the parameter names and their corresponding values
|
128
|
+
from the runs in the collection. If the runs are stored in a DataFrame,
|
129
|
+
it retrieves the unique values for each parameter.
|
130
|
+
|
131
|
+
|
132
|
+
Returns:
|
133
|
+
dict[str, list[str]]: A dictionary of parameter names and their
|
134
|
+
corresponding values.
|
135
|
+
|
136
|
+
Raises:
|
137
|
+
NotImplementedError: If the runs are not in a DataFrame format.
|
138
|
+
"""
|
51
139
|
if isinstance(self.runs, DataFrame):
|
52
140
|
return get_param_dict(self.runs)
|
53
141
|
|
54
142
|
raise NotImplementedError
|
55
143
|
|
56
144
|
|
145
|
+
def search_runs(*args, **kwargs) -> Runs:
|
146
|
+
"""
|
147
|
+
Search for runs that match the specified criteria.
|
148
|
+
|
149
|
+
This function wraps the `mlflow.search_runs` function and returns the results
|
150
|
+
as a `Runs` object. It allows for flexible searching of MLflow runs based on
|
151
|
+
various criteria.
|
152
|
+
|
153
|
+
Args:
|
154
|
+
*args: Positional arguments to pass to `mlflow.search_runs`.
|
155
|
+
**kwargs: Keyword arguments to pass to `mlflow.search_runs`.
|
156
|
+
|
157
|
+
Returns:
|
158
|
+
Runs: A `Runs` object containing the search results.
|
159
|
+
"""
|
160
|
+
runs = mlflow.search_runs(*args, **kwargs)
|
161
|
+
return Runs(runs)
|
162
|
+
|
163
|
+
|
57
164
|
def filter_runs(runs: list[Run_] | DataFrame, config: object) -> list[Run_] | DataFrame:
|
165
|
+
"""
|
166
|
+
Filter the runs based on the provided configuration.
|
167
|
+
|
168
|
+
This method filters the runs in the collection according to the
|
169
|
+
specified configuration object. The configuration object should
|
170
|
+
contain key-value pairs that correspond to the parameters of the
|
171
|
+
runs. Only the runs that match all the specified parameters will
|
172
|
+
be included in the returned `Runs` object.
|
173
|
+
|
174
|
+
Args:
|
175
|
+
runs: The runs to filter.
|
176
|
+
config: The configuration object to filter the runs.
|
177
|
+
|
178
|
+
Returns:
|
179
|
+
Runs: A filtered list of runs or a DataFrame.
|
180
|
+
"""
|
58
181
|
if isinstance(runs, list):
|
59
|
-
return
|
182
|
+
return _filter_runs_list(runs, config)
|
60
183
|
|
61
|
-
return
|
184
|
+
return _filter_runs_dataframe(runs, config)
|
62
185
|
|
63
186
|
|
64
187
|
def _is_equal(run: Run_, key: str, value: Any) -> bool:
|
@@ -70,14 +193,14 @@ def _is_equal(run: Run_, key: str, value: Any) -> bool:
|
|
70
193
|
return type(value)(param) == value
|
71
194
|
|
72
195
|
|
73
|
-
def
|
196
|
+
def _filter_runs_list(runs: list[Run_], config: object) -> list[Run_]:
|
74
197
|
for key, value in iter_params(config):
|
75
198
|
runs = [run for run in runs if _is_equal(run, key, value)]
|
76
199
|
|
77
200
|
return runs
|
78
201
|
|
79
202
|
|
80
|
-
def
|
203
|
+
def _filter_runs_dataframe(runs: DataFrame, config: object) -> DataFrame:
|
81
204
|
index = np.ones(len(runs), dtype=bool)
|
82
205
|
|
83
206
|
for key, value in iter_params(config):
|
@@ -93,6 +216,21 @@ def filter_runs_dataframe(runs: DataFrame, config: object) -> DataFrame:
|
|
93
216
|
|
94
217
|
|
95
218
|
def get_run(runs: list[Run_] | DataFrame, config: object) -> Run_ | Series:
|
219
|
+
"""
|
220
|
+
Retrieve a specific run based on the provided configuration.
|
221
|
+
|
222
|
+
This method filters the runs in the collection according to the
|
223
|
+
specified configuration object and returns the run that matches
|
224
|
+
the provided parameters. If more than one run matches the criteria,
|
225
|
+
an error is raised.
|
226
|
+
|
227
|
+
Args:
|
228
|
+
runs: The runs to filter.
|
229
|
+
config: The configuration object to identify the run.
|
230
|
+
|
231
|
+
Returns:
|
232
|
+
Run: The run object that matches the provided configuration.
|
233
|
+
"""
|
96
234
|
runs = filter_runs(runs, config)
|
97
235
|
|
98
236
|
if len(runs) == 1:
|
@@ -103,6 +241,20 @@ def get_run(runs: list[Run_] | DataFrame, config: object) -> Run_ | Series:
|
|
103
241
|
|
104
242
|
|
105
243
|
def drop_unique_params(runs: DataFrame) -> DataFrame:
|
244
|
+
"""
|
245
|
+
Drop unique parameters from the runs and return a new DataFrame.
|
246
|
+
|
247
|
+
This method removes parameters that have unique values across all runs
|
248
|
+
in the collection. This is useful for identifying common parameters
|
249
|
+
that are shared among multiple runs.
|
250
|
+
|
251
|
+
Args:
|
252
|
+
runs: The DataFrame containing the runs.
|
253
|
+
|
254
|
+
Returns:
|
255
|
+
DataFrame: A new DataFrame with unique parameters dropped.
|
256
|
+
"""
|
257
|
+
|
106
258
|
def select(column: str) -> bool:
|
107
259
|
return not column.startswith("params.") or len(runs[column].unique()) > 1
|
108
260
|
|
@@ -111,6 +263,20 @@ def drop_unique_params(runs: DataFrame) -> DataFrame:
|
|
111
263
|
|
112
264
|
|
113
265
|
def get_param_names(runs: DataFrame) -> list[str]:
|
266
|
+
"""
|
267
|
+
Get the parameter names from the runs.
|
268
|
+
|
269
|
+
This method extracts the parameter names from the runs in the collection.
|
270
|
+
If the runs are stored in a DataFrame, it retrieves the column names
|
271
|
+
that correspond to the parameters.
|
272
|
+
|
273
|
+
Args:
|
274
|
+
runs: The DataFrame containing the runs.
|
275
|
+
|
276
|
+
Returns:
|
277
|
+
list[str]: A list of parameter names.
|
278
|
+
"""
|
279
|
+
|
114
280
|
def get_name(column: str) -> str:
|
115
281
|
if column.startswith("params."):
|
116
282
|
return column.split(".", maxsplit=1)[-1]
|
@@ -122,6 +288,20 @@ def get_param_names(runs: DataFrame) -> list[str]:
|
|
122
288
|
|
123
289
|
|
124
290
|
def get_param_dict(runs: DataFrame) -> dict[str, list[str]]:
|
291
|
+
"""
|
292
|
+
Get the parameter dictionary from the runs.
|
293
|
+
|
294
|
+
This method extracts the parameter names and their corresponding values
|
295
|
+
from the runs in the collection. If the runs are stored in a DataFrame,
|
296
|
+
it retrieves the unique values for each parameter.
|
297
|
+
|
298
|
+
Args:
|
299
|
+
runs: The DataFrame containing the runs.
|
300
|
+
|
301
|
+
Returns:
|
302
|
+
dict[str, list[str]]: A dictionary of parameter names and
|
303
|
+
their corresponding values.
|
304
|
+
"""
|
125
305
|
params = {}
|
126
306
|
for name in get_param_names(runs):
|
127
307
|
params[name] = list(runs[f"params.{name}"].unique())
|
@@ -131,6 +311,14 @@ def get_param_dict(runs: DataFrame) -> dict[str, list[str]]:
|
|
131
311
|
|
132
312
|
@dataclass
|
133
313
|
class Run:
|
314
|
+
"""
|
315
|
+
A class to represent a specific MLflow run.
|
316
|
+
|
317
|
+
This class provides methods to interact with the run, such as retrieving
|
318
|
+
the run ID, artifact URI, and configuration. It also includes properties
|
319
|
+
to access the artifact directory, artifact path, and Hydra output directory.
|
320
|
+
"""
|
321
|
+
|
134
322
|
run: Run_ | Series | str
|
135
323
|
|
136
324
|
def __repr__(self) -> str:
|
@@ -138,27 +326,78 @@ class Run:
|
|
138
326
|
|
139
327
|
@property
|
140
328
|
def run_id(self) -> str:
|
329
|
+
"""
|
330
|
+
Get the run ID.
|
331
|
+
|
332
|
+
Returns:
|
333
|
+
str: The run ID.
|
334
|
+
"""
|
141
335
|
return get_run_id(self.run)
|
142
336
|
|
143
337
|
def artifact_uri(self, artifact_path: str | None = None) -> str:
|
338
|
+
"""
|
339
|
+
Get the artifact URI.
|
340
|
+
|
341
|
+
Args:
|
342
|
+
artifact_path (str | None): The artifact path.
|
343
|
+
|
344
|
+
Returns:
|
345
|
+
str: The artifact URI.
|
346
|
+
"""
|
144
347
|
return get_artifact_uri(self.run, artifact_path)
|
145
348
|
|
146
349
|
@property
|
147
350
|
def artifact_dir(self) -> Path:
|
351
|
+
"""
|
352
|
+
Get the artifact directory.
|
353
|
+
|
354
|
+
Returns:
|
355
|
+
Path: The artifact directory.
|
356
|
+
"""
|
148
357
|
return get_artifact_dir(self.run)
|
149
358
|
|
150
359
|
def artifact_path(self, artifact_path: str | None = None) -> Path:
|
360
|
+
"""
|
361
|
+
Get the artifact path.
|
362
|
+
|
363
|
+
Args:
|
364
|
+
artifact_path: The artifact path.
|
365
|
+
|
366
|
+
Returns:
|
367
|
+
Path: The artifact path.
|
368
|
+
"""
|
151
369
|
return get_artifact_path(self.run, artifact_path)
|
152
370
|
|
153
371
|
@property
|
154
372
|
def config(self) -> DictConfig:
|
373
|
+
"""
|
374
|
+
Get the configuration.
|
375
|
+
|
376
|
+
Returns:
|
377
|
+
DictConfig: The configuration.
|
378
|
+
"""
|
155
379
|
return load_config(self.run)
|
156
380
|
|
157
381
|
def log_hydra_output_dir(self) -> None:
|
382
|
+
"""
|
383
|
+
Log the Hydra output directory.
|
384
|
+
|
385
|
+
Returns:
|
386
|
+
None
|
387
|
+
"""
|
158
388
|
log_hydra_output_dir(self.run)
|
159
389
|
|
160
390
|
|
161
391
|
def get_run_id(run: Run_ | Series | str) -> str:
|
392
|
+
"""
|
393
|
+
Get the run ID.
|
394
|
+
|
395
|
+
Args:
|
396
|
+
run: The run object.
|
397
|
+
|
398
|
+
Returns:
|
399
|
+
str: The run ID.
|
400
|
+
"""
|
162
401
|
if isinstance(run, str):
|
163
402
|
return run
|
164
403
|
|
@@ -169,21 +408,59 @@ def get_run_id(run: Run_ | Series | str) -> str:
|
|
169
408
|
|
170
409
|
|
171
410
|
def get_artifact_uri(run: Run_ | Series | str, artifact_path: str | None = None) -> str:
|
411
|
+
"""
|
412
|
+
Get the artifact URI.
|
413
|
+
|
414
|
+
Args:
|
415
|
+
run: The run object.
|
416
|
+
artifact_path: The artifact path.
|
417
|
+
|
418
|
+
Returns:
|
419
|
+
str: The artifact URI.
|
420
|
+
"""
|
172
421
|
run_id = get_run_id(run)
|
173
422
|
return artifact_utils.get_artifact_uri(run_id, artifact_path)
|
174
423
|
|
175
424
|
|
176
425
|
def get_artifact_dir(run: Run_ | Series | str) -> Path:
|
426
|
+
"""
|
427
|
+
Get the artifact directory.
|
428
|
+
|
429
|
+
Args:
|
430
|
+
run: The run object.
|
431
|
+
|
432
|
+
Returns:
|
433
|
+
Path: The artifact directory.
|
434
|
+
"""
|
177
435
|
uri = get_artifact_uri(run)
|
178
436
|
return uri_to_path(uri)
|
179
437
|
|
180
438
|
|
181
439
|
def get_artifact_path(run: Run_ | Series | str, artifact_path: str | None = None) -> Path:
|
440
|
+
"""
|
441
|
+
Get the artifact path.
|
442
|
+
|
443
|
+
Args:
|
444
|
+
run: The run object.
|
445
|
+
artifact_path: The artifact path.
|
446
|
+
|
447
|
+
Returns:
|
448
|
+
Path: The artifact path.
|
449
|
+
"""
|
182
450
|
artifact_dir = get_artifact_dir(run)
|
183
451
|
return artifact_dir / artifact_path if artifact_path else artifact_dir
|
184
452
|
|
185
453
|
|
186
454
|
def load_config(run: Run_ | Series | str) -> DictConfig:
|
455
|
+
"""
|
456
|
+
Load the configuration.
|
457
|
+
|
458
|
+
Args:
|
459
|
+
run: The run object.
|
460
|
+
|
461
|
+
Returns:
|
462
|
+
DictConfig: The configuration.
|
463
|
+
"""
|
187
464
|
run_id = get_run_id(run)
|
188
465
|
return _load_config(run_id)
|
189
466
|
|
@@ -202,6 +479,15 @@ def _load_config(run_id: str) -> DictConfig:
|
|
202
479
|
|
203
480
|
|
204
481
|
def get_hydra_output_dir(run: Run_ | Series | str) -> Path:
|
482
|
+
"""
|
483
|
+
Get the Hydra output directory.
|
484
|
+
|
485
|
+
Args:
|
486
|
+
run: The run object.
|
487
|
+
|
488
|
+
Returns:
|
489
|
+
Path: The Hydra output directory.
|
490
|
+
"""
|
205
491
|
path = get_artifact_dir(run) / ".hydra/hydra.yaml"
|
206
492
|
|
207
493
|
if path.exists():
|
@@ -212,6 +498,15 @@ def get_hydra_output_dir(run: Run_ | Series | str) -> Path:
|
|
212
498
|
|
213
499
|
|
214
500
|
def log_hydra_output_dir(run: Run_ | Series | str) -> None:
|
501
|
+
"""
|
502
|
+
Log the Hydra output directory.
|
503
|
+
|
504
|
+
Args:
|
505
|
+
run: The run object.
|
506
|
+
|
507
|
+
Returns:
|
508
|
+
None
|
509
|
+
"""
|
215
510
|
output_dir = get_hydra_output_dir(run)
|
216
511
|
run_id = run if isinstance(run, str) else run.info.run_id
|
217
512
|
mlflow.log_artifacts(output_dir.as_posix(), run_id=run_id)
|
hydraflow/util.py
CHANGED
@@ -4,6 +4,19 @@ from urllib.parse import urlparse
|
|
4
4
|
|
5
5
|
|
6
6
|
def uri_to_path(uri: str) -> Path:
|
7
|
+
"""
|
8
|
+
Convert a URI to a path.
|
9
|
+
|
10
|
+
This function parses the given URI and converts it to a local file system
|
11
|
+
path. On Windows, if the path starts with a forward slash, it is removed
|
12
|
+
to ensure the path is correctly formatted.
|
13
|
+
|
14
|
+
Args:
|
15
|
+
uri (str): The URI to convert.
|
16
|
+
|
17
|
+
Returns:
|
18
|
+
Path: The path corresponding to the URI.
|
19
|
+
"""
|
7
20
|
path = urlparse(uri).path
|
8
21
|
if platform.system() == "Windows" and path.startswith("/"):
|
9
22
|
path = path[1:]
|
@@ -0,0 +1,111 @@
|
|
1
|
+
Metadata-Version: 2.3
|
2
|
+
Name: hydraflow
|
3
|
+
Version: 0.1.5
|
4
|
+
Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
|
5
|
+
Project-URL: Documentation, https://github.com/daizutabi/hydraflow
|
6
|
+
Project-URL: Source, https://github.com/daizutabi/hydraflow
|
7
|
+
Project-URL: Issues, https://github.com/daizutabi/hydraflow/issues
|
8
|
+
Author-email: daizutabi <daizutabi@gmail.com>
|
9
|
+
License-Expression: MIT
|
10
|
+
License-File: LICENSE
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
12
|
+
Classifier: Programming Language :: Python
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
16
|
+
Classifier: Topic :: Documentation
|
17
|
+
Classifier: Topic :: Software Development :: Documentation
|
18
|
+
Requires-Python: >=3.10
|
19
|
+
Requires-Dist: hydra-core>1.3
|
20
|
+
Requires-Dist: mlflow>2.15
|
21
|
+
Requires-Dist: setuptools
|
22
|
+
Requires-Dist: watchdog
|
23
|
+
Provides-Extra: dev
|
24
|
+
Requires-Dist: pytest-clarity; extra == 'dev'
|
25
|
+
Requires-Dist: pytest-cov; extra == 'dev'
|
26
|
+
Requires-Dist: pytest-randomly; extra == 'dev'
|
27
|
+
Requires-Dist: pytest-xdist; extra == 'dev'
|
28
|
+
Description-Content-Type: text/markdown
|
29
|
+
|
30
|
+
# Hydraflow
|
31
|
+
|
32
|
+
[![PyPI Version][pypi-v-image]][pypi-v-link]
|
33
|
+
[![Python Version][python-v-image]][python-v-link]
|
34
|
+
[![Build Status][GHAction-image]][GHAction-link]
|
35
|
+
[![Coverage Status][codecov-image]][codecov-link]
|
36
|
+
|
37
|
+
<!-- Badges -->
|
38
|
+
[pypi-v-image]: https://img.shields.io/pypi/v/hydraflow.svg
|
39
|
+
[pypi-v-link]: https://pypi.org/project/hydraflow/
|
40
|
+
[python-v-image]: https://img.shields.io/pypi/pyversions/hydraflow.svg
|
41
|
+
[python-v-link]: https://pypi.org/project/hydraflow
|
42
|
+
[GHAction-image]: https://github.com/daizutabi/hydraflow/actions/workflows/ci.yml/badge.svg?branch=main&event=push
|
43
|
+
[GHAction-link]: https://github.com/daizutabi/hydraflow/actions?query=event%3Apush+branch%3Amain
|
44
|
+
[codecov-image]: https://codecov.io/github/daizutabi/hydraflow/coverage.svg?branch=main
|
45
|
+
[codecov-link]: https://codecov.io/github/daizutabi/hydraflow?branch=main
|
46
|
+
|
47
|
+
## Overview
|
48
|
+
|
49
|
+
Hydraflow is a powerful library designed to seamlessly integrate [Hydra](https://hydra.cc/) and [MLflow](https://mlflow.org/), making it easier to manage and track machine learning experiments. By combining the flexibility of Hydra's configuration management with the robust experiment tracking capabilities of MLflow, Hydraflow provides a comprehensive solution for managing complex machine learning workflows.
|
50
|
+
|
51
|
+
## Key Features
|
52
|
+
|
53
|
+
- **Configuration Management**: Utilize Hydra's advanced configuration management to handle complex parameter sweeps and experiment setups.
|
54
|
+
- **Experiment Tracking**: Leverage MLflow's tracking capabilities to log parameters, metrics, and artifacts for each run.
|
55
|
+
- **Artifact Management**: Automatically log and manage artifacts, such as model checkpoints and configuration files, with MLflow.
|
56
|
+
- **Seamless Integration**: Easily integrate Hydra and MLflow in your machine learning projects with minimal setup.
|
57
|
+
|
58
|
+
## Installation
|
59
|
+
|
60
|
+
You can install Hydraflow via pip:
|
61
|
+
|
62
|
+
```bash
|
63
|
+
pip install hydraflow
|
64
|
+
```
|
65
|
+
|
66
|
+
## Getting Started
|
67
|
+
|
68
|
+
Here is a simple example to get you started with Hydraflow:
|
69
|
+
|
70
|
+
```python
|
71
|
+
import hydra
|
72
|
+
import hydraflow
|
73
|
+
import mlflow
|
74
|
+
from dataclasses import dataclass
|
75
|
+
from hydra.core.config_store import ConfigStore
|
76
|
+
from pathlib import Path
|
77
|
+
|
78
|
+
@dataclass
|
79
|
+
class MySQLConfig:
|
80
|
+
host: str = "localhost"
|
81
|
+
port: int = 3306
|
82
|
+
|
83
|
+
cs = ConfigStore.instance()
|
84
|
+
cs.store(name="config", node=MySQLConfig)
|
85
|
+
|
86
|
+
@hydra.main(version_base=None, config_name="config")
|
87
|
+
def my_app(cfg: MySQLConfig) -> None:
|
88
|
+
# Set experiment by Hydra job name.
|
89
|
+
hydraflow.set_experiment()
|
90
|
+
|
91
|
+
# Automatically log params using Hydra config.
|
92
|
+
with mlflow.start_run(), hydraflow.log_run(cfg) as info:
|
93
|
+
# Your app code below.
|
94
|
+
|
95
|
+
# `info.output_dir` is the Hydra output directory.
|
96
|
+
# `info.artifact_dir` is the MLflow artifact directory.
|
97
|
+
|
98
|
+
with hydraflow.watch(callback):
|
99
|
+
# Watch files in the MLflow artifact directory.
|
100
|
+
# You can update metrics or log other artifacts
|
101
|
+
# according to the watched files in your callback
|
102
|
+
# function.
|
103
|
+
pass
|
104
|
+
|
105
|
+
# Your callback function here.
|
106
|
+
def callback(file: Path) -> None:
|
107
|
+
pass
|
108
|
+
|
109
|
+
if __name__ == "__main__":
|
110
|
+
my_app()
|
111
|
+
```
|
@@ -0,0 +1,10 @@
|
|
1
|
+
hydraflow/__init__.py,sha256=e1Q0Sskx39jaU2zkGNXjFWNC5xugEz_hDERTN_6Mzy8,666
|
2
|
+
hydraflow/config.py,sha256=WARa5u1F0n3wCOi65v8v8rUO78ME-mtzMeeeE2Yc1I8,1728
|
3
|
+
hydraflow/context.py,sha256=NYjIMepLtaKyvw1obpE8gR1qu1OBpSB_uc6-5So2tg8,5139
|
4
|
+
hydraflow/mlflow.py,sha256=2YWOYpv8eRB_ROD2yFh6ksKDXHvAPDYb86hrUi9zv6E,1558
|
5
|
+
hydraflow/runs.py,sha256=vH-hrlcoTo8HRmgUWam9gtLXAl_wDzX26HEZGWckdMs,14038
|
6
|
+
hydraflow/util.py,sha256=qdUGtBgY7qOF4Yr4PibJHImbLPf-6WYFVuIKu6zbNbY,614
|
7
|
+
hydraflow-0.1.5.dist-info/METADATA,sha256=8mCKAA9KjcJAUiqP-DPdMl4Gcp3MSXxOF34VYKA2P8I,4224
|
8
|
+
hydraflow-0.1.5.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
9
|
+
hydraflow-0.1.5.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
|
10
|
+
hydraflow-0.1.5.dist-info/RECORD,,
|
@@ -1,45 +0,0 @@
|
|
1
|
-
Metadata-Version: 2.3
|
2
|
-
Name: hydraflow
|
3
|
-
Version: 0.1.4
|
4
|
-
Summary: Hydra with MLflow
|
5
|
-
Project-URL: Documentation, https://github.com/daizutabi/hydraflow
|
6
|
-
Project-URL: Source, https://github.com/daizutabi/hydraflow
|
7
|
-
Project-URL: Issues, https://github.com/daizutabi/hydraflow/issues
|
8
|
-
Author-email: daizutabi <daizutabi@gmail.com>
|
9
|
-
License-Expression: MIT
|
10
|
-
License-File: LICENSE
|
11
|
-
Classifier: Development Status :: 4 - Beta
|
12
|
-
Classifier: Programming Language :: Python
|
13
|
-
Classifier: Programming Language :: Python :: 3.10
|
14
|
-
Classifier: Programming Language :: Python :: 3.11
|
15
|
-
Classifier: Programming Language :: Python :: 3.12
|
16
|
-
Classifier: Topic :: Documentation
|
17
|
-
Classifier: Topic :: Software Development :: Documentation
|
18
|
-
Requires-Python: >=3.10
|
19
|
-
Requires-Dist: hydra-core>1.3
|
20
|
-
Requires-Dist: mlflow>2.15
|
21
|
-
Requires-Dist: setuptools
|
22
|
-
Requires-Dist: watchdog
|
23
|
-
Provides-Extra: dev
|
24
|
-
Requires-Dist: pytest-clarity; extra == 'dev'
|
25
|
-
Requires-Dist: pytest-cov; extra == 'dev'
|
26
|
-
Requires-Dist: pytest-randomly; extra == 'dev'
|
27
|
-
Requires-Dist: pytest-xdist; extra == 'dev'
|
28
|
-
Description-Content-Type: text/markdown
|
29
|
-
|
30
|
-
# hydraflow
|
31
|
-
|
32
|
-
[![PyPI Version][pypi-v-image]][pypi-v-link]
|
33
|
-
[![Python Version][python-v-image]][python-v-link]
|
34
|
-
[![Build Status][GHAction-image]][GHAction-link]
|
35
|
-
[![Coverage Status][codecov-image]][codecov-link]
|
36
|
-
|
37
|
-
<!-- Badges -->
|
38
|
-
[pypi-v-image]: https://img.shields.io/pypi/v/hydraflow.svg
|
39
|
-
[pypi-v-link]: https://pypi.org/project/hydraflow/
|
40
|
-
[python-v-image]: https://img.shields.io/pypi/pyversions/hydraflow.svg
|
41
|
-
[python-v-link]: https://pypi.org/project/hydraflow
|
42
|
-
[GHAction-image]: https://github.com/daizutabi/hydraflow/actions/workflows/ci.yml/badge.svg?branch=main&event=push
|
43
|
-
[GHAction-link]: https://github.com/daizutabi/hydraflow/actions?query=event%3Apush+branch%3Amain
|
44
|
-
[codecov-image]: https://codecov.io/github/daizutabi/hydraflow/coverage.svg?branch=main
|
45
|
-
[codecov-link]: https://codecov.io/github/daizutabi/hydraflow?branch=main
|
hydraflow-0.1.4.dist-info/RECORD
DELETED
@@ -1,10 +0,0 @@
|
|
1
|
-
hydraflow/__init__.py,sha256=e1Q0Sskx39jaU2zkGNXjFWNC5xugEz_hDERTN_6Mzy8,666
|
2
|
-
hydraflow/config.py,sha256=b3Plh_lmq94loZNw9QP2asd6thCLyTzzYSutH0cONXA,964
|
3
|
-
hydraflow/context.py,sha256=3vejDbRYQBuBwlhpBpOv5aoyZ-yS8UUzpbCFK1V1uvw,2720
|
4
|
-
hydraflow/mlflow.py,sha256=unBP3Y7ujTM3E_Hq_eYvRVFZoGfTA7B0h4FkOZtPPqc,566
|
5
|
-
hydraflow/runs.py,sha256=127YykWzmiNUUuJSGPOCZasXmd6tcE15HU32j8x71ck,5864
|
6
|
-
hydraflow/util.py,sha256=_BdOMq5tKPm8HOehb2s2ZIBpJYyVpvO_yaAIxbSj51I,253
|
7
|
-
hydraflow-0.1.4.dist-info/METADATA,sha256=Xw-xcDKdzkHa7bKDZUI6MXpOKekcyFbMyBy1yANjNQs,1903
|
8
|
-
hydraflow-0.1.4.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
9
|
-
hydraflow-0.1.4.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
|
10
|
-
hydraflow-0.1.4.dist-info/RECORD,,
|
File without changes
|
File without changes
|