hydraflow 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- hydraflow/config.py +26 -2
- hydraflow/context.py +65 -0
- hydraflow/mlflow.py +29 -0
- hydraflow/runs.py +299 -4
- hydraflow/util.py +13 -0
- hydraflow-0.1.5.dist-info/METADATA +111 -0
- hydraflow-0.1.5.dist-info/RECORD +10 -0
- hydraflow-0.1.4.dist-info/METADATA +0 -45
- hydraflow-0.1.4.dist-info/RECORD +0 -10
- {hydraflow-0.1.4.dist-info → hydraflow-0.1.5.dist-info}/WHEEL +0 -0
- {hydraflow-0.1.4.dist-info → hydraflow-0.1.5.dist-info}/licenses/LICENSE +0 -0
hydraflow/config.py
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
"""
|
2
|
+
This module provides functionality for working with configuration
|
3
|
+
objects using the OmegaConf library.
|
4
|
+
"""
|
5
|
+
|
1
6
|
from __future__ import annotations
|
2
7
|
|
3
8
|
from typing import TYPE_CHECKING
|
@@ -10,12 +15,31 @@ if TYPE_CHECKING:
|
|
10
15
|
|
11
16
|
|
12
17
|
def iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
|
13
|
-
|
18
|
+
"""
|
19
|
+
Iterate over the parameters in the given configuration object.
|
20
|
+
|
21
|
+
This function recursively traverses the configuration object and yields
|
22
|
+
key-value pairs representing the parameters.
|
23
|
+
|
24
|
+
Args:
|
25
|
+
config (object): The configuration object to iterate over.
|
26
|
+
prefix (str, optional): The prefix to prepend to the parameter keys.
|
27
|
+
Defaults to "".
|
28
|
+
|
29
|
+
Yields:
|
30
|
+
Key-value pairs representing the parameters.
|
31
|
+
"""
|
32
|
+
if not isinstance(config, (DictConfig, ListConfig)):
|
14
33
|
config = OmegaConf.create(config) # type: ignore
|
15
34
|
|
16
35
|
if isinstance(config, DictConfig):
|
17
36
|
for key, value in config.items():
|
18
|
-
if isinstance(value,
|
37
|
+
if isinstance(value, ListConfig) and not any(
|
38
|
+
isinstance(v, (DictConfig, ListConfig)) for v in value
|
39
|
+
):
|
40
|
+
yield f"{prefix}{key}", value
|
41
|
+
|
42
|
+
elif isinstance(value, (DictConfig, ListConfig)):
|
19
43
|
yield from iter_params(value, f"{prefix}{key}.")
|
20
44
|
|
21
45
|
else:
|
hydraflow/context.py
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
"""
|
2
|
+
This module provides context managers to log parameters and manage the MLflow
|
3
|
+
run context.
|
4
|
+
"""
|
5
|
+
|
1
6
|
from __future__ import annotations
|
2
7
|
|
3
8
|
import os
|
@@ -35,6 +40,28 @@ def log_run(
|
|
35
40
|
*,
|
36
41
|
synchronous: bool | None = None,
|
37
42
|
) -> Iterator[Info]:
|
43
|
+
"""
|
44
|
+
Log the parameters from the given configuration object and manage the MLflow
|
45
|
+
run context.
|
46
|
+
|
47
|
+
This context manager logs the parameters from the provided configuration object
|
48
|
+
using MLflow. It also manages the MLflow run context, ensuring that artifacts
|
49
|
+
are logged and the run is properly closed.
|
50
|
+
|
51
|
+
Args:
|
52
|
+
config: The configuration object to log the parameters from.
|
53
|
+
synchronous: Whether to log the parameters synchronously.
|
54
|
+
Defaults to None.
|
55
|
+
|
56
|
+
Yields:
|
57
|
+
Info: An `Info` object containing the output directory and artifact directory
|
58
|
+
paths.
|
59
|
+
|
60
|
+
Example:
|
61
|
+
with log_run(config) as info:
|
62
|
+
# Perform operations within the MLflow run context
|
63
|
+
pass
|
64
|
+
"""
|
38
65
|
log_params(config, synchronous=synchronous)
|
39
66
|
|
40
67
|
hc = HydraConfig.get()
|
@@ -61,6 +88,32 @@ def log_run(
|
|
61
88
|
|
62
89
|
@contextmanager
|
63
90
|
def watch(func: Callable[[Path], None], dir: Path | str = "", timeout: int = 60) -> Iterator[None]:
|
91
|
+
"""
|
92
|
+
Watch the given directory for changes and call the provided function
|
93
|
+
when a change is detected.
|
94
|
+
|
95
|
+
This context manager sets up a file system watcher on the specified directory.
|
96
|
+
When a file modification is detected, the provided function is called with
|
97
|
+
the path of the modified file. The watcher runs for the specified timeout
|
98
|
+
period or until the context is exited.
|
99
|
+
|
100
|
+
Args:
|
101
|
+
func (Callable[[Path], None]): The function to call when a change is
|
102
|
+
detected. It should accept a single argument of type `Path`,
|
103
|
+
which is the path of the modified file.
|
104
|
+
dir (Path | str, optional): The directory to watch. If not specified,
|
105
|
+
the current MLflow artifact URI is used. Defaults to "".
|
106
|
+
timeout (int, optional): The timeout period in seconds for the watcher
|
107
|
+
to run after the context is exited. Defaults to 60.
|
108
|
+
|
109
|
+
Yields:
|
110
|
+
None: This context manager does not return any value.
|
111
|
+
|
112
|
+
Example:
|
113
|
+
with watch(log_artifact, "/path/to/dir"):
|
114
|
+
# Perform operations while watching the directory for changes
|
115
|
+
pass
|
116
|
+
"""
|
64
117
|
if not dir:
|
65
118
|
uri = mlflow.get_artifact_uri()
|
66
119
|
dir = uri_to_path(uri)
|
@@ -100,6 +153,18 @@ def chdir_artifact(
|
|
100
153
|
run: Run | Series | str,
|
101
154
|
artifact_path: str | None = None,
|
102
155
|
) -> Iterator[Path]:
|
156
|
+
"""
|
157
|
+
Change the current working directory to the artifact directory of the
|
158
|
+
given run.
|
159
|
+
|
160
|
+
This context manager changes the current working directory to the artifact
|
161
|
+
directory of the given run. It ensures that the directory is changed back
|
162
|
+
to the original directory after the context is exited.
|
163
|
+
|
164
|
+
Args:
|
165
|
+
run: The run to get the artifact directory from.
|
166
|
+
artifact_path: The artifact path.
|
167
|
+
"""
|
103
168
|
curdir = Path.cwd()
|
104
169
|
|
105
170
|
artifact_dir = get_artifact_path(run, artifact_path)
|
hydraflow/mlflow.py
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
"""
|
2
|
+
This module provides functionality to log parameters from Hydra
|
3
|
+
configuration objects and set up experiments using MLflow.
|
4
|
+
"""
|
5
|
+
|
1
6
|
from __future__ import annotations
|
2
7
|
|
3
8
|
import mlflow
|
@@ -7,6 +12,18 @@ from hydraflow.config import iter_params
|
|
7
12
|
|
8
13
|
|
9
14
|
def set_experiment(prefix: str = "", suffix: str = "", uri: str | None = None) -> None:
|
15
|
+
"""
|
16
|
+
Set the experiment name and tracking URI optionally.
|
17
|
+
|
18
|
+
This function sets the experiment name by combining the given prefix,
|
19
|
+
the job name from HydraConfig, and the given suffix. Optionally, it can
|
20
|
+
also set the tracking URI.
|
21
|
+
|
22
|
+
Args:
|
23
|
+
prefix: The prefix to prepend to the experiment name.
|
24
|
+
suffix: The suffix to append to the experiment name.
|
25
|
+
uri: The tracking URI to use.
|
26
|
+
"""
|
10
27
|
if uri:
|
11
28
|
mlflow.set_tracking_uri(uri)
|
12
29
|
|
@@ -16,5 +33,17 @@ def set_experiment(prefix: str = "", suffix: str = "", uri: str | None = None) -
|
|
16
33
|
|
17
34
|
|
18
35
|
def log_params(config: object, *, synchronous: bool | None = None) -> None:
|
36
|
+
"""
|
37
|
+
Log the parameters from the given configuration object.
|
38
|
+
|
39
|
+
This method logs the parameters from the provided configuration object
|
40
|
+
using MLflow. It iterates over the parameters and logs them using the
|
41
|
+
`mlflow.log_param` method.
|
42
|
+
|
43
|
+
Args:
|
44
|
+
config: The configuration object to log the parameters from.
|
45
|
+
synchronous: Whether to log the parameters synchronously.
|
46
|
+
Defaults to None.
|
47
|
+
"""
|
19
48
|
for key, value in iter_params(config):
|
20
49
|
mlflow.log_param(key, value, synchronous=synchronous)
|
hydraflow/runs.py
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
"""
|
2
|
+
This module provides functionality for managing and interacting with MLflow runs.
|
3
|
+
It includes classes and functions to filter runs, retrieve run information, and
|
4
|
+
log artifacts and configurations.
|
5
|
+
"""
|
6
|
+
|
1
7
|
from __future__ import annotations
|
2
8
|
|
3
9
|
from dataclasses import dataclass
|
@@ -21,6 +27,13 @@ if TYPE_CHECKING:
|
|
21
27
|
|
22
28
|
@dataclass
|
23
29
|
class Runs:
|
30
|
+
"""
|
31
|
+
A class to represent a collection of MLflow runs.
|
32
|
+
|
33
|
+
This class provides methods to interact with the runs, such as filtering,
|
34
|
+
retrieving specific runs, and accessing run information.
|
35
|
+
"""
|
36
|
+
|
24
37
|
runs: list[Run_] | DataFrame
|
25
38
|
|
26
39
|
def __repr__(self) -> str:
|
@@ -30,35 +43,145 @@ class Runs:
|
|
30
43
|
return len(self.runs)
|
31
44
|
|
32
45
|
def filter(self, config: object) -> Runs:
|
46
|
+
"""
|
47
|
+
Filter the runs based on the provided configuration.
|
48
|
+
|
49
|
+
This method filters the runs in the collection according to the
|
50
|
+
specified configuration object. The configuration object should
|
51
|
+
contain key-value pairs that correspond to the parameters of the
|
52
|
+
runs. Only the runs that match all the specified parameters will
|
53
|
+
be included in the returned `Runs` object.
|
54
|
+
|
55
|
+
Args:
|
56
|
+
config (object): The configuration object to filter the runs.
|
57
|
+
This object should contain key-value pairs representing
|
58
|
+
the parameters to filter by.
|
59
|
+
|
60
|
+
Returns:
|
61
|
+
Runs: A new `Runs` object containing the filtered runs.
|
62
|
+
"""
|
33
63
|
return Runs(filter_runs(self.runs, config))
|
34
64
|
|
35
65
|
def get(self, config: object) -> Run:
|
66
|
+
"""
|
67
|
+
Retrieve a specific run based on the provided configuration.
|
68
|
+
|
69
|
+
This method filters the runs in the collection according to the
|
70
|
+
specified configuration object and returns the run that matches
|
71
|
+
the provided parameters. If more than one run matches the criteria,
|
72
|
+
an error is raised.
|
73
|
+
|
74
|
+
Args:
|
75
|
+
config (object): The configuration object to identify the run.
|
76
|
+
|
77
|
+
Returns:
|
78
|
+
Run: The run object that matches the provided configuration.
|
79
|
+
|
80
|
+
Raises:
|
81
|
+
ValueError: If the number of filtered runs is not exactly one.
|
82
|
+
"""
|
36
83
|
return Run(get_run(self.runs, config))
|
37
84
|
|
38
85
|
def drop_unique_params(self) -> Runs:
|
86
|
+
"""
|
87
|
+
Drop unique parameters from the runs and return a new Runs object.
|
88
|
+
|
89
|
+
This method removes parameters that have unique values across all runs
|
90
|
+
in the collection. This is useful for identifying common parameters
|
91
|
+
that are shared among multiple runs.
|
92
|
+
|
93
|
+
Returns:
|
94
|
+
Runs: A new `Runs` object with unique parameters dropped.
|
95
|
+
|
96
|
+
Raises:
|
97
|
+
NotImplementedError: If the runs are not in a DataFrame format.
|
98
|
+
"""
|
39
99
|
if isinstance(self.runs, DataFrame):
|
40
100
|
return Runs(drop_unique_params(self.runs))
|
41
101
|
|
42
102
|
raise NotImplementedError
|
43
103
|
|
44
104
|
def get_param_names(self) -> list[str]:
|
105
|
+
"""
|
106
|
+
Get the parameter names from the runs.
|
107
|
+
|
108
|
+
This method extracts the parameter names from the runs in the collection.
|
109
|
+
If the runs are stored in a DataFrame, it retrieves the column names
|
110
|
+
that correspond to the parameters.
|
111
|
+
|
112
|
+
Returns:
|
113
|
+
list[str]: A list of parameter names.
|
114
|
+
|
115
|
+
Raises:
|
116
|
+
NotImplementedError: If the runs are not in a DataFrame format.
|
117
|
+
"""
|
45
118
|
if isinstance(self.runs, DataFrame):
|
46
119
|
return get_param_names(self.runs)
|
47
120
|
|
48
121
|
raise NotImplementedError
|
49
122
|
|
50
123
|
def get_param_dict(self) -> dict[str, list[str]]:
|
124
|
+
"""
|
125
|
+
Get the parameter dictionary from the runs.
|
126
|
+
|
127
|
+
This method extracts the parameter names and their corresponding values
|
128
|
+
from the runs in the collection. If the runs are stored in a DataFrame,
|
129
|
+
it retrieves the unique values for each parameter.
|
130
|
+
|
131
|
+
|
132
|
+
Returns:
|
133
|
+
dict[str, list[str]]: A dictionary of parameter names and their
|
134
|
+
corresponding values.
|
135
|
+
|
136
|
+
Raises:
|
137
|
+
NotImplementedError: If the runs are not in a DataFrame format.
|
138
|
+
"""
|
51
139
|
if isinstance(self.runs, DataFrame):
|
52
140
|
return get_param_dict(self.runs)
|
53
141
|
|
54
142
|
raise NotImplementedError
|
55
143
|
|
56
144
|
|
145
|
+
def search_runs(*args, **kwargs) -> Runs:
|
146
|
+
"""
|
147
|
+
Search for runs that match the specified criteria.
|
148
|
+
|
149
|
+
This function wraps the `mlflow.search_runs` function and returns the results
|
150
|
+
as a `Runs` object. It allows for flexible searching of MLflow runs based on
|
151
|
+
various criteria.
|
152
|
+
|
153
|
+
Args:
|
154
|
+
*args: Positional arguments to pass to `mlflow.search_runs`.
|
155
|
+
**kwargs: Keyword arguments to pass to `mlflow.search_runs`.
|
156
|
+
|
157
|
+
Returns:
|
158
|
+
Runs: A `Runs` object containing the search results.
|
159
|
+
"""
|
160
|
+
runs = mlflow.search_runs(*args, **kwargs)
|
161
|
+
return Runs(runs)
|
162
|
+
|
163
|
+
|
57
164
|
def filter_runs(runs: list[Run_] | DataFrame, config: object) -> list[Run_] | DataFrame:
|
165
|
+
"""
|
166
|
+
Filter the runs based on the provided configuration.
|
167
|
+
|
168
|
+
This method filters the runs in the collection according to the
|
169
|
+
specified configuration object. The configuration object should
|
170
|
+
contain key-value pairs that correspond to the parameters of the
|
171
|
+
runs. Only the runs that match all the specified parameters will
|
172
|
+
be included in the returned `Runs` object.
|
173
|
+
|
174
|
+
Args:
|
175
|
+
runs: The runs to filter.
|
176
|
+
config: The configuration object to filter the runs.
|
177
|
+
|
178
|
+
Returns:
|
179
|
+
Runs: A filtered list of runs or a DataFrame.
|
180
|
+
"""
|
58
181
|
if isinstance(runs, list):
|
59
|
-
return
|
182
|
+
return _filter_runs_list(runs, config)
|
60
183
|
|
61
|
-
return
|
184
|
+
return _filter_runs_dataframe(runs, config)
|
62
185
|
|
63
186
|
|
64
187
|
def _is_equal(run: Run_, key: str, value: Any) -> bool:
|
@@ -70,14 +193,14 @@ def _is_equal(run: Run_, key: str, value: Any) -> bool:
|
|
70
193
|
return type(value)(param) == value
|
71
194
|
|
72
195
|
|
73
|
-
def
|
196
|
+
def _filter_runs_list(runs: list[Run_], config: object) -> list[Run_]:
|
74
197
|
for key, value in iter_params(config):
|
75
198
|
runs = [run for run in runs if _is_equal(run, key, value)]
|
76
199
|
|
77
200
|
return runs
|
78
201
|
|
79
202
|
|
80
|
-
def
|
203
|
+
def _filter_runs_dataframe(runs: DataFrame, config: object) -> DataFrame:
|
81
204
|
index = np.ones(len(runs), dtype=bool)
|
82
205
|
|
83
206
|
for key, value in iter_params(config):
|
@@ -93,6 +216,21 @@ def filter_runs_dataframe(runs: DataFrame, config: object) -> DataFrame:
|
|
93
216
|
|
94
217
|
|
95
218
|
def get_run(runs: list[Run_] | DataFrame, config: object) -> Run_ | Series:
|
219
|
+
"""
|
220
|
+
Retrieve a specific run based on the provided configuration.
|
221
|
+
|
222
|
+
This method filters the runs in the collection according to the
|
223
|
+
specified configuration object and returns the run that matches
|
224
|
+
the provided parameters. If more than one run matches the criteria,
|
225
|
+
an error is raised.
|
226
|
+
|
227
|
+
Args:
|
228
|
+
runs: The runs to filter.
|
229
|
+
config: The configuration object to identify the run.
|
230
|
+
|
231
|
+
Returns:
|
232
|
+
Run: The run object that matches the provided configuration.
|
233
|
+
"""
|
96
234
|
runs = filter_runs(runs, config)
|
97
235
|
|
98
236
|
if len(runs) == 1:
|
@@ -103,6 +241,20 @@ def get_run(runs: list[Run_] | DataFrame, config: object) -> Run_ | Series:
|
|
103
241
|
|
104
242
|
|
105
243
|
def drop_unique_params(runs: DataFrame) -> DataFrame:
|
244
|
+
"""
|
245
|
+
Drop unique parameters from the runs and return a new DataFrame.
|
246
|
+
|
247
|
+
This method removes parameters that have unique values across all runs
|
248
|
+
in the collection. This is useful for identifying common parameters
|
249
|
+
that are shared among multiple runs.
|
250
|
+
|
251
|
+
Args:
|
252
|
+
runs: The DataFrame containing the runs.
|
253
|
+
|
254
|
+
Returns:
|
255
|
+
DataFrame: A new DataFrame with unique parameters dropped.
|
256
|
+
"""
|
257
|
+
|
106
258
|
def select(column: str) -> bool:
|
107
259
|
return not column.startswith("params.") or len(runs[column].unique()) > 1
|
108
260
|
|
@@ -111,6 +263,20 @@ def drop_unique_params(runs: DataFrame) -> DataFrame:
|
|
111
263
|
|
112
264
|
|
113
265
|
def get_param_names(runs: DataFrame) -> list[str]:
|
266
|
+
"""
|
267
|
+
Get the parameter names from the runs.
|
268
|
+
|
269
|
+
This method extracts the parameter names from the runs in the collection.
|
270
|
+
If the runs are stored in a DataFrame, it retrieves the column names
|
271
|
+
that correspond to the parameters.
|
272
|
+
|
273
|
+
Args:
|
274
|
+
runs: The DataFrame containing the runs.
|
275
|
+
|
276
|
+
Returns:
|
277
|
+
list[str]: A list of parameter names.
|
278
|
+
"""
|
279
|
+
|
114
280
|
def get_name(column: str) -> str:
|
115
281
|
if column.startswith("params."):
|
116
282
|
return column.split(".", maxsplit=1)[-1]
|
@@ -122,6 +288,20 @@ def get_param_names(runs: DataFrame) -> list[str]:
|
|
122
288
|
|
123
289
|
|
124
290
|
def get_param_dict(runs: DataFrame) -> dict[str, list[str]]:
|
291
|
+
"""
|
292
|
+
Get the parameter dictionary from the runs.
|
293
|
+
|
294
|
+
This method extracts the parameter names and their corresponding values
|
295
|
+
from the runs in the collection. If the runs are stored in a DataFrame,
|
296
|
+
it retrieves the unique values for each parameter.
|
297
|
+
|
298
|
+
Args:
|
299
|
+
runs: The DataFrame containing the runs.
|
300
|
+
|
301
|
+
Returns:
|
302
|
+
dict[str, list[str]]: A dictionary of parameter names and
|
303
|
+
their corresponding values.
|
304
|
+
"""
|
125
305
|
params = {}
|
126
306
|
for name in get_param_names(runs):
|
127
307
|
params[name] = list(runs[f"params.{name}"].unique())
|
@@ -131,6 +311,14 @@ def get_param_dict(runs: DataFrame) -> dict[str, list[str]]:
|
|
131
311
|
|
132
312
|
@dataclass
|
133
313
|
class Run:
|
314
|
+
"""
|
315
|
+
A class to represent a specific MLflow run.
|
316
|
+
|
317
|
+
This class provides methods to interact with the run, such as retrieving
|
318
|
+
the run ID, artifact URI, and configuration. It also includes properties
|
319
|
+
to access the artifact directory, artifact path, and Hydra output directory.
|
320
|
+
"""
|
321
|
+
|
134
322
|
run: Run_ | Series | str
|
135
323
|
|
136
324
|
def __repr__(self) -> str:
|
@@ -138,27 +326,78 @@ class Run:
|
|
138
326
|
|
139
327
|
@property
|
140
328
|
def run_id(self) -> str:
|
329
|
+
"""
|
330
|
+
Get the run ID.
|
331
|
+
|
332
|
+
Returns:
|
333
|
+
str: The run ID.
|
334
|
+
"""
|
141
335
|
return get_run_id(self.run)
|
142
336
|
|
143
337
|
def artifact_uri(self, artifact_path: str | None = None) -> str:
|
338
|
+
"""
|
339
|
+
Get the artifact URI.
|
340
|
+
|
341
|
+
Args:
|
342
|
+
artifact_path (str | None): The artifact path.
|
343
|
+
|
344
|
+
Returns:
|
345
|
+
str: The artifact URI.
|
346
|
+
"""
|
144
347
|
return get_artifact_uri(self.run, artifact_path)
|
145
348
|
|
146
349
|
@property
|
147
350
|
def artifact_dir(self) -> Path:
|
351
|
+
"""
|
352
|
+
Get the artifact directory.
|
353
|
+
|
354
|
+
Returns:
|
355
|
+
Path: The artifact directory.
|
356
|
+
"""
|
148
357
|
return get_artifact_dir(self.run)
|
149
358
|
|
150
359
|
def artifact_path(self, artifact_path: str | None = None) -> Path:
|
360
|
+
"""
|
361
|
+
Get the artifact path.
|
362
|
+
|
363
|
+
Args:
|
364
|
+
artifact_path: The artifact path.
|
365
|
+
|
366
|
+
Returns:
|
367
|
+
Path: The artifact path.
|
368
|
+
"""
|
151
369
|
return get_artifact_path(self.run, artifact_path)
|
152
370
|
|
153
371
|
@property
|
154
372
|
def config(self) -> DictConfig:
|
373
|
+
"""
|
374
|
+
Get the configuration.
|
375
|
+
|
376
|
+
Returns:
|
377
|
+
DictConfig: The configuration.
|
378
|
+
"""
|
155
379
|
return load_config(self.run)
|
156
380
|
|
157
381
|
def log_hydra_output_dir(self) -> None:
|
382
|
+
"""
|
383
|
+
Log the Hydra output directory.
|
384
|
+
|
385
|
+
Returns:
|
386
|
+
None
|
387
|
+
"""
|
158
388
|
log_hydra_output_dir(self.run)
|
159
389
|
|
160
390
|
|
161
391
|
def get_run_id(run: Run_ | Series | str) -> str:
|
392
|
+
"""
|
393
|
+
Get the run ID.
|
394
|
+
|
395
|
+
Args:
|
396
|
+
run: The run object.
|
397
|
+
|
398
|
+
Returns:
|
399
|
+
str: The run ID.
|
400
|
+
"""
|
162
401
|
if isinstance(run, str):
|
163
402
|
return run
|
164
403
|
|
@@ -169,21 +408,59 @@ def get_run_id(run: Run_ | Series | str) -> str:
|
|
169
408
|
|
170
409
|
|
171
410
|
def get_artifact_uri(run: Run_ | Series | str, artifact_path: str | None = None) -> str:
|
411
|
+
"""
|
412
|
+
Get the artifact URI.
|
413
|
+
|
414
|
+
Args:
|
415
|
+
run: The run object.
|
416
|
+
artifact_path: The artifact path.
|
417
|
+
|
418
|
+
Returns:
|
419
|
+
str: The artifact URI.
|
420
|
+
"""
|
172
421
|
run_id = get_run_id(run)
|
173
422
|
return artifact_utils.get_artifact_uri(run_id, artifact_path)
|
174
423
|
|
175
424
|
|
176
425
|
def get_artifact_dir(run: Run_ | Series | str) -> Path:
|
426
|
+
"""
|
427
|
+
Get the artifact directory.
|
428
|
+
|
429
|
+
Args:
|
430
|
+
run: The run object.
|
431
|
+
|
432
|
+
Returns:
|
433
|
+
Path: The artifact directory.
|
434
|
+
"""
|
177
435
|
uri = get_artifact_uri(run)
|
178
436
|
return uri_to_path(uri)
|
179
437
|
|
180
438
|
|
181
439
|
def get_artifact_path(run: Run_ | Series | str, artifact_path: str | None = None) -> Path:
|
440
|
+
"""
|
441
|
+
Get the artifact path.
|
442
|
+
|
443
|
+
Args:
|
444
|
+
run: The run object.
|
445
|
+
artifact_path: The artifact path.
|
446
|
+
|
447
|
+
Returns:
|
448
|
+
Path: The artifact path.
|
449
|
+
"""
|
182
450
|
artifact_dir = get_artifact_dir(run)
|
183
451
|
return artifact_dir / artifact_path if artifact_path else artifact_dir
|
184
452
|
|
185
453
|
|
186
454
|
def load_config(run: Run_ | Series | str) -> DictConfig:
|
455
|
+
"""
|
456
|
+
Load the configuration.
|
457
|
+
|
458
|
+
Args:
|
459
|
+
run: The run object.
|
460
|
+
|
461
|
+
Returns:
|
462
|
+
DictConfig: The configuration.
|
463
|
+
"""
|
187
464
|
run_id = get_run_id(run)
|
188
465
|
return _load_config(run_id)
|
189
466
|
|
@@ -202,6 +479,15 @@ def _load_config(run_id: str) -> DictConfig:
|
|
202
479
|
|
203
480
|
|
204
481
|
def get_hydra_output_dir(run: Run_ | Series | str) -> Path:
|
482
|
+
"""
|
483
|
+
Get the Hydra output directory.
|
484
|
+
|
485
|
+
Args:
|
486
|
+
run: The run object.
|
487
|
+
|
488
|
+
Returns:
|
489
|
+
Path: The Hydra output directory.
|
490
|
+
"""
|
205
491
|
path = get_artifact_dir(run) / ".hydra/hydra.yaml"
|
206
492
|
|
207
493
|
if path.exists():
|
@@ -212,6 +498,15 @@ def get_hydra_output_dir(run: Run_ | Series | str) -> Path:
|
|
212
498
|
|
213
499
|
|
214
500
|
def log_hydra_output_dir(run: Run_ | Series | str) -> None:
|
501
|
+
"""
|
502
|
+
Log the Hydra output directory.
|
503
|
+
|
504
|
+
Args:
|
505
|
+
run: The run object.
|
506
|
+
|
507
|
+
Returns:
|
508
|
+
None
|
509
|
+
"""
|
215
510
|
output_dir = get_hydra_output_dir(run)
|
216
511
|
run_id = run if isinstance(run, str) else run.info.run_id
|
217
512
|
mlflow.log_artifacts(output_dir.as_posix(), run_id=run_id)
|
hydraflow/util.py
CHANGED
@@ -4,6 +4,19 @@ from urllib.parse import urlparse
|
|
4
4
|
|
5
5
|
|
6
6
|
def uri_to_path(uri: str) -> Path:
|
7
|
+
"""
|
8
|
+
Convert a URI to a path.
|
9
|
+
|
10
|
+
This function parses the given URI and converts it to a local file system
|
11
|
+
path. On Windows, if the path starts with a forward slash, it is removed
|
12
|
+
to ensure the path is correctly formatted.
|
13
|
+
|
14
|
+
Args:
|
15
|
+
uri (str): The URI to convert.
|
16
|
+
|
17
|
+
Returns:
|
18
|
+
Path: The path corresponding to the URI.
|
19
|
+
"""
|
7
20
|
path = urlparse(uri).path
|
8
21
|
if platform.system() == "Windows" and path.startswith("/"):
|
9
22
|
path = path[1:]
|
@@ -0,0 +1,111 @@
|
|
1
|
+
Metadata-Version: 2.3
|
2
|
+
Name: hydraflow
|
3
|
+
Version: 0.1.5
|
4
|
+
Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
|
5
|
+
Project-URL: Documentation, https://github.com/daizutabi/hydraflow
|
6
|
+
Project-URL: Source, https://github.com/daizutabi/hydraflow
|
7
|
+
Project-URL: Issues, https://github.com/daizutabi/hydraflow/issues
|
8
|
+
Author-email: daizutabi <daizutabi@gmail.com>
|
9
|
+
License-Expression: MIT
|
10
|
+
License-File: LICENSE
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
12
|
+
Classifier: Programming Language :: Python
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
16
|
+
Classifier: Topic :: Documentation
|
17
|
+
Classifier: Topic :: Software Development :: Documentation
|
18
|
+
Requires-Python: >=3.10
|
19
|
+
Requires-Dist: hydra-core>1.3
|
20
|
+
Requires-Dist: mlflow>2.15
|
21
|
+
Requires-Dist: setuptools
|
22
|
+
Requires-Dist: watchdog
|
23
|
+
Provides-Extra: dev
|
24
|
+
Requires-Dist: pytest-clarity; extra == 'dev'
|
25
|
+
Requires-Dist: pytest-cov; extra == 'dev'
|
26
|
+
Requires-Dist: pytest-randomly; extra == 'dev'
|
27
|
+
Requires-Dist: pytest-xdist; extra == 'dev'
|
28
|
+
Description-Content-Type: text/markdown
|
29
|
+
|
30
|
+
# Hydraflow
|
31
|
+
|
32
|
+
[![PyPI Version][pypi-v-image]][pypi-v-link]
|
33
|
+
[![Python Version][python-v-image]][python-v-link]
|
34
|
+
[![Build Status][GHAction-image]][GHAction-link]
|
35
|
+
[![Coverage Status][codecov-image]][codecov-link]
|
36
|
+
|
37
|
+
<!-- Badges -->
|
38
|
+
[pypi-v-image]: https://img.shields.io/pypi/v/hydraflow.svg
|
39
|
+
[pypi-v-link]: https://pypi.org/project/hydraflow/
|
40
|
+
[python-v-image]: https://img.shields.io/pypi/pyversions/hydraflow.svg
|
41
|
+
[python-v-link]: https://pypi.org/project/hydraflow
|
42
|
+
[GHAction-image]: https://github.com/daizutabi/hydraflow/actions/workflows/ci.yml/badge.svg?branch=main&event=push
|
43
|
+
[GHAction-link]: https://github.com/daizutabi/hydraflow/actions?query=event%3Apush+branch%3Amain
|
44
|
+
[codecov-image]: https://codecov.io/github/daizutabi/hydraflow/coverage.svg?branch=main
|
45
|
+
[codecov-link]: https://codecov.io/github/daizutabi/hydraflow?branch=main
|
46
|
+
|
47
|
+
## Overview
|
48
|
+
|
49
|
+
Hydraflow is a powerful library designed to seamlessly integrate [Hydra](https://hydra.cc/) and [MLflow](https://mlflow.org/), making it easier to manage and track machine learning experiments. By combining the flexibility of Hydra's configuration management with the robust experiment tracking capabilities of MLflow, Hydraflow provides a comprehensive solution for managing complex machine learning workflows.
|
50
|
+
|
51
|
+
## Key Features
|
52
|
+
|
53
|
+
- **Configuration Management**: Utilize Hydra's advanced configuration management to handle complex parameter sweeps and experiment setups.
|
54
|
+
- **Experiment Tracking**: Leverage MLflow's tracking capabilities to log parameters, metrics, and artifacts for each run.
|
55
|
+
- **Artifact Management**: Automatically log and manage artifacts, such as model checkpoints and configuration files, with MLflow.
|
56
|
+
- **Seamless Integration**: Easily integrate Hydra and MLflow in your machine learning projects with minimal setup.
|
57
|
+
|
58
|
+
## Installation
|
59
|
+
|
60
|
+
You can install Hydraflow via pip:
|
61
|
+
|
62
|
+
```bash
|
63
|
+
pip install hydraflow
|
64
|
+
```
|
65
|
+
|
66
|
+
## Getting Started
|
67
|
+
|
68
|
+
Here is a simple example to get you started with Hydraflow:
|
69
|
+
|
70
|
+
```python
|
71
|
+
import hydra
|
72
|
+
import hydraflow
|
73
|
+
import mlflow
|
74
|
+
from dataclasses import dataclass
|
75
|
+
from hydra.core.config_store import ConfigStore
|
76
|
+
from pathlib import Path
|
77
|
+
|
78
|
+
@dataclass
|
79
|
+
class MySQLConfig:
|
80
|
+
host: str = "localhost"
|
81
|
+
port: int = 3306
|
82
|
+
|
83
|
+
cs = ConfigStore.instance()
|
84
|
+
cs.store(name="config", node=MySQLConfig)
|
85
|
+
|
86
|
+
@hydra.main(version_base=None, config_name="config")
|
87
|
+
def my_app(cfg: MySQLConfig) -> None:
|
88
|
+
# Set experiment by Hydra job name.
|
89
|
+
hydraflow.set_experiment()
|
90
|
+
|
91
|
+
# Automatically log params using Hydra config.
|
92
|
+
with mlflow.start_run(), hydraflow.log_run(cfg) as info:
|
93
|
+
# Your app code below.
|
94
|
+
|
95
|
+
# `info.output_dir` is the Hydra output directory.
|
96
|
+
# `info.artifact_dir` is the MLflow artifact directory.
|
97
|
+
|
98
|
+
with hydraflow.watch(callback):
|
99
|
+
# Watch files in the MLflow artifact directory.
|
100
|
+
# You can update metrics or log other artifacts
|
101
|
+
# according to the watched files in your callback
|
102
|
+
# function.
|
103
|
+
pass
|
104
|
+
|
105
|
+
# Your callback function here.
|
106
|
+
def callback(file: Path) -> None:
|
107
|
+
pass
|
108
|
+
|
109
|
+
if __name__ == "__main__":
|
110
|
+
my_app()
|
111
|
+
```
|
@@ -0,0 +1,10 @@
|
|
1
|
+
hydraflow/__init__.py,sha256=e1Q0Sskx39jaU2zkGNXjFWNC5xugEz_hDERTN_6Mzy8,666
|
2
|
+
hydraflow/config.py,sha256=WARa5u1F0n3wCOi65v8v8rUO78ME-mtzMeeeE2Yc1I8,1728
|
3
|
+
hydraflow/context.py,sha256=NYjIMepLtaKyvw1obpE8gR1qu1OBpSB_uc6-5So2tg8,5139
|
4
|
+
hydraflow/mlflow.py,sha256=2YWOYpv8eRB_ROD2yFh6ksKDXHvAPDYb86hrUi9zv6E,1558
|
5
|
+
hydraflow/runs.py,sha256=vH-hrlcoTo8HRmgUWam9gtLXAl_wDzX26HEZGWckdMs,14038
|
6
|
+
hydraflow/util.py,sha256=qdUGtBgY7qOF4Yr4PibJHImbLPf-6WYFVuIKu6zbNbY,614
|
7
|
+
hydraflow-0.1.5.dist-info/METADATA,sha256=8mCKAA9KjcJAUiqP-DPdMl4Gcp3MSXxOF34VYKA2P8I,4224
|
8
|
+
hydraflow-0.1.5.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
9
|
+
hydraflow-0.1.5.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
|
10
|
+
hydraflow-0.1.5.dist-info/RECORD,,
|
@@ -1,45 +0,0 @@
|
|
1
|
-
Metadata-Version: 2.3
|
2
|
-
Name: hydraflow
|
3
|
-
Version: 0.1.4
|
4
|
-
Summary: Hydra with MLflow
|
5
|
-
Project-URL: Documentation, https://github.com/daizutabi/hydraflow
|
6
|
-
Project-URL: Source, https://github.com/daizutabi/hydraflow
|
7
|
-
Project-URL: Issues, https://github.com/daizutabi/hydraflow/issues
|
8
|
-
Author-email: daizutabi <daizutabi@gmail.com>
|
9
|
-
License-Expression: MIT
|
10
|
-
License-File: LICENSE
|
11
|
-
Classifier: Development Status :: 4 - Beta
|
12
|
-
Classifier: Programming Language :: Python
|
13
|
-
Classifier: Programming Language :: Python :: 3.10
|
14
|
-
Classifier: Programming Language :: Python :: 3.11
|
15
|
-
Classifier: Programming Language :: Python :: 3.12
|
16
|
-
Classifier: Topic :: Documentation
|
17
|
-
Classifier: Topic :: Software Development :: Documentation
|
18
|
-
Requires-Python: >=3.10
|
19
|
-
Requires-Dist: hydra-core>1.3
|
20
|
-
Requires-Dist: mlflow>2.15
|
21
|
-
Requires-Dist: setuptools
|
22
|
-
Requires-Dist: watchdog
|
23
|
-
Provides-Extra: dev
|
24
|
-
Requires-Dist: pytest-clarity; extra == 'dev'
|
25
|
-
Requires-Dist: pytest-cov; extra == 'dev'
|
26
|
-
Requires-Dist: pytest-randomly; extra == 'dev'
|
27
|
-
Requires-Dist: pytest-xdist; extra == 'dev'
|
28
|
-
Description-Content-Type: text/markdown
|
29
|
-
|
30
|
-
# hydraflow
|
31
|
-
|
32
|
-
[![PyPI Version][pypi-v-image]][pypi-v-link]
|
33
|
-
[![Python Version][python-v-image]][python-v-link]
|
34
|
-
[![Build Status][GHAction-image]][GHAction-link]
|
35
|
-
[![Coverage Status][codecov-image]][codecov-link]
|
36
|
-
|
37
|
-
<!-- Badges -->
|
38
|
-
[pypi-v-image]: https://img.shields.io/pypi/v/hydraflow.svg
|
39
|
-
[pypi-v-link]: https://pypi.org/project/hydraflow/
|
40
|
-
[python-v-image]: https://img.shields.io/pypi/pyversions/hydraflow.svg
|
41
|
-
[python-v-link]: https://pypi.org/project/hydraflow
|
42
|
-
[GHAction-image]: https://github.com/daizutabi/hydraflow/actions/workflows/ci.yml/badge.svg?branch=main&event=push
|
43
|
-
[GHAction-link]: https://github.com/daizutabi/hydraflow/actions?query=event%3Apush+branch%3Amain
|
44
|
-
[codecov-image]: https://codecov.io/github/daizutabi/hydraflow/coverage.svg?branch=main
|
45
|
-
[codecov-link]: https://codecov.io/github/daizutabi/hydraflow?branch=main
|
hydraflow-0.1.4.dist-info/RECORD
DELETED
@@ -1,10 +0,0 @@
|
|
1
|
-
hydraflow/__init__.py,sha256=e1Q0Sskx39jaU2zkGNXjFWNC5xugEz_hDERTN_6Mzy8,666
|
2
|
-
hydraflow/config.py,sha256=b3Plh_lmq94loZNw9QP2asd6thCLyTzzYSutH0cONXA,964
|
3
|
-
hydraflow/context.py,sha256=3vejDbRYQBuBwlhpBpOv5aoyZ-yS8UUzpbCFK1V1uvw,2720
|
4
|
-
hydraflow/mlflow.py,sha256=unBP3Y7ujTM3E_Hq_eYvRVFZoGfTA7B0h4FkOZtPPqc,566
|
5
|
-
hydraflow/runs.py,sha256=127YykWzmiNUUuJSGPOCZasXmd6tcE15HU32j8x71ck,5864
|
6
|
-
hydraflow/util.py,sha256=_BdOMq5tKPm8HOehb2s2ZIBpJYyVpvO_yaAIxbSj51I,253
|
7
|
-
hydraflow-0.1.4.dist-info/METADATA,sha256=Xw-xcDKdzkHa7bKDZUI6MXpOKekcyFbMyBy1yANjNQs,1903
|
8
|
-
hydraflow-0.1.4.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
9
|
-
hydraflow-0.1.4.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
|
10
|
-
hydraflow-0.1.4.dist-info/RECORD,,
|
File without changes
|
File without changes
|