hydraflow 0.7.5__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hydraflow/__init__.py +5 -20
- hydraflow/cli.py +31 -39
- hydraflow/core/__init__.py +0 -0
- hydraflow/{config.py → core/config.py} +10 -27
- hydraflow/{context.py → core/context.py} +8 -50
- hydraflow/{utils.py → core/io.py} +19 -28
- hydraflow/core/main.py +164 -0
- hydraflow/core/mlflow.py +168 -0
- hydraflow/{param.py → core/param.py} +2 -2
- hydraflow/entities/__init__.py +0 -0
- hydraflow/{run_collection.py → entities/run_collection.py} +18 -163
- hydraflow/{run_data.py → entities/run_data.py} +5 -3
- hydraflow/{run_info.py → entities/run_info.py} +2 -2
- hydraflow/executor/__init__.py +0 -0
- hydraflow/executor/conf.py +23 -0
- hydraflow/executor/io.py +34 -0
- hydraflow/executor/job.py +152 -0
- hydraflow/executor/parser.py +397 -0
- {hydraflow-0.7.5.dist-info → hydraflow-0.9.0.dist-info}/METADATA +18 -19
- hydraflow-0.9.0.dist-info/RECORD +24 -0
- hydraflow/main.py +0 -54
- hydraflow/mlflow.py +0 -280
- hydraflow-0.7.5.dist-info/RECORD +0 -17
- {hydraflow-0.7.5.dist-info → hydraflow-0.9.0.dist-info}/WHEEL +0 -0
- {hydraflow-0.7.5.dist-info → hydraflow-0.9.0.dist-info}/entry_points.txt +0 -0
- {hydraflow-0.7.5.dist-info → hydraflow-0.9.0.dist-info}/licenses/LICENSE +0 -0
hydraflow/mlflow.py
DELETED
@@ -1,280 +0,0 @@
|
|
1
|
-
"""Provide functionality to log parameters from Hydra configuration objects.
|
2
|
-
|
3
|
-
This module provides functions to log parameters from Hydra configuration objects
|
4
|
-
to MLflow, set experiments, and manage tracking URIs. It integrates Hydra's
|
5
|
-
configuration management with MLflow's experiment tracking capabilities.
|
6
|
-
|
7
|
-
Key Features:
|
8
|
-
- **Experiment Management**: Set experiment names and tracking URIs using Hydra
|
9
|
-
configuration details.
|
10
|
-
- **Parameter Logging**: Log parameters from Hydra configuration objects to MLflow,
|
11
|
-
supporting both synchronous and asynchronous logging.
|
12
|
-
- **Run Collection**: Utilize the `RunCollection` class to manage and interact with
|
13
|
-
multiple MLflow runs, providing methods to filter and retrieve runs based on
|
14
|
-
various criteria.
|
15
|
-
"""
|
16
|
-
|
17
|
-
from __future__ import annotations
|
18
|
-
|
19
|
-
from typing import TYPE_CHECKING
|
20
|
-
|
21
|
-
import joblib
|
22
|
-
import mlflow
|
23
|
-
import mlflow.artifacts
|
24
|
-
from hydra.core.hydra_config import HydraConfig
|
25
|
-
from mlflow.entities import ViewType
|
26
|
-
from mlflow.tracking.fluent import SEARCH_MAX_RESULTS_PANDAS, _get_experiment_id
|
27
|
-
|
28
|
-
from hydraflow.config import iter_params
|
29
|
-
from hydraflow.run_collection import RunCollection
|
30
|
-
from hydraflow.utils import get_artifact_dir
|
31
|
-
|
32
|
-
if TYPE_CHECKING:
|
33
|
-
from pathlib import Path
|
34
|
-
|
35
|
-
from mlflow.entities.experiment import Experiment
|
36
|
-
|
37
|
-
|
38
|
-
def set_experiment(
|
39
|
-
prefix: str = "",
|
40
|
-
suffix: str = "",
|
41
|
-
uri: str | Path | None = None,
|
42
|
-
name: str | None = None,
|
43
|
-
) -> Experiment:
|
44
|
-
"""Set the experiment name and tracking URI optionally.
|
45
|
-
|
46
|
-
This function sets the experiment name by combining the given prefix,
|
47
|
-
the job name from HydraConfig, and the given suffix. Optionally, it can
|
48
|
-
also set the tracking URI.
|
49
|
-
|
50
|
-
Args:
|
51
|
-
prefix (str): The prefix to prepend to the experiment name.
|
52
|
-
suffix (str): The suffix to append to the experiment name.
|
53
|
-
uri (str | Path | None): The tracking URI to use. Defaults to None.
|
54
|
-
name (str | None): The name of the experiment. Defaults to None.
|
55
|
-
|
56
|
-
Returns:
|
57
|
-
Experiment: An instance of `mlflow.entities.Experiment` representing
|
58
|
-
the new active experiment.
|
59
|
-
|
60
|
-
"""
|
61
|
-
if uri is not None:
|
62
|
-
mlflow.set_tracking_uri(uri)
|
63
|
-
|
64
|
-
if name is not None:
|
65
|
-
return mlflow.set_experiment(name)
|
66
|
-
|
67
|
-
hc = HydraConfig.get()
|
68
|
-
name = f"{prefix}{hc.job.name}{suffix}"
|
69
|
-
return mlflow.set_experiment(name)
|
70
|
-
|
71
|
-
|
72
|
-
def log_params(config: object, *, synchronous: bool | None = None) -> None:
|
73
|
-
"""Log the parameters from the given configuration object.
|
74
|
-
|
75
|
-
This method logs the parameters from the provided configuration object
|
76
|
-
using MLflow. It iterates over the parameters and logs them using the
|
77
|
-
`mlflow.log_param` method.
|
78
|
-
|
79
|
-
Args:
|
80
|
-
config (object): The configuration object to log the parameters from.
|
81
|
-
synchronous (bool | None): Whether to log the parameters synchronously.
|
82
|
-
Defaults to None.
|
83
|
-
|
84
|
-
"""
|
85
|
-
for key, value in iter_params(config):
|
86
|
-
mlflow.log_param(key, value, synchronous=synchronous)
|
87
|
-
|
88
|
-
|
89
|
-
def search_runs( # noqa: PLR0913
|
90
|
-
*,
|
91
|
-
experiment_ids: list[str] | None = None,
|
92
|
-
filter_string: str = "",
|
93
|
-
run_view_type: int = ViewType.ACTIVE_ONLY,
|
94
|
-
max_results: int = SEARCH_MAX_RESULTS_PANDAS,
|
95
|
-
order_by: list[str] | None = None,
|
96
|
-
search_all_experiments: bool = False,
|
97
|
-
experiment_names: list[str] | None = None,
|
98
|
-
) -> RunCollection:
|
99
|
-
"""Search for Runs that fit the specified criteria.
|
100
|
-
|
101
|
-
This function wraps the `mlflow.search_runs` function and returns the
|
102
|
-
results as a `RunCollection` object. It allows for flexible searching of
|
103
|
-
MLflow runs based on various criteria.
|
104
|
-
|
105
|
-
Note:
|
106
|
-
The returned runs are sorted by their start time in ascending order.
|
107
|
-
|
108
|
-
Args:
|
109
|
-
experiment_ids (list[str] | None): List of experiment IDs. Search can
|
110
|
-
work with experiment IDs or experiment names, but not both in the
|
111
|
-
same call. Values other than ``None`` or ``[]`` will result in
|
112
|
-
error if ``experiment_names`` is also not ``None`` or ``[]``.
|
113
|
-
``None`` will default to the active experiment if ``experiment_names``
|
114
|
-
is ``None`` or ``[]``.
|
115
|
-
filter_string (str): Filter query string, defaults to searching all
|
116
|
-
runs.
|
117
|
-
run_view_type (int): one of enum values ``ACTIVE_ONLY``, ``DELETED_ONLY``,
|
118
|
-
or ``ALL`` runs defined in :py:class:`mlflow.entities.ViewType`.
|
119
|
-
max_results (int): The maximum number of runs to put in the dataframe.
|
120
|
-
Default is 100,000 to avoid causing out-of-memory issues on the user's
|
121
|
-
machine.
|
122
|
-
order_by (list[str] | None): List of columns to order by (e.g.,
|
123
|
-
"metrics.rmse"). The ``order_by`` column can contain an optional
|
124
|
-
``DESC`` or ``ASC`` value. The default is ``ASC``. The default
|
125
|
-
ordering is to sort by ``start_time DESC``, then ``run_id``.
|
126
|
-
``start_time DESC``, then ``run_id``.
|
127
|
-
search_all_experiments (bool): Boolean specifying whether all
|
128
|
-
experiments should be searched. Only honored if ``experiment_ids``
|
129
|
-
is ``[]`` or ``None``.
|
130
|
-
experiment_names (list[str] | None): List of experiment names. Search
|
131
|
-
can work with experiment IDs or experiment names, but not both in
|
132
|
-
the same call. Values other than ``None`` or ``[]`` will result in
|
133
|
-
error if ``experiment_ids`` is also not ``None`` or ``[]``.
|
134
|
-
``experiment_ids`` is also not ``None`` or ``[]``. ``None`` will
|
135
|
-
default to the active experiment if ``experiment_ids`` is ``None``
|
136
|
-
or ``[]``.
|
137
|
-
|
138
|
-
Returns:
|
139
|
-
A `RunCollection` object containing the search results.
|
140
|
-
|
141
|
-
"""
|
142
|
-
runs = mlflow.search_runs(
|
143
|
-
experiment_ids=experiment_ids,
|
144
|
-
filter_string=filter_string,
|
145
|
-
run_view_type=run_view_type,
|
146
|
-
max_results=max_results,
|
147
|
-
order_by=order_by,
|
148
|
-
output_format="list",
|
149
|
-
search_all_experiments=search_all_experiments,
|
150
|
-
experiment_names=experiment_names,
|
151
|
-
)
|
152
|
-
runs = sorted(runs, key=lambda run: run.info.start_time) # type: ignore
|
153
|
-
return RunCollection(runs) # type: ignore
|
154
|
-
|
155
|
-
|
156
|
-
def list_run_paths(
|
157
|
-
experiment_names: str | list[str] | None = None,
|
158
|
-
*other: str,
|
159
|
-
) -> list[Path]:
|
160
|
-
"""List all run paths for the specified experiments.
|
161
|
-
|
162
|
-
This function retrieves all run paths for the given list of experiment names.
|
163
|
-
If no experiment names are provided (None), it defaults to searching all runs
|
164
|
-
for the currently active experiment. If an empty list is provided, the function
|
165
|
-
will search all runs for all experiments except the "Default" experiment.
|
166
|
-
The function returns the results as a list of `Path` objects.
|
167
|
-
|
168
|
-
Note:
|
169
|
-
The returned runs are sorted by their start time in ascending order.
|
170
|
-
|
171
|
-
Args:
|
172
|
-
experiment_names (list[str] | None): List of experiment names to search
|
173
|
-
for runs. If None or an empty list is provided, the function will
|
174
|
-
search the currently active experiment or all experiments except
|
175
|
-
the "Default" experiment.
|
176
|
-
other (str): The parts of the run directory to join.
|
177
|
-
|
178
|
-
Returns:
|
179
|
-
list[Path]: A list of run paths for the specified experiments.
|
180
|
-
|
181
|
-
"""
|
182
|
-
if isinstance(experiment_names, str):
|
183
|
-
experiment_names = [experiment_names]
|
184
|
-
|
185
|
-
elif experiment_names == []:
|
186
|
-
experiments = mlflow.search_experiments()
|
187
|
-
experiment_names = [e.name for e in experiments if e.name != "Default"]
|
188
|
-
|
189
|
-
if experiment_names is None:
|
190
|
-
experiment_id = _get_experiment_id()
|
191
|
-
experiment_names = [mlflow.get_experiment(experiment_id).name]
|
192
|
-
|
193
|
-
run_paths: list[Path] = []
|
194
|
-
|
195
|
-
for name in experiment_names:
|
196
|
-
if experiment := mlflow.get_experiment_by_name(name):
|
197
|
-
uri = experiment.artifact_location
|
198
|
-
|
199
|
-
if isinstance(uri, str):
|
200
|
-
path = get_artifact_dir(uri=uri)
|
201
|
-
run_paths.extend(p for p in path.iterdir() if p.is_dir())
|
202
|
-
|
203
|
-
if other:
|
204
|
-
return [p.joinpath(*other) for p in run_paths]
|
205
|
-
|
206
|
-
return run_paths
|
207
|
-
|
208
|
-
|
209
|
-
def list_run_ids(experiment_names: str | list[str] | None = None) -> list[str]:
|
210
|
-
"""List all run IDs for the specified experiments.
|
211
|
-
|
212
|
-
This function retrieves all runs for the given list of experiment names.
|
213
|
-
If no experiment names are provided (None), it defaults to searching all runs
|
214
|
-
for the currently active experiment. If an empty list is provided, the function
|
215
|
-
will search all runs for all experiments except the "Default" experiment.
|
216
|
-
The function returns the results as a list of string.
|
217
|
-
|
218
|
-
Note:
|
219
|
-
The returned runs are sorted by their start time in ascending order.
|
220
|
-
|
221
|
-
Args:
|
222
|
-
experiment_names (list[str] | None): List of experiment names to search
|
223
|
-
for runs. If None or an empty list is provided, the function will
|
224
|
-
search the currently active experiment or all experiments except
|
225
|
-
the "Default" experiment.
|
226
|
-
|
227
|
-
Returns:
|
228
|
-
list[str]: A list of run IDs for the specified experiments.
|
229
|
-
|
230
|
-
"""
|
231
|
-
return [run_dir.stem for run_dir in list_run_paths(experiment_names)]
|
232
|
-
|
233
|
-
|
234
|
-
def list_runs(
|
235
|
-
experiment_names: str | list[str] | None = None,
|
236
|
-
n_jobs: int = 0,
|
237
|
-
status: str | list[str] | int | list[int] | None = None,
|
238
|
-
) -> RunCollection:
|
239
|
-
"""List all runs for the specified experiments.
|
240
|
-
|
241
|
-
This function retrieves all runs for the given list of experiment names.
|
242
|
-
If no experiment names are provided (None), it defaults to searching all runs
|
243
|
-
for the currently active experiment. If an empty list is provided, the function
|
244
|
-
will search all runs for all experiments except the "Default" experiment.
|
245
|
-
The function returns the results as a `RunCollection` object.
|
246
|
-
|
247
|
-
Note:
|
248
|
-
The returned runs are sorted by their start time in ascending order.
|
249
|
-
|
250
|
-
Args:
|
251
|
-
experiment_names (list[str] | None): List of experiment names to search
|
252
|
-
for runs. If None or an empty list is provided, the function will
|
253
|
-
search the currently active experiment or all experiments except
|
254
|
-
the "Default" experiment.
|
255
|
-
n_jobs (int): The number of jobs to run in parallel. If 0, the function
|
256
|
-
will search runs sequentially.
|
257
|
-
status (str | list[str] | int | list[int] | None): The status of the runs
|
258
|
-
to filter.
|
259
|
-
|
260
|
-
Returns:
|
261
|
-
RunCollection: A `RunCollection` instance containing the runs for the
|
262
|
-
specified experiments.
|
263
|
-
|
264
|
-
"""
|
265
|
-
run_ids = list_run_ids(experiment_names)
|
266
|
-
|
267
|
-
if n_jobs == 0:
|
268
|
-
runs = [mlflow.get_run(run_id) for run_id in run_ids]
|
269
|
-
|
270
|
-
else:
|
271
|
-
it = (joblib.delayed(mlflow.get_run)(run_id) for run_id in run_ids)
|
272
|
-
runs = joblib.Parallel(n_jobs, prefer="threads")(it)
|
273
|
-
|
274
|
-
runs = sorted(runs, key=lambda run: run.info.start_time) # type: ignore
|
275
|
-
rc = RunCollection(runs) # type: ignore
|
276
|
-
|
277
|
-
if status is None:
|
278
|
-
return rc
|
279
|
-
|
280
|
-
return rc.filter(status=status)
|
hydraflow-0.7.5.dist-info/RECORD
DELETED
@@ -1,17 +0,0 @@
|
|
1
|
-
hydraflow/__init__.py,sha256=0HJOiiKhfH3MFbuoL_BLaBaruVSb53Scimt2_2rRI28,995
|
2
|
-
hydraflow/cli.py,sha256=jxqFppNeJWAr2Tb-C_MQXEJtegJ6TXcd3C1CT7Jdb1A,1559
|
3
|
-
hydraflow/config.py,sha256=MNX9da5bPVDcjnpji7Cm9ndK6ura92pt361m4PRh6_E,4326
|
4
|
-
hydraflow/context.py,sha256=3xfKhMozkKFqtWeOp9Gie0A5o5URMta4US6iVD5TcLU,6002
|
5
|
-
hydraflow/main.py,sha256=hroncI_SNpNgEtdxLgzI397J5S2Amv7J0atnPxwBePM,1314
|
6
|
-
hydraflow/mlflow.py,sha256=lKpY5tPJRXXlvT5ZFVz1kROHsuvzGhp5kp8RiT2jlX8,10912
|
7
|
-
hydraflow/param.py,sha256=yu1aMNXRLegXGDL-68vwIkfeDF9CaU784WZENGLwl7Q,4572
|
8
|
-
hydraflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
9
|
-
hydraflow/run_collection.py,sha256=YCWg5Dz1j49xB2LA75onq5wsAeQQbifXpG4yPUwRN4I,24776
|
10
|
-
hydraflow/run_data.py,sha256=dpyyfnuH9mCtIZeigMo1iFQo9bafMdEL4i4uI2l0UqY,1525
|
11
|
-
hydraflow/run_info.py,sha256=Jf5wrIjRLIV1-k-obHDqwKHa6j_ZonrY8od-rXlbtMo,1024
|
12
|
-
hydraflow/utils.py,sha256=a9i5PEJn8Ssowv9dqHadAihZXlsqtVjHZ9MZvkPq1bY,4747
|
13
|
-
hydraflow-0.7.5.dist-info/METADATA,sha256=oSBWEevJs2RI55hqrxzW3k9ArtwRrvnk1kBl7oJNohg,4767
|
14
|
-
hydraflow-0.7.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
15
|
-
hydraflow-0.7.5.dist-info/entry_points.txt,sha256=XI0khPbpCIUo9UPqkNEpgh-kqK3Jy8T7L2VCWOdkbSM,48
|
16
|
-
hydraflow-0.7.5.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
|
17
|
-
hydraflow-0.7.5.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|