hydraflow 0.7.5__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hydraflow/mlflow.py DELETED
@@ -1,280 +0,0 @@
1
- """Provide functionality to log parameters from Hydra configuration objects.
2
-
3
- This module provides functions to log parameters from Hydra configuration objects
4
- to MLflow, set experiments, and manage tracking URIs. It integrates Hydra's
5
- configuration management with MLflow's experiment tracking capabilities.
6
-
7
- Key Features:
8
- - **Experiment Management**: Set experiment names and tracking URIs using Hydra
9
- configuration details.
10
- - **Parameter Logging**: Log parameters from Hydra configuration objects to MLflow,
11
- supporting both synchronous and asynchronous logging.
12
- - **Run Collection**: Utilize the `RunCollection` class to manage and interact with
13
- multiple MLflow runs, providing methods to filter and retrieve runs based on
14
- various criteria.
15
- """
16
-
17
- from __future__ import annotations
18
-
19
- from typing import TYPE_CHECKING
20
-
21
- import joblib
22
- import mlflow
23
- import mlflow.artifacts
24
- from hydra.core.hydra_config import HydraConfig
25
- from mlflow.entities import ViewType
26
- from mlflow.tracking.fluent import SEARCH_MAX_RESULTS_PANDAS, _get_experiment_id
27
-
28
- from hydraflow.config import iter_params
29
- from hydraflow.run_collection import RunCollection
30
- from hydraflow.utils import get_artifact_dir
31
-
32
- if TYPE_CHECKING:
33
- from pathlib import Path
34
-
35
- from mlflow.entities.experiment import Experiment
36
-
37
-
38
- def set_experiment(
39
- prefix: str = "",
40
- suffix: str = "",
41
- uri: str | Path | None = None,
42
- name: str | None = None,
43
- ) -> Experiment:
44
- """Set the experiment name and tracking URI optionally.
45
-
46
- This function sets the experiment name by combining the given prefix,
47
- the job name from HydraConfig, and the given suffix. Optionally, it can
48
- also set the tracking URI.
49
-
50
- Args:
51
- prefix (str): The prefix to prepend to the experiment name.
52
- suffix (str): The suffix to append to the experiment name.
53
- uri (str | Path | None): The tracking URI to use. Defaults to None.
54
- name (str | None): The name of the experiment. Defaults to None.
55
-
56
- Returns:
57
- Experiment: An instance of `mlflow.entities.Experiment` representing
58
- the new active experiment.
59
-
60
- """
61
- if uri is not None:
62
- mlflow.set_tracking_uri(uri)
63
-
64
- if name is not None:
65
- return mlflow.set_experiment(name)
66
-
67
- hc = HydraConfig.get()
68
- name = f"{prefix}{hc.job.name}{suffix}"
69
- return mlflow.set_experiment(name)
70
-
71
-
72
- def log_params(config: object, *, synchronous: bool | None = None) -> None:
73
- """Log the parameters from the given configuration object.
74
-
75
- This method logs the parameters from the provided configuration object
76
- using MLflow. It iterates over the parameters and logs them using the
77
- `mlflow.log_param` method.
78
-
79
- Args:
80
- config (object): The configuration object to log the parameters from.
81
- synchronous (bool | None): Whether to log the parameters synchronously.
82
- Defaults to None.
83
-
84
- """
85
- for key, value in iter_params(config):
86
- mlflow.log_param(key, value, synchronous=synchronous)
87
-
88
-
89
- def search_runs( # noqa: PLR0913
90
- *,
91
- experiment_ids: list[str] | None = None,
92
- filter_string: str = "",
93
- run_view_type: int = ViewType.ACTIVE_ONLY,
94
- max_results: int = SEARCH_MAX_RESULTS_PANDAS,
95
- order_by: list[str] | None = None,
96
- search_all_experiments: bool = False,
97
- experiment_names: list[str] | None = None,
98
- ) -> RunCollection:
99
- """Search for Runs that fit the specified criteria.
100
-
101
- This function wraps the `mlflow.search_runs` function and returns the
102
- results as a `RunCollection` object. It allows for flexible searching of
103
- MLflow runs based on various criteria.
104
-
105
- Note:
106
- The returned runs are sorted by their start time in ascending order.
107
-
108
- Args:
109
- experiment_ids (list[str] | None): List of experiment IDs. Search can
110
- work with experiment IDs or experiment names, but not both in the
111
- same call. Values other than ``None`` or ``[]`` will result in
112
- error if ``experiment_names`` is also not ``None`` or ``[]``.
113
- ``None`` will default to the active experiment if ``experiment_names``
114
- is ``None`` or ``[]``.
115
- filter_string (str): Filter query string, defaults to searching all
116
- runs.
117
- run_view_type (int): one of enum values ``ACTIVE_ONLY``, ``DELETED_ONLY``,
118
- or ``ALL`` runs defined in :py:class:`mlflow.entities.ViewType`.
119
- max_results (int): The maximum number of runs to put in the dataframe.
120
- Default is 100,000 to avoid causing out-of-memory issues on the user's
121
- machine.
122
- order_by (list[str] | None): List of columns to order by (e.g.,
123
- "metrics.rmse"). The ``order_by`` column can contain an optional
124
- ``DESC`` or ``ASC`` value. The default is ``ASC``. The default
125
- ordering is to sort by ``start_time DESC``, then ``run_id``.
126
- ``start_time DESC``, then ``run_id``.
127
- search_all_experiments (bool): Boolean specifying whether all
128
- experiments should be searched. Only honored if ``experiment_ids``
129
- is ``[]`` or ``None``.
130
- experiment_names (list[str] | None): List of experiment names. Search
131
- can work with experiment IDs or experiment names, but not both in
132
- the same call. Values other than ``None`` or ``[]`` will result in
133
- error if ``experiment_ids`` is also not ``None`` or ``[]``.
134
- ``experiment_ids`` is also not ``None`` or ``[]``. ``None`` will
135
- default to the active experiment if ``experiment_ids`` is ``None``
136
- or ``[]``.
137
-
138
- Returns:
139
- A `RunCollection` object containing the search results.
140
-
141
- """
142
- runs = mlflow.search_runs(
143
- experiment_ids=experiment_ids,
144
- filter_string=filter_string,
145
- run_view_type=run_view_type,
146
- max_results=max_results,
147
- order_by=order_by,
148
- output_format="list",
149
- search_all_experiments=search_all_experiments,
150
- experiment_names=experiment_names,
151
- )
152
- runs = sorted(runs, key=lambda run: run.info.start_time) # type: ignore
153
- return RunCollection(runs) # type: ignore
154
-
155
-
156
- def list_run_paths(
157
- experiment_names: str | list[str] | None = None,
158
- *other: str,
159
- ) -> list[Path]:
160
- """List all run paths for the specified experiments.
161
-
162
- This function retrieves all run paths for the given list of experiment names.
163
- If no experiment names are provided (None), it defaults to searching all runs
164
- for the currently active experiment. If an empty list is provided, the function
165
- will search all runs for all experiments except the "Default" experiment.
166
- The function returns the results as a list of `Path` objects.
167
-
168
- Note:
169
- The returned runs are sorted by their start time in ascending order.
170
-
171
- Args:
172
- experiment_names (list[str] | None): List of experiment names to search
173
- for runs. If None or an empty list is provided, the function will
174
- search the currently active experiment or all experiments except
175
- the "Default" experiment.
176
- other (str): The parts of the run directory to join.
177
-
178
- Returns:
179
- list[Path]: A list of run paths for the specified experiments.
180
-
181
- """
182
- if isinstance(experiment_names, str):
183
- experiment_names = [experiment_names]
184
-
185
- elif experiment_names == []:
186
- experiments = mlflow.search_experiments()
187
- experiment_names = [e.name for e in experiments if e.name != "Default"]
188
-
189
- if experiment_names is None:
190
- experiment_id = _get_experiment_id()
191
- experiment_names = [mlflow.get_experiment(experiment_id).name]
192
-
193
- run_paths: list[Path] = []
194
-
195
- for name in experiment_names:
196
- if experiment := mlflow.get_experiment_by_name(name):
197
- uri = experiment.artifact_location
198
-
199
- if isinstance(uri, str):
200
- path = get_artifact_dir(uri=uri)
201
- run_paths.extend(p for p in path.iterdir() if p.is_dir())
202
-
203
- if other:
204
- return [p.joinpath(*other) for p in run_paths]
205
-
206
- return run_paths
207
-
208
-
209
- def list_run_ids(experiment_names: str | list[str] | None = None) -> list[str]:
210
- """List all run IDs for the specified experiments.
211
-
212
- This function retrieves all runs for the given list of experiment names.
213
- If no experiment names are provided (None), it defaults to searching all runs
214
- for the currently active experiment. If an empty list is provided, the function
215
- will search all runs for all experiments except the "Default" experiment.
216
- The function returns the results as a list of string.
217
-
218
- Note:
219
- The returned runs are sorted by their start time in ascending order.
220
-
221
- Args:
222
- experiment_names (list[str] | None): List of experiment names to search
223
- for runs. If None or an empty list is provided, the function will
224
- search the currently active experiment or all experiments except
225
- the "Default" experiment.
226
-
227
- Returns:
228
- list[str]: A list of run IDs for the specified experiments.
229
-
230
- """
231
- return [run_dir.stem for run_dir in list_run_paths(experiment_names)]
232
-
233
-
234
- def list_runs(
235
- experiment_names: str | list[str] | None = None,
236
- n_jobs: int = 0,
237
- status: str | list[str] | int | list[int] | None = None,
238
- ) -> RunCollection:
239
- """List all runs for the specified experiments.
240
-
241
- This function retrieves all runs for the given list of experiment names.
242
- If no experiment names are provided (None), it defaults to searching all runs
243
- for the currently active experiment. If an empty list is provided, the function
244
- will search all runs for all experiments except the "Default" experiment.
245
- The function returns the results as a `RunCollection` object.
246
-
247
- Note:
248
- The returned runs are sorted by their start time in ascending order.
249
-
250
- Args:
251
- experiment_names (list[str] | None): List of experiment names to search
252
- for runs. If None or an empty list is provided, the function will
253
- search the currently active experiment or all experiments except
254
- the "Default" experiment.
255
- n_jobs (int): The number of jobs to run in parallel. If 0, the function
256
- will search runs sequentially.
257
- status (str | list[str] | int | list[int] | None): The status of the runs
258
- to filter.
259
-
260
- Returns:
261
- RunCollection: A `RunCollection` instance containing the runs for the
262
- specified experiments.
263
-
264
- """
265
- run_ids = list_run_ids(experiment_names)
266
-
267
- if n_jobs == 0:
268
- runs = [mlflow.get_run(run_id) for run_id in run_ids]
269
-
270
- else:
271
- it = (joblib.delayed(mlflow.get_run)(run_id) for run_id in run_ids)
272
- runs = joblib.Parallel(n_jobs, prefer="threads")(it)
273
-
274
- runs = sorted(runs, key=lambda run: run.info.start_time) # type: ignore
275
- rc = RunCollection(runs) # type: ignore
276
-
277
- if status is None:
278
- return rc
279
-
280
- return rc.filter(status=status)
@@ -1,17 +0,0 @@
1
- hydraflow/__init__.py,sha256=0HJOiiKhfH3MFbuoL_BLaBaruVSb53Scimt2_2rRI28,995
2
- hydraflow/cli.py,sha256=jxqFppNeJWAr2Tb-C_MQXEJtegJ6TXcd3C1CT7Jdb1A,1559
3
- hydraflow/config.py,sha256=MNX9da5bPVDcjnpji7Cm9ndK6ura92pt361m4PRh6_E,4326
4
- hydraflow/context.py,sha256=3xfKhMozkKFqtWeOp9Gie0A5o5URMta4US6iVD5TcLU,6002
5
- hydraflow/main.py,sha256=hroncI_SNpNgEtdxLgzI397J5S2Amv7J0atnPxwBePM,1314
6
- hydraflow/mlflow.py,sha256=lKpY5tPJRXXlvT5ZFVz1kROHsuvzGhp5kp8RiT2jlX8,10912
7
- hydraflow/param.py,sha256=yu1aMNXRLegXGDL-68vwIkfeDF9CaU784WZENGLwl7Q,4572
8
- hydraflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- hydraflow/run_collection.py,sha256=YCWg5Dz1j49xB2LA75onq5wsAeQQbifXpG4yPUwRN4I,24776
10
- hydraflow/run_data.py,sha256=dpyyfnuH9mCtIZeigMo1iFQo9bafMdEL4i4uI2l0UqY,1525
11
- hydraflow/run_info.py,sha256=Jf5wrIjRLIV1-k-obHDqwKHa6j_ZonrY8od-rXlbtMo,1024
12
- hydraflow/utils.py,sha256=a9i5PEJn8Ssowv9dqHadAihZXlsqtVjHZ9MZvkPq1bY,4747
13
- hydraflow-0.7.5.dist-info/METADATA,sha256=oSBWEevJs2RI55hqrxzW3k9ArtwRrvnk1kBl7oJNohg,4767
14
- hydraflow-0.7.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
15
- hydraflow-0.7.5.dist-info/entry_points.txt,sha256=XI0khPbpCIUo9UPqkNEpgh-kqK3Jy8T7L2VCWOdkbSM,48
16
- hydraflow-0.7.5.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
17
- hydraflow-0.7.5.dist-info/RECORD,,