hydraflow 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hydraflow/__init__.py CHANGED
@@ -2,25 +2,29 @@ from .context import Info, chdir_artifact, log_run, watch
2
2
  from .mlflow import set_experiment
3
3
  from .runs import (
4
4
  Run,
5
- Runs,
5
+ RunCollection,
6
6
  filter_runs,
7
7
  get_param_dict,
8
8
  get_param_names,
9
9
  get_run,
10
+ list_runs,
10
11
  load_config,
12
+ search_runs,
11
13
  )
12
14
 
13
15
  __all__ = [
14
16
  "Info",
15
17
  "Run",
16
- "Runs",
18
+ "RunCollection",
17
19
  "chdir_artifact",
18
20
  "filter_runs",
19
21
  "get_param_dict",
20
22
  "get_param_names",
21
23
  "get_run",
24
+ "list_runs",
22
25
  "load_config",
23
26
  "log_run",
27
+ "search_runs",
24
28
  "set_experiment",
25
29
  "watch",
26
30
  ]
hydraflow/asyncio.py ADDED
@@ -0,0 +1,199 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import logging
5
+ from asyncio.subprocess import PIPE
6
+ from pathlib import Path
7
+ from typing import TYPE_CHECKING
8
+
9
+ import watchfiles
10
+
11
+ if TYPE_CHECKING:
12
+ from asyncio.streams import StreamReader
13
+ from collections.abc import Callable
14
+
15
+ from watchfiles import Change
16
+
17
+
18
+ # Set up logging
19
+ logging.basicConfig(level=logging.INFO)
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ async def execute_command(
24
+ program: str,
25
+ *args: str,
26
+ stdout: Callable[[str], None] | None = None,
27
+ stderr: Callable[[str], None] | None = None,
28
+ stop_event: asyncio.Event,
29
+ ) -> int:
30
+ """
31
+ Runs a command asynchronously and pass the output to callback functions.
32
+
33
+ Args:
34
+ program (str): The program to run.
35
+ *args (str): Arguments for the program.
36
+ stdout (Callable[[str], None] | None): Callback for standard output.
37
+ stderr (Callable[[str], None] | None): Callback for standard error.
38
+ stop_event (asyncio.Event): Event to signal when the process is done.
39
+
40
+ Returns:
41
+ int: The return code of the process.
42
+ """
43
+ try:
44
+ process = await asyncio.create_subprocess_exec(program, *args, stdout=PIPE, stderr=PIPE)
45
+ await asyncio.gather(
46
+ process_stream(process.stdout, stdout),
47
+ process_stream(process.stderr, stderr),
48
+ )
49
+ returncode = await process.wait()
50
+
51
+ except Exception as e:
52
+ logger.error(f"Error running command: {e}")
53
+ returncode = 1
54
+
55
+ finally:
56
+ stop_event.set()
57
+
58
+ return returncode
59
+
60
+
61
+ async def process_stream(
62
+ stream: StreamReader | None,
63
+ callback: Callable[[str], None] | None,
64
+ ) -> None:
65
+ """
66
+ Reads a stream asynchronously and pass each line to a callback function.
67
+
68
+ Args:
69
+ stream (StreamReader | None): The stream to read from.
70
+ callback (Callable[[str], None] | None): The callback function to handle
71
+ each line.
72
+ """
73
+ if stream is None or callback is None:
74
+ return
75
+
76
+ while True:
77
+ line = await stream.readline()
78
+ if line:
79
+ callback(line.decode().strip())
80
+ else:
81
+ break
82
+
83
+
84
+ async def monitor_file_changes(
85
+ paths: list[str | Path],
86
+ callback: Callable[[set[tuple[Change, str]]], None],
87
+ stop_event: asyncio.Event,
88
+ **awatch_kwargs,
89
+ ) -> None:
90
+ """
91
+ Watches for file changes in specified paths and pass the changes to a
92
+ callback function.
93
+
94
+ Args:
95
+ paths (list[str | Path]): List of paths to monitor for changes.
96
+ callback (Callable[[set[tuple[Change, str]]], None]): The callback
97
+ function to handle file changes.
98
+ stop_event (asyncio.Event): Event to signal when to stop watching.
99
+ **awatch_kwargs: Additional keyword arguments to pass to watchfiles.awatch.
100
+ """
101
+ str_paths = [str(path) for path in paths]
102
+ try:
103
+ async for changes in watchfiles.awatch(*str_paths, stop_event=stop_event, **awatch_kwargs):
104
+ callback(changes)
105
+ except Exception as e:
106
+ logger.error(f"Error watching files: {e}")
107
+
108
+
109
+ async def run_and_monitor(
110
+ program: str,
111
+ *args: str,
112
+ stdout: Callable[[str], None] | None = None,
113
+ stderr: Callable[[str], None] | None = None,
114
+ watch: Callable[[set[tuple[Change, str]]], None] | None = None,
115
+ paths: list[str | Path] | None = None,
116
+ **awatch_kwargs,
117
+ ) -> int:
118
+ """
119
+ Runs a command and optionally watch for file changes concurrently.
120
+
121
+ Args:
122
+ program (str): The program to run.
123
+ *args (str): Arguments for the program.
124
+ stdout (Callable[[str], None] | None): Callback for standard output.
125
+ stderr (Callable[[str], None] | None): Callback for standard error.
126
+ watch (Callable[[set[tuple[Change, str]]], None] | None): Callback for
127
+ file changes.
128
+ paths (list[str | Path] | None): List of paths to monitor for changes.
129
+ """
130
+ stop_event = asyncio.Event()
131
+ run_task = asyncio.create_task(
132
+ execute_command(program, *args, stop_event=stop_event, stdout=stdout, stderr=stderr)
133
+ )
134
+ if watch and paths:
135
+ monitor_task = asyncio.create_task(
136
+ monitor_file_changes(paths, watch, stop_event, **awatch_kwargs)
137
+ )
138
+ else:
139
+ monitor_task = None
140
+
141
+ try:
142
+ if monitor_task:
143
+ await asyncio.gather(run_task, monitor_task)
144
+ else:
145
+ await run_task
146
+
147
+ except Exception as e:
148
+ logger.error(f"Error in run_and_monitor: {e}")
149
+ finally:
150
+ stop_event.set()
151
+ await run_task
152
+ if monitor_task:
153
+ await monitor_task
154
+
155
+ return run_task.result()
156
+
157
+
158
+ def run(
159
+ program: str,
160
+ *args: str,
161
+ stdout: Callable[[str], None] | None = None,
162
+ stderr: Callable[[str], None] | None = None,
163
+ watch: Callable[[set[tuple[Change, str]]], None] | None = None,
164
+ paths: list[str | Path] | None = None,
165
+ **awatch_kwargs,
166
+ ) -> int:
167
+ """
168
+ Run a command synchronously and optionally watch for file changes.
169
+
170
+ This function is a synchronous wrapper around the asynchronous `run_and_monitor` function.
171
+ It runs a specified command and optionally monitors specified paths for file changes,
172
+ invoking the provided callbacks for standard output, standard error, and file changes.
173
+
174
+ Args:
175
+ program (str): The program to run.
176
+ *args (str): Arguments for the program.
177
+ stdout (Callable[[str], None] | None): Callback for handling standard output lines.
178
+ stderr (Callable[[str], None] | None): Callback for handling standard error lines.
179
+ watch (Callable[[set[tuple[Change, str]]], None] | None): Callback for handling file changes.
180
+ paths (list[str | Path] | None): List of paths to monitor for file changes.
181
+ **awatch_kwargs: Additional keyword arguments to pass to `watchfiles.awatch`.
182
+
183
+ Returns:
184
+ int: The return code of the process.
185
+ """
186
+ if watch and not paths:
187
+ paths = [Path.cwd()]
188
+
189
+ return asyncio.run(
190
+ run_and_monitor(
191
+ program,
192
+ *args,
193
+ stdout=stdout,
194
+ stderr=stderr,
195
+ watch=watch,
196
+ paths=paths,
197
+ **awatch_kwargs,
198
+ )
199
+ )
hydraflow/config.py CHANGED
@@ -30,6 +30,9 @@ def iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
30
30
  Yields:
31
31
  Key-value pairs representing the parameters in the configuration object.
32
32
  """
33
+ if config is None:
34
+ return
35
+
33
36
  if not isinstance(config, (DictConfig, ListConfig)):
34
37
  config = OmegaConf.create(config) # type: ignore
35
38
 
hydraflow/runs.py CHANGED
@@ -1,7 +1,7 @@
1
1
  """
2
- This module provides functionality for managing and interacting with MLflow runs.
3
- It includes the `Runs` class and various methods to filter runs, retrieve run information,
4
- log artifacts, and load configurations.
2
+ This module provides functionality for managing and interacting with MLflow
3
+ runs. It includes the `RunCollection` class and various methods to filter
4
+ runs, retrieve run information, log artifacts, and load configurations.
5
5
  """
6
6
 
7
7
  from __future__ import annotations
@@ -9,9 +9,10 @@ from __future__ import annotations
9
9
  from dataclasses import dataclass
10
10
  from functools import cache
11
11
  from itertools import chain
12
- from typing import TYPE_CHECKING, Any
12
+ from typing import TYPE_CHECKING, Any, TypeVar
13
13
 
14
14
  import mlflow
15
+ from mlflow.artifacts import download_artifacts
15
16
  from mlflow.entities import ViewType
16
17
  from mlflow.entities.run import Run
17
18
  from mlflow.tracking.fluent import SEARCH_MAX_RESULTS_PANDAS
@@ -20,6 +21,7 @@ from omegaconf import DictConfig, OmegaConf
20
21
  from hydraflow.config import iter_params
21
22
 
22
23
  if TYPE_CHECKING:
24
+ from collections.abc import Callable, Iterator
23
25
  from typing import Any
24
26
 
25
27
 
@@ -31,41 +33,46 @@ def search_runs(
31
33
  order_by: list[str] | None = None,
32
34
  search_all_experiments: bool = False,
33
35
  experiment_names: list[str] | None = None,
34
- ) -> Runs:
36
+ ) -> RunCollection:
35
37
  """
36
38
  Search for Runs that fit the specified criteria.
37
39
 
38
- This function wraps the `mlflow.search_runs` function and returns the results
39
- as a `Runs` object. It allows for flexible searching of MLflow runs based on
40
- various criteria.
40
+ This function wraps the `mlflow.search_runs` function and returns the
41
+ results as a `RunCollection` object. It allows for flexible searching of
42
+ MLflow runs based on various criteria.
43
+
44
+ Note:
45
+ The returned runs are sorted by their start time in ascending order.
41
46
 
42
47
  Args:
43
- experiment_ids: List of experiment IDs. Search can work with experiment IDs or
44
- experiment names, but not both in the same call. Values other than
45
- ``None`` or ``[]`` will result in error if ``experiment_names`` is
46
- also not ``None`` or ``[]``. ``None`` will default to the active
47
- experiment if ``experiment_names`` is ``None`` or ``[]``.
48
+ experiment_ids: List of experiment IDs. Search can work with experiment
49
+ IDs or experiment names, but not both in the same call. Values
50
+ other than ``None`` or ``[]`` will result in error if
51
+ ``experiment_names`` is also not ``None`` or ``[]``. ``None`` will
52
+ default to the active experiment if ``experiment_names`` is ``None``
53
+ or ``[]``.
48
54
  filter_string: Filter query string, defaults to searching all runs.
49
- run_view_type: one of enum values ``ACTIVE_ONLY``, ``DELETED_ONLY``, or ``ALL`` runs
50
- defined in :py:class:`mlflow.entities.ViewType`.
51
- max_results: The maximum number of runs to put in the dataframe. Default is 100,000
52
- to avoid causing out-of-memory issues on the user's machine.
53
- order_by: List of columns to order by (e.g., "metrics.rmse"). The ``order_by`` column
54
- can contain an optional ``DESC`` or ``ASC`` value. The default is ``ASC``.
55
- The default ordering is to sort by ``start_time DESC``, then ``run_id``.
56
- output_format: The output format to be returned. If ``pandas``, a ``pandas.DataFrame``
57
- is returned and, if ``list``, a list of :py:class:`mlflow.entities.Run`
58
- is returned.
59
- search_all_experiments: Boolean specifying whether all experiments should be searched.
60
- Only honored if ``experiment_ids`` is ``[]`` or ``None``.
61
- experiment_names: List of experiment names. Search can work with experiment IDs or
62
- experiment names, but not both in the same call. Values other
63
- than ``None`` or ``[]`` will result in error if ``experiment_ids``
64
- is also not ``None`` or ``[]``. ``None`` will default to the active
65
- experiment if ``experiment_ids`` is ``None`` or ``[]``.
55
+ run_view_type: one of enum values ``ACTIVE_ONLY``, ``DELETED_ONLY``, or
56
+ ``ALL`` runs defined in :py:class:`mlflow.entities.ViewType`.
57
+ max_results: The maximum number of runs to put in the dataframe. Default
58
+ is 100,000 to avoid causing out-of-memory issues on the user's
59
+ machine.
60
+ order_by: List of columns to order by (e.g., "metrics.rmse"). The
61
+ ``order_by`` column can contain an optional ``DESC`` or ``ASC``
62
+ value. The default is ``ASC``. The default ordering is to sort by
63
+ ``start_time DESC``, then ``run_id``.
64
+ search_all_experiments: Boolean specifying whether all experiments
65
+ should be searched. Only honored if ``experiment_ids`` is ``[]`` or
66
+ ``None``.
67
+ experiment_names: List of experiment names. Search can work with
68
+ experiment IDs or experiment names, but not both in the same call.
69
+ Values other than ``None`` or ``[]`` will result in error if
70
+ ``experiment_ids`` is also not ``None`` or ``[]``. ``None`` will
71
+ default to the active experiment if ``experiment_ids`` is ``None``
72
+ or ``[]``.
66
73
 
67
74
  Returns:
68
- A `Runs` object containing the search results.
75
+ A `RunCollection` object containing the search results.
69
76
  """
70
77
  runs = mlflow.search_runs(
71
78
  experiment_ids=experiment_ids,
@@ -77,11 +84,44 @@ def search_runs(
77
84
  search_all_experiments=search_all_experiments,
78
85
  experiment_names=experiment_names,
79
86
  )
80
- return Runs(runs) # type: ignore
87
+ runs = sorted(runs, key=lambda run: run.info.start_time) # type: ignore
88
+ return RunCollection(runs) # type: ignore
89
+
90
+
91
+ def list_runs(experiment_names: list[str] | None = None) -> RunCollection:
92
+ """
93
+ List all runs for the specified experiments.
94
+
95
+ This function retrieves all runs for the given list of experiment names.
96
+ If no experiment names are provided (None), it defaults to searching all runs
97
+ for the currently active experiment. If an empty list is provided, the function
98
+ will search all runs for all experiments except the "Default" experiment.
99
+ The function returns the results as a `RunCollection` object.
100
+
101
+ Note:
102
+ The returned runs are sorted by their start time in ascending order.
103
+
104
+ Args:
105
+ experiment_names: List of experiment names to search for runs.
106
+ If None or an empty list is provided, the function will search
107
+ the currently active experiment or all experiments except the
108
+ "Default" experiment.
109
+
110
+ Returns:
111
+ A `RunCollection` object containing the runs for the specified experiments.
112
+ """
113
+ if experiment_names == []:
114
+ experiments = mlflow.search_experiments()
115
+ experiment_names = [e.name for e in experiments if e.name != "Default"]
116
+
117
+ return search_runs(experiment_names=experiment_names)
118
+
119
+
120
+ T = TypeVar("T")
81
121
 
82
122
 
83
123
  @dataclass
84
- class Runs:
124
+ class RunCollection:
85
125
  """
86
126
  A class to represent a collection of MLflow runs.
87
127
 
@@ -89,133 +129,414 @@ class Runs:
89
129
  retrieving specific runs, and accessing run information.
90
130
  """
91
131
 
92
- runs: list[Run]
132
+ _runs: list[Run]
133
+ """A list of MLflow Run objects."""
93
134
 
94
135
  def __repr__(self) -> str:
95
136
  return f"{self.__class__.__name__}({len(self)})"
96
137
 
97
138
  def __len__(self) -> int:
98
- return len(self.runs)
139
+ return len(self._runs)
140
+
141
+ def first(self) -> Run:
142
+ """
143
+ Get the first run in the collection.
144
+
145
+ Returns:
146
+ The first run object in the collection.
147
+
148
+ Raises:
149
+ ValueError: If the collection is empty.
150
+ """
151
+ if not self._runs:
152
+ raise ValueError("The collection is empty.")
153
+
154
+ return self._runs[0]
155
+
156
+ def try_first(self) -> Run | None:
157
+ """
158
+ Try to get the first run in the collection.
159
+
160
+ Returns:
161
+ The first run object in the collection, or None if the collection
162
+ is empty.
163
+ """
164
+ return self._runs[0] if self._runs else None
165
+
166
+ def last(self) -> Run:
167
+ """
168
+ Get the last run in the collection.
99
169
 
100
- def filter(self, config: object) -> Runs:
170
+ Returns:
171
+ The last run object in the collection.
172
+
173
+ Raises:
174
+ ValueError: If the collection is empty.
175
+ """
176
+ if not self._runs:
177
+ raise ValueError("The collection is empty.")
178
+
179
+ return self._runs[-1]
180
+
181
+ def try_last(self) -> Run | None:
182
+ """
183
+ Try to get the last run in the collection.
184
+
185
+ Returns:
186
+ The last run object in the collection, or None if the collection is
187
+ empty.
188
+ """
189
+ return self._runs[-1] if self._runs else None
190
+
191
+ def filter(self, config: object | None = None, **kwargs) -> RunCollection:
101
192
  """
102
193
  Filter the runs based on the provided configuration.
103
194
 
104
195
  This method filters the runs in the collection according to the
105
- specified configuration object. The configuration object should
106
- contain key-value pairs that correspond to the parameters of the
107
- runs. Only the runs that match all the specified parameters will
108
- be included in the returned `Runs` object.
196
+ specified configuration object and additional key-value pairs. The
197
+ configuration object and key-value pairs should contain key-value pairs
198
+ that correspond to the parameters of the runs. Only the runs that match
199
+ all the specified parameters will be included in the returned
200
+ `RunCollection` object.
201
+
202
+ The filtering supports:
203
+ - Exact matches for single values.
204
+ - Membership checks for lists of values.
205
+ - Range checks for tuples of two values (inclusive of the lower bound
206
+ and exclusive of the upper bound).
109
207
 
110
208
  Args:
111
- config: The configuration object to filter the runs.
209
+ config: The configuration object to filter the runs. This can be
210
+ any object that provides key-value pairs through the
211
+ `iter_params` function.
212
+ **kwargs: Additional key-value pairs to filter the runs.
112
213
 
113
214
  Returns:
114
- A new `Runs` object containing the filtered runs.
215
+ A new `RunCollection` object containing the filtered runs.
115
216
  """
116
- return Runs(filter_runs(self.runs, config))
217
+ return RunCollection(filter_runs(self._runs, config, **kwargs))
117
218
 
118
- def get(self, config: object) -> Run | None:
219
+ def find(self, config: object | None = None, **kwargs) -> Run:
119
220
  """
120
- Retrieve a specific run based on the provided configuration.
221
+ Find the first run based on the provided configuration.
222
+
223
+ This method filters the runs in the collection according to the
224
+ specified configuration object and returns the first run that matches
225
+ the provided parameters. If no run matches the criteria, a `ValueError`
226
+ is raised.
227
+
228
+ Args:
229
+ config: The configuration object to identify the run.
230
+ **kwargs: Additional key-value pairs to filter the runs.
231
+
232
+ Returns:
233
+ The first run object that matches the provided configuration.
234
+
235
+ Raises:
236
+ ValueError: If no run matches the criteria.
237
+
238
+ See Also:
239
+ RunCollection.filter: The method that performs the actual filtering
240
+ logic.
241
+ """
242
+ return find_run(self._runs, config, **kwargs)
243
+
244
+ def try_find(self, config: object | None = None, **kwargs) -> Run | None:
245
+ """
246
+ Find the first run based on the provided configuration.
247
+
248
+ This method filters the runs in the collection according to the
249
+ specified configuration object and returns the first run that matches
250
+ the provided parameters. If no run matches the criteria, None is
251
+ returned.
252
+
253
+ Args:
254
+ config: The configuration object to identify the run.
255
+ **kwargs: Additional key-value pairs to filter the runs.
256
+
257
+ Returns:
258
+ The first run object that matches the provided configuration, or
259
+ None if no runs match the criteria.
260
+
261
+ See Also:
262
+ RunCollection.filter: The method that performs the actual filtering
263
+ logic.
264
+ """
265
+ return try_find_run(self._runs, config, **kwargs)
266
+
267
+ def find_last(self, config: object | None = None, **kwargs) -> Run:
268
+ """
269
+ Find the last run based on the provided configuration.
121
270
 
122
271
  This method filters the runs in the collection according to the
123
- specified configuration object and returns the run that matches
124
- the provided parameters. If more than one run matches the criteria,
125
- a `ValueError` is raised.
272
+ specified configuration object and returns the last run that matches
273
+ the provided parameters. If no run matches the criteria, a `ValueError`
274
+ is raised.
126
275
 
127
276
  Args:
128
277
  config: The configuration object to identify the run.
278
+ **kwargs: Additional key-value pairs to filter the runs.
129
279
 
130
280
  Returns:
131
- Run: The run object that matches the provided configuration.
132
- None, if the runs are not in a DataFrame format.
281
+ The last run object that matches the provided configuration.
133
282
 
134
283
  Raises:
135
- ValueError: If the number of filtered runs is not exactly one.
284
+ ValueError: If no run matches the criteria.
285
+
286
+ See Also:
287
+ RunCollection.filter: The method that performs the actual filtering
288
+ logic.
136
289
  """
137
- return get_run(self.runs, config)
290
+ return find_last_run(self._runs, config, **kwargs)
138
291
 
139
- def get_earliest_run(self, config: object | None = None, **kwargs) -> Run | None:
292
+ def try_find_last(self, config: object | None = None, **kwargs) -> Run | None:
140
293
  """
141
- Get the earliest run from the list of runs based on the start time.
294
+ Find the last run based on the provided configuration.
142
295
 
143
- This method filters the runs based on the configuration if provided
144
- and returns the run with the earliest start time.
296
+ This method filters the runs in the collection according to the
297
+ specified configuration object and returns the last run that matches
298
+ the provided parameters. If no run matches the criteria, None is
299
+ returned.
145
300
 
146
301
  Args:
147
- config: The configuration object to filter the runs.
148
- If None, no filtering is applied.
302
+ config: The configuration object to identify the run.
149
303
  **kwargs: Additional key-value pairs to filter the runs.
150
304
 
151
305
  Returns:
152
- The run with the earliest start time, or None if no runs match the criteria.
306
+ The last run object that matches the provided configuration, or
307
+ None if no runs match the criteria.
308
+
309
+ See Also:
310
+ RunCollection.filter: The method that performs the actual filtering
311
+ logic.
312
+ """
313
+ return try_find_last_run(self._runs, config, **kwargs)
314
+
315
+ def get(self, config: object | None = None, **kwargs) -> Run:
316
+ """
317
+ Retrieve a specific run based on the provided configuration.
318
+
319
+ This method filters the runs in the collection according to the
320
+ specified configuration object and returns the run that matches the
321
+ provided parameters. If no run matches the criteria, or if more than
322
+ one run matches the criteria, a `ValueError` is raised.
323
+
324
+ Args:
325
+ config: The configuration object to identify the run.
326
+ **kwargs: Additional key-value pairs to filter the runs.
327
+
328
+ Returns:
329
+ The run object that matches the provided configuration.
330
+
331
+ Raises:
332
+ ValueError: If no run matches the criteria or if more than one run
333
+ matches the criteria.
334
+
335
+ See Also:
336
+ RunCollection.filter: The method that performs the actual filtering
337
+ logic.
153
338
  """
154
- return get_earliest_run(self.runs, config, **kwargs)
339
+ return get_run(self._runs, config, **kwargs)
155
340
 
156
- def get_latest_run(self, config: object | None = None, **kwargs) -> Run | None:
341
+ def try_get(self, config: object | None = None, **kwargs) -> Run | None:
157
342
  """
158
- Get the latest run from the list of runs based on the start time.
343
+ Retrieve a specific run based on the provided configuration.
344
+
345
+ This method filters the runs in the collection according to the
346
+ specified configuration object and returns the run that matches the
347
+ provided parameters. If no run matches the criteria, None is returned.
348
+ If more than one run matches the criteria, a `ValueError` is raised.
159
349
 
160
350
  Args:
161
- config: The configuration object to filter the runs.
162
- If None, no filtering is applied.
351
+ config: The configuration object to identify the run.
163
352
  **kwargs: Additional key-value pairs to filter the runs.
164
353
 
165
354
  Returns:
166
- The run with the latest start time, or None if no runs match the criteria.
355
+ The run object that matches the provided configuration, or None if
356
+ no runs match the criteria.
357
+
358
+ Raises:
359
+ ValueError: If more than one run matches the criteria.
360
+
361
+ See Also:
362
+ RunCollection.filter: The method that performs the actual filtering
363
+ logic.
167
364
  """
168
- return get_latest_run(self.runs, config, **kwargs)
365
+ return try_get_run(self._runs, config, **kwargs)
169
366
 
170
367
  def get_param_names(self) -> list[str]:
171
368
  """
172
369
  Get the parameter names from the runs.
173
370
 
174
- This method extracts the unique parameter names from the provided list of runs.
175
- It iterates through each run and collects the parameter names into a set to
176
- ensure uniqueness.
371
+ This method extracts the unique parameter names from the provided list
372
+ of runs. It iterates through each run and collects the parameter names
373
+ into a set to ensure uniqueness.
177
374
 
178
375
  Returns:
179
376
  A list of unique parameter names.
180
377
  """
181
- return get_param_names(self.runs)
378
+ return get_param_names(self._runs)
182
379
 
183
380
  def get_param_dict(self) -> dict[str, list[str]]:
184
381
  """
185
382
  Get the parameter dictionary from the list of runs.
186
383
 
187
384
  This method extracts the parameter names and their corresponding values
188
- from the provided list of runs. It iterates through each run and collects
189
- the parameter values into a dictionary where the keys are parameter names
190
- and the values are lists of parameter values.
385
+ from the provided list of runs. It iterates through each run and
386
+ collects the parameter values into a dictionary where the keys are
387
+ parameter names and the values are lists of parameter values.
191
388
 
192
389
  Returns:
193
- A dictionary where the keys are parameter names and the values are lists
194
- of parameter values.
390
+ A dictionary where the keys are parameter names and the values are
391
+ lists of parameter values.
392
+ """
393
+ return get_param_dict(self._runs)
394
+
395
+ def map(self, func: Callable[[Run], T]) -> Iterator[T]:
396
+ """
397
+ Apply a function to each run in the collection and return an iterator of
398
+ results.
399
+
400
+ Args:
401
+ func: A function that takes a run and returns a result.
402
+
403
+ Yields:
404
+ Results obtained by applying the function to each run in the
405
+ collection.
406
+ """
407
+ return (func(run) for run in self._runs)
408
+
409
+ def map_run_id(self, func: Callable[[str], T]) -> Iterator[T]:
410
+ """
411
+ Apply a function to each run id in the collection and return an iterator
412
+ of results.
413
+
414
+ Args:
415
+ func: A function that takes a run id and returns a result.
416
+
417
+ Yields:
418
+ Results obtained by applying the function to each run id in the
419
+ collection.
420
+ """
421
+ return (func(run.info.run_id) for run in self._runs)
422
+
423
+ def map_config(self, func: Callable[[DictConfig], T]) -> Iterator[T]:
424
+ """
425
+ Apply a function to each run configuration in the collection and return
426
+ an iterator of results.
427
+
428
+ Args:
429
+ func: A function that takes a run configuration and returns a
430
+ result.
431
+
432
+ Yields:
433
+ Results obtained by applying the function to each run configuration
434
+ in the collection.
435
+ """
436
+ return (func(load_config(run)) for run in self._runs)
437
+
438
+ def map_uri(self, func: Callable[[str | None], T]) -> Iterator[T]:
439
+ """
440
+ Apply a function to each artifact URI in the collection and return an
441
+ iterator of results.
442
+
443
+ This method iterates over each run in the collection, retrieves the
444
+ artifact URI, and applies the provided function to it. If a run does not
445
+ have an artifact URI, None is passed to the function.
446
+
447
+ Args:
448
+ func: A function that takes an artifact URI (string or None) and
449
+ returns a result.
450
+
451
+ Yields:
452
+ Results obtained by applying the function to each artifact URI in the
453
+ collection.
195
454
  """
196
- return get_param_dict(self.runs)
455
+ return (func(run.info.artifact_uri) for run in self._runs)
456
+
457
+ def map_dir(self, func: Callable[[str], T]) -> Iterator[T]:
458
+ """
459
+ Apply a function to each artifact directory in the collection and return
460
+ an iterator of results.
461
+
462
+ This method iterates over each run in the collection, downloads the
463
+ artifact directory, and applies the provided function to the directory
464
+ path.
465
+
466
+ Args:
467
+ func: A function that takes an artifact directory path (string) and
468
+ returns a result.
469
+
470
+ Yields:
471
+ Results obtained by applying the function to each artifact directory
472
+ in the collection.
473
+ """
474
+ return (func(download_artifacts(run_id=run.info.run_id)) for run in self._runs)
475
+
476
+
477
+ def _param_matches(run: Run, key: str, value: Any) -> bool:
478
+ """
479
+ Check if the run's parameter matches the specified key-value pair.
480
+
481
+ This function checks if the run's parameters contain the specified
482
+ key-value pair. It handles different types of values, including lists
483
+ and tuples.
484
+
485
+ Args:
486
+ run: The run object to check.
487
+ key: The parameter key to check.
488
+ value: The parameter value to check.
489
+
490
+ Returns:
491
+ True if the run's parameter matches the specified key-value pair,
492
+ False otherwise.
493
+ """
494
+ param = run.data.params.get(key, value)
495
+
496
+ if param is None:
497
+ return False
498
+
499
+ if param == "None":
500
+ return value is None
501
+
502
+ if isinstance(value, list) and value:
503
+ return type(value[0])(param) in value
504
+
505
+ if isinstance(value, tuple) and len(value) == 2:
506
+ return value[0] <= type(value[0])(param) < value[1]
507
+
508
+ return type(value)(param) == value
197
509
 
198
510
 
199
- def filter_runs(runs: list[Run], config: object, **kwargs) -> list[Run]:
511
+ def filter_runs(runs: list[Run], config: object | None = None, **kwargs) -> list[Run]:
200
512
  """
201
513
  Filter the runs based on the provided configuration.
202
514
 
203
515
  This method filters the runs in the collection according to the
204
- specified configuration object. The configuration object should
205
- contain key-value pairs that correspond to the parameters of the
206
- runs. Only the runs that match all the specified parameters will
516
+ specified configuration object and additional key-value pairs.
517
+ The configuration object and key-value pairs should contain
518
+ key-value pairs that correspond to the parameters of the runs.
519
+ Only the runs that match all the specified parameters will
207
520
  be included in the returned list of runs.
208
521
 
522
+ The filtering supports:
523
+ - Exact matches for single values.
524
+ - Membership checks for lists of values.
525
+ - Range checks for tuples of two values (inclusive of the lower bound and
526
+ exclusive of the upper bound).
527
+
209
528
  Args:
210
- runs: The runs to filter.
211
- config: The configuration object to filter the runs.
529
+ runs: The list of runs to filter.
530
+ config: The configuration object to filter the runs. This can be any
531
+ object that provides key-value pairs through the `iter_params`
532
+ function.
212
533
  **kwargs: Additional key-value pairs to filter the runs.
213
534
 
214
535
  Returns:
215
- A filtered list of runs.
536
+ A list of runs that match the specified configuration and key-value pairs.
216
537
  """
217
538
  for key, value in chain(iter_params(config), kwargs.items()):
218
- runs = [run for run in runs if _is_equal(run, key, value)]
539
+ runs = [run for run in runs if _param_matches(run, key, value)]
219
540
 
220
541
  if len(runs) == 0:
221
542
  return []
@@ -223,23 +544,44 @@ def filter_runs(runs: list[Run], config: object, **kwargs) -> list[Run]:
223
544
  return runs
224
545
 
225
546
 
226
- def _is_equal(run: Run, key: str, value: Any) -> bool:
227
- param = run.data.params.get(key, value)
547
+ def find_run(runs: list[Run], config: object | None = None, **kwargs) -> Run:
548
+ """
549
+ Find the first run based on the provided configuration.
228
550
 
229
- if param is None:
230
- return False
551
+ This method filters the runs in the collection according to the
552
+ specified configuration object and returns the first run that matches
553
+ the provided parameters. If no run matches the criteria, a `ValueError` is
554
+ raised.
231
555
 
232
- return type(value)(param) == value
556
+ Args:
557
+ runs: The runs to filter.
558
+ config: The configuration object to identify the run.
559
+ **kwargs: Additional key-value pairs to filter the runs.
233
560
 
561
+ Returns:
562
+ The first run object that matches the provided configuration.
563
+
564
+ Raises:
565
+ ValueError: If no run matches the criteria.
234
566
 
235
- def get_run(runs: list[Run], config: object, **kwargs) -> Run | None:
567
+ See Also:
568
+ RunCollection.filter: The method that performs the actual filtering logic.
236
569
  """
237
- Retrieve a specific run based on the provided configuration.
570
+ filtered_runs = filter_runs(runs, config, **kwargs)
571
+
572
+ if len(filtered_runs) == 0:
573
+ raise ValueError("No run matches the provided configuration.")
574
+
575
+ return filtered_runs[0]
576
+
577
+
578
+ def try_find_run(runs: list[Run], config: object | None = None, **kwargs) -> Run | None:
579
+ """
580
+ Find the first run based on the provided configuration.
238
581
 
239
582
  This method filters the runs in the collection according to the
240
- specified configuration object and returns the run that matches
241
- the provided parameters. If more than one run matches the criteria,
242
- a `ValueError` is raised.
583
+ specified configuration object and returns the first run that matches
584
+ the provided parameters. If no run matches the criteria, None is returned.
243
585
 
244
586
  Args:
245
587
  runs: The runs to filter.
@@ -247,75 +589,161 @@ def get_run(runs: list[Run], config: object, **kwargs) -> Run | None:
247
589
  **kwargs: Additional key-value pairs to filter the runs.
248
590
 
249
591
  Returns:
250
- The run object that matches the provided configuration, or None
592
+ The first run object that matches the provided configuration, or None
251
593
  if no runs match the criteria.
594
+ """
595
+ filtered_runs = filter_runs(runs, config, **kwargs)
596
+
597
+ if len(filtered_runs) == 0:
598
+ return None
599
+
600
+ return filtered_runs[0]
601
+
602
+
603
+ def find_last_run(runs: list[Run], config: object | None = None, **kwargs) -> Run:
604
+ """
605
+ Find the last run based on the provided configuration.
606
+
607
+ This method filters the runs in the collection according to the
608
+ specified configuration object and returns the last run that matches
609
+ the provided parameters. If no run matches the criteria, a `ValueError`
610
+ is raised.
611
+
612
+ Args:
613
+ runs: The runs to filter.
614
+ config: The configuration object to identify the run.
615
+ **kwargs: Additional key-value pairs to filter the runs.
616
+
617
+ Returns:
618
+ The last run object that matches the provided configuration.
252
619
 
253
620
  Raises:
254
- ValueError: If more than one run matches the criteria.
621
+ ValueError: If no run matches the criteria.
622
+
623
+ See Also:
624
+ RunCollection.filter: The method that performs the actual filtering
625
+ logic.
255
626
  """
256
- runs = filter_runs(runs, config, **kwargs)
627
+ filtered_runs = filter_runs(runs, config, **kwargs)
257
628
 
258
- if len(runs) == 0:
259
- return None
629
+ if len(filtered_runs) == 0:
630
+ raise ValueError("No run matches the provided configuration.")
260
631
 
261
- if len(runs) == 1:
262
- return runs[0]
632
+ return filtered_runs[-1]
263
633
 
264
- msg = f"Multiple runs were filtered. Expected number of runs is 1, but found {len(runs)} runs."
265
- raise ValueError(msg)
266
634
 
635
+ def try_find_last_run(runs: list[Run], config: object | None = None, **kwargs) -> Run | None:
636
+ """
637
+ Find the last run based on the provided configuration.
638
+
639
+ This method filters the runs in the collection according to the
640
+ specified configuration object and returns the last run that matches
641
+ the provided parameters. If no run matches the criteria, None is returned.
642
+
643
+ Args:
644
+ runs: The runs to filter.
645
+ config: The configuration object to identify the run.
646
+ **kwargs: Additional key-value pairs to filter the runs.
647
+
648
+ Returns:
649
+ The last run object that matches the provided configuration, or None
650
+ if no runs match the criteria.
651
+ """
652
+ filtered_runs = filter_runs(runs, config, **kwargs)
653
+
654
+ if len(filtered_runs) == 0:
655
+ return None
656
+
657
+ return filtered_runs[-1]
267
658
 
268
- def get_earliest_run(runs: list[Run], config: object | None = None, **kwargs) -> Run | None:
659
+
660
+ def get_run(runs: list[Run], config: object | None = None, **kwargs) -> Run:
269
661
  """
270
- Get the earliest run from the list of runs based on the start time.
662
+ Retrieve a specific run based on the provided configuration.
271
663
 
272
- This method filters the runs based on the configuration if provided
273
- and returns the run with the earliest start time.
664
+ This method filters the runs in the collection according to the
665
+ specified configuration object and returns the run that matches
666
+ the provided parameters. If no run matches the criteria, or if more
667
+ than one run matches the criteria, a `ValueError` is raised.
274
668
 
275
669
  Args:
276
- runs: The list of runs.
277
- config: The configuration object to filter the runs.
278
- If None, no filtering is applied.
670
+ runs: The runs to filter.
671
+ config: The configuration object to identify the run.
279
672
  **kwargs: Additional key-value pairs to filter the runs.
280
673
 
281
674
  Returns:
282
- The run with the earliest start time, or None if no runs match the criteria.
675
+ The run object that matches the provided configuration.
676
+
677
+ Raises:
678
+ ValueError: If no run matches the criteria or if more than one run
679
+ matches the criteria.
680
+
681
+ See Also:
682
+ RunCollection.filter: The method that performs the actual filtering
683
+ logic.
283
684
  """
284
- if config is not None or kwargs:
285
- runs = filter_runs(runs, config or {}, **kwargs)
685
+ filtered_runs = filter_runs(runs, config, **kwargs)
686
+
687
+ if len(filtered_runs) == 0:
688
+ raise ValueError("No run matches the provided configuration.")
286
689
 
287
- return min(runs, key=lambda run: run.info.start_time, default=None)
690
+ if len(filtered_runs) == 1:
691
+ return filtered_runs[0]
288
692
 
693
+ msg = (
694
+ f"Multiple runs were filtered. Expected number of runs is 1, "
695
+ f"but found {len(filtered_runs)} runs."
696
+ )
697
+ raise ValueError(msg)
289
698
 
290
- def get_latest_run(runs: list[Run], config: object | None = None, **kwargs) -> Run | None:
699
+
700
+ def try_get_run(runs: list[Run], config: object | None = None, **kwargs) -> Run | None:
291
701
  """
292
- Get the latest run from the list of runs based on the start time.
702
+ Retrieve a specific run based on the provided configuration.
293
703
 
294
- This method filters the runs based on the configuration if provided
295
- and returns the run with the latest start time.
704
+ This method filters the runs in the collection according to the
705
+ specified configuration object and returns the run that matches
706
+ the provided parameters. If no run matches the criteria, None is returned.
707
+ If more than one run matches the criteria, a `ValueError` is raised.
296
708
 
297
709
  Args:
298
- runs: The list of runs.
299
- config: The configuration object to filter the runs.
300
- If None, no filtering is applied.
710
+ runs: The runs to filter.
711
+ config: The configuration object to identify the run.
301
712
  **kwargs: Additional key-value pairs to filter the runs.
302
713
 
303
714
  Returns:
304
- The run with the latest start time, or None if no runs match the criteria.
715
+ The run object that matches the provided configuration, or None
716
+ if no runs match the criteria.
717
+
718
+ Raises:
719
+ ValueError: If more than one run matches the criteria.
720
+
721
+ See Also:
722
+ RunCollection.filter: The method that performs the actual filtering
723
+ logic.
305
724
  """
306
- if config is not None or kwargs:
307
- runs = filter_runs(runs, config or {}, **kwargs)
725
+ filtered_runs = filter_runs(runs, config, **kwargs)
726
+
727
+ if len(filtered_runs) == 0:
728
+ return None
308
729
 
309
- return max(runs, key=lambda run: run.info.start_time, default=None)
730
+ if len(filtered_runs) == 1:
731
+ return filtered_runs[0]
732
+
733
+ msg = (
734
+ "Multiple runs were filtered. Expected number of runs is 1, "
735
+ f"but found {len(filtered_runs)} runs."
736
+ )
737
+ raise ValueError(msg)
310
738
 
311
739
 
312
740
  def get_param_names(runs: list[Run]) -> list[str]:
313
741
  """
314
742
  Get the parameter names from the runs.
315
743
 
316
- This method extracts the unique parameter names from the provided list of runs.
317
- It iterates through each run and collects the parameter names into a set to
318
- ensure uniqueness.
744
+ This method extracts the unique parameter names from the provided list of
745
+ runs. It iterates through each run and collects the parameter names into a
746
+ set to ensure uniqueness.
319
747
 
320
748
  Args:
321
749
  runs: The list of runs from which to extract parameter names.
@@ -363,13 +791,15 @@ def load_config(run: Run) -> DictConfig:
363
791
 
364
792
  This function loads the configuration for the provided Run instance
365
793
  by downloading the configuration file from the MLflow artifacts and
366
- loading it using OmegaConf.
794
+ loading it using OmegaConf. It returns an empty config if
795
+ `.hydra/config.yaml` is not found in the run's artifact directory.
367
796
 
368
797
  Args:
369
- run: The Run instance to load the configuration for.
798
+ run: The Run instance for which to load the configuration.
370
799
 
371
800
  Returns:
372
- The loaded configuration.
801
+ The loaded configuration as a DictConfig object. Returns an empty
802
+ DictConfig if the configuration file is not found.
373
803
  """
374
804
  run_id = run.info.run_id
375
805
  return _load_config(run_id)
@@ -378,10 +808,7 @@ def load_config(run: Run) -> DictConfig:
378
808
  @cache
379
809
  def _load_config(run_id: str) -> DictConfig:
380
810
  try:
381
- path = mlflow.artifacts.download_artifacts(
382
- run_id=run_id,
383
- artifact_path=".hydra/config.yaml",
384
- )
811
+ path = download_artifacts(run_id=run_id, artifact_path=".hydra/config.yaml")
385
812
  except OSError:
386
813
  return DictConfig({})
387
814
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: hydraflow
3
- Version: 0.2.1
3
+ Version: 0.2.3
4
4
  Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
5
5
  Project-URL: Documentation, https://github.com/daizutabi/hydraflow
6
6
  Project-URL: Source, https://github.com/daizutabi/hydraflow
@@ -20,7 +20,9 @@ Requires-Dist: hydra-core>1.3
20
20
  Requires-Dist: mlflow>2.15
21
21
  Requires-Dist: setuptools
22
22
  Requires-Dist: watchdog
23
+ Requires-Dist: watchfiles
23
24
  Provides-Extra: dev
25
+ Requires-Dist: pytest-asyncio; extra == 'dev'
24
26
  Requires-Dist: pytest-clarity; extra == 'dev'
25
27
  Requires-Dist: pytest-cov; extra == 'dev'
26
28
  Requires-Dist: pytest-randomly; extra == 'dev'
@@ -46,14 +48,23 @@ Description-Content-Type: text/markdown
46
48
 
47
49
  ## Overview
48
50
 
49
- Hydraflow is a powerful library designed to seamlessly integrate [Hydra](https://hydra.cc/) and [MLflow](https://mlflow.org/), making it easier to manage and track machine learning experiments. By combining the flexibility of Hydra's configuration management with the robust experiment tracking capabilities of MLflow, Hydraflow provides a comprehensive solution for managing complex machine learning workflows.
51
+ Hydraflow is a powerful library designed to seamlessly integrate
52
+ [Hydra](https://hydra.cc/) and [MLflow](https://mlflow.org/), making it easier to
53
+ manage and track machine learning experiments. By combining the flexibility of
54
+ Hydra's configuration management with the robust experiment tracking capabilities
55
+ of MLflow, Hydraflow provides a comprehensive solution for managing complex
56
+ machine learning workflows.
50
57
 
51
58
  ## Key Features
52
59
 
53
- - **Configuration Management**: Utilize Hydra's advanced configuration management to handle complex parameter sweeps and experiment setups.
54
- - **Experiment Tracking**: Leverage MLflow's tracking capabilities to log parameters, metrics, and artifacts for each run.
55
- - **Artifact Management**: Automatically log and manage artifacts, such as model checkpoints and configuration files, with MLflow.
56
- - **Seamless Integration**: Easily integrate Hydra and MLflow in your machine learning projects with minimal setup.
60
+ - **Configuration Management**: Utilize Hydra's advanced configuration management
61
+ to handle complex parameter sweeps and experiment setups.
62
+ - **Experiment Tracking**: Leverage MLflow's tracking capabilities to log parameters,
63
+ metrics, and artifacts for each run.
64
+ - **Artifact Management**: Automatically log and manage artifacts, such as model
65
+ checkpoints and configuration files, with MLflow.
66
+ - **Seamless Integration**: Easily integrate Hydra and MLflow in your machine learning
67
+ projects with minimal setup.
57
68
 
58
69
  ## Installation
59
70
 
@@ -0,0 +1,10 @@
1
+ hydraflow/__init__.py,sha256=9v7p2ezUd_LMoRJQS0ay8c7fpaqPZ6Ofq7YPT0rSO5I,528
2
+ hydraflow/asyncio.py,sha256=yh851L315QHzRBwq6r-uwO2oZKgz1JawHp-fswfxT1E,6175
3
+ hydraflow/config.py,sha256=FNTuCppjCMrZKVByJMrWKbgj3HeMWWwAmQNoyFe029Y,2087
4
+ hydraflow/context.py,sha256=MqkEhKEZL_N3eb3v5u9D4EqKkiSmiPyXXafhPkALRlg,5129
5
+ hydraflow/mlflow.py,sha256=_Los9E38eG8sTiN8bGwZmvjCrS0S-wSGiA4fyhQM3Zw,2251
6
+ hydraflow/runs.py,sha256=0BXSBbNkELP3CzaCGBkejOkpyk5uQUxrdknJPRwR400,29022
7
+ hydraflow-0.2.3.dist-info/METADATA,sha256=h5Pxy6EnxTlyyGL8NRr14ZHtLhA9ldmM9GP5sES6KWU,4304
8
+ hydraflow-0.2.3.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
9
+ hydraflow-0.2.3.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
10
+ hydraflow-0.2.3.dist-info/RECORD,,
@@ -1,9 +0,0 @@
1
- hydraflow/__init__.py,sha256=PzziOG9RnGAVbl9Yz4ScvsL8nfkjsuN0alMKRvZT-_Y,442
2
- hydraflow/config.py,sha256=wI8uNuD2D-hIf4BAhEYJaMC6EyO-erKopy_ia_b1pYA,2048
3
- hydraflow/context.py,sha256=MqkEhKEZL_N3eb3v5u9D4EqKkiSmiPyXXafhPkALRlg,5129
4
- hydraflow/mlflow.py,sha256=_Los9E38eG8sTiN8bGwZmvjCrS0S-wSGiA4fyhQM3Zw,2251
5
- hydraflow/runs.py,sha256=NT7IzE-Pf7T2Ey-eWEPZzQQaX4Gt_RKDKSn2pj2yzGc,14304
6
- hydraflow-0.2.1.dist-info/METADATA,sha256=4C_hnw1gMb8WUQXyqj4q8eA1IVbp0wZuLGGthIk1G7U,4224
7
- hydraflow-0.2.1.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
8
- hydraflow-0.2.1.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
9
- hydraflow-0.2.1.dist-info/RECORD,,