hydraflow 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
hydraflow/__init__.py CHANGED
@@ -2,25 +2,29 @@ from .context import Info, chdir_artifact, log_run, watch
2
2
  from .mlflow import set_experiment
3
3
  from .runs import (
4
4
  Run,
5
- Runs,
5
+ RunCollection,
6
6
  filter_runs,
7
7
  get_param_dict,
8
8
  get_param_names,
9
9
  get_run,
10
+ list_runs,
10
11
  load_config,
12
+ search_runs,
11
13
  )
12
14
 
13
15
  __all__ = [
14
16
  "Info",
15
17
  "Run",
16
- "Runs",
18
+ "RunCollection",
17
19
  "chdir_artifact",
18
20
  "filter_runs",
19
21
  "get_param_dict",
20
22
  "get_param_names",
21
23
  "get_run",
24
+ "list_runs",
22
25
  "load_config",
23
26
  "log_run",
27
+ "search_runs",
24
28
  "set_experiment",
25
29
  "watch",
26
30
  ]
hydraflow/asyncio.py ADDED
@@ -0,0 +1,199 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import logging
5
+ from asyncio.subprocess import PIPE
6
+ from pathlib import Path
7
+ from typing import TYPE_CHECKING
8
+
9
+ import watchfiles
10
+
11
+ if TYPE_CHECKING:
12
+ from asyncio.streams import StreamReader
13
+ from collections.abc import Callable
14
+
15
+ from watchfiles import Change
16
+
17
+
18
+ # Set up logging
19
+ logging.basicConfig(level=logging.INFO)
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ async def execute_command(
24
+ program: str,
25
+ *args: str,
26
+ stdout: Callable[[str], None] | None = None,
27
+ stderr: Callable[[str], None] | None = None,
28
+ stop_event: asyncio.Event,
29
+ ) -> int:
30
+ """
31
+ Runs a command asynchronously and pass the output to callback functions.
32
+
33
+ Args:
34
+ program (str): The program to run.
35
+ *args (str): Arguments for the program.
36
+ stdout (Callable[[str], None] | None): Callback for standard output.
37
+ stderr (Callable[[str], None] | None): Callback for standard error.
38
+ stop_event (asyncio.Event): Event to signal when the process is done.
39
+
40
+ Returns:
41
+ int: The return code of the process.
42
+ """
43
+ try:
44
+ process = await asyncio.create_subprocess_exec(program, *args, stdout=PIPE, stderr=PIPE)
45
+ await asyncio.gather(
46
+ process_stream(process.stdout, stdout),
47
+ process_stream(process.stderr, stderr),
48
+ )
49
+ returncode = await process.wait()
50
+
51
+ except Exception as e:
52
+ logger.error(f"Error running command: {e}")
53
+ returncode = 1
54
+
55
+ finally:
56
+ stop_event.set()
57
+
58
+ return returncode
59
+
60
+
61
+ async def process_stream(
62
+ stream: StreamReader | None,
63
+ callback: Callable[[str], None] | None,
64
+ ) -> None:
65
+ """
66
+ Reads a stream asynchronously and pass each line to a callback function.
67
+
68
+ Args:
69
+ stream (StreamReader | None): The stream to read from.
70
+ callback (Callable[[str], None] | None): The callback function to handle
71
+ each line.
72
+ """
73
+ if stream is None or callback is None:
74
+ return
75
+
76
+ while True:
77
+ line = await stream.readline()
78
+ if line:
79
+ callback(line.decode().strip())
80
+ else:
81
+ break
82
+
83
+
84
+ async def monitor_file_changes(
85
+ paths: list[str | Path],
86
+ callback: Callable[[set[tuple[Change, str]]], None],
87
+ stop_event: asyncio.Event,
88
+ **awatch_kwargs,
89
+ ) -> None:
90
+ """
91
+ Watches for file changes in specified paths and pass the changes to a
92
+ callback function.
93
+
94
+ Args:
95
+ paths (list[str | Path]): List of paths to monitor for changes.
96
+ callback (Callable[[set[tuple[Change, str]]], None]): The callback
97
+ function to handle file changes.
98
+ stop_event (asyncio.Event): Event to signal when to stop watching.
99
+ **awatch_kwargs: Additional keyword arguments to pass to watchfiles.awatch.
100
+ """
101
+ str_paths = [str(path) for path in paths]
102
+ try:
103
+ async for changes in watchfiles.awatch(*str_paths, stop_event=stop_event, **awatch_kwargs):
104
+ callback(changes)
105
+ except Exception as e:
106
+ logger.error(f"Error watching files: {e}")
107
+
108
+
109
+ async def run_and_monitor(
110
+ program: str,
111
+ *args: str,
112
+ stdout: Callable[[str], None] | None = None,
113
+ stderr: Callable[[str], None] | None = None,
114
+ watch: Callable[[set[tuple[Change, str]]], None] | None = None,
115
+ paths: list[str | Path] | None = None,
116
+ **awatch_kwargs,
117
+ ) -> int:
118
+ """
119
+ Runs a command and optionally watch for file changes concurrently.
120
+
121
+ Args:
122
+ program (str): The program to run.
123
+ *args (str): Arguments for the program.
124
+ stdout (Callable[[str], None] | None): Callback for standard output.
125
+ stderr (Callable[[str], None] | None): Callback for standard error.
126
+ watch (Callable[[set[tuple[Change, str]]], None] | None): Callback for
127
+ file changes.
128
+ paths (list[str | Path] | None): List of paths to monitor for changes.
129
+ """
130
+ stop_event = asyncio.Event()
131
+ run_task = asyncio.create_task(
132
+ execute_command(program, *args, stop_event=stop_event, stdout=stdout, stderr=stderr)
133
+ )
134
+ if watch and paths:
135
+ monitor_task = asyncio.create_task(
136
+ monitor_file_changes(paths, watch, stop_event, **awatch_kwargs)
137
+ )
138
+ else:
139
+ monitor_task = None
140
+
141
+ try:
142
+ if monitor_task:
143
+ await asyncio.gather(run_task, monitor_task)
144
+ else:
145
+ await run_task
146
+
147
+ except Exception as e:
148
+ logger.error(f"Error in run_and_monitor: {e}")
149
+ finally:
150
+ stop_event.set()
151
+ await run_task
152
+ if monitor_task:
153
+ await monitor_task
154
+
155
+ return run_task.result()
156
+
157
+
158
+ def run(
159
+ program: str,
160
+ *args: str,
161
+ stdout: Callable[[str], None] | None = None,
162
+ stderr: Callable[[str], None] | None = None,
163
+ watch: Callable[[set[tuple[Change, str]]], None] | None = None,
164
+ paths: list[str | Path] | None = None,
165
+ **awatch_kwargs,
166
+ ) -> int:
167
+ """
168
+ Run a command synchronously and optionally watch for file changes.
169
+
170
+ This function is a synchronous wrapper around the asynchronous `run_and_monitor` function.
171
+ It runs a specified command and optionally monitors specified paths for file changes,
172
+ invoking the provided callbacks for standard output, standard error, and file changes.
173
+
174
+ Args:
175
+ program (str): The program to run.
176
+ *args (str): Arguments for the program.
177
+ stdout (Callable[[str], None] | None): Callback for handling standard output lines.
178
+ stderr (Callable[[str], None] | None): Callback for handling standard error lines.
179
+ watch (Callable[[set[tuple[Change, str]]], None] | None): Callback for handling file changes.
180
+ paths (list[str | Path] | None): List of paths to monitor for file changes.
181
+ **awatch_kwargs: Additional keyword arguments to pass to `watchfiles.awatch`.
182
+
183
+ Returns:
184
+ int: The return code of the process.
185
+ """
186
+ if watch and not paths:
187
+ paths = [Path.cwd()]
188
+
189
+ return asyncio.run(
190
+ run_and_monitor(
191
+ program,
192
+ *args,
193
+ stdout=stdout,
194
+ stderr=stderr,
195
+ watch=watch,
196
+ paths=paths,
197
+ **awatch_kwargs,
198
+ )
199
+ )
hydraflow/config.py CHANGED
@@ -30,6 +30,9 @@ def iter_params(config: object, prefix: str = "") -> Iterator[tuple[str, Any]]:
30
30
  Yields:
31
31
  Key-value pairs representing the parameters in the configuration object.
32
32
  """
33
+ if config is None:
34
+ return
35
+
33
36
  if not isinstance(config, (DictConfig, ListConfig)):
34
37
  config = OmegaConf.create(config) # type: ignore
35
38
 
hydraflow/runs.py CHANGED
@@ -1,7 +1,7 @@
1
1
  """
2
- This module provides functionality for managing and interacting with MLflow runs.
3
- It includes the `Runs` class and various methods to filter runs, retrieve run information,
4
- log artifacts, and load configurations.
2
+ This module provides functionality for managing and interacting with MLflow
3
+ runs. It includes the `RunCollection` class and various methods to filter
4
+ runs, retrieve run information, log artifacts, and load configurations.
5
5
  """
6
6
 
7
7
  from __future__ import annotations
@@ -9,9 +9,10 @@ from __future__ import annotations
9
9
  from dataclasses import dataclass
10
10
  from functools import cache
11
11
  from itertools import chain
12
- from typing import TYPE_CHECKING, Any
12
+ from typing import TYPE_CHECKING, Any, TypeVar
13
13
 
14
14
  import mlflow
15
+ from mlflow.artifacts import download_artifacts
15
16
  from mlflow.entities import ViewType
16
17
  from mlflow.entities.run import Run
17
18
  from mlflow.tracking.fluent import SEARCH_MAX_RESULTS_PANDAS
@@ -20,6 +21,7 @@ from omegaconf import DictConfig, OmegaConf
20
21
  from hydraflow.config import iter_params
21
22
 
22
23
  if TYPE_CHECKING:
24
+ from collections.abc import Callable, Iterator
23
25
  from typing import Any
24
26
 
25
27
 
@@ -31,41 +33,46 @@ def search_runs(
31
33
  order_by: list[str] | None = None,
32
34
  search_all_experiments: bool = False,
33
35
  experiment_names: list[str] | None = None,
34
- ) -> Runs:
36
+ ) -> RunCollection:
35
37
  """
36
38
  Search for Runs that fit the specified criteria.
37
39
 
38
- This function wraps the `mlflow.search_runs` function and returns the results
39
- as a `Runs` object. It allows for flexible searching of MLflow runs based on
40
- various criteria.
40
+ This function wraps the `mlflow.search_runs` function and returns the
41
+ results as a `RunCollection` object. It allows for flexible searching of
42
+ MLflow runs based on various criteria.
43
+
44
+ Note:
45
+ The returned runs are sorted by their start time in ascending order.
41
46
 
42
47
  Args:
43
- experiment_ids: List of experiment IDs. Search can work with experiment IDs or
44
- experiment names, but not both in the same call. Values other than
45
- ``None`` or ``[]`` will result in error if ``experiment_names`` is
46
- also not ``None`` or ``[]``. ``None`` will default to the active
47
- experiment if ``experiment_names`` is ``None`` or ``[]``.
48
+ experiment_ids: List of experiment IDs. Search can work with experiment
49
+ IDs or experiment names, but not both in the same call. Values
50
+ other than ``None`` or ``[]`` will result in error if
51
+ ``experiment_names`` is also not ``None`` or ``[]``. ``None`` will
52
+ default to the active experiment if ``experiment_names`` is ``None``
53
+ or ``[]``.
48
54
  filter_string: Filter query string, defaults to searching all runs.
49
- run_view_type: one of enum values ``ACTIVE_ONLY``, ``DELETED_ONLY``, or ``ALL`` runs
50
- defined in :py:class:`mlflow.entities.ViewType`.
51
- max_results: The maximum number of runs to put in the dataframe. Default is 100,000
52
- to avoid causing out-of-memory issues on the user's machine.
53
- order_by: List of columns to order by (e.g., "metrics.rmse"). The ``order_by`` column
54
- can contain an optional ``DESC`` or ``ASC`` value. The default is ``ASC``.
55
- The default ordering is to sort by ``start_time DESC``, then ``run_id``.
56
- output_format: The output format to be returned. If ``pandas``, a ``pandas.DataFrame``
57
- is returned and, if ``list``, a list of :py:class:`mlflow.entities.Run`
58
- is returned.
59
- search_all_experiments: Boolean specifying whether all experiments should be searched.
60
- Only honored if ``experiment_ids`` is ``[]`` or ``None``.
61
- experiment_names: List of experiment names. Search can work with experiment IDs or
62
- experiment names, but not both in the same call. Values other
63
- than ``None`` or ``[]`` will result in error if ``experiment_ids``
64
- is also not ``None`` or ``[]``. ``None`` will default to the active
65
- experiment if ``experiment_ids`` is ``None`` or ``[]``.
55
+ run_view_type: one of enum values ``ACTIVE_ONLY``, ``DELETED_ONLY``, or
56
+ ``ALL`` runs defined in :py:class:`mlflow.entities.ViewType`.
57
+ max_results: The maximum number of runs to put in the dataframe. Default
58
+ is 100,000 to avoid causing out-of-memory issues on the user's
59
+ machine.
60
+ order_by: List of columns to order by (e.g., "metrics.rmse"). The
61
+ ``order_by`` column can contain an optional ``DESC`` or ``ASC``
62
+ value. The default is ``ASC``. The default ordering is to sort by
63
+ ``start_time DESC``, then ``run_id``.
64
+ search_all_experiments: Boolean specifying whether all experiments
65
+ should be searched. Only honored if ``experiment_ids`` is ``[]`` or
66
+ ``None``.
67
+ experiment_names: List of experiment names. Search can work with
68
+ experiment IDs or experiment names, but not both in the same call.
69
+ Values other than ``None`` or ``[]`` will result in error if
70
+ ``experiment_ids`` is also not ``None`` or ``[]``. ``None`` will
71
+ default to the active experiment if ``experiment_ids`` is ``None``
72
+ or ``[]``.
66
73
 
67
74
  Returns:
68
- A `Runs` object containing the search results.
75
+ A `RunCollection` object containing the search results.
69
76
  """
70
77
  runs = mlflow.search_runs(
71
78
  experiment_ids=experiment_ids,
@@ -77,11 +84,44 @@ def search_runs(
77
84
  search_all_experiments=search_all_experiments,
78
85
  experiment_names=experiment_names,
79
86
  )
80
- return Runs(runs) # type: ignore
87
+ runs = sorted(runs, key=lambda run: run.info.start_time) # type: ignore
88
+ return RunCollection(runs) # type: ignore
89
+
90
+
91
+ def list_runs(experiment_names: list[str] | None = None) -> RunCollection:
92
+ """
93
+ List all runs for the specified experiments.
94
+
95
+ This function retrieves all runs for the given list of experiment names.
96
+ If no experiment names are provided (None), it defaults to searching all runs
97
+ for the currently active experiment. If an empty list is provided, the function
98
+ will search all runs for all experiments except the "Default" experiment.
99
+ The function returns the results as a `RunCollection` object.
100
+
101
+ Note:
102
+ The returned runs are sorted by their start time in ascending order.
103
+
104
+ Args:
105
+ experiment_names: List of experiment names to search for runs.
106
+ If None or an empty list is provided, the function will search
107
+ the currently active experiment or all experiments except the
108
+ "Default" experiment.
109
+
110
+ Returns:
111
+ A `RunCollection` object containing the runs for the specified experiments.
112
+ """
113
+ if experiment_names == []:
114
+ experiments = mlflow.search_experiments()
115
+ experiment_names = [e.name for e in experiments if e.name != "Default"]
116
+
117
+ return search_runs(experiment_names=experiment_names)
118
+
119
+
120
+ T = TypeVar("T")
81
121
 
82
122
 
83
123
  @dataclass
84
- class Runs:
124
+ class RunCollection:
85
125
  """
86
126
  A class to represent a collection of MLflow runs.
87
127
 
@@ -89,133 +129,414 @@ class Runs:
89
129
  retrieving specific runs, and accessing run information.
90
130
  """
91
131
 
92
- runs: list[Run]
132
+ _runs: list[Run]
133
+ """A list of MLflow Run objects."""
93
134
 
94
135
  def __repr__(self) -> str:
95
136
  return f"{self.__class__.__name__}({len(self)})"
96
137
 
97
138
  def __len__(self) -> int:
98
- return len(self.runs)
139
+ return len(self._runs)
140
+
141
+ def first(self) -> Run:
142
+ """
143
+ Get the first run in the collection.
144
+
145
+ Returns:
146
+ The first run object in the collection.
147
+
148
+ Raises:
149
+ ValueError: If the collection is empty.
150
+ """
151
+ if not self._runs:
152
+ raise ValueError("The collection is empty.")
153
+
154
+ return self._runs[0]
155
+
156
+ def try_first(self) -> Run | None:
157
+ """
158
+ Try to get the first run in the collection.
159
+
160
+ Returns:
161
+ The first run object in the collection, or None if the collection
162
+ is empty.
163
+ """
164
+ return self._runs[0] if self._runs else None
165
+
166
+ def last(self) -> Run:
167
+ """
168
+ Get the last run in the collection.
99
169
 
100
- def filter(self, config: object) -> Runs:
170
+ Returns:
171
+ The last run object in the collection.
172
+
173
+ Raises:
174
+ ValueError: If the collection is empty.
175
+ """
176
+ if not self._runs:
177
+ raise ValueError("The collection is empty.")
178
+
179
+ return self._runs[-1]
180
+
181
+ def try_last(self) -> Run | None:
182
+ """
183
+ Try to get the last run in the collection.
184
+
185
+ Returns:
186
+ The last run object in the collection, or None if the collection is
187
+ empty.
188
+ """
189
+ return self._runs[-1] if self._runs else None
190
+
191
+ def filter(self, config: object | None = None, **kwargs) -> RunCollection:
101
192
  """
102
193
  Filter the runs based on the provided configuration.
103
194
 
104
195
  This method filters the runs in the collection according to the
105
- specified configuration object. The configuration object should
106
- contain key-value pairs that correspond to the parameters of the
107
- runs. Only the runs that match all the specified parameters will
108
- be included in the returned `Runs` object.
196
+ specified configuration object and additional key-value pairs. The
197
+ configuration object and key-value pairs should contain key-value pairs
198
+ that correspond to the parameters of the runs. Only the runs that match
199
+ all the specified parameters will be included in the returned
200
+ `RunCollection` object.
201
+
202
+ The filtering supports:
203
+ - Exact matches for single values.
204
+ - Membership checks for lists of values.
205
+ - Range checks for tuples of two values (inclusive of the lower bound
206
+ and exclusive of the upper bound).
109
207
 
110
208
  Args:
111
- config: The configuration object to filter the runs.
209
+ config: The configuration object to filter the runs. This can be
210
+ any object that provides key-value pairs through the
211
+ `iter_params` function.
212
+ **kwargs: Additional key-value pairs to filter the runs.
112
213
 
113
214
  Returns:
114
- A new `Runs` object containing the filtered runs.
215
+ A new `RunCollection` object containing the filtered runs.
115
216
  """
116
- return Runs(filter_runs(self.runs, config))
217
+ return RunCollection(filter_runs(self._runs, config, **kwargs))
117
218
 
118
- def get(self, config: object) -> Run | None:
219
+ def find(self, config: object | None = None, **kwargs) -> Run:
119
220
  """
120
- Retrieve a specific run based on the provided configuration.
221
+ Find the first run based on the provided configuration.
222
+
223
+ This method filters the runs in the collection according to the
224
+ specified configuration object and returns the first run that matches
225
+ the provided parameters. If no run matches the criteria, a `ValueError`
226
+ is raised.
227
+
228
+ Args:
229
+ config: The configuration object to identify the run.
230
+ **kwargs: Additional key-value pairs to filter the runs.
231
+
232
+ Returns:
233
+ The first run object that matches the provided configuration.
234
+
235
+ Raises:
236
+ ValueError: If no run matches the criteria.
237
+
238
+ See Also:
239
+ RunCollection.filter: The method that performs the actual filtering
240
+ logic.
241
+ """
242
+ return find_run(self._runs, config, **kwargs)
243
+
244
+ def try_find(self, config: object | None = None, **kwargs) -> Run | None:
245
+ """
246
+ Find the first run based on the provided configuration.
247
+
248
+ This method filters the runs in the collection according to the
249
+ specified configuration object and returns the first run that matches
250
+ the provided parameters. If no run matches the criteria, None is
251
+ returned.
252
+
253
+ Args:
254
+ config: The configuration object to identify the run.
255
+ **kwargs: Additional key-value pairs to filter the runs.
256
+
257
+ Returns:
258
+ The first run object that matches the provided configuration, or
259
+ None if no runs match the criteria.
260
+
261
+ See Also:
262
+ RunCollection.filter: The method that performs the actual filtering
263
+ logic.
264
+ """
265
+ return try_find_run(self._runs, config, **kwargs)
266
+
267
+ def find_last(self, config: object | None = None, **kwargs) -> Run:
268
+ """
269
+ Find the last run based on the provided configuration.
121
270
 
122
271
  This method filters the runs in the collection according to the
123
- specified configuration object and returns the run that matches
124
- the provided parameters. If more than one run matches the criteria,
125
- a `ValueError` is raised.
272
+ specified configuration object and returns the last run that matches
273
+ the provided parameters. If no run matches the criteria, a `ValueError`
274
+ is raised.
126
275
 
127
276
  Args:
128
277
  config: The configuration object to identify the run.
278
+ **kwargs: Additional key-value pairs to filter the runs.
129
279
 
130
280
  Returns:
131
- Run: The run object that matches the provided configuration.
132
- None, if the runs are not in a DataFrame format.
281
+ The last run object that matches the provided configuration.
133
282
 
134
283
  Raises:
135
- ValueError: If the number of filtered runs is not exactly one.
284
+ ValueError: If no run matches the criteria.
285
+
286
+ See Also:
287
+ RunCollection.filter: The method that performs the actual filtering
288
+ logic.
136
289
  """
137
- return get_run(self.runs, config)
290
+ return find_last_run(self._runs, config, **kwargs)
138
291
 
139
- def get_earliest_run(self, config: object | None = None, **kwargs) -> Run | None:
292
+ def try_find_last(self, config: object | None = None, **kwargs) -> Run | None:
140
293
  """
141
- Get the earliest run from the list of runs based on the start time.
294
+ Find the last run based on the provided configuration.
142
295
 
143
- This method filters the runs based on the configuration if provided
144
- and returns the run with the earliest start time.
296
+ This method filters the runs in the collection according to the
297
+ specified configuration object and returns the last run that matches
298
+ the provided parameters. If no run matches the criteria, None is
299
+ returned.
145
300
 
146
301
  Args:
147
- config: The configuration object to filter the runs.
148
- If None, no filtering is applied.
302
+ config: The configuration object to identify the run.
149
303
  **kwargs: Additional key-value pairs to filter the runs.
150
304
 
151
305
  Returns:
152
- The run with the earliest start time, or None if no runs match the criteria.
306
+ The last run object that matches the provided configuration, or
307
+ None if no runs match the criteria.
308
+
309
+ See Also:
310
+ RunCollection.filter: The method that performs the actual filtering
311
+ logic.
312
+ """
313
+ return try_find_last_run(self._runs, config, **kwargs)
314
+
315
+ def get(self, config: object | None = None, **kwargs) -> Run:
316
+ """
317
+ Retrieve a specific run based on the provided configuration.
318
+
319
+ This method filters the runs in the collection according to the
320
+ specified configuration object and returns the run that matches the
321
+ provided parameters. If no run matches the criteria, or if more than
322
+ one run matches the criteria, a `ValueError` is raised.
323
+
324
+ Args:
325
+ config: The configuration object to identify the run.
326
+ **kwargs: Additional key-value pairs to filter the runs.
327
+
328
+ Returns:
329
+ The run object that matches the provided configuration.
330
+
331
+ Raises:
332
+ ValueError: If no run matches the criteria or if more than one run
333
+ matches the criteria.
334
+
335
+ See Also:
336
+ RunCollection.filter: The method that performs the actual filtering
337
+ logic.
153
338
  """
154
- return get_earliest_run(self.runs, config, **kwargs)
339
+ return get_run(self._runs, config, **kwargs)
155
340
 
156
- def get_latest_run(self, config: object | None = None, **kwargs) -> Run | None:
341
+ def try_get(self, config: object | None = None, **kwargs) -> Run | None:
157
342
  """
158
- Get the latest run from the list of runs based on the start time.
343
+ Retrieve a specific run based on the provided configuration.
344
+
345
+ This method filters the runs in the collection according to the
346
+ specified configuration object and returns the run that matches the
347
+ provided parameters. If no run matches the criteria, None is returned.
348
+ If more than one run matches the criteria, a `ValueError` is raised.
159
349
 
160
350
  Args:
161
- config: The configuration object to filter the runs.
162
- If None, no filtering is applied.
351
+ config: The configuration object to identify the run.
163
352
  **kwargs: Additional key-value pairs to filter the runs.
164
353
 
165
354
  Returns:
166
- The run with the latest start time, or None if no runs match the criteria.
355
+ The run object that matches the provided configuration, or None if
356
+ no runs match the criteria.
357
+
358
+ Raises:
359
+ ValueError: If more than one run matches the criteria.
360
+
361
+ See Also:
362
+ RunCollection.filter: The method that performs the actual filtering
363
+ logic.
167
364
  """
168
- return get_latest_run(self.runs, config, **kwargs)
365
+ return try_get_run(self._runs, config, **kwargs)
169
366
 
170
367
  def get_param_names(self) -> list[str]:
171
368
  """
172
369
  Get the parameter names from the runs.
173
370
 
174
- This method extracts the unique parameter names from the provided list of runs.
175
- It iterates through each run and collects the parameter names into a set to
176
- ensure uniqueness.
371
+ This method extracts the unique parameter names from the provided list
372
+ of runs. It iterates through each run and collects the parameter names
373
+ into a set to ensure uniqueness.
177
374
 
178
375
  Returns:
179
376
  A list of unique parameter names.
180
377
  """
181
- return get_param_names(self.runs)
378
+ return get_param_names(self._runs)
182
379
 
183
380
  def get_param_dict(self) -> dict[str, list[str]]:
184
381
  """
185
382
  Get the parameter dictionary from the list of runs.
186
383
 
187
384
  This method extracts the parameter names and their corresponding values
188
- from the provided list of runs. It iterates through each run and collects
189
- the parameter values into a dictionary where the keys are parameter names
190
- and the values are lists of parameter values.
385
+ from the provided list of runs. It iterates through each run and
386
+ collects the parameter values into a dictionary where the keys are
387
+ parameter names and the values are lists of parameter values.
191
388
 
192
389
  Returns:
193
- A dictionary where the keys are parameter names and the values are lists
194
- of parameter values.
390
+ A dictionary where the keys are parameter names and the values are
391
+ lists of parameter values.
392
+ """
393
+ return get_param_dict(self._runs)
394
+
395
+ def map(self, func: Callable[[Run], T]) -> Iterator[T]:
396
+ """
397
+ Apply a function to each run in the collection and return an iterator of
398
+ results.
399
+
400
+ Args:
401
+ func: A function that takes a run and returns a result.
402
+
403
+ Yields:
404
+ Results obtained by applying the function to each run in the
405
+ collection.
406
+ """
407
+ return (func(run) for run in self._runs)
408
+
409
+ def map_run_id(self, func: Callable[[str], T]) -> Iterator[T]:
410
+ """
411
+ Apply a function to each run id in the collection and return an iterator
412
+ of results.
413
+
414
+ Args:
415
+ func: A function that takes a run id and returns a result.
416
+
417
+ Yields:
418
+ Results obtained by applying the function to each run id in the
419
+ collection.
420
+ """
421
+ return (func(run.info.run_id) for run in self._runs)
422
+
423
+ def map_config(self, func: Callable[[DictConfig], T]) -> Iterator[T]:
424
+ """
425
+ Apply a function to each run configuration in the collection and return
426
+ an iterator of results.
427
+
428
+ Args:
429
+ func: A function that takes a run configuration and returns a
430
+ result.
431
+
432
+ Yields:
433
+ Results obtained by applying the function to each run configuration
434
+ in the collection.
435
+ """
436
+ return (func(load_config(run)) for run in self._runs)
437
+
438
+ def map_uri(self, func: Callable[[str | None], T]) -> Iterator[T]:
439
+ """
440
+ Apply a function to each artifact URI in the collection and return an
441
+ iterator of results.
442
+
443
+ This method iterates over each run in the collection, retrieves the
444
+ artifact URI, and applies the provided function to it. If a run does not
445
+ have an artifact URI, None is passed to the function.
446
+
447
+ Args:
448
+ func: A function that takes an artifact URI (string or None) and
449
+ returns a result.
450
+
451
+ Yields:
452
+ Results obtained by applying the function to each artifact URI in the
453
+ collection.
195
454
  """
196
- return get_param_dict(self.runs)
455
+ return (func(run.info.artifact_uri) for run in self._runs)
456
+
457
+ def map_dir(self, func: Callable[[str], T]) -> Iterator[T]:
458
+ """
459
+ Apply a function to each artifact directory in the collection and return
460
+ an iterator of results.
461
+
462
+ This method iterates over each run in the collection, downloads the
463
+ artifact directory, and applies the provided function to the directory
464
+ path.
465
+
466
+ Args:
467
+ func: A function that takes an artifact directory path (string) and
468
+ returns a result.
469
+
470
+ Yields:
471
+ Results obtained by applying the function to each artifact directory
472
+ in the collection.
473
+ """
474
+ return (func(download_artifacts(run_id=run.info.run_id)) for run in self._runs)
475
+
476
+
477
+ def _param_matches(run: Run, key: str, value: Any) -> bool:
478
+ """
479
+ Check if the run's parameter matches the specified key-value pair.
480
+
481
+ This function checks if the run's parameters contain the specified
482
+ key-value pair. It handles different types of values, including lists
483
+ and tuples.
484
+
485
+ Args:
486
+ run: The run object to check.
487
+ key: The parameter key to check.
488
+ value: The parameter value to check.
489
+
490
+ Returns:
491
+ True if the run's parameter matches the specified key-value pair,
492
+ False otherwise.
493
+ """
494
+ param = run.data.params.get(key, value)
495
+
496
+ if param is None:
497
+ return False
498
+
499
+ if param == "None":
500
+ return value is None
501
+
502
+ if isinstance(value, list) and value:
503
+ return type(value[0])(param) in value
504
+
505
+ if isinstance(value, tuple) and len(value) == 2:
506
+ return value[0] <= type(value[0])(param) < value[1]
507
+
508
+ return type(value)(param) == value
197
509
 
198
510
 
199
- def filter_runs(runs: list[Run], config: object, **kwargs) -> list[Run]:
511
+ def filter_runs(runs: list[Run], config: object | None = None, **kwargs) -> list[Run]:
200
512
  """
201
513
  Filter the runs based on the provided configuration.
202
514
 
203
515
  This method filters the runs in the collection according to the
204
- specified configuration object. The configuration object should
205
- contain key-value pairs that correspond to the parameters of the
206
- runs. Only the runs that match all the specified parameters will
516
+ specified configuration object and additional key-value pairs.
517
+ The configuration object and key-value pairs should contain
518
+ key-value pairs that correspond to the parameters of the runs.
519
+ Only the runs that match all the specified parameters will
207
520
  be included in the returned list of runs.
208
521
 
522
+ The filtering supports:
523
+ - Exact matches for single values.
524
+ - Membership checks for lists of values.
525
+ - Range checks for tuples of two values (inclusive of the lower bound and
526
+ exclusive of the upper bound).
527
+
209
528
  Args:
210
- runs: The runs to filter.
211
- config: The configuration object to filter the runs.
529
+ runs: The list of runs to filter.
530
+ config: The configuration object to filter the runs. This can be any
531
+ object that provides key-value pairs through the `iter_params`
532
+ function.
212
533
  **kwargs: Additional key-value pairs to filter the runs.
213
534
 
214
535
  Returns:
215
- A filtered list of runs.
536
+ A list of runs that match the specified configuration and key-value pairs.
216
537
  """
217
538
  for key, value in chain(iter_params(config), kwargs.items()):
218
- runs = [run for run in runs if _is_equal(run, key, value)]
539
+ runs = [run for run in runs if _param_matches(run, key, value)]
219
540
 
220
541
  if len(runs) == 0:
221
542
  return []
@@ -223,23 +544,44 @@ def filter_runs(runs: list[Run], config: object, **kwargs) -> list[Run]:
223
544
  return runs
224
545
 
225
546
 
226
- def _is_equal(run: Run, key: str, value: Any) -> bool:
227
- param = run.data.params.get(key, value)
547
+ def find_run(runs: list[Run], config: object | None = None, **kwargs) -> Run:
548
+ """
549
+ Find the first run based on the provided configuration.
228
550
 
229
- if param is None:
230
- return False
551
+ This method filters the runs in the collection according to the
552
+ specified configuration object and returns the first run that matches
553
+ the provided parameters. If no run matches the criteria, a `ValueError` is
554
+ raised.
231
555
 
232
- return type(value)(param) == value
556
+ Args:
557
+ runs: The runs to filter.
558
+ config: The configuration object to identify the run.
559
+ **kwargs: Additional key-value pairs to filter the runs.
233
560
 
561
+ Returns:
562
+ The first run object that matches the provided configuration.
563
+
564
+ Raises:
565
+ ValueError: If no run matches the criteria.
234
566
 
235
- def get_run(runs: list[Run], config: object, **kwargs) -> Run | None:
567
+ See Also:
568
+ RunCollection.filter: The method that performs the actual filtering logic.
236
569
  """
237
- Retrieve a specific run based on the provided configuration.
570
+ filtered_runs = filter_runs(runs, config, **kwargs)
571
+
572
+ if len(filtered_runs) == 0:
573
+ raise ValueError("No run matches the provided configuration.")
574
+
575
+ return filtered_runs[0]
576
+
577
+
578
+ def try_find_run(runs: list[Run], config: object | None = None, **kwargs) -> Run | None:
579
+ """
580
+ Find the first run based on the provided configuration.
238
581
 
239
582
  This method filters the runs in the collection according to the
240
- specified configuration object and returns the run that matches
241
- the provided parameters. If more than one run matches the criteria,
242
- a `ValueError` is raised.
583
+ specified configuration object and returns the first run that matches
584
+ the provided parameters. If no run matches the criteria, None is returned.
243
585
 
244
586
  Args:
245
587
  runs: The runs to filter.
@@ -247,75 +589,161 @@ def get_run(runs: list[Run], config: object, **kwargs) -> Run | None:
247
589
  **kwargs: Additional key-value pairs to filter the runs.
248
590
 
249
591
  Returns:
250
- The run object that matches the provided configuration, or None
592
+ The first run object that matches the provided configuration, or None
251
593
  if no runs match the criteria.
594
+ """
595
+ filtered_runs = filter_runs(runs, config, **kwargs)
596
+
597
+ if len(filtered_runs) == 0:
598
+ return None
599
+
600
+ return filtered_runs[0]
601
+
602
+
603
+ def find_last_run(runs: list[Run], config: object | None = None, **kwargs) -> Run:
604
+ """
605
+ Find the last run based on the provided configuration.
606
+
607
+ This method filters the runs in the collection according to the
608
+ specified configuration object and returns the last run that matches
609
+ the provided parameters. If no run matches the criteria, a `ValueError`
610
+ is raised.
611
+
612
+ Args:
613
+ runs: The runs to filter.
614
+ config: The configuration object to identify the run.
615
+ **kwargs: Additional key-value pairs to filter the runs.
616
+
617
+ Returns:
618
+ The last run object that matches the provided configuration.
252
619
 
253
620
  Raises:
254
- ValueError: If more than one run matches the criteria.
621
+ ValueError: If no run matches the criteria.
622
+
623
+ See Also:
624
+ RunCollection.filter: The method that performs the actual filtering
625
+ logic.
255
626
  """
256
- runs = filter_runs(runs, config, **kwargs)
627
+ filtered_runs = filter_runs(runs, config, **kwargs)
257
628
 
258
- if len(runs) == 0:
259
- return None
629
+ if len(filtered_runs) == 0:
630
+ raise ValueError("No run matches the provided configuration.")
260
631
 
261
- if len(runs) == 1:
262
- return runs[0]
632
+ return filtered_runs[-1]
263
633
 
264
- msg = f"Multiple runs were filtered. Expected number of runs is 1, but found {len(runs)} runs."
265
- raise ValueError(msg)
266
634
 
635
+ def try_find_last_run(runs: list[Run], config: object | None = None, **kwargs) -> Run | None:
636
+ """
637
+ Find the last run based on the provided configuration.
638
+
639
+ This method filters the runs in the collection according to the
640
+ specified configuration object and returns the last run that matches
641
+ the provided parameters. If no run matches the criteria, None is returned.
642
+
643
+ Args:
644
+ runs: The runs to filter.
645
+ config: The configuration object to identify the run.
646
+ **kwargs: Additional key-value pairs to filter the runs.
647
+
648
+ Returns:
649
+ The last run object that matches the provided configuration, or None
650
+ if no runs match the criteria.
651
+ """
652
+ filtered_runs = filter_runs(runs, config, **kwargs)
653
+
654
+ if len(filtered_runs) == 0:
655
+ return None
656
+
657
+ return filtered_runs[-1]
267
658
 
268
- def get_earliest_run(runs: list[Run], config: object | None = None, **kwargs) -> Run | None:
659
+
660
+ def get_run(runs: list[Run], config: object | None = None, **kwargs) -> Run:
269
661
  """
270
- Get the earliest run from the list of runs based on the start time.
662
+ Retrieve a specific run based on the provided configuration.
271
663
 
272
- This method filters the runs based on the configuration if provided
273
- and returns the run with the earliest start time.
664
+ This method filters the runs in the collection according to the
665
+ specified configuration object and returns the run that matches
666
+ the provided parameters. If no run matches the criteria, or if more
667
+ than one run matches the criteria, a `ValueError` is raised.
274
668
 
275
669
  Args:
276
- runs: The list of runs.
277
- config: The configuration object to filter the runs.
278
- If None, no filtering is applied.
670
+ runs: The runs to filter.
671
+ config: The configuration object to identify the run.
279
672
  **kwargs: Additional key-value pairs to filter the runs.
280
673
 
281
674
  Returns:
282
- The run with the earliest start time, or None if no runs match the criteria.
675
+ The run object that matches the provided configuration.
676
+
677
+ Raises:
678
+ ValueError: If no run matches the criteria or if more than one run
679
+ matches the criteria.
680
+
681
+ See Also:
682
+ RunCollection.filter: The method that performs the actual filtering
683
+ logic.
283
684
  """
284
- if config is not None or kwargs:
285
- runs = filter_runs(runs, config or {}, **kwargs)
685
+ filtered_runs = filter_runs(runs, config, **kwargs)
686
+
687
+ if len(filtered_runs) == 0:
688
+ raise ValueError("No run matches the provided configuration.")
286
689
 
287
- return min(runs, key=lambda run: run.info.start_time, default=None)
690
+ if len(filtered_runs) == 1:
691
+ return filtered_runs[0]
288
692
 
693
+ msg = (
694
+ f"Multiple runs were filtered. Expected number of runs is 1, "
695
+ f"but found {len(filtered_runs)} runs."
696
+ )
697
+ raise ValueError(msg)
289
698
 
290
- def get_latest_run(runs: list[Run], config: object | None = None, **kwargs) -> Run | None:
699
+
700
+ def try_get_run(runs: list[Run], config: object | None = None, **kwargs) -> Run | None:
291
701
  """
292
- Get the latest run from the list of runs based on the start time.
702
+ Retrieve a specific run based on the provided configuration.
293
703
 
294
- This method filters the runs based on the configuration if provided
295
- and returns the run with the latest start time.
704
+ This method filters the runs in the collection according to the
705
+ specified configuration object and returns the run that matches
706
+ the provided parameters. If no run matches the criteria, None is returned.
707
+ If more than one run matches the criteria, a `ValueError` is raised.
296
708
 
297
709
  Args:
298
- runs: The list of runs.
299
- config: The configuration object to filter the runs.
300
- If None, no filtering is applied.
710
+ runs: The runs to filter.
711
+ config: The configuration object to identify the run.
301
712
  **kwargs: Additional key-value pairs to filter the runs.
302
713
 
303
714
  Returns:
304
- The run with the latest start time, or None if no runs match the criteria.
715
+ The run object that matches the provided configuration, or None
716
+ if no runs match the criteria.
717
+
718
+ Raises:
719
+ ValueError: If more than one run matches the criteria.
720
+
721
+ See Also:
722
+ RunCollection.filter: The method that performs the actual filtering
723
+ logic.
305
724
  """
306
- if config is not None or kwargs:
307
- runs = filter_runs(runs, config or {}, **kwargs)
725
+ filtered_runs = filter_runs(runs, config, **kwargs)
726
+
727
+ if len(filtered_runs) == 0:
728
+ return None
308
729
 
309
- return max(runs, key=lambda run: run.info.start_time, default=None)
730
+ if len(filtered_runs) == 1:
731
+ return filtered_runs[0]
732
+
733
+ msg = (
734
+ "Multiple runs were filtered. Expected number of runs is 1, "
735
+ f"but found {len(filtered_runs)} runs."
736
+ )
737
+ raise ValueError(msg)
310
738
 
311
739
 
312
740
  def get_param_names(runs: list[Run]) -> list[str]:
313
741
  """
314
742
  Get the parameter names from the runs.
315
743
 
316
- This method extracts the unique parameter names from the provided list of runs.
317
- It iterates through each run and collects the parameter names into a set to
318
- ensure uniqueness.
744
+ This method extracts the unique parameter names from the provided list of
745
+ runs. It iterates through each run and collects the parameter names into a
746
+ set to ensure uniqueness.
319
747
 
320
748
  Args:
321
749
  runs: The list of runs from which to extract parameter names.
@@ -363,13 +791,15 @@ def load_config(run: Run) -> DictConfig:
363
791
 
364
792
  This function loads the configuration for the provided Run instance
365
793
  by downloading the configuration file from the MLflow artifacts and
366
- loading it using OmegaConf.
794
+ loading it using OmegaConf. It returns an empty config if
795
+ `.hydra/config.yaml` is not found in the run's artifact directory.
367
796
 
368
797
  Args:
369
- run: The Run instance to load the configuration for.
798
+ run: The Run instance for which to load the configuration.
370
799
 
371
800
  Returns:
372
- The loaded configuration.
801
+ The loaded configuration as a DictConfig object. Returns an empty
802
+ DictConfig if the configuration file is not found.
373
803
  """
374
804
  run_id = run.info.run_id
375
805
  return _load_config(run_id)
@@ -378,10 +808,7 @@ def load_config(run: Run) -> DictConfig:
378
808
  @cache
379
809
  def _load_config(run_id: str) -> DictConfig:
380
810
  try:
381
- path = mlflow.artifacts.download_artifacts(
382
- run_id=run_id,
383
- artifact_path=".hydra/config.yaml",
384
- )
811
+ path = download_artifacts(run_id=run_id, artifact_path=".hydra/config.yaml")
385
812
  except OSError:
386
813
  return DictConfig({})
387
814
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: hydraflow
3
- Version: 0.2.1
3
+ Version: 0.2.3
4
4
  Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
5
5
  Project-URL: Documentation, https://github.com/daizutabi/hydraflow
6
6
  Project-URL: Source, https://github.com/daizutabi/hydraflow
@@ -20,7 +20,9 @@ Requires-Dist: hydra-core>1.3
20
20
  Requires-Dist: mlflow>2.15
21
21
  Requires-Dist: setuptools
22
22
  Requires-Dist: watchdog
23
+ Requires-Dist: watchfiles
23
24
  Provides-Extra: dev
25
+ Requires-Dist: pytest-asyncio; extra == 'dev'
24
26
  Requires-Dist: pytest-clarity; extra == 'dev'
25
27
  Requires-Dist: pytest-cov; extra == 'dev'
26
28
  Requires-Dist: pytest-randomly; extra == 'dev'
@@ -46,14 +48,23 @@ Description-Content-Type: text/markdown
46
48
 
47
49
  ## Overview
48
50
 
49
- Hydraflow is a powerful library designed to seamlessly integrate [Hydra](https://hydra.cc/) and [MLflow](https://mlflow.org/), making it easier to manage and track machine learning experiments. By combining the flexibility of Hydra's configuration management with the robust experiment tracking capabilities of MLflow, Hydraflow provides a comprehensive solution for managing complex machine learning workflows.
51
+ Hydraflow is a powerful library designed to seamlessly integrate
52
+ [Hydra](https://hydra.cc/) and [MLflow](https://mlflow.org/), making it easier to
53
+ manage and track machine learning experiments. By combining the flexibility of
54
+ Hydra's configuration management with the robust experiment tracking capabilities
55
+ of MLflow, Hydraflow provides a comprehensive solution for managing complex
56
+ machine learning workflows.
50
57
 
51
58
  ## Key Features
52
59
 
53
- - **Configuration Management**: Utilize Hydra's advanced configuration management to handle complex parameter sweeps and experiment setups.
54
- - **Experiment Tracking**: Leverage MLflow's tracking capabilities to log parameters, metrics, and artifacts for each run.
55
- - **Artifact Management**: Automatically log and manage artifacts, such as model checkpoints and configuration files, with MLflow.
56
- - **Seamless Integration**: Easily integrate Hydra and MLflow in your machine learning projects with minimal setup.
60
+ - **Configuration Management**: Utilize Hydra's advanced configuration management
61
+ to handle complex parameter sweeps and experiment setups.
62
+ - **Experiment Tracking**: Leverage MLflow's tracking capabilities to log parameters,
63
+ metrics, and artifacts for each run.
64
+ - **Artifact Management**: Automatically log and manage artifacts, such as model
65
+ checkpoints and configuration files, with MLflow.
66
+ - **Seamless Integration**: Easily integrate Hydra and MLflow in your machine learning
67
+ projects with minimal setup.
57
68
 
58
69
  ## Installation
59
70
 
@@ -0,0 +1,10 @@
1
+ hydraflow/__init__.py,sha256=9v7p2ezUd_LMoRJQS0ay8c7fpaqPZ6Ofq7YPT0rSO5I,528
2
+ hydraflow/asyncio.py,sha256=yh851L315QHzRBwq6r-uwO2oZKgz1JawHp-fswfxT1E,6175
3
+ hydraflow/config.py,sha256=FNTuCppjCMrZKVByJMrWKbgj3HeMWWwAmQNoyFe029Y,2087
4
+ hydraflow/context.py,sha256=MqkEhKEZL_N3eb3v5u9D4EqKkiSmiPyXXafhPkALRlg,5129
5
+ hydraflow/mlflow.py,sha256=_Los9E38eG8sTiN8bGwZmvjCrS0S-wSGiA4fyhQM3Zw,2251
6
+ hydraflow/runs.py,sha256=0BXSBbNkELP3CzaCGBkejOkpyk5uQUxrdknJPRwR400,29022
7
+ hydraflow-0.2.3.dist-info/METADATA,sha256=h5Pxy6EnxTlyyGL8NRr14ZHtLhA9ldmM9GP5sES6KWU,4304
8
+ hydraflow-0.2.3.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
9
+ hydraflow-0.2.3.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
10
+ hydraflow-0.2.3.dist-info/RECORD,,
@@ -1,9 +0,0 @@
1
- hydraflow/__init__.py,sha256=PzziOG9RnGAVbl9Yz4ScvsL8nfkjsuN0alMKRvZT-_Y,442
2
- hydraflow/config.py,sha256=wI8uNuD2D-hIf4BAhEYJaMC6EyO-erKopy_ia_b1pYA,2048
3
- hydraflow/context.py,sha256=MqkEhKEZL_N3eb3v5u9D4EqKkiSmiPyXXafhPkALRlg,5129
4
- hydraflow/mlflow.py,sha256=_Los9E38eG8sTiN8bGwZmvjCrS0S-wSGiA4fyhQM3Zw,2251
5
- hydraflow/runs.py,sha256=NT7IzE-Pf7T2Ey-eWEPZzQQaX4Gt_RKDKSn2pj2yzGc,14304
6
- hydraflow-0.2.1.dist-info/METADATA,sha256=4C_hnw1gMb8WUQXyqj4q8eA1IVbp0wZuLGGthIk1G7U,4224
7
- hydraflow-0.2.1.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
8
- hydraflow-0.2.1.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
9
- hydraflow-0.2.1.dist-info/RECORD,,