hydraflow 0.2.11__tar.gz → 0.2.14__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (33) hide show
  1. {hydraflow-0.2.11 → hydraflow-0.2.14}/PKG-INFO +1 -3
  2. hydraflow-0.2.14/mkdocs.yml +53 -0
  3. {hydraflow-0.2.11 → hydraflow-0.2.14}/pyproject.toml +1 -3
  4. {hydraflow-0.2.11 → hydraflow-0.2.14}/src/hydraflow/context.py +7 -2
  5. {hydraflow-0.2.11 → hydraflow-0.2.14}/src/hydraflow/mlflow.py +32 -5
  6. {hydraflow-0.2.11 → hydraflow-0.2.14}/src/hydraflow/progress.py +3 -5
  7. {hydraflow-0.2.11 → hydraflow-0.2.14}/src/hydraflow/run_collection.py +144 -317
  8. {hydraflow-0.2.11 → hydraflow-0.2.14}/tests/test_app.py +42 -0
  9. {hydraflow-0.2.11 → hydraflow-0.2.14}/tests/test_run_collection.py +43 -121
  10. {hydraflow-0.2.11 → hydraflow-0.2.14}/.devcontainer/devcontainer.json +0 -0
  11. {hydraflow-0.2.11 → hydraflow-0.2.14}/.devcontainer/postCreate.sh +0 -0
  12. {hydraflow-0.2.11 → hydraflow-0.2.14}/.devcontainer/starship.toml +0 -0
  13. {hydraflow-0.2.11 → hydraflow-0.2.14}/.gitattributes +0 -0
  14. {hydraflow-0.2.11 → hydraflow-0.2.14}/.gitignore +0 -0
  15. {hydraflow-0.2.11 → hydraflow-0.2.14}/LICENSE +0 -0
  16. {hydraflow-0.2.11 → hydraflow-0.2.14}/README.md +0 -0
  17. {hydraflow-0.2.11 → hydraflow-0.2.14}/src/hydraflow/__init__.py +0 -0
  18. {hydraflow-0.2.11 → hydraflow-0.2.14}/src/hydraflow/asyncio.py +0 -0
  19. {hydraflow-0.2.11 → hydraflow-0.2.14}/src/hydraflow/config.py +0 -0
  20. {hydraflow-0.2.11 → hydraflow-0.2.14}/src/hydraflow/info.py +0 -0
  21. {hydraflow-0.2.11 → hydraflow-0.2.14}/tests/scripts/__init__.py +0 -0
  22. {hydraflow-0.2.11 → hydraflow-0.2.14}/tests/scripts/app.py +0 -0
  23. {hydraflow-0.2.11 → hydraflow-0.2.14}/tests/scripts/progress.py +0 -0
  24. {hydraflow-0.2.11 → hydraflow-0.2.14}/tests/scripts/watch.py +0 -0
  25. {hydraflow-0.2.11 → hydraflow-0.2.14}/tests/test_asyncio.py +0 -0
  26. {hydraflow-0.2.11 → hydraflow-0.2.14}/tests/test_config.py +0 -0
  27. {hydraflow-0.2.11 → hydraflow-0.2.14}/tests/test_context.py +0 -0
  28. {hydraflow-0.2.11 → hydraflow-0.2.14}/tests/test_info.py +0 -0
  29. {hydraflow-0.2.11 → hydraflow-0.2.14}/tests/test_log_run.py +0 -0
  30. {hydraflow-0.2.11 → hydraflow-0.2.14}/tests/test_mlflow.py +0 -0
  31. {hydraflow-0.2.11 → hydraflow-0.2.14}/tests/test_progress.py +0 -0
  32. {hydraflow-0.2.11 → hydraflow-0.2.14}/tests/test_version.py +0 -0
  33. {hydraflow-0.2.11 → hydraflow-0.2.14}/tests/test_watch.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: hydraflow
3
- Version: 0.2.11
3
+ Version: 0.2.14
4
4
  Summary: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
5
5
  Project-URL: Documentation, https://github.com/daizutabi/hydraflow
6
6
  Project-URL: Source, https://github.com/daizutabi/hydraflow
@@ -13,8 +13,6 @@ Classifier: Programming Language :: Python
13
13
  Classifier: Programming Language :: Python :: 3.10
14
14
  Classifier: Programming Language :: Python :: 3.11
15
15
  Classifier: Programming Language :: Python :: 3.12
16
- Classifier: Topic :: Documentation
17
- Classifier: Topic :: Software Development :: Documentation
18
16
  Requires-Python: >=3.10
19
17
  Requires-Dist: hydra-core>1.3
20
18
  Requires-Dist: joblib
@@ -0,0 +1,53 @@
1
+ site_name: hydraflow
2
+ site_url: https://daizutabi.github.io/hydraflow/
3
+ site_description: Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments.
4
+ site_author: daizutabi
5
+ repo_url: https://github.com/daizutabi/hydraflow/
6
+ repo_name: daizutabi/hydraflow
7
+ edit_uri: edit/main/docs/
8
+ theme:
9
+ name: material
10
+ font:
11
+ text: Fira Sans
12
+ code: Fira Code
13
+ icon:
14
+ repo: fontawesome/brands/github
15
+ palette:
16
+ - scheme: default
17
+ primary: indigo
18
+ accent: indigo
19
+ toggle:
20
+ icon: material/weather-sunny
21
+ name: Switch to dark mode
22
+ - scheme: slate
23
+ primary: black
24
+ accent: black
25
+ toggle:
26
+ icon: material/weather-night
27
+ name: Switch to light mode
28
+ features:
29
+ - content.code.annotate
30
+ - content.tooltips
31
+ - navigation.expand
32
+ - navigation.footer
33
+ - navigation.indexes
34
+ - navigation.sections
35
+ - navigation.tabs
36
+ - navigation.tabs.sticky
37
+ - navigation.top
38
+ - navigation.tracking
39
+ plugins:
40
+ - search
41
+ - mkapi
42
+ markdown_extensions:
43
+ - pymdownx.magiclink
44
+ - pymdownx.highlight:
45
+ use_pygments: true
46
+ - pymdownx.inlinehilite
47
+ - pymdownx.snippets
48
+ - pymdownx.superfences
49
+ - pymdownx.tabbed:
50
+ alternate_style: true
51
+ nav:
52
+ - Home: index.md
53
+ - Reference: $api/hydraflow.**
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "hydraflow"
7
- version = "0.2.11"
7
+ version = "0.2.14"
8
8
  description = "Hydraflow integrates Hydra and MLflow to manage and track machine learning experiments."
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -15,8 +15,6 @@ classifiers = [
15
15
  "Programming Language :: Python :: 3.10",
16
16
  "Programming Language :: Python :: 3.11",
17
17
  "Programming Language :: Python :: 3.12",
18
- "Topic :: Documentation",
19
- "Topic :: Software Development :: Documentation",
20
18
  ]
21
19
  requires-python = ">=3.10"
22
20
  dependencies = [
@@ -51,9 +51,11 @@ def log_run(
51
51
  None
52
52
 
53
53
  Example:
54
+ ```python
54
55
  with log_run(config):
55
56
  # Perform operations within the MLflow run context
56
57
  pass
58
+ ```
57
59
  """
58
60
  log_params(config, synchronous=synchronous)
59
61
 
@@ -122,8 +124,9 @@ def start_run(
122
124
  pass
123
125
 
124
126
  See Also:
125
- `mlflow.start_run`: The MLflow function to start a run directly.
126
- `log_run`: A context manager to log parameters and manage the MLflow run context.
127
+ - `mlflow.start_run`: The MLflow function to start a run directly.
128
+ - `log_run`: A context manager to log parameters and manage the MLflow
129
+ run context.
127
130
  """
128
131
  with mlflow.start_run(
129
132
  run_id=run_id,
@@ -169,9 +172,11 @@ def watch(
169
172
  None
170
173
 
171
174
  Example:
175
+ ```python
172
176
  with watch(log_artifact, "/path/to/dir"):
173
177
  # Perform operations while watching the directory for changes
174
178
  pass
179
+ ```
175
180
  """
176
181
  dir = dir or get_artifact_dir()
177
182
  if isinstance(dir, Path):
@@ -22,10 +22,11 @@ from __future__ import annotations
22
22
  from pathlib import Path
23
23
  from typing import TYPE_CHECKING
24
24
 
25
+ import joblib
25
26
  import mlflow
26
27
  from hydra.core.hydra_config import HydraConfig
27
28
  from mlflow.entities import ViewType
28
- from mlflow.tracking.fluent import SEARCH_MAX_RESULTS_PANDAS
29
+ from mlflow.tracking.fluent import SEARCH_MAX_RESULTS_PANDAS, _get_experiment_id
29
30
 
30
31
  from hydraflow.config import iter_params
31
32
  from hydraflow.run_collection import RunCollection
@@ -146,7 +147,9 @@ def search_runs(
146
147
  return RunCollection(runs) # type: ignore
147
148
 
148
149
 
149
- def list_runs(experiment_names: list[str] | None = None) -> RunCollection:
150
+ def list_runs(
151
+ experiment_names: str | list[str] | None = None, n_jobs: int = 0
152
+ ) -> RunCollection:
150
153
  """
151
154
  List all runs for the specified experiments.
152
155
 
@@ -166,10 +169,34 @@ def list_runs(experiment_names: list[str] | None = None) -> RunCollection:
166
169
  the "Default" experiment.
167
170
 
168
171
  Returns:
169
- A `RunCollection` object containing the runs for the specified experiments.
172
+ RunCollection: A `RunCollection` instance containing the runs for the
173
+ specified experiments.
170
174
  """
171
- if experiment_names == []:
175
+ if isinstance(experiment_names, str):
176
+ experiment_names = [experiment_names]
177
+
178
+ elif experiment_names == []:
172
179
  experiments = mlflow.search_experiments()
173
180
  experiment_names = [e.name for e in experiments if e.name != "Default"]
174
181
 
175
- return search_runs(experiment_names=experiment_names)
182
+ if n_jobs == 0:
183
+ return search_runs(experiment_names=experiment_names)
184
+
185
+ if experiment_names is None:
186
+ experiment_id = _get_experiment_id()
187
+ experiment_names = [mlflow.get_experiment(experiment_id).name]
188
+
189
+ run_ids = []
190
+
191
+ for name in experiment_names:
192
+ if experiment := mlflow.get_experiment_by_name(name):
193
+ loc = experiment.artifact_location
194
+
195
+ if isinstance(loc, str) and loc.startswith("file://"):
196
+ path = Path(mlflow.artifacts.download_artifacts(loc))
197
+ run_ids.extend(file.stem for file in path.iterdir() if file.is_dir())
198
+
199
+ it = (joblib.delayed(mlflow.get_run)(run_id) for run_id in run_ids)
200
+ runs = joblib.Parallel(n_jobs, prefer="threads")(it)
201
+ runs = sorted(runs, key=lambda run: run.info.start_time) # type: ignore
202
+ return RunCollection(runs) # type: ignore
@@ -7,16 +7,12 @@ of tasks in parallel while displaying progress updates.
7
7
 
8
8
  The following key components are provided:
9
9
 
10
- - JoblibProgress: A context manager for tracking progress with Rich's Progress
10
+ - JoblibProgress: A context manager for tracking progress with Rich's progress
11
11
  bar.
12
12
  - parallel_progress: A function to execute a given function in parallel over
13
13
  an iterable with progress tracking.
14
14
  - multi_tasks_progress: A function to render auto-updating progress bars for
15
15
  multiple tasks concurrently.
16
-
17
- Usage:
18
- Import the necessary functions and use them to manage progress in your
19
- parallel processing tasks.
20
16
  """
21
17
 
22
18
  from __future__ import annotations
@@ -56,8 +52,10 @@ def JoblibProgress(
56
52
  Progress: A Progress instance for managing the progress bar.
57
53
 
58
54
  Example:
55
+ ```python
59
56
  with JoblibProgress("task", total=100) as progress:
60
57
  # Your parallel processing code here
58
+ ```
61
59
  """
62
60
  if not columns:
63
61
  columns = Progress.get_default_columns()
@@ -1,31 +1,27 @@
1
1
  """
2
- This module provides functionality for managing and interacting with MLflow runs.
3
- It includes the `RunCollection` class, which serves as a container for multiple MLflow
4
- run objects, and various methods to filter, retrieve, and manipulate these runs.
2
+ Provide functionality for managing and interacting with MLflow runs.
3
+ It includes the `RunCollection` class, which serves as a container
4
+ for multiple MLflow `Run` instances, and various methods to filter and
5
+ retrieve these runs.
5
6
 
6
7
  Key Features:
7
- - **Run Management**: The `RunCollection` class allows for easy management of multiple
8
- MLflow runs, providing methods to access, filter, and sort runs based on various
9
- criteria.
10
- - **Filtering**: The module supports filtering runs based on specific configurations
8
+ - **Run Management**: The `RunCollection` class allows for easy management of
9
+ multiple MLflow runs, providing methods to filter and retrieve runs based
10
+ on various criteria.
11
+ - **Filtering**: Support filtering runs based on specific configurations
11
12
  and parameters, enabling users to easily find runs that match certain conditions.
12
13
  - **Retrieval**: Users can retrieve specific runs, including the first, last, or any
13
14
  run that matches a given configuration.
14
- - **Artifact Handling**: The module provides methods to access and manipulate the
15
- artifacts associated with each run, including retrieving artifact URIs and directories.
16
-
17
- The `RunCollection` class is designed to work seamlessly with the MLflow tracking
18
- API, providing a robust solution for managing machine learning experiment runs and
19
- their associated metadata. This module is particularly useful for data scientists and
20
- machine learning engineers who need to track and analyze the results of their experiments
21
- efficiently.
15
+ - **Artifact Handling**: Provide methods to access and manipulate the
16
+ artifacts associated with each run, including retrieving artifact URIs and
17
+ directories.
22
18
  """
23
19
 
24
20
  from __future__ import annotations
25
21
 
26
22
  from dataclasses import dataclass, field
27
23
  from itertools import chain
28
- from typing import TYPE_CHECKING, Any, Concatenate, ParamSpec, TypeVar
24
+ from typing import TYPE_CHECKING, Any, Concatenate, ParamSpec, TypeVar, overload
29
25
 
30
26
  from mlflow.entities.run import Run
31
27
 
@@ -47,9 +43,9 @@ P = ParamSpec("P")
47
43
  @dataclass
48
44
  class RunCollection:
49
45
  """
50
- A class to represent a collection of MLflow runs.
46
+ Represent a collection of MLflow runs.
51
47
 
52
- This class provides methods to interact with the runs, such as filtering,
48
+ Provide methods to interact with the runs, such as filtering,
53
49
  retrieving specific runs, and accessing run information.
54
50
 
55
51
  Key Features:
@@ -59,10 +55,10 @@ class RunCollection:
59
55
  """
60
56
 
61
57
  _runs: list[Run]
62
- """A list of MLflow Run objects."""
58
+ """A list of MLflow `Run` instances."""
63
59
 
64
60
  _info: RunCollectionInfo = field(init=False)
65
- """A list of MLflow Run objects."""
61
+ """An instance of `RunCollectionInfo`."""
66
62
 
67
63
  def __post_init__(self):
68
64
  self._info = RunCollectionInfo(self)
@@ -76,16 +72,51 @@ class RunCollection:
76
72
  def __iter__(self) -> Iterator[Run]:
77
73
  return iter(self._runs)
78
74
 
79
- def __getitem__(self, index: int) -> Run:
75
+ @overload
76
+ def __getitem__(self, index: int) -> Run: ...
77
+
78
+ @overload
79
+ def __getitem__(self, index: slice) -> RunCollection: ...
80
+
81
+ def __getitem__(self, index: int | slice) -> Run | RunCollection:
82
+ if isinstance(index, slice):
83
+ return self.__class__(self._runs[index])
84
+
80
85
  return self._runs[index]
81
86
 
82
87
  def __contains__(self, run: Run) -> bool:
83
88
  return run in self._runs
84
89
 
90
+ @classmethod
91
+ def from_list(cls, runs: list[Run]) -> RunCollection:
92
+ """Create a new `RunCollection` instance from a list of MLflow `Run` instances."""
93
+
94
+ return cls(runs)
95
+
85
96
  @property
86
97
  def info(self) -> RunCollectionInfo:
98
+ """An instance of `RunCollectionInfo`."""
87
99
  return self._info
88
100
 
101
+ def take(self, n: int) -> RunCollection:
102
+ """Take the first n runs from the collection.
103
+
104
+ If n is negative, the method returns the last n runs
105
+ from the collection.
106
+
107
+ Args:
108
+ n (int): The number of runs to take. If n is negative, the method
109
+ returns the last n runs from the collection.
110
+
111
+ Returns:
112
+ A new `RunCollection` instance containing the first n runs if n is
113
+ positive, or the last n runs if n is negative.
114
+ """
115
+ if n < 0:
116
+ return self.__class__(self._runs[n:])
117
+
118
+ return self.__class__(self._runs[:n])
119
+
89
120
  def sort(
90
121
  self,
91
122
  key: Callable[[Run], Any] | None = None,
@@ -93,12 +124,37 @@ class RunCollection:
93
124
  ) -> None:
94
125
  self._runs.sort(key=key or (lambda x: x.info.start_time), reverse=reverse)
95
126
 
127
+ def one(self) -> Run:
128
+ """
129
+ Get the only `Run` instance in the collection.
130
+
131
+ Returns:
132
+ The only `Run` instance in the collection.
133
+
134
+ Raises:
135
+ ValueError: If the collection does not contain exactly one run.
136
+ """
137
+ if len(self._runs) != 1:
138
+ raise ValueError("The collection does not contain exactly one run.")
139
+
140
+ return self._runs[0]
141
+
142
+ def try_one(self) -> Run | None:
143
+ """
144
+ Try to get the only `Run` instance in the collection.
145
+
146
+ Returns:
147
+ The only `Run` instance in the collection, or None if the collection
148
+ does not contain exactly one run.
149
+ """
150
+ return self._runs[0] if len(self._runs) == 1 else None
151
+
96
152
  def first(self) -> Run:
97
153
  """
98
- Get the first run in the collection.
154
+ Get the first `Run` instance in the collection.
99
155
 
100
156
  Returns:
101
- The first run object in the collection.
157
+ The first `Run` instance in the collection.
102
158
 
103
159
  Raises:
104
160
  ValueError: If the collection is empty.
@@ -110,20 +166,20 @@ class RunCollection:
110
166
 
111
167
  def try_first(self) -> Run | None:
112
168
  """
113
- Try to get the first run in the collection.
169
+ Try to get the first `Run` instance in the collection.
114
170
 
115
171
  Returns:
116
- The first run object in the collection, or None if the collection
172
+ The first `Run` instance in the collection, or None if the collection
117
173
  is empty.
118
174
  """
119
175
  return self._runs[0] if self._runs else None
120
176
 
121
177
  def last(self) -> Run:
122
178
  """
123
- Get the last run in the collection.
179
+ Get the last `Run` instance in the collection.
124
180
 
125
181
  Returns:
126
- The last run object in the collection.
182
+ The last `Run` instance in the collection.
127
183
 
128
184
  Raises:
129
185
  ValueError: If the collection is empty.
@@ -135,17 +191,17 @@ class RunCollection:
135
191
 
136
192
  def try_last(self) -> Run | None:
137
193
  """
138
- Try to get the last run in the collection.
194
+ Try to get the last `Run` instance in the collection.
139
195
 
140
196
  Returns:
141
- The last run object in the collection, or None if the collection is
142
- empty.
197
+ The last `Run` instance in the collection, or None if the collection
198
+ is empty.
143
199
  """
144
200
  return self._runs[-1] if self._runs else None
145
201
 
146
202
  def filter(self, config: object | None = None, **kwargs) -> RunCollection:
147
203
  """
148
- Filter the runs based on the provided configuration.
204
+ Filter the `Run` instances based on the provided configuration.
149
205
 
150
206
  This method filters the runs in the collection according to the
151
207
  specified configuration object and additional key-value pairs. The
@@ -173,7 +229,7 @@ class RunCollection:
173
229
 
174
230
  def find(self, config: object | None = None, **kwargs) -> Run:
175
231
  """
176
- Find the first run based on the provided configuration.
232
+ Find the first `Run` instance based on the provided configuration.
177
233
 
178
234
  This method filters the runs in the collection according to the
179
235
  specified configuration object and returns the first run that matches
@@ -185,20 +241,22 @@ class RunCollection:
185
241
  **kwargs: Additional key-value pairs to filter the runs.
186
242
 
187
243
  Returns:
188
- The first run object that matches the provided configuration.
244
+ The first `Run` instance that matches the provided configuration.
189
245
 
190
246
  Raises:
191
247
  ValueError: If no run matches the criteria.
192
248
 
193
249
  See Also:
194
- RunCollection.filter: The method that performs the actual filtering
195
- logic.
250
+ `filter`: Perform the actual filtering logic.
196
251
  """
197
- return find_run(self._runs, config, **kwargs)
252
+ try:
253
+ return self.filter(config, **kwargs).first()
254
+ except ValueError:
255
+ raise ValueError("No run matches the provided configuration.")
198
256
 
199
257
  def try_find(self, config: object | None = None, **kwargs) -> Run | None:
200
258
  """
201
- Find the first run based on the provided configuration.
259
+ Try to find the first `Run` instance based on the provided configuration.
202
260
 
203
261
  This method filters the runs in the collection according to the
204
262
  specified configuration object and returns the first run that matches
@@ -210,18 +268,17 @@ class RunCollection:
210
268
  **kwargs: Additional key-value pairs to filter the runs.
211
269
 
212
270
  Returns:
213
- The first run object that matches the provided configuration, or
271
+ The first `Run` instance that matches the provided configuration, or
214
272
  None if no runs match the criteria.
215
273
 
216
274
  See Also:
217
- RunCollection.filter: The method that performs the actual filtering
218
- logic.
275
+ `filter`: Perform the actual filtering logic.
219
276
  """
220
- return try_find_run(self._runs, config, **kwargs)
277
+ return self.filter(config, **kwargs).try_first()
221
278
 
222
279
  def find_last(self, config: object | None = None, **kwargs) -> Run:
223
280
  """
224
- Find the last run based on the provided configuration.
281
+ Find the last `Run` instance based on the provided configuration.
225
282
 
226
283
  This method filters the runs in the collection according to the
227
284
  specified configuration object and returns the last run that matches
@@ -233,20 +290,22 @@ class RunCollection:
233
290
  **kwargs: Additional key-value pairs to filter the runs.
234
291
 
235
292
  Returns:
236
- The last run object that matches the provided configuration.
293
+ The last `Run` instance that matches the provided configuration.
237
294
 
238
295
  Raises:
239
296
  ValueError: If no run matches the criteria.
240
297
 
241
298
  See Also:
242
- RunCollection.filter: The method that performs the actual filtering
243
- logic.
299
+ `filter`: Perform the actual filtering logic.
244
300
  """
245
- return find_last_run(self._runs, config, **kwargs)
301
+ try:
302
+ return self.filter(config, **kwargs).last()
303
+ except ValueError:
304
+ raise ValueError("No run matches the provided configuration.")
246
305
 
247
306
  def try_find_last(self, config: object | None = None, **kwargs) -> Run | None:
248
307
  """
249
- Find the last run based on the provided configuration.
308
+ Try to find the last `Run` instance based on the provided configuration.
250
309
 
251
310
  This method filters the runs in the collection according to the
252
311
  specified configuration object and returns the last run that matches
@@ -258,18 +317,17 @@ class RunCollection:
258
317
  **kwargs: Additional key-value pairs to filter the runs.
259
318
 
260
319
  Returns:
261
- The last run object that matches the provided configuration, or
320
+ The last `Run` instance that matches the provided configuration, or
262
321
  None if no runs match the criteria.
263
322
 
264
323
  See Also:
265
- RunCollection.filter: The method that performs the actual filtering
266
- logic.
324
+ `filter`: Perform the actual filtering logic.
267
325
  """
268
- return try_find_last_run(self._runs, config, **kwargs)
326
+ return self.filter(config, **kwargs).try_last()
269
327
 
270
328
  def get(self, config: object | None = None, **kwargs) -> Run:
271
329
  """
272
- Retrieve a specific run based on the provided configuration.
330
+ Retrieve a specific `Run` instance based on the provided configuration.
273
331
 
274
332
  This method filters the runs in the collection according to the
275
333
  specified configuration object and returns the run that matches the
@@ -281,21 +339,24 @@ class RunCollection:
281
339
  **kwargs: Additional key-value pairs to filter the runs.
282
340
 
283
341
  Returns:
284
- The run object that matches the provided configuration.
342
+ The `Run` instance that matches the provided configuration.
285
343
 
286
344
  Raises:
287
345
  ValueError: If no run matches the criteria or if more than one run
288
346
  matches the criteria.
289
347
 
290
348
  See Also:
291
- RunCollection.filter: The method that performs the actual filtering
292
- logic.
349
+ `filter`: Perform the actual filtering logic.
293
350
  """
294
- return get_run(self._runs, config, **kwargs)
351
+ try:
352
+ return self.filter(config, **kwargs).one()
353
+ except ValueError:
354
+ msg = "The filtered collection does not contain exactly one run."
355
+ raise ValueError(msg)
295
356
 
296
357
  def try_get(self, config: object | None = None, **kwargs) -> Run | None:
297
358
  """
298
- Retrieve a specific run based on the provided configuration.
359
+ Try to retrieve a specific `Run` instance based on the provided configuration.
299
360
 
300
361
  This method filters the runs in the collection according to the
301
362
  specified configuration object and returns the run that matches the
@@ -307,17 +368,16 @@ class RunCollection:
307
368
  **kwargs: Additional key-value pairs to filter the runs.
308
369
 
309
370
  Returns:
310
- The run object that matches the provided configuration, or None if
311
- no runs match the criteria.
371
+ The `Run` instance that matches the provided configuration, or None
372
+ if no runs match the criteria.
312
373
 
313
374
  Raises:
314
375
  ValueError: If more than one run matches the criteria.
315
376
 
316
377
  See Also:
317
- RunCollection.filter: The method that performs the actual filtering
318
- logic.
378
+ `filter`: Perform the actual filtering logic.
319
379
  """
320
- return try_get_run(self._runs, config, **kwargs)
380
+ return self.filter(config, **kwargs).try_one()
321
381
 
322
382
  def get_param_names(self) -> list[str]:
323
383
  """
@@ -330,7 +390,13 @@ class RunCollection:
330
390
  Returns:
331
391
  A list of unique parameter names.
332
392
  """
333
- return get_param_names(self._runs)
393
+ param_names = set()
394
+
395
+ for run in self:
396
+ for param in run.data.params.keys():
397
+ param_names.add(param)
398
+
399
+ return list(param_names)
334
400
 
335
401
  def get_param_dict(self) -> dict[str, list[str]]:
336
402
  """
@@ -345,7 +411,13 @@ class RunCollection:
345
411
  A dictionary where the keys are parameter names and the values are
346
412
  lists of parameter values.
347
413
  """
348
- return get_param_dict(self._runs)
414
+ params = {}
415
+
416
+ for name in self.get_param_names():
417
+ it = (run.data.params[name] for run in self if name in run.data.params)
418
+ params[name] = sorted(set(it))
419
+
420
+ return params
349
421
 
350
422
  def map(
351
423
  self,
@@ -426,9 +498,9 @@ class RunCollection:
426
498
  Apply a function to each artifact URI in the collection and return an
427
499
  iterator of results.
428
500
 
429
- This method iterates over each run in the collection, retrieves the
430
- artifact URI, and applies the provided function to it. If a run does not
431
- have an artifact URI, None is passed to the function.
501
+ Iterate over each run in the collection, retrieves the artifact URI, and
502
+ apply the provided function to it. If a run does not have an artifact
503
+ URI, None is passed to the function.
432
504
 
433
505
  Args:
434
506
  func (Callable[[str | None, P], T]): A function that takes an
@@ -452,9 +524,8 @@ class RunCollection:
452
524
  Apply a function to each artifact directory in the collection and return
453
525
  an iterator of results.
454
526
 
455
- This method iterates over each run in the collection, downloads the
456
- artifact directory, and applies the provided function to the directory
457
- path.
527
+ Iterate over each run in the collection, downloads the artifact
528
+ directory, and apply the provided function to the directory path.
458
529
 
459
530
  Args:
460
531
  func (Callable[[Path, P], T]): A function that takes an artifact directory
@@ -474,7 +545,7 @@ class RunCollection:
474
545
  """
475
546
  Group runs by specified parameter names.
476
547
 
477
- This method groups the runs in the collection based on the values of the
548
+ Group the runs in the collection based on the values of the
478
549
  specified parameters. Each unique combination of parameter values will
479
550
  form a key in the returned dictionary.
480
551
 
@@ -500,7 +571,7 @@ def _param_matches(run: Run, key: str, value: Any) -> bool:
500
571
  """
501
572
  Check if the run's parameter matches the specified key-value pair.
502
573
 
503
- This function checks if the run's parameters contain the specified
574
+ Check if the run's parameters contain the specified
504
575
  key-value pair. It handles different types of values, including lists
505
576
  and tuples.
506
577
 
@@ -534,7 +605,7 @@ def filter_runs(runs: list[Run], config: object | None = None, **kwargs) -> list
534
605
  """
535
606
  Filter the runs based on the provided configuration.
536
607
 
537
- This method filters the runs in the collection according to the
608
+ Filter the runs in the collection according to the
538
609
  specified configuration object and additional key-value pairs.
539
610
  The configuration object and key-value pairs should contain
540
611
  key-value pairs that correspond to the parameters of the runs.
@@ -566,201 +637,6 @@ def filter_runs(runs: list[Run], config: object | None = None, **kwargs) -> list
566
637
  return runs
567
638
 
568
639
 
569
- def find_run(runs: list[Run], config: object | None = None, **kwargs) -> Run:
570
- """
571
- Find the first run based on the provided configuration.
572
-
573
- This method filters the runs in the collection according to the
574
- specified configuration object and returns the first run that matches
575
- the provided parameters. If no run matches the criteria, a `ValueError` is
576
- raised.
577
-
578
- Args:
579
- runs (list[Run]): The runs to filter.
580
- config (object | None): The configuration object to identify the run.
581
- **kwargs: Additional key-value pairs to filter the runs.
582
-
583
- Returns:
584
- The first run object that matches the provided configuration.
585
-
586
- Raises:
587
- ValueError: If no run matches the criteria.
588
-
589
- See Also:
590
- RunCollection.filter: The method that performs the actual filtering logic.
591
- """
592
- filtered_runs = filter_runs(runs, config, **kwargs)
593
-
594
- if len(filtered_runs) == 0:
595
- raise ValueError("No run matches the provided configuration.")
596
-
597
- return filtered_runs[0]
598
-
599
-
600
- def try_find_run(runs: list[Run], config: object | None = None, **kwargs) -> Run | None:
601
- """
602
- Find the first run based on the provided configuration.
603
-
604
- This method filters the runs in the collection according to the
605
- specified configuration object and returns the first run that matches
606
- the provided parameters. If no run matches the criteria, None is returned.
607
-
608
- Args:
609
- runs (list[Run]): The runs to filter.
610
- config (object | None): The configuration object to identify the run.
611
- **kwargs: Additional key-value pairs to filter the runs.
612
-
613
- Returns:
614
- The first run object that matches the provided configuration, or None
615
- if no runs match the criteria.
616
- """
617
- filtered_runs = filter_runs(runs, config, **kwargs)
618
-
619
- if len(filtered_runs) == 0:
620
- return None
621
-
622
- return filtered_runs[0]
623
-
624
-
625
- def find_last_run(runs: list[Run], config: object | None = None, **kwargs) -> Run:
626
- """
627
- Find the last run based on the provided configuration.
628
-
629
- This method filters the runs in the collection according to the
630
- specified configuration object and returns the last run that matches
631
- the provided parameters. If no run matches the criteria, a `ValueError`
632
- is raised.
633
-
634
- Args:
635
- runs (list[Run]): The runs to filter.
636
- config (object | None): The configuration object to identify the run.
637
- **kwargs: Additional key-value pairs to filter the runs.
638
-
639
- Returns:
640
- The last run object that matches the provided configuration.
641
-
642
- Raises:
643
- ValueError: If no run matches the criteria.
644
-
645
- See Also:
646
- RunCollection.filter: The method that performs the actual filtering
647
- logic.
648
- """
649
- filtered_runs = filter_runs(runs, config, **kwargs)
650
-
651
- if len(filtered_runs) == 0:
652
- raise ValueError("No run matches the provided configuration.")
653
-
654
- return filtered_runs[-1]
655
-
656
-
657
- def try_find_last_run(
658
- runs: list[Run], config: object | None = None, **kwargs
659
- ) -> Run | None:
660
- """
661
- Find the last run based on the provided configuration.
662
-
663
- This method filters the runs in the collection according to the
664
- specified configuration object and returns the last run that matches
665
- the provided parameters. If no run matches the criteria, None is returned.
666
-
667
- Args:
668
- runs (list[Run]): The runs to filter.
669
- config (object | None): The configuration object to identify the run.
670
- **kwargs: Additional key-value pairs to filter the runs.
671
-
672
- Returns:
673
- The last run object that matches the provided configuration, or None
674
- if no runs match the criteria.
675
- """
676
- filtered_runs = filter_runs(runs, config, **kwargs)
677
-
678
- if len(filtered_runs) == 0:
679
- return None
680
-
681
- return filtered_runs[-1]
682
-
683
-
684
- def get_run(runs: list[Run], config: object | None = None, **kwargs) -> Run:
685
- """
686
- Retrieve a specific run based on the provided configuration.
687
-
688
- This method filters the runs in the collection according to the
689
- specified configuration object and returns the run that matches
690
- the provided parameters. If no run matches the criteria, or if more
691
- than one run matches the criteria, a `ValueError` is raised.
692
-
693
- Args:
694
- runs (list[Run]): The runs to filter.
695
- config (object | None): The configuration object to identify the run.
696
- **kwargs: Additional key-value pairs to filter the runs.
697
-
698
- Returns:
699
- The run object that matches the provided configuration.
700
-
701
- Raises:
702
- ValueError: If no run matches the criteria or if more than one run
703
- matches the criteria.
704
-
705
- See Also:
706
- RunCollection.filter: The method that performs the actual filtering
707
- logic.
708
- """
709
- filtered_runs = filter_runs(runs, config, **kwargs)
710
-
711
- if len(filtered_runs) == 0:
712
- raise ValueError("No run matches the provided configuration.")
713
-
714
- if len(filtered_runs) == 1:
715
- return filtered_runs[0]
716
-
717
- msg = (
718
- f"Multiple runs were filtered. Expected number of runs is 1, "
719
- f"but found {len(filtered_runs)} runs."
720
- )
721
- raise ValueError(msg)
722
-
723
-
724
- def try_get_run(runs: list[Run], config: object | None = None, **kwargs) -> Run | None:
725
- """
726
- Retrieve a specific run based on the provided configuration.
727
-
728
- This method filters the runs in the collection according to the
729
- specified configuration object and returns the run that matches
730
- the provided parameters. If no run matches the criteria, None is returned.
731
- If more than one run matches the criteria, a `ValueError` is raised.
732
-
733
- Args:
734
- runs (list[Run]): The runs to filter.
735
- config (object | None): The configuration object to identify the run.
736
- **kwargs: Additional key-value pairs to filter the runs.
737
-
738
- Returns:
739
- The run object that matches the provided configuration, or None
740
- if no runs match the criteria.
741
-
742
- Raises:
743
- ValueError: If more than one run matches the criteria.
744
-
745
- See Also:
746
- RunCollection.filter: The method that performs the actual filtering
747
- logic.
748
- """
749
- filtered_runs = filter_runs(runs, config, **kwargs)
750
-
751
- if len(filtered_runs) == 0:
752
- return None
753
-
754
- if len(filtered_runs) == 1:
755
- return filtered_runs[0]
756
-
757
- msg = (
758
- "Multiple runs were filtered. Expected number of runs is 1, "
759
- f"but found {len(filtered_runs)} runs."
760
- )
761
- raise ValueError(msg)
762
-
763
-
764
640
  def get_params(run: Run, *names: str | list[str]) -> tuple[str | None, ...]:
765
641
  """
766
642
  Retrieve the values of specified parameters from the given run.
@@ -787,52 +663,3 @@ def get_params(run: Run, *names: str | list[str]) -> tuple[str | None, ...]:
787
663
  names_.append(name)
788
664
 
789
665
  return tuple(run.data.params.get(name) for name in names_)
790
-
791
-
792
- def get_param_names(runs: list[Run]) -> list[str]:
793
- """
794
- Get the parameter names from the runs.
795
-
796
- This method extracts the unique parameter names from the provided list of
797
- runs. It iterates through each run and collects the parameter names into a
798
- set to ensure uniqueness.
799
-
800
- Args:
801
- runs (list[Run]): The list of runs from which to extract parameter names.
802
-
803
- Returns:
804
- A list of unique parameter names.
805
- """
806
- param_names = set()
807
-
808
- for run in runs:
809
- for param in run.data.params.keys():
810
- param_names.add(param)
811
-
812
- return list(param_names)
813
-
814
-
815
- def get_param_dict(runs: list[Run]) -> dict[str, list[str]]:
816
- """
817
- Get the parameter dictionary from the list of runs.
818
-
819
- This method extracts the parameter names and their corresponding values
820
- from the provided list of runs. It iterates through each run and collects
821
- the parameter values into a dictionary where the keys are parameter names
822
- and the values are lists of parameter values.
823
-
824
- Args:
825
- runs (list[Run]): The list of runs from which to extract parameter names
826
- and values.
827
-
828
- Returns:
829
- A dictionary where the keys are parameter names and the values are lists
830
- of parameter values.
831
- """
832
- params = {}
833
-
834
- for name in get_param_names(runs):
835
- it = (run.data.params[name] for run in runs if name in run.data.params)
836
- params[name] = sorted(set(it))
837
-
838
- return params
@@ -26,6 +26,48 @@ def rc(monkeypatch, tmp_path):
26
26
  yield hydraflow.list_runs()
27
27
 
28
28
 
29
+ def test_list_runs_all(rc: RunCollection):
30
+ from hydraflow.mlflow import list_runs
31
+
32
+ rc_ = list_runs([])
33
+ assert len(rc) == len(rc_)
34
+
35
+ for a, b in zip(rc, rc_):
36
+ assert a.info.run_id == b.info.run_id
37
+ assert a.info.start_time == b.info.start_time
38
+ assert a.info.status == b.info.status
39
+ assert a.info.artifact_uri == b.info.artifact_uri
40
+
41
+
42
+ @pytest.mark.parametrize("n_jobs", [0, 1, 2, 4, -1])
43
+ def test_list_runs_parallel(rc: RunCollection, n_jobs: int):
44
+ from hydraflow.mlflow import list_runs
45
+
46
+ rc_ = list_runs("_info_", n_jobs=n_jobs)
47
+ assert len(rc) == len(rc_)
48
+
49
+ for a, b in zip(rc, rc_):
50
+ assert a.info.run_id == b.info.run_id
51
+ assert a.info.start_time == b.info.start_time
52
+ assert a.info.status == b.info.status
53
+ assert a.info.artifact_uri == b.info.artifact_uri
54
+
55
+
56
+ @pytest.mark.parametrize("n_jobs", [0, 1, 2, 4, -1])
57
+ def test_list_runs_parallel_active(rc: RunCollection, n_jobs: int):
58
+ from hydraflow.mlflow import list_runs
59
+
60
+ mlflow.set_experiment("_info_")
61
+ rc_ = list_runs(n_jobs=n_jobs)
62
+ assert len(rc) == len(rc_)
63
+
64
+ for a, b in zip(rc, rc_):
65
+ assert a.info.run_id == b.info.run_id
66
+ assert a.info.start_time == b.info.start_time
67
+ assert a.info.status == b.info.status
68
+ assert a.info.artifact_uri == b.info.artifact_uri
69
+
70
+
29
71
  def test_app_info_run_id(rc: RunCollection):
30
72
  assert len(rc.info.run_id) == 4
31
73
 
@@ -33,6 +33,12 @@ def run_list(runs: RunCollection):
33
33
  return runs._runs
34
34
 
35
35
 
36
+ def test_from_list(run_list: list[Run]):
37
+ rc = RunCollection.from_list(run_list)
38
+ assert len(rc) == len(run_list)
39
+ assert all(run in rc for run in run_list)
40
+
41
+
36
42
  def test_search_runs_sorted(run_list: list[Run]):
37
43
  assert [run.data.params["p"] for run in run_list] == ["0", "1", "2", "3", "4", "5"]
38
44
 
@@ -84,91 +90,6 @@ def test_filter_invalid_param(run_list: list[Run]):
84
90
  assert len(x) == 6
85
91
 
86
92
 
87
- def test_find_run(run_list: list[Run]):
88
- from hydraflow.run_collection import find_run, try_find_run
89
-
90
- x = find_run(run_list, {"r": 1})
91
- assert isinstance(x, Run)
92
- assert x.data.params["p"] == "1"
93
- x = find_run(run_list, r=2)
94
- assert isinstance(x, Run)
95
- assert x.data.params["p"] == "2"
96
- x = try_find_run(run_list, r=2)
97
- assert isinstance(x, Run)
98
- assert x.data.params["p"] == "2"
99
-
100
-
101
- def test_find_run_none(run_list: list[Run]):
102
- from hydraflow.run_collection import find_run
103
-
104
- with pytest.raises(ValueError):
105
- find_run(run_list, {"r": 10})
106
-
107
-
108
- def test_try_find_run_none_empty(run_list: list[Run]):
109
- from hydraflow.run_collection import try_find_run
110
-
111
- assert try_find_run([]) is None
112
-
113
-
114
- def test_find_last_run(run_list: list[Run]):
115
- from hydraflow.run_collection import find_last_run, try_find_last_run
116
-
117
- x = find_last_run(run_list, {"r": 1})
118
- assert isinstance(x, Run)
119
- assert x.data.params["p"] == "4"
120
- x = find_last_run(run_list, r=2)
121
- assert isinstance(x, Run)
122
- assert x.data.params["p"] == "5"
123
- x = try_find_last_run(run_list, r=2)
124
- assert isinstance(x, Run)
125
- assert x.data.params["p"] == "5"
126
-
127
-
128
- def test_find_last_run_none(run_list: list[Run]):
129
- from hydraflow.run_collection import find_last_run
130
-
131
- with pytest.raises(ValueError):
132
- find_last_run(run_list, {"r": 10})
133
-
134
-
135
- def test_try_find_last_run_none(run_list: list[Run]):
136
- from hydraflow.run_collection import try_find_last_run
137
-
138
- assert try_find_last_run([]) is None
139
-
140
-
141
- def test_get_run(run_list: list[Run]):
142
- from hydraflow.run_collection import get_run
143
-
144
- run = get_run(run_list, {"p": 4})
145
- assert isinstance(run, Run)
146
- assert run.data.params["p"] == "4"
147
-
148
-
149
- def test_get_run_error(run_list: list[Run]):
150
- from hydraflow.run_collection import get_run
151
-
152
- with pytest.raises(ValueError):
153
- get_run(run_list, {"q": 0})
154
-
155
- with pytest.raises(ValueError):
156
- get_run(run_list, {"q": -1})
157
-
158
-
159
- def test_try_get_run_none(run_list: list[Run]):
160
- from hydraflow.run_collection import try_get_run
161
-
162
- assert try_get_run(run_list, {"q": -1}) is None
163
-
164
-
165
- def test_try_get_run_error(run_list: list[Run]):
166
- from hydraflow.run_collection import try_get_run
167
-
168
- with pytest.raises(ValueError):
169
- try_get_run(run_list, {"q": 0})
170
-
171
-
172
93
  def test_get_params(run_list: list[Run]):
173
94
  from hydraflow.run_collection import get_params
174
95
 
@@ -179,24 +100,6 @@ def test_get_params(run_list: list[Run]):
179
100
  assert get_params(run_list[5], ["a", "q"], "r") == (None, "None", "2")
180
101
 
181
102
 
182
- def test_get_param_names(run_list: list[Run]):
183
- from hydraflow.run_collection import get_param_names
184
-
185
- params = get_param_names(run_list)
186
- assert len(params) == 3
187
- assert "p" in params
188
- assert "q" in params
189
- assert "r" in params
190
-
191
-
192
- def test_get_param_dict(run_list: list[Run]):
193
- from hydraflow.run_collection import get_param_dict
194
-
195
- params = get_param_dict(run_list)
196
- assert len(params["p"]) == 6
197
- assert len(params["q"]) == 2
198
-
199
-
200
103
  @pytest.mark.parametrize("i", range(6))
201
104
  def test_chdir_artifact_list(i: int, run_list: list[Run]):
202
105
  from hydraflow.context import chdir_artifact
@@ -364,14 +267,14 @@ def test_list_runs_empty_list(runs, runs2):
364
267
  def test_list_runs_list(runs, runs2, name, n):
365
268
  from hydraflow.mlflow import list_runs
366
269
 
367
- filtered_runs = list_runs(experiment_names=[name])
270
+ filtered_runs = list_runs(name)
368
271
  assert len(filtered_runs) == n
369
272
 
370
273
 
371
274
  def test_list_runs_none(runs, runs2):
372
275
  from hydraflow.mlflow import list_runs
373
276
 
374
- no_runs = list_runs(experiment_names=["non_existent_experiment"])
277
+ no_runs = list_runs(["non_existent_experiment"])
375
278
  assert len(no_runs) == 0
376
279
 
377
280
 
@@ -427,6 +330,33 @@ def test_run_collection_getitem(runs: RunCollection, i: int):
427
330
  assert runs[i] == runs._runs[i]
428
331
 
429
332
 
333
+ @pytest.mark.parametrize("i", range(6))
334
+ def test_run_collection_getitem_slice(runs: RunCollection, i: int):
335
+ assert runs[i : i + 2]._runs == runs._runs[i : i + 2]
336
+
337
+
338
+ @pytest.mark.parametrize("i", range(6))
339
+ def test_run_collection_getitem_slice_step(runs: RunCollection, i: int):
340
+ assert runs[i::2]._runs == runs._runs[i::2]
341
+
342
+
343
+ @pytest.mark.parametrize("i", range(6))
344
+ def test_run_collection_getitem_slice_step_neg(runs: RunCollection, i: int):
345
+ assert runs[i::-2]._runs == runs._runs[i::-2]
346
+
347
+
348
+ def test_run_collection_take(runs: RunCollection):
349
+ assert runs.take(3)._runs == runs._runs[:3]
350
+ assert len(runs.take(4)) == 4
351
+ assert runs.take(10)._runs == runs._runs
352
+
353
+
354
+ def test_run_collection_take_neg(runs: RunCollection):
355
+ assert runs.take(-3)._runs == runs._runs[-3:]
356
+ assert len(runs.take(-4)) == 4
357
+ assert runs.take(-10)._runs == runs._runs
358
+
359
+
430
360
  @pytest.mark.parametrize("i", range(6))
431
361
  def test_run_collection_contains(runs: RunCollection, i: int):
432
362
  assert runs[i] in runs
@@ -462,32 +392,24 @@ def test_filter_runs_no_match(run_list: list[Run]):
462
392
  assert x == []
463
393
 
464
394
 
465
- def test_get_run_no_match(run_list: list[Run]):
466
- from hydraflow.run_collection import get_run
467
-
395
+ def test_get_run_no_match(runs: RunCollection):
468
396
  with pytest.raises(ValueError):
469
- get_run(run_list, {"p": 10})
397
+ runs.get({"p": 10})
470
398
 
471
399
 
472
- def test_get_run_multiple_params(run_list: list[Run]):
473
- from hydraflow.run_collection import get_run
474
-
475
- run = get_run(run_list, {"p": 4, "q": 0})
400
+ def test_get_run_multiple_params(runs: RunCollection):
401
+ run = runs.get({"p": 4, "q": 0})
476
402
  assert isinstance(run, Run)
477
403
  assert run.data.params["p"] == "4"
478
404
  assert run.data.params["q"] == "0"
479
405
 
480
406
 
481
- def test_try_get_run_no_match(run_list: list[Run]):
482
- from hydraflow.run_collection import try_get_run
483
-
484
- assert try_get_run(run_list, {"p": 10}) is None
485
-
407
+ def test_try_get_run_no_match(runs: RunCollection):
408
+ assert runs.try_get({"p": 10}) is None
486
409
 
487
- def test_try_get_run_multiple_params(run_list: list[Run]):
488
- from hydraflow.run_collection import try_get_run
489
410
 
490
- run = try_get_run(run_list, {"p": 4, "q": 0})
411
+ def test_try_get_run_multiple_params(runs: RunCollection):
412
+ run = runs.try_get({"p": 4, "q": 0})
491
413
  assert isinstance(run, Run)
492
414
  assert run.data.params["p"] == "4"
493
415
  assert run.data.params["q"] == "0"
File without changes
File without changes
File without changes
File without changes