hydraflow 0.16.2__py3-none-any.whl → 0.17.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -37,25 +37,18 @@ Note:
37
37
 
38
38
  from __future__ import annotations
39
39
 
40
- from collections.abc import Hashable, Iterable, Sequence
41
- from dataclasses import MISSING
40
+ from functools import cached_property
42
41
  from typing import TYPE_CHECKING, overload
43
42
 
44
- import numpy as np
45
- import polars as pl
46
- from omegaconf import OmegaConf
47
- from polars import DataFrame, Series
48
-
43
+ from .collection import Collection
49
44
  from .run import Run
50
45
 
51
46
  if TYPE_CHECKING:
52
- from collections.abc import Callable, Iterator
47
+ from collections.abc import Callable, Iterable
53
48
  from typing import Any, Self
54
49
 
55
- from numpy.typing import NDArray
56
-
57
50
 
58
- class RunCollection[R: Run[Any, Any]](Sequence[R]):
51
+ class RunCollection[R: Run[Any, Any], I = None](Collection[R]):
59
52
  """A collection of Run instances that implements the Sequence protocol.
60
53
 
61
54
  RunCollection provides methods for filtering, sorting, grouping, and analyzing
@@ -67,79 +60,6 @@ class RunCollection[R: Run[Any, Any]](Sequence[R]):
67
60
 
68
61
  """
69
62
 
70
- runs: list[R]
71
- """A list containing the Run instances in this collection."""
72
-
73
- def __init__(self, runs: Iterable[R]) -> None:
74
- self.runs = list(runs)
75
-
76
- def __repr__(self) -> str:
77
- """Return a string representation of the RunCollection."""
78
- class_name = self.__class__.__name__
79
- if not self:
80
- return f"{class_name}(empty)"
81
-
82
- type_name = repr(self[0])
83
- if "(" in type_name:
84
- type_name = type_name.split("(", 1)[0]
85
- return f"{class_name}({type_name}, n={len(self)})"
86
-
87
- def __len__(self) -> int:
88
- """Return the number of Run instances in the collection.
89
-
90
- Returns:
91
- int: The number of runs.
92
-
93
- """
94
- return len(self.runs)
95
-
96
- def __bool__(self) -> bool:
97
- """Return whether the collection contains any Run instances.
98
-
99
- Returns:
100
- bool: True if the collection is not empty, False otherwise.
101
-
102
- """
103
- return bool(self.runs)
104
-
105
- @overload
106
- def __getitem__(self, index: int) -> R: ...
107
-
108
- @overload
109
- def __getitem__(self, index: slice) -> Self: ...
110
-
111
- @overload
112
- def __getitem__(self, index: Iterable[int]) -> Self: ...
113
-
114
- def __getitem__(self, index: int | slice | Iterable[int]) -> R | Self:
115
- """Get a Run or a new RunCollection based on the provided index.
116
-
117
- Args:
118
- index: Can be one of:
119
- - An integer to get a single Run
120
- - A slice to get a subrange of Runs
121
- - An iterable of integers to get specific Runs
122
-
123
- Returns:
124
- R | Self: A single Run if index is an integer, or a new
125
- RunCollection if index is a slice or iterable of integers.
126
-
127
- """
128
- if isinstance(index, int):
129
- return self.runs[index]
130
- if isinstance(index, slice):
131
- return self.__class__(self.runs[index])
132
- return self.__class__([self.runs[i] for i in index])
133
-
134
- def __iter__(self) -> Iterator[R]:
135
- """Return an iterator over the Runs in the collection.
136
-
137
- Returns:
138
- Iterator[R]: An iterator yielding Run instances.
139
-
140
- """
141
- return iter(self.runs)
142
-
143
63
  def preload(
144
64
  self,
145
65
  *,
@@ -155,15 +75,39 @@ class RunCollection[R: Run[Any, Any]](Sequence[R]):
155
75
  access these properties, as they will be already loaded in memory.
156
76
 
157
77
  Args:
158
- cfg (bool): Whether to preload the configuration objects
159
- impl (bool): Whether to preload the implementation objects
160
- n_jobs (int): Number of parallel jobs to run
161
- (-1 means using all processors)
78
+ n_jobs (int): Number of parallel jobs to run.
79
+ - 0: Run sequentially (default)
80
+ - -1: Use all available CPU cores
81
+ - >0: Use the specified number of cores
82
+ cfg (bool): Whether to preload the configuration objects.
83
+ Defaults to True.
84
+ impl (bool): Whether to preload the implementation objects.
85
+ Defaults to True.
162
86
 
163
87
  Returns:
164
88
  Self: The same RunCollection instance with preloaded
165
89
  configuration and implementation objects.
166
90
 
91
+ Note:
92
+ The preloading is done using joblib's threading backend,
93
+ which is suitable for I/O-bound tasks like loading
94
+ configuration files and implementation objects.
95
+
96
+ Examples:
97
+ ```python
98
+ # Preload all runs sequentially
99
+ runs.preload()
100
+
101
+ # Preload using all available cores
102
+ runs.preload(n_jobs=-1)
103
+
104
+ # Preload only configurations
105
+ runs.preload(impl=False)
106
+
107
+ # Preload only implementations
108
+ runs.preload(cfg=False)
109
+ ```
110
+
167
111
  """
168
112
 
169
113
  def load(run: R) -> None:
@@ -220,413 +164,12 @@ class RunCollection[R: Run[Any, Any]](Sequence[R]):
220
164
  for run in self:
221
165
  run.update(key, value, force=force)
222
166
 
223
- def filter(
224
- self,
225
- *predicates: Callable[[R], bool] | tuple[str, Any],
226
- **kwargs: Any,
227
- ) -> Self:
228
- """Filter runs based on predicates or key-value conditions.
229
-
230
- This method allows filtering runs using various criteria:
231
- - Callable predicates that take a Run and return a boolean
232
- - Key-value tuples where the key is a string and the value
233
- is compared using the Run.predicate method
234
- - Keyword arguments, where the key is a string and the value
235
- is compared using the Run.predicate method
236
-
237
- Args:
238
- *predicates: Callable predicates or (key, value) tuples
239
- for filtering.
240
- **kwargs: Additional key-value pairs for filtering.
167
+ @cached_property
168
+ def impls(self) -> Collection[I]:
169
+ """Get the implementation object for all runs in the collection.
241
170
 
242
171
  Returns:
243
- Self: A new RunCollection containing only the runs that
244
- match all criteria.
172
+ Collection[Any]: A collection of implementation objects for all runs.
245
173
 
246
174
  """
247
- runs = self.runs
248
-
249
- for predicate in predicates:
250
- if callable(predicate):
251
- runs = [r for r in runs if predicate(r)]
252
- else:
253
- runs = [r for r in runs if r.predicate(*predicate)]
254
-
255
- for key, value in kwargs.items():
256
- runs = [r for r in runs if r.predicate(key, value)]
257
-
258
- return self.__class__(runs)
259
-
260
- def try_get(
261
- self,
262
- *predicates: Callable[[R], bool] | tuple[str, Any],
263
- **kwargs: Any,
264
- ) -> R | None:
265
- """Try to get a single run matching the specified criteria.
266
-
267
- This method applies filters and returns a single matching
268
- run if exactly one is found, None if no runs are found,
269
- or raises ValueError if multiple runs match.
270
-
271
- Args:
272
- *predicates: Callable predicates or (key, value) tuples
273
- for filtering.
274
- **kwargs: Additional key-value pairs for filtering.
275
-
276
- Returns:
277
- R | None: A single Run that matches the criteria, or None if
278
- no matches are found.
279
-
280
- Raises:
281
- ValueError: If multiple runs match the criteria.
282
-
283
- """
284
- runs = self.filter(*predicates, **kwargs)
285
-
286
- n = len(runs)
287
- if n == 0:
288
- return None
289
-
290
- if n == 1:
291
- return runs[0]
292
-
293
- msg = f"Multiple Run ({n}) found matching the criteria, "
294
- msg += "expected exactly one"
295
- raise ValueError(msg)
296
-
297
- def get(
298
- self,
299
- *predicates: Callable[[R], bool] | tuple[str, Any],
300
- **kwargs: Any,
301
- ) -> R:
302
- """Get a single run matching the specified criteria.
303
-
304
- This method applies filters and returns a single matching run,
305
- or raises ValueError if no runs or multiple runs match.
306
-
307
- Args:
308
- *predicates: Callable predicates or (key, value) tuples
309
- for filtering.
310
- **kwargs: Additional key-value pairs for filtering.
311
-
312
- Returns:
313
- R: A single Run that matches the criteria.
314
-
315
- Raises:
316
- ValueError: If no runs match or if multiple runs match
317
- the criteria.
318
-
319
- """
320
- if run := self.try_get(*predicates, **kwargs):
321
- return run
322
-
323
- raise _value_error()
324
-
325
- def first(
326
- self,
327
- *predicates: Callable[[R], bool] | tuple[str, Any],
328
- **kwargs: Any,
329
- ) -> R:
330
- """Get the first run matching the specified criteria.
331
-
332
- This method applies filters and returns the first matching run,
333
- or raises ValueError if no runs match.
334
-
335
- Args:
336
- *predicates: Callable predicates or (key, value) tuples
337
- for filtering.
338
- **kwargs: Additional key-value pairs for filtering.
339
-
340
- Returns:
341
- R: The first Run that matches the criteria.
342
-
343
- Raises:
344
- ValueError: If no runs match the criteria.
345
-
346
- """
347
- if runs := self.filter(*predicates, **kwargs):
348
- return runs[0]
349
-
350
- raise _value_error()
351
-
352
- def last(
353
- self,
354
- *predicates: Callable[[R], bool] | tuple[str, Any],
355
- **kwargs: Any,
356
- ) -> R:
357
- """Get the last run matching the specified criteria.
358
-
359
- This method applies filters and returns the last matching run,
360
- or raises ValueError if no runs match.
361
-
362
- Args:
363
- *predicates: Callable predicates or (key, value) tuples
364
- for filtering.
365
- **kwargs: Additional key-value pairs for filtering.
366
-
367
- Returns:
368
- R: The last Run that matches the criteria.
369
-
370
- Raises:
371
- ValueError: If no runs match the criteria.
372
-
373
- """
374
- if runs := self.filter(*predicates, **kwargs):
375
- return runs[-1]
376
-
377
- raise _value_error()
378
-
379
- def to_list(
380
- self,
381
- key: str,
382
- default: Any | Callable[[R], Any] = MISSING,
383
- ) -> list[Any]:
384
- """Extract a list of values for a specific key from all runs.
385
-
386
- Args:
387
- key: The key to extract from each run.
388
- default: The default value to return if the key is not found.
389
- If a callable, it will be called with the Run instance
390
- and the value returned will be used as the default.
391
-
392
- Returns:
393
- list[Any]: A list containing the values for the
394
- specified key from each run.
395
-
396
- """
397
- return [run.get(key, default) for run in self]
398
-
399
- def to_numpy(
400
- self,
401
- key: str,
402
- default: Any | Callable[[R], Any] = MISSING,
403
- ) -> NDArray:
404
- """Extract values for a specific key from all runs as a NumPy array.
405
-
406
- Args:
407
- key: The key to extract from each run.
408
- default: The default value to return if the key is not found.
409
- If a callable, it will be called with the Run instance
410
- and the value returned will be used as the default.
411
-
412
- Returns:
413
- NDArray: A NumPy array containing the values for the
414
- specified key from each run.
415
-
416
- """
417
- return np.array(self.to_list(key, default))
418
-
419
- def to_series(
420
- self,
421
- key: str,
422
- default: Any | Callable[[R], Any] = MISSING,
423
- *,
424
- name: str | None = None,
425
- ) -> Series:
426
- """Extract values for a specific key from all runs as a Polars series.
427
-
428
- Args:
429
- key: The key to extract from each run.
430
- default: The default value to return if the key is not found.
431
- If a callable, it will be called with the Run instance
432
- and the value returned will be used as the default.
433
- name: The name of the series. If not provided, the key will be used.
434
-
435
- Returns:
436
- Series: A Polars series containing the values for the
437
- specified key from each run.
438
-
439
- """
440
- return Series(name or key, self.to_list(key, default))
441
-
442
- def unique(
443
- self,
444
- key: str,
445
- default: Any | Callable[[R], Any] = MISSING,
446
- ) -> NDArray:
447
- """Get the unique values for a specific key across all runs.
448
-
449
- Args:
450
- key: The key to extract unique values for.
451
- default: The default value to return if the key is not found.
452
- If a callable, it will be called with the Run instance
453
- and the value returned will be used as the default.
454
-
455
- Returns:
456
- NDArray: A NumPy array containing the unique values for the
457
- specified key.
458
-
459
- """
460
- return np.unique(self.to_numpy(key, default), axis=0)
461
-
462
- def n_unique(
463
- self,
464
- key: str,
465
- default: Any | Callable[[R], Any] = MISSING,
466
- ) -> int:
467
- """Count the number of unique values for a specific key across all runs.
468
-
469
- Args:
470
- key: The key to count unique values for.
471
- default: The default value to return if the key is not found.
472
- If a callable, it will be called with the Run instance
473
- and the value returned will be used as the default.
474
-
475
- Returns:
476
- int: The number of unique values for the specified key.
477
-
478
- """
479
- return len(self.unique(key, default))
480
-
481
- def sort(self, *keys: str, reverse: bool = False) -> Self:
482
- """Sort runs based on one or more keys.
483
-
484
- Args:
485
- *keys: The keys to sort by, in order of priority.
486
- reverse: Whether to sort in descending order (default is
487
- ascending).
488
-
489
- Returns:
490
- Self: A new RunCollection with the runs sorted according to
491
- the specified keys.
492
-
493
- """
494
- if not keys:
495
- return self
496
-
497
- arrays = [self.to_numpy(key) for key in keys]
498
- index = np.lexsort(arrays[::-1])
499
-
500
- if reverse:
501
- index = index[::-1]
502
-
503
- return self[index]
504
-
505
- def to_frame(
506
- self,
507
- *keys: str,
508
- defaults: dict[str, Any | Callable[[R], Any]] | None = None,
509
- **kwargs: Callable[[R], Any],
510
- ) -> DataFrame:
511
- """Convert the collection to a Polars DataFrame.
512
-
513
- Args:
514
- *keys (str): The keys to include as columns in the DataFrame.
515
- If not provided, all keys from each run's to_dict() method
516
- will be used.
517
- defaults (dict[str, Any | Callable[[R], Any]] | None): Default
518
- values for the keys. If a callable, it will be called with
519
- the Run instance and the value returned will be used as the
520
- default.
521
- **kwargs (Callable[[R], Any]): Additional columns to compute
522
- using callables that take a Run and return a value.
523
-
524
- Returns:
525
- DataFrame: A Polars DataFrame containing the specified data
526
- from the runs.
527
-
528
- """
529
- if defaults is None:
530
- defaults = {}
531
-
532
- if keys:
533
- df = DataFrame(
534
- {key: self.to_list(key, defaults.get(key, MISSING)) for key in keys},
535
- )
536
- else:
537
- df = DataFrame(r.to_dict() for r in self)
538
-
539
- if not kwargs:
540
- return df
541
-
542
- columns = [Series(k, [v(r) for r in self]) for k, v in kwargs.items()]
543
- return df.with_columns(*columns)
544
-
545
- def _group_by(self, *keys: str) -> dict[Any, Self]:
546
- result: dict[Any, Self] = {}
547
-
548
- for run in self:
549
- keys_ = [to_hashable(run.get(key)) for key in keys]
550
- key = keys_[0] if len(keys) == 1 else tuple(keys_)
551
-
552
- if key not in result:
553
- result[key] = self.__class__([])
554
- result[key].runs.append(run)
555
-
556
- return result
557
-
558
- @overload
559
- def group_by(self, *keys: str) -> dict[Any, Self]: ...
560
-
561
- @overload
562
- def group_by(
563
- self,
564
- *keys: str,
565
- **kwargs: Callable[[Self | Sequence[R]], Any],
566
- ) -> DataFrame: ...
567
-
568
- def group_by(
569
- self,
570
- *keys: str,
571
- **kwargs: Callable[[Self | Sequence[R]], Any],
572
- ) -> dict[Any, Self] | DataFrame:
573
- """Group runs by one or more keys.
574
-
575
- This method can return either:
576
- - A dictionary mapping group keys to RunCollections
577
- (no kwargs provided)
578
- - A Polars DataFrame with group keys and aggregated
579
- values (kwargs provided)
580
-
581
- Args:
582
- *keys (str): The keys to group by.
583
- **kwargs (Callable[[Self | Sequence[R]], Any]): Aggregation
584
- functions to apply to each group. Each function should
585
- accept a RunCollection or Sequence[Run] and return a value.
586
-
587
- Returns:
588
- dict[Any, Self] | DataFrame: Either a dictionary mapping
589
- group keys to RunCollections, or a Polars DataFrame with
590
- group keys and aggregated values.
591
-
592
- """
593
- gp = self._group_by(*keys)
594
- if not kwargs:
595
- return gp
596
-
597
- if len(keys) == 1:
598
- df = DataFrame({keys[0]: list(gp)})
599
- else:
600
- df = DataFrame(dict(zip(keys, k, strict=True)) for k in gp)
601
- columns = [pl.Series(k, [v(r) for r in gp.values()]) for k, v in kwargs.items()]
602
- return df.with_columns(*columns)
603
-
604
-
605
- def to_hashable(value: Any) -> Hashable:
606
- """Convert a value to a hashable instance.
607
-
608
- This function handles various types of values and converts them to
609
- hashable equivalents for use in dictionaries and sets.
610
-
611
- Args:
612
- value: The value to convert to a hashable instance.
613
-
614
- Returns:
615
- A hashable version of the input value.
616
-
617
- """
618
- if OmegaConf.is_list(value): # Is ListConfig hashable?
619
- return tuple(value)
620
- if isinstance(value, Hashable):
621
- return value
622
- if isinstance(value, np.ndarray):
623
- return tuple(value.tolist())
624
- try:
625
- return tuple(value)
626
- except TypeError:
627
- return str(value)
628
-
629
-
630
- def _value_error() -> ValueError:
631
- msg = "No Run found matching the specified criteria"
632
- return ValueError(msg)
175
+ return Collection(run.impl for run in self)
@@ -19,7 +19,6 @@ from .io import get_experiment_name
19
19
 
20
20
  if TYPE_CHECKING:
21
21
  from pathlib import Path
22
- from typing import Any
23
22
 
24
23
 
25
24
  @dataclass
@@ -51,11 +50,3 @@ class RunInfo:
51
50
  contain the expected format).
52
51
  """
53
52
  return get_experiment_name(self.run_dir.parent)
54
-
55
- def to_dict(self) -> dict[str, Any]:
56
- """Convert the RunInfo to a dictionary."""
57
- return {
58
- "run_id": self.run_id,
59
- "run_dir": self.run_dir.as_posix(),
60
- "job_name": self.job_name,
61
- }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hydraflow
3
- Version: 0.16.2
3
+ Version: 0.17.0
4
4
  Summary: HydraFlow seamlessly integrates Hydra and MLflow to streamline ML experiment management, combining Hydra's configuration management with MLflow's tracking capabilities.
5
5
  Project-URL: Documentation, https://daizutabi.github.io/hydraflow/
6
6
  Project-URL: Source, https://github.com/daizutabi/hydraflow
@@ -2,20 +2,22 @@ hydraflow/__init__.py,sha256=8UraqH00Qp0In301ZUmQBRTIGbV1L5zSZACOUlIRPn8,727
2
2
  hydraflow/cli.py,sha256=3rGr___wwp8KazjLGQ7JO_IgAMqLyMlcVSs_QJK7g0Y,3135
3
3
  hydraflow/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  hydraflow/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ hydraflow/core/collection.py,sha256=tUdjV_v4vzUHSNET-Z7a_8k5oXoH6nkZ_0OxZ-u8_nI,16791
5
6
  hydraflow/core/context.py,sha256=igE17oQESGjH-sBnICI8HkZbngY_crkHTgx2E-YkmEo,4155
7
+ hydraflow/core/group_by.py,sha256=Pnw-oA5aXHeRG9lMLz-bKc8drqQ8LIRsWzvVn153iyQ,5488
6
8
  hydraflow/core/io.py,sha256=B3-jPuJWttRgpbIpy_XA-Z2qpXzNF1ATwyYEwA7Pv3w,5172
7
9
  hydraflow/core/main.py,sha256=pgr2b9A4VoZuwbApE71NElmV64MFJv8UKda05q4uCqk,6010
8
- hydraflow/core/run.py,sha256=SugX6JLdBqsfz3JTrB66I3muo03rrmwDvITVZQaF48w,12685
9
- hydraflow/core/run_collection.py,sha256=cbaJO68WzE-QNlTc8NhOyQ1pHDNberJs-31qTY7P9Fo,19495
10
- hydraflow/core/run_info.py,sha256=B5sueHKVH9KEwty8fWuYzGC3M0-_g3TF_iwDM_2dyJs,1885
10
+ hydraflow/core/run.py,sha256=VQfS3DkAR2GBWdltmlD0XMStiOUo1YZiRONm-mPW2x4,11948
11
+ hydraflow/core/run_collection.py,sha256=4YjnAmB4lpGxTnlHzZOIwEXNfdI5yU5cj3PRiCW6vuA,5439
12
+ hydraflow/core/run_info.py,sha256=SMOTZXEa7OBV_XjTyctk5gJGrggmYwhePvRF8CLF1kU,1616
11
13
  hydraflow/executor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
14
  hydraflow/executor/aio.py,sha256=xXsmBPIPdBlopv_1h0FdtOvoKUcuW7PQeKCV2d_lN9I,2122
13
15
  hydraflow/executor/conf.py,sha256=8Xq4UAenRKJIl1NBgNbSfv6VUTJhdwPLayZIEAsiBR0,414
14
16
  hydraflow/executor/io.py,sha256=18wnHpCMQRGYL-oN2841h9W2aSW_X2SmO68Lx-3FIbU,1043
15
17
  hydraflow/executor/job.py,sha256=6QeJ18OMeocXeM04rCYL46GgArfX1SvZs9_4HTomTgE,5436
16
18
  hydraflow/executor/parser.py,sha256=RxP8qpDaJ8VLqZ51VlPFyVitWctObhkE_3iPIsY66Cs,14610
17
- hydraflow-0.16.2.dist-info/METADATA,sha256=3UWuHRuYrTCwXopZeqP9xBDKYn2_pUpL4Q2MBSOJhaA,7535
18
- hydraflow-0.16.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
19
- hydraflow-0.16.2.dist-info/entry_points.txt,sha256=XI0khPbpCIUo9UPqkNEpgh-kqK3Jy8T7L2VCWOdkbSM,48
20
- hydraflow-0.16.2.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
21
- hydraflow-0.16.2.dist-info/RECORD,,
19
+ hydraflow-0.17.0.dist-info/METADATA,sha256=f9LHLgsZMEiTl1CusfZQHUSv6rlz8DfL78EoMfheCBA,7535
20
+ hydraflow-0.17.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
21
+ hydraflow-0.17.0.dist-info/entry_points.txt,sha256=XI0khPbpCIUo9UPqkNEpgh-kqK3Jy8T7L2VCWOdkbSM,48
22
+ hydraflow-0.17.0.dist-info/licenses/LICENSE,sha256=IGdDrBPqz1O0v_UwCW-NJlbX9Hy9b3uJ11t28y2srmY,1062
23
+ hydraflow-0.17.0.dist-info/RECORD,,