kaparoo-python 0.4.0__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. {kaparoo_python-0.4.0 → kaparoo_python-0.6.0}/PKG-INFO +7 -6
  2. {kaparoo_python-0.4.0 → kaparoo_python-0.6.0}/README.md +6 -5
  3. {kaparoo_python-0.4.0 → kaparoo_python-0.6.0}/kaparoo/data/README.md +82 -5
  4. {kaparoo_python-0.4.0 → kaparoo_python-0.6.0}/kaparoo/data/__init__.py +6 -0
  5. {kaparoo_python-0.4.0 → kaparoo_python-0.6.0}/kaparoo/data/sequences/__init__.py +6 -0
  6. {kaparoo_python-0.4.0 → kaparoo_python-0.6.0}/kaparoo/data/sequences/composers.py +179 -6
  7. {kaparoo_python-0.4.0 → kaparoo_python-0.6.0}/kaparoo/data/sequences/templates.py +92 -46
  8. {kaparoo_python-0.4.0 → kaparoo_python-0.6.0}/kaparoo/filesystem/README.md +2 -2
  9. {kaparoo_python-0.4.0 → kaparoo_python-0.6.0}/kaparoo/filesystem/directory.py +36 -12
  10. {kaparoo_python-0.4.0 → kaparoo_python-0.6.0}/kaparoo/filesystem/staged.py +61 -20
  11. {kaparoo_python-0.4.0 → kaparoo_python-0.6.0}/kaparoo/filesystem/utils.py +10 -2
  12. {kaparoo_python-0.4.0 → kaparoo_python-0.6.0}/kaparoo/utils/README.md +3 -2
  13. {kaparoo_python-0.4.0 → kaparoo_python-0.6.0}/kaparoo/utils/__init__.py +4 -0
  14. {kaparoo_python-0.4.0 → kaparoo_python-0.6.0}/kaparoo/utils/aggregate.py +62 -0
  15. {kaparoo_python-0.4.0 → kaparoo_python-0.6.0}/pyproject.toml +1 -1
  16. {kaparoo_python-0.4.0 → kaparoo_python-0.6.0}/LICENSE +0 -0
  17. {kaparoo_python-0.4.0 → kaparoo_python-0.6.0}/kaparoo/__init__.py +0 -0
  18. {kaparoo_python-0.4.0 → kaparoo_python-0.6.0}/kaparoo/data/sequences/base.py +0 -0
  19. {kaparoo_python-0.4.0 → kaparoo_python-0.6.0}/kaparoo/data/sequences/utils.py +0 -0
  20. {kaparoo_python-0.4.0 → kaparoo_python-0.6.0}/kaparoo/filesystem/__init__.py +0 -0
  21. {kaparoo_python-0.4.0 → kaparoo_python-0.6.0}/kaparoo/filesystem/exceptions.py +0 -0
  22. {kaparoo_python-0.4.0 → kaparoo_python-0.6.0}/kaparoo/filesystem/existence.py +0 -0
  23. {kaparoo_python-0.4.0 → kaparoo_python-0.6.0}/kaparoo/filesystem/search/README.md +0 -0
  24. {kaparoo_python-0.4.0 → kaparoo_python-0.6.0}/kaparoo/filesystem/search/__init__.py +0 -0
  25. {kaparoo_python-0.4.0 → kaparoo_python-0.6.0}/kaparoo/filesystem/search/classes.py +0 -0
  26. {kaparoo_python-0.4.0 → kaparoo_python-0.6.0}/kaparoo/filesystem/search/deprecated.py +0 -0
  27. {kaparoo_python-0.4.0 → kaparoo_python-0.6.0}/kaparoo/filesystem/search/filters/__init__.py +0 -0
  28. {kaparoo_python-0.4.0 → kaparoo_python-0.6.0}/kaparoo/filesystem/search/filters/base.py +0 -0
  29. {kaparoo_python-0.4.0 → kaparoo_python-0.6.0}/kaparoo/filesystem/search/filters/logical.py +0 -0
  30. {kaparoo_python-0.4.0 → kaparoo_python-0.6.0}/kaparoo/filesystem/search/filters/multi_pattern.py +0 -0
  31. {kaparoo_python-0.4.0 → kaparoo_python-0.6.0}/kaparoo/filesystem/search/filters/pattern.py +0 -0
  32. {kaparoo_python-0.4.0 → kaparoo_python-0.6.0}/kaparoo/filesystem/search/filters/types.py +0 -0
  33. {kaparoo_python-0.4.0 → kaparoo_python-0.6.0}/kaparoo/filesystem/search/filters/utils.py +0 -0
  34. {kaparoo_python-0.4.0 → kaparoo_python-0.6.0}/kaparoo/filesystem/search/wrappers.py +0 -0
  35. {kaparoo_python-0.4.0 → kaparoo_python-0.6.0}/kaparoo/filesystem/types.py +0 -0
  36. {kaparoo_python-0.4.0 → kaparoo_python-0.6.0}/kaparoo/py.typed +0 -0
  37. {kaparoo_python-0.4.0 → kaparoo_python-0.6.0}/kaparoo/utils/optional.py +0 -0
  38. {kaparoo_python-0.4.0 → kaparoo_python-0.6.0}/kaparoo/utils/timer.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kaparoo-python
3
- Version: 0.4.0
3
+ Version: 0.6.0
4
4
  Summary: Personally common and useful Python features
5
5
  Keywords: filesystem,pathlib,paths,utilities
6
6
  Author: Jaewoo Park
@@ -67,16 +67,17 @@ hook for custom filter kinds.
67
67
 
68
68
  `Timer` / `SegmentTimer` context-manager-and-decorator timers (with
69
69
  `lap`-split and `measure`-block timings); `Aggregator` for nested,
70
- pluggable metric aggregation (the batch → epoch → run pattern); plus a
71
- small family of helpers for working with `Optional[T]` values
72
- (`replace_if_none`, `unwrap_or_default`, ...).
70
+ pluggable metric aggregation (the batch → epoch → run pattern;
71
+ experimental); plus a small family of helpers for working with
72
+ `Optional[T]` values (`replace_if_none`, `unwrap_or_default`, ...).
73
73
 
74
74
  ### [`kaparoo.data`](https://github.com/kaparoo/kaparoo-python/tree/main/kaparoo/data)
75
75
 
76
76
  Building blocks for dataset code: `DataSequence[T, M]` ABC (item +
77
77
  metadata), composers (`SlicedSequence`, `ConcatSequence`,
78
- `WindowedSequence`), file-backed templates (`FileFolderSequence`,
79
- `SingleFileSequence`), and `generate_batches`.
78
+ `TransformedSequence`, `WindowedSequence`, `ZippedSequence`), file-backed
79
+ templates (`FileFolderSequence`, `FileListSequence`, `SingleFileSequence`),
80
+ and `generate_batches`.
80
81
 
81
82
  ## 🎯 Quick example
82
83
 
@@ -46,16 +46,17 @@ hook for custom filter kinds.
46
46
 
47
47
  `Timer` / `SegmentTimer` context-manager-and-decorator timers (with
48
48
  `lap`-split and `measure`-block timings); `Aggregator` for nested,
49
- pluggable metric aggregation (the batch → epoch → run pattern); plus a
50
- small family of helpers for working with `Optional[T]` values
51
- (`replace_if_none`, `unwrap_or_default`, ...).
49
+ pluggable metric aggregation (the batch → epoch → run pattern;
50
+ experimental); plus a small family of helpers for working with
51
+ `Optional[T]` values (`replace_if_none`, `unwrap_or_default`, ...).
52
52
 
53
53
  ### [`kaparoo.data`](https://github.com/kaparoo/kaparoo-python/tree/main/kaparoo/data)
54
54
 
55
55
  Building blocks for dataset code: `DataSequence[T, M]` ABC (item +
56
56
  metadata), composers (`SlicedSequence`, `ConcatSequence`,
57
- `WindowedSequence`), file-backed templates (`FileFolderSequence`,
58
- `SingleFileSequence`), and `generate_batches`.
57
+ `TransformedSequence`, `WindowedSequence`, `ZippedSequence`), file-backed
58
+ templates (`FileFolderSequence`, `FileListSequence`, `SingleFileSequence`),
59
+ and `generate_batches`.
59
60
 
60
61
  ## 🎯 Quick example
61
62
 
@@ -7,9 +7,10 @@ small set of composers, and ready-to-subclass file-backed templates.
7
7
 
8
8
  - [`sequences/base`](./sequences/base.py) — `DataSequence[T, M]` abstract base
9
9
  - [`sequences/composers`](./sequences/composers.py) — `SlicedSequence`,
10
- `ConcatSequence`, `WindowedSequence`
10
+ `TransformedSequence`, `ConcatSequence`, `WindowedSequence`,
11
+ `ZippedSequence`
11
12
  - [`sequences/templates`](./sequences/templates.py) — `FileFolderSequence`,
12
- `SingleFileSequence`
13
+ `FileListSequence`, `SingleFileSequence`
13
14
  - [`sequences/utils`](./sequences/utils.py) — `generate_batches`
14
15
 
15
16
  All public symbols are re-exported from both `kaparoo.data` and
@@ -83,18 +84,49 @@ combined = ConcatSequence(train_a, train_b, train_c)
83
84
  len(combined) # == len(train_a) + len(train_b) + len(train_c)
84
85
  ```
85
86
 
87
+ ### `TransformedSequence`
88
+
89
+ A lazy view that applies a `transform` callable to each item of
90
+ `source`. The transform is called on demand in `get_item` -- nothing
91
+ is computed at construction. `get_meta` passes through `source.get_meta`
92
+ unchanged by default; override it in a subclass when `M_out` differs
93
+ from `M_in`.
94
+
95
+ ```python
96
+ from kaparoo.data.sequences import TransformedSequence
97
+
98
+ # Item transform only -- metadata type is unchanged.
99
+ normalized = TransformedSequence(image_folder, normalize_fn)
100
+
101
+ # Meta transform via subclassing:
102
+ class Augmented(TransformedSequence[ndarray, Path, ndarray, AugMeta]):
103
+ def get_meta(self, index: int) -> AugMeta:
104
+ return AugMeta(path=self.source.get_meta(index), applied="normalize")
105
+ ```
106
+
107
+ Chaining two `TransformedSequence` instances applies the transforms in
108
+ order:
109
+
110
+ ```python
111
+ resized = TransformedSequence(raw, resize)
112
+ normalized = TransformedSequence(resized, normalize)
113
+ ```
114
+
115
+ `T_out` and `M_out` default to `T_in` and `M_in` respectively (PEP 696),
116
+ so you only need to specify them when the type actually changes.
117
+
86
118
  ### `WindowedSequence`
87
119
 
88
120
  An abstract sliding-window view: each item is a `tuple[T, ...]` of
89
121
  `size` frames from `source`. Per-frame `M_in` and window-level
90
- `M_out` are independent type parameters, so subclasses decide how
91
- metadata aggregates.
122
+ `M_out` are independent type parameters (`M_out` defaults to `M_in`),
123
+ so subclasses decide how metadata aggregates.
92
124
 
93
125
  ```python
94
126
  from pathlib import Path
95
127
  from kaparoo.data.sequences import WindowedSequence
96
128
 
97
- class FirstFrameMeta(WindowedSequence[bytes, Path, Path]):
129
+ class FirstFrameMeta(WindowedSequence[bytes, Path]):
98
130
  def get_meta(self, index):
99
131
  # window's metadata is its first frame's metadata
100
132
  index = self._normalize_index(index)
@@ -109,6 +141,27 @@ windows.get_meta(0) # frames.get_meta(0)
109
141
  `size`, `step`, `skip` follow the same semantics as
110
142
  [`generate_batches`](#generate_batches).
111
143
 
144
+ ### `ZippedSequence`
145
+
146
+ Element-wise zip of two sequences — item `i` is `(first[i], second[i])`
147
+ and metadata `i` is the `(M1, M2)` tuple. This is the "paired image +
148
+ label" pattern that `ConcatSequence` (end-to-end) cannot express. With
149
+ `strict=True` (the default) the lengths must match or construction raises
150
+ `ValueError`; pass `strict=False` to truncate to the shorter length, like
151
+ the builtin `zip`. For a different combined metadata shape, subclass and
152
+ override `get_meta`.
153
+
154
+ ```python
155
+ from kaparoo.data.sequences import ZippedSequence
156
+
157
+ pairs = ZippedSequence(images, labels)
158
+ pairs[0] # (images[0], labels[0])
159
+ pairs.get_meta(0) # (images.get_meta(0), labels.get_meta(0))
160
+ ```
161
+
162
+ For three or more, nest: `ZippedSequence(a, ZippedSequence(b, c))` yields
163
+ `(a[i], (b[i], c[i]))`.
164
+
112
165
  ## Templates
113
166
 
114
167
  ### `FileFolderSequence`
@@ -158,6 +211,30 @@ class GlobFolder(FileFolderSequence[bytes]):
158
211
  folder = GlobFolder("data", pattern="*.png", recursive=True)
159
212
  ```
160
213
 
214
+ ### `FileListSequence`
215
+
216
+ Same "one file per item" contract as `FileFolderSequence`, but the files
217
+ are given as an explicit list instead of discovered under a `root` — so
218
+ they may live in unrelated directories (or, on Windows, different drives),
219
+ which `FileFolderSequence` cannot represent. There is no `list_files`;
220
+ subclasses implement only `load_file` and `get_meta`. The input order is
221
+ preserved verbatim (duplicates kept) — sort it yourself if needed.
222
+
223
+ ```python
224
+ from pathlib import Path
225
+ from kaparoo.data.sequences import FileListSequence
226
+
227
+ class BytesList(FileListSequence[bytes]):
228
+ def get_meta(self, index):
229
+ return self.get_file(index)
230
+
231
+ def load_file(self, path):
232
+ return path.read_bytes()
233
+
234
+ # Files from anywhere, in the order given:
235
+ data = BytesList(["images/a.png", "/other/disk/b.png"])
236
+ ```
237
+
161
238
  ### `SingleFileSequence`
162
239
 
163
240
  Thin ABC for the "one file, many records" pattern (a video with many
@@ -2,9 +2,12 @@ __all__ = (
2
2
  "ConcatSequence",
3
3
  "DataSequence",
4
4
  "FileFolderSequence",
5
+ "FileListSequence",
5
6
  "SingleFileSequence",
6
7
  "SlicedSequence",
8
+ "TransformedSequence",
7
9
  "WindowedSequence",
10
+ "ZippedSequence",
8
11
  "generate_batches",
9
12
  )
10
13
 
@@ -12,8 +15,11 @@ from kaparoo.data.sequences import (
12
15
  ConcatSequence,
13
16
  DataSequence,
14
17
  FileFolderSequence,
18
+ FileListSequence,
15
19
  SingleFileSequence,
16
20
  SlicedSequence,
21
+ TransformedSequence,
17
22
  WindowedSequence,
23
+ ZippedSequence,
18
24
  generate_batches,
19
25
  )
@@ -4,9 +4,12 @@ __all__ = (
4
4
  "ConcatSequence",
5
5
  "DataSequence",
6
6
  "FileFolderSequence",
7
+ "FileListSequence",
7
8
  "SingleFileSequence",
8
9
  "SlicedSequence",
10
+ "TransformedSequence",
9
11
  "WindowedSequence",
12
+ "ZippedSequence",
10
13
  "generate_batches",
11
14
  )
12
15
 
@@ -14,10 +17,13 @@ from kaparoo.data.sequences.base import DataSequence
14
17
  from kaparoo.data.sequences.composers import (
15
18
  ConcatSequence,
16
19
  SlicedSequence,
20
+ TransformedSequence,
17
21
  WindowedSequence,
22
+ ZippedSequence,
18
23
  )
19
24
  from kaparoo.data.sequences.templates import (
20
25
  FileFolderSequence,
26
+ FileListSequence,
21
27
  SingleFileSequence,
22
28
  )
23
29
  from kaparoo.data.sequences.utils import generate_batches
@@ -1,15 +1,21 @@
1
1
  from __future__ import annotations
2
2
 
3
- __all__ = ("ConcatSequence", "SlicedSequence", "WindowedSequence")
3
+ __all__ = (
4
+ "ConcatSequence",
5
+ "SlicedSequence",
6
+ "TransformedSequence",
7
+ "WindowedSequence",
8
+ "ZippedSequence",
9
+ )
4
10
 
5
11
  from abc import abstractmethod
6
12
  from bisect import bisect_right
7
- from typing import TYPE_CHECKING
13
+ from typing import TYPE_CHECKING, cast
8
14
 
9
15
  from kaparoo.data.sequences.base import DataSequence
10
16
 
11
17
  if TYPE_CHECKING:
12
- from collections.abc import Sequence
18
+ from collections.abc import Callable, Sequence
13
19
 
14
20
 
15
21
  class SlicedSequence[T, M](DataSequence[T, M]):
@@ -59,6 +65,61 @@ class SlicedSequence[T, M](DataSequence[T, M]):
59
65
  return self._source.get_meta(self._indices[index])
60
66
 
61
67
 
68
+ class TransformedSequence[T_in, M_in, T_out = T_in, M_out = M_in](
69
+ DataSequence[T_out, M_out]
70
+ ):
71
+ """A view of `source` with `transform` applied lazily to each item.
72
+
73
+ `transform` is called on demand in `get_item`; nothing is loaded or
74
+ converted at construction time. `get_meta` passes through
75
+ `source.get_meta` unchanged by default -- override it in a subclass
76
+ when `M_out` differs from `M_in`.
77
+
78
+ Type Parameters:
79
+ T_in: Item type of `source`.
80
+ M_in: Metadata type of `source`.
81
+ T_out: Item type after the transform. Defaults to `T_in`.
82
+ M_out: Metadata type exposed by this view. Defaults to `M_in`.
83
+ When `M_out != M_in`, override `get_meta` in a subclass;
84
+ the default passthrough is only safe when `M_out == M_in`.
85
+
86
+ Example:
87
+ >>> # Item-only transform; metadata passes through unchanged.
88
+ >>> normalized = TransformedSequence(image_folder, normalize)
89
+
90
+ >>> # Meta transform via subclassing:
91
+ >>> class Augmented(TransformedSequence[ndarray, Path, ndarray, AugMeta]):
92
+ ... def get_meta(self, index: int) -> AugMeta:
93
+ ... return AugMeta(
94
+ ... path=self.source.get_meta(index),
95
+ ... applied="normalize",
96
+ ... )
97
+ """
98
+
99
+ def __init__(
100
+ self,
101
+ source: DataSequence[T_in, M_in],
102
+ transform: Callable[[T_in], T_out],
103
+ ) -> None:
104
+ self._source = source
105
+ self._transform = transform
106
+
107
+ @property
108
+ def source(self) -> DataSequence[T_in, M_in]:
109
+ """The wrapped sequence."""
110
+ return self._source
111
+
112
+ def __len__(self) -> int:
113
+ return len(self._source)
114
+
115
+ def get_item(self, index: int) -> T_out:
116
+ return self._transform(self._source.get_item(index))
117
+
118
+ def get_meta(self, index: int) -> M_out:
119
+ # Passthrough by default. Override when M_out != M_in.
120
+ return cast("M_out", self._source.get_meta(index))
121
+
122
+
62
123
  class ConcatSequence[T, M](DataSequence[T, M]):
63
124
  """The end-to-end concatenation of zero or more `sources`.
64
125
 
@@ -112,7 +173,7 @@ class ConcatSequence[T, M](DataSequence[T, M]):
112
173
  return source.get_meta(local)
113
174
 
114
175
 
115
- class WindowedSequence[T, M_in, M_out](DataSequence[tuple[T, ...], M_out]):
176
+ class WindowedSequence[T, M_in, M_out = M_in](DataSequence[tuple[T, ...], M_out]):
116
177
  """An abstract sliding-window view over `source`.
117
178
 
118
179
  Each item is a tuple of `size` items from `source`, starting at
@@ -130,8 +191,8 @@ class WindowedSequence[T, M_in, M_out](DataSequence[tuple[T, ...], M_out]):
130
191
  T: Item type of `source` (also the per-frame type within each
131
192
  window).
132
193
  M_in: Metadata type of `source` (per-frame metadata).
133
- M_out: Metadata type of the window. Determined by the
134
- subclass's `get_meta` return.
194
+ M_out: Metadata type of the window. Defaults to `M_in`.
195
+ Determined by the subclass's `get_meta` return.
135
196
 
136
197
  Args:
137
198
  source: The sequence to window over.
@@ -219,3 +280,115 @@ class WindowedSequence[T, M_in, M_out](DataSequence[tuple[T, ...], M_out]):
219
280
  @abstractmethod
220
281
  def get_meta(self, index: int) -> M_out:
221
282
  raise NotImplementedError
283
+
284
+
285
+ class ZippedSequence[T1, T2, M1 = None, M2 = None](
286
+ DataSequence[tuple[T1, T2], tuple[M1, M2]]
287
+ ):
288
+ """Element-wise zip of two sequences.
289
+
290
+ Item `i` is `(first[i], second[i])` and metadata `i` is
291
+ `(first.get_meta(i), second.get_meta(i))` -- the "paired image + label"
292
+ pattern that `ConcatSequence` (end-to-end) cannot express.
293
+
294
+ With `strict=True` (the default) the two sequences must have the same
295
+ length; a mismatch raises `ValueError` at construction. With
296
+ `strict=False` the view is truncated to the shorter length, like the
297
+ builtin `zip`. For a different combined-metadata shape, subclass and
298
+ override `get_meta`.
299
+
300
+ Type Parameters:
301
+ T1: Item type of the first source.
302
+ T2: Item type of the second source.
303
+ M1: Metadata type of the first source. Defaults to `None`.
304
+ M2: Metadata type of the second source. Defaults to `None`.
305
+
306
+ Args:
307
+ first: The first sequence.
308
+ second: The second sequence.
309
+ strict: When True (default), require equal lengths and raise on a
310
+ mismatch. When False, truncate to the shorter length.
311
+
312
+ Raises:
313
+ ValueError: If `strict` is True and the sequences differ in length.
314
+
315
+ Example:
316
+ >>> pairs = ZippedSequence(images, labels)
317
+ >>> pairs[0] # (images[0], labels[0])
318
+ >>> pairs.get_meta(0) # (images.get_meta(0), labels.get_meta(0))
319
+ """
320
+
321
+ def __init__(
322
+ self,
323
+ first: DataSequence[T1, M1],
324
+ second: DataSequence[T2, M2],
325
+ *,
326
+ strict: bool = True,
327
+ ) -> None:
328
+ if strict and len(first) != len(second):
329
+ msg = f"sequences differ in length: {len(first)} != {len(second)}"
330
+ raise ValueError(msg)
331
+ self._first = first
332
+ self._second = second
333
+ self._length = len(first) if strict else min(len(first), len(second))
334
+
335
+ @property
336
+ def first(self) -> DataSequence[T1, M1]:
337
+ """The first wrapped sequence."""
338
+ return self._first
339
+
340
+ @property
341
+ def second(self) -> DataSequence[T2, M2]:
342
+ """The second wrapped sequence."""
343
+ return self._second
344
+
345
+ def __len__(self) -> int:
346
+ return self._length
347
+
348
+ def _normalize_index(self, index: int) -> int:
349
+ """Normalize a possibly-negative index and validate range.
350
+
351
+ Indices resolve against the zipped length (the shorter source when
352
+ `strict=False`), so they address the same position in both sources.
353
+
354
+ Raises:
355
+ IndexError: If `index` is outside `[-len(self), len(self))`.
356
+ """
357
+ n = self._length
358
+ original = index
359
+ if index < 0:
360
+ index += n
361
+ if not 0 <= index < n:
362
+ msg = f"index {original} out of range for length {n}"
363
+ raise IndexError(msg)
364
+ return index
365
+
366
+ def get_item(self, index: int) -> tuple[T1, T2]:
367
+ index = self._normalize_index(index)
368
+ return self._first.get_item(index), self._second.get_item(index)
369
+
370
+ def get_items(self, indices: Sequence[int]) -> Sequence[tuple[T1, T2]]:
371
+ # Normalize, then bulk-delegate so each source's `get_items`
372
+ # optimization is used.
373
+ normalized = [self._normalize_index(i) for i in indices]
374
+ return list(
375
+ zip(
376
+ self._first.get_items(normalized),
377
+ self._second.get_items(normalized),
378
+ strict=True,
379
+ )
380
+ )
381
+
382
+ def get_meta(self, index: int) -> tuple[M1, M2]:
383
+ index = self._normalize_index(index)
384
+ return self._first.get_meta(index), self._second.get_meta(index)
385
+
386
+ def get_metas(self, indices: Sequence[int]) -> Sequence[tuple[M1, M2]]:
387
+ normalized = [self._normalize_index(i) for i in indices]
388
+ return list(
389
+ zip(
390
+ self._first.get_metas(normalized),
391
+ self._second.get_metas(normalized),
392
+ strict=True,
393
+ )
394
+ )
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- __all__ = ("FileFolderSequence", "SingleFileSequence")
3
+ __all__ = ("FileFolderSequence", "FileListSequence", "SingleFileSequence")
4
4
 
5
5
  from abc import abstractmethod
6
6
  from pathlib import Path
@@ -11,14 +11,95 @@ from kaparoo.filesystem.existence import ensure_dir_exists, ensure_file_exists
11
11
  from kaparoo.filesystem.utils import stringify_paths, wrap_path
12
12
 
13
13
  if TYPE_CHECKING:
14
- from kaparoo.filesystem.types import StrPath
14
+ from kaparoo.filesystem.types import StrPath, StrPaths
15
15
 
16
16
 
17
- class FileFolderSequence[T, M = Path](DataSequence[T, M]):
18
- """A folder-rooted `DataSequence` whose items live in individual files.
17
+ class FileListSequence[T, M = Path](DataSequence[T, M]):
18
+ """A `DataSequence` over an explicit, ordered list of files.
19
19
 
20
- The base class handles file discovery, indexing, and root-relative
21
- path bookkeeping. Subclasses are responsible for three things:
20
+ Items live one-per-file; subclasses implement `load_file` and `get_meta`.
21
+ The files are given directly rather than discovered under a `root`, so
22
+ they may live in unrelated directories -- or, on Windows, on different
23
+ drives. (`FileFolderSequence` is the special case where the list is
24
+ discovered under a single root and stored relative to it.)
25
+
26
+ The given order is preserved verbatim and duplicates are kept; sort the
27
+ input yourself (`sorted(files, key=...)`) if a particular order is
28
+ needed. Paths are not checked for existence at construction; `load_file`
29
+ is called lazily on each `get_item`.
30
+
31
+ The base exposes:
32
+
33
+ - `files: tuple[Path, ...]` — full paths as an immutable snapshot.
34
+ - `get_file(index) -> Path` — full path of the i-th file.
35
+
36
+ Type Parameters:
37
+ T: Item type returned by `get_item`.
38
+ M: Per-item metadata type. Defaults to `Path`; override when the
39
+ metadata is something else (label, line number, ...).
40
+
41
+ Args:
42
+ files: The file paths to expose, in order.
43
+
44
+ Example:
45
+ >>> from pathlib import Path
46
+ >>> class BytesList(FileListSequence[bytes]):
47
+ ... def get_meta(self, index: int) -> Path:
48
+ ... return self.get_file(index)
49
+ ...
50
+ ... def load_file(self, path: Path) -> bytes:
51
+ ... return path.read_bytes()
52
+ >>>
53
+ >>> data = BytesList(["images/a.png", "/other/b.png"])
54
+ """
55
+
56
+ def __init__(self, files: StrPaths) -> None:
57
+ self._files = list(stringify_paths(files))
58
+
59
+ def __len__(self) -> int:
60
+ return len(self._files)
61
+
62
+ @property
63
+ def files(self) -> tuple[Path, ...]:
64
+ """Immutable snapshot of the full file paths, in order.
65
+
66
+ Returns a fresh `tuple[Path, ...]` on each access.
67
+ """
68
+ return tuple(self.get_file(i) for i in range(len(self)))
69
+
70
+ def get_file(self, index: int) -> Path:
71
+ """Full Path of the file at `index`."""
72
+ return Path(self._files[index])
73
+
74
+ def get_item(self, index: int) -> T:
75
+ return self.load_file(self.get_file(index))
76
+
77
+ @abstractmethod
78
+ def get_meta(self, index: int) -> M:
79
+ raise NotImplementedError
80
+
81
+ @abstractmethod
82
+ def load_file(self, path: Path) -> T:
83
+ """Decode a single file into an item of type `T`.
84
+
85
+ Called lazily on each `get_item` -- not at construction time.
86
+ Subclasses may freely use external libraries (PIL, librosa,
87
+ cv2, ...) to decode.
88
+ """
89
+ raise NotImplementedError
90
+
91
+
92
+ class FileFolderSequence[T, M = Path](FileListSequence[T, M]):
93
+ """A `FileListSequence` whose file list is discovered under a root.
94
+
95
+ The special case of `FileListSequence` where every file lives under one
96
+ base directory. The list is produced by `list_files(root)`, validated to
97
+ be under `root`, and stored in root-relative form so memory stays low for
98
+ large datasets and the paths survive a `root` relocation; `get_file`
99
+ transparently re-prepends `root`. `load_file`, `get_item`, `files`, and
100
+ `__len__` are inherited unchanged.
101
+
102
+ Subclasses are responsible for three things:
22
103
 
23
104
  - **`list_files(self, root)`** (abstract): return the full `Path`
24
105
  of every file to expose, in the desired order. Called once from
@@ -33,16 +114,9 @@ class FileFolderSequence[T, M = Path](DataSequence[T, M]):
33
114
  to `Path` and `get_meta(i)` can be the one-liner
34
115
  `return self.get_file(i)`.
35
116
 
36
- The base exposes:
117
+ The base adds, on top of `FileListSequence`:
37
118
 
38
119
  - `root: Path` — the base directory.
39
- - `files: tuple[Path, ...]` — full paths as an immutable snapshot.
40
- - `get_file(index) -> Path` — full path of the i-th file.
41
-
42
- Paths are kept internally in their root-relative form so that
43
- memory stays low for large datasets and the sequence survives
44
- `root` relocations; the conversion is transparent to subclasses
45
- and external callers.
46
120
 
47
121
  Parameterized subclasses:
48
122
  When a subclass needs instance-level options (e.g. `pattern`,
@@ -94,48 +168,20 @@ class FileFolderSequence[T, M = Path](DataSequence[T, M]):
94
168
 
95
169
  def __init__(self, root: StrPath) -> None:
96
170
  self._root = ensure_dir_exists(root)
97
- self._files = list(
98
- stringify_paths(self.list_files(self._root), after=self._root)
99
- )
100
-
101
- def __len__(self) -> int:
102
- return len(self._files)
171
+ # `after=root` makes each path root-relative and raises ValueError if
172
+ # any file is not under `root`. The base then stores the relative
173
+ # form; `get_file` re-prepends `root`.
174
+ super().__init__(stringify_paths(self.list_files(self._root), after=self._root))
103
175
 
104
176
  @property
105
177
  def root(self) -> Path:
106
178
  """The base directory the sequence was constructed from."""
107
179
  return self._root
108
180
 
109
- @property
110
- def files(self) -> tuple[Path, ...]:
111
- """Immutable snapshot of the full file paths this sequence exposes.
112
-
113
- Returns a fresh `tuple[Path, ...]` on each access, in the order
114
- established by `list_files`.
115
- """
116
- return tuple(self.get_file(i) for i in range(len(self)))
117
-
118
181
  def get_file(self, index: int) -> Path:
119
182
  """Full Path of the file at `index`."""
120
183
  return wrap_path(self._files[index], prepend=self._root)
121
184
 
122
- def get_item(self, index: int) -> T:
123
- return self.load_file(self.get_file(index))
124
-
125
- @abstractmethod
126
- def get_meta(self, index: int) -> M:
127
- raise NotImplementedError
128
-
129
- @abstractmethod
130
- def load_file(self, path: Path) -> T:
131
- """Decode a single file into an item of type `T`.
132
-
133
- Called lazily on each `get_item` -- not at construction time.
134
- Subclasses may freely use external libraries (PIL, librosa,
135
- cv2, ...) to decode.
136
- """
137
- raise NotImplementedError
138
-
139
185
  @abstractmethod
140
186
  def list_files(self, root: Path) -> list[Path]:
141
187
  """Return the full Path of every file to expose, in order.
@@ -69,8 +69,8 @@ cache_dir = make_dir("var/cache", exist_ok=True)
69
69
 
70
70
  # Start from a clean slate: wipe an existing directory's contents and
71
71
  # recreate it empty. Destructive, and only ever wipes a *directory* (a
72
- # non-directory at the path still raises). `clean=True` makes `exist_ok`
73
- # moot, since the directory is removed and remade.
72
+ # non-directory -- or a symlink -- at the path still raises). `clean=True`
73
+ # makes `exist_ok` moot, since the directory is removed and remade.
74
74
  run_dir = make_dir("out/run_42", clean=True)
75
75
 
76
76
  # Bulk creation with a shared root
@@ -38,6 +38,20 @@ if TYPE_CHECKING:
38
38
  # ========================== #
39
39
 
40
40
 
41
+ def _ensure_directory_target(path: Path, *, clean: bool) -> None:
42
+ """Reject a path that cannot serve as a directory target.
43
+
44
+ Raises `NotADirectoryError` when `path` exists but is not a directory,
45
+ or when `clean` is requested on a symlink -- cleaning must operate on a
46
+ real directory, never through a link (which would otherwise reach the
47
+ link's target). A symlink to a directory is accepted only when `clean`
48
+ is False.
49
+ """
50
+ if (path.exists() and not path.is_dir()) or (clean and path.is_symlink()):
51
+ msg = f"not a usable directory target: {path}"
52
+ raise NotADirectoryError(msg)
53
+
54
+
41
55
  @overload
42
56
  def make_dir(
43
57
  path: StrPath,
@@ -88,9 +102,9 @@ def make_dir(
88
102
  Defaults to False.
89
103
  clean: Whether to recreate the directory empty when it already exists,
90
104
  removing its contents first. Only an existing *directory* is wiped;
91
- a non-directory still raises. Because the directory is removed and
92
- remade, `clean=True` makes `exist_ok` moot. **Destructive.**
93
- Defaults to False.
105
+ a non-directory -- or a symlink -- still raises. Because the
106
+ directory is removed and remade, `clean=True` makes `exist_ok`
107
+ moot. **Destructive.** Defaults to False.
94
108
  stringify: Whether to return the path as a string. Defaults to False.
95
109
 
96
110
  Returns:
@@ -100,15 +114,14 @@ def make_dir(
100
114
  Raises:
101
115
  ValueError: If `mode` is outside the range 0o1-0o7777
102
116
  (not checked on Windows, where the mode is ignored).
103
- NotADirectoryError: If the path exists but is not a directory.
117
+ NotADirectoryError: If the path exists but is not a directory, or
118
+ `clean` is True and the path is a symlink.
104
119
  OSError: If `exist_ok` is False, `clean` is False, and the path
105
120
  already exists.
106
121
  """
107
122
  _validate_mode(mode)
108
123
  path = Path(path)
109
- if path.exists() and not path.is_dir():
110
- msg = f"not a directory: {path}"
111
- raise NotADirectoryError(msg)
124
+ _ensure_directory_target(path, clean=clean)
112
125
  if clean and path.is_dir():
113
126
  shutil.rmtree(path)
114
127
  path.mkdir(mode=mode, parents=True, exist_ok=exist_ok)
@@ -170,9 +183,9 @@ def make_dirs(
170
183
  Defaults to False.
171
184
  clean: Whether to recreate each directory empty when it already exists,
172
185
  removing its contents first. Only an existing *directory* is wiped;
173
- a non-directory still raises. Because the directory is removed and
174
- remade, `clean=True` makes `exist_ok` moot. **Destructive.**
175
- Defaults to False.
186
+ a non-directory -- or a symlink -- still raises. Because the
187
+ directory is removed and remade, `clean=True` makes `exist_ok`
188
+ moot. **Destructive.** Defaults to False.
176
189
  stringify: Whether to return the paths as strings. Defaults to False.
177
190
 
178
191
  Returns:
@@ -183,15 +196,26 @@ def make_dirs(
183
196
  ValueError: If `mode` is outside the range 0o1-0o7777
184
197
  (not checked on Windows, where the mode is ignored).
185
198
  DirectoryNotFoundError: If `root` is provided and does not exist.
186
- NotADirectoryError: If `root` is provided and is not a directory.
199
+ NotADirectoryError: If `root` is provided and is not a directory, if
200
+ any path exists but is not a directory, or `clean` is True and
201
+ any path is a symlink.
187
202
  ValueError: If `root` is provided and any of the paths are absolute.
188
203
  OSError: If `exist_ok` is False, `clean` is False, and any of the
189
204
  paths already exist.
190
- OSError: If any of the paths are not directories.
205
+
206
+ Note:
207
+ Every path is validated (the non-directory / symlink checks above)
208
+ *before* any directory is wiped or created, so a deterministically
209
+ bad entry -- e.g. a file in the list -- fails without partially
210
+ cleaning earlier entries. Creation/cleanup is otherwise per-path and
211
+ not transactional, so a runtime failure (a race, a permission error)
212
+ partway through can still leave earlier entries created or cleaned.
191
213
  """
192
214
  _validate_mode(mode)
193
215
  paths = _join_root_if_provided(paths, root)
194
216
  directories = [Path(p) for p in paths]
217
+ for directory in directories:
218
+ _ensure_directory_target(directory, clean=clean)
195
219
  for directory in directories:
196
220
  if clean and directory.is_dir():
197
221
  shutil.rmtree(directory)
@@ -58,6 +58,23 @@ def _default_dir_mode() -> int:
58
58
  return _umask_default(0o777)
59
59
 
60
60
 
61
+ def _fsync_parent(path: Path) -> None:
62
+ """Best-effort fsync of `path`'s parent directory entry.
63
+
64
+ Makes a just-completed rename/link into `path` durable across a crash on
65
+ POSIX (the file's own data is fsynced separately). A no-op where a
66
+ directory cannot be opened for fsync, e.g. Windows.
67
+ """
68
+ try:
69
+ fd = os.open(path.parent, os.O_RDONLY)
70
+ except OSError:
71
+ return
72
+ try:
73
+ os.fsync(fd)
74
+ finally:
75
+ os.close(fd)
76
+
77
+
61
78
  class StagedFile[AnyStrT: (str, bytes)]:
62
79
  """Write a file safely: stage to a temp file, then commit by atomic move.
63
80
 
@@ -87,10 +104,13 @@ class StagedFile[AnyStrT: (str, bytes)]:
87
104
  ```
88
105
 
89
106
  With `overwrite=False` (the default) an existing destination is a
90
- fail-fast `FileExistsError`, and the commit creates the file atomically --
91
- it never clobbers a file that appeared meanwhile. With `overwrite=True`
92
- the destination is atomically replaced, inheriting its previous
93
- permissions.
107
+ fail-fast `FileExistsError`, and the commit creates the file atomically
108
+ via a hardlink -- it never clobbers a file that appeared meanwhile. On a
109
+ filesystem without hardlink support (FAT/exFAT, some network mounts) the
110
+ commit falls back to a best-effort existence check plus replace, leaving
111
+ a small window where a file appearing concurrently could be clobbered.
112
+ With `overwrite=True` the destination is atomically replaced, inheriting
113
+ its previous permissions.
94
114
 
95
115
  The committed file gets the usual umask-based permissions (not the
96
116
  restrictive mode of the internal temp file). The destination's parent
@@ -254,15 +274,26 @@ class StagedFile[AnyStrT: (str, bytes)]:
254
274
  if self._overwrite:
255
275
  self._temp_path.replace(self._path)
256
276
  else:
277
+ # Atomic exclusive create via hardlink where supported. A
278
+ # filesystem without hardlinks (FAT/exFAT, some network mounts)
279
+ # raises a non-`FileExistsError` `OSError`; fall back to a
280
+ # best-effort existence check plus `replace` (which leaves a
281
+ # small TOCTOU window where a file appearing meanwhile could be
282
+ # clobbered -- unavoidable without an atomic no-clobber move).
257
283
  try:
258
284
  self._path.hardlink_to(self._temp_path)
259
- except FileExistsError:
260
- msg = (
261
- f"file already exists, pass overwrite=True to replace: {self._path}"
262
- )
263
- raise FileExistsError(msg) from None
264
- finally:
265
- self._temp_path.unlink(missing_ok=True)
285
+ except OSError as exc:
286
+ if isinstance(exc, FileExistsError) or self._path.exists():
287
+ self._temp_path.unlink(missing_ok=True)
288
+ msg = (
289
+ "file already exists, pass overwrite=True to replace: "
290
+ f"{self._path}"
291
+ )
292
+ raise FileExistsError(msg) from None
293
+ self._temp_path.replace(self._path)
294
+ else:
295
+ self._temp_path.unlink()
296
+ _fsync_parent(self._path)
266
297
  self._committed = True
267
298
  self._finalizer.detach()
268
299
  return self._path
@@ -318,9 +349,9 @@ class StagedDirectory:
318
349
  staged directory is moved into place with a single rename, and an existing
319
350
  destination is a fail-fast `FileExistsError`. Replacing an existing one
320
351
  (`overwrite=True`) is *not* fully atomic -- the old directory is swapped
321
- aside and then removed, leaving a brief window where the destination is
322
- absent and, on a rare failure mid-swap, the previous contents in a sibling
323
- ``<name>.old`` directory for recovery.
352
+ aside, the staged one moved in, then the old removed. A failed move
353
+ restores the original; only a crash *between* the two renames leaves the
354
+ previous contents in a sibling ``<name>.old`` directory for recovery.
324
355
 
325
356
  The committed directory gets the usual umask-based permissions. Pass
326
357
  `make_parents=True` to create the destination's parent if it is missing.
@@ -395,6 +426,8 @@ class StagedDirectory:
395
426
  appeared after this builder opened.
396
427
  NotADirectoryError: If `overwrite` is True and the destination
397
428
  exists but is not a directory.
429
+ OSError: If replacing an existing directory and moving the staged
430
+ one into place fails; the original is restored first.
398
431
  """
399
432
  if self._committed:
400
433
  return self._path
@@ -420,16 +453,24 @@ class StagedDirectory:
420
453
  mode = stat.S_IMODE(self._path.stat().st_mode)
421
454
  self._workdir.chmod(mode)
422
455
  if exists:
423
- # Replacing an existing directory. No portable atomic dir replace:
424
- # swap the old one aside, move the staged one in, then remove the
425
- # old. A failure between the renames leaves the previous contents
426
- # in `<name>.old`.
456
+ # Replacing an existing directory. There is no portable atomic
457
+ # directory replace, so swap the old one aside, move the staged one
458
+ # in, then remove the old. If the second move fails, restore the
459
+ # original; removing the backup is best-effort (the destination is
460
+ # already correct). A crash *between* the two moves is the residual
461
+ # non-atomic window -- the previous contents remain in a sibling
462
+ # `<name>.old` directory for manual recovery.
427
463
  backup = self._path.with_name(f"{self._workdir.name}.old")
428
464
  self._path.rename(backup)
429
- self._workdir.rename(self._path)
430
- shutil.rmtree(backup)
465
+ try:
466
+ self._workdir.rename(self._path)
467
+ except OSError:
468
+ backup.rename(self._path)
469
+ raise
470
+ shutil.rmtree(backup, ignore_errors=True)
431
471
  else:
432
472
  self._workdir.rename(self._path)
473
+ _fsync_parent(self._path)
433
474
  self._committed = True
434
475
  self._finalizer.detach()
435
476
  return self._path
@@ -266,6 +266,10 @@ def reserve_path(
266
266
  an exclusive file create, `open(path, "x")` raises the same
267
267
  `FileExistsError` directly.
268
268
 
269
+ A symlink counts as occupying the path -- including a *broken* one,
270
+ which `Path.exists` alone reports as absent yet still takes the name
271
+ (so `open(path, "x")` would fail). Such a path is treated as existing.
272
+
269
273
  Args:
270
274
  path: The path that should not yet exist.
271
275
  exist_ok: Whether to allow an already-existing path. Defaults to False.
@@ -277,9 +281,13 @@ def reserve_path(
277
281
  The path as a Path object or a string, depending on `stringify`.
278
282
 
279
283
  Raises:
280
- FileExistsError: If the path exists and `exist_ok` is False.
284
+ FileExistsError: If the path exists (or is a symlink) and `exist_ok`
285
+ is False.
286
+ OSError: If `make_parents` is True and the parent cannot be created
287
+ (e.g. an ancestor along the path is a file).
281
288
  """
282
- if (path := Path(path)).exists() and not exist_ok:
289
+ path = Path(path)
290
+ if (path.exists() or path.is_symlink()) and not exist_ok:
283
291
  msg = f"path already exists: {path}"
284
292
  raise FileExistsError(msg)
285
293
  if make_parents:
@@ -164,6 +164,7 @@ print(run.compute())
164
164
  | Reduction | Result | Empty |
165
165
  | --- | --- | --- |
166
166
  | `Mean()` | weighted arithmetic mean | `nan` |
167
+ | `Var()` / `Std()` | weighted population variance / std (Welford) | `nan` |
167
168
  | `Sum()` | sum of values (weight ignored) | `0.0` |
168
169
  | `Min()` / `Max()` | running min / max (weight ignored) | `nan` |
169
170
  | `Last()` | most recent value | `nan` |
@@ -177,8 +178,8 @@ import operator
177
178
  Aggregator(Fold(operator.mul, 1.0)) # running product
178
179
  ```
179
180
 
180
- For a reduction with richer state (weighted variance, RMS, ...), subclass
181
- `Reduction` (or `UnweightedReduction` when weight is irrelevant) and
181
+ For a reduction with richer state (RMS, a weighted geometric mean, ...),
182
+ subclass `Reduction` (or `UnweightedReduction` when weight is irrelevant) and
182
183
  implement `identity` / `step` (or `accumulate`) / `merge` / `result`. The
183
184
  `merge` method *is* the nesting behavior, so custom reductions nest as
184
185
  exactly as the built-ins.
@@ -8,9 +8,11 @@ __all__ = (
8
8
  "Reduction",
9
9
  "SegmentRecord",
10
10
  "SegmentTimer",
11
+ "Std",
11
12
  "Sum",
12
13
  "Timer",
13
14
  "UnweightedReduction",
15
+ "Var",
14
16
  "factory_if_none",
15
17
  "replace_if_none",
16
18
  "unwrap_or_default",
@@ -27,8 +29,10 @@ from kaparoo.utils.aggregate import (
27
29
  Mean,
28
30
  Min,
29
31
  Reduction,
32
+ Std,
30
33
  Sum,
31
34
  UnweightedReduction,
35
+ Var,
32
36
  )
33
37
  from kaparoo.utils.optional import (
34
38
  factory_if_none,
@@ -16,10 +16,13 @@ __all__ = (
16
16
  "Mean",
17
17
  "Min",
18
18
  "Reduction",
19
+ "Std",
19
20
  "Sum",
20
21
  "UnweightedReduction",
22
+ "Var",
21
23
  )
22
24
 
25
+ import math
23
26
  from abc import ABC, abstractmethod
24
27
  from dataclasses import dataclass
25
28
  from typing import TYPE_CHECKING
@@ -108,6 +111,65 @@ class Mean(Reduction[tuple[float, float]]):
108
111
  return state[0] / state[1] if state[1] else float("nan")
109
112
 
110
113
 
114
+ @dataclass(frozen=True)
115
+ class Var(Reduction[tuple[float, float, float]]):
116
+ """Weighted population variance; state is `(weight, mean, M2)`.
117
+
118
+ Accumulated online (Welford) and merged exactly (Chan's parallel
119
+ algorithm), so it nests across loop levels like the other reductions.
120
+ Uses the population convention -- M2 over the total weight, as in
121
+ numpy's default `ddof=0` -- which stays well-defined under weighting.
122
+ Empty -> `nan`.
123
+ """
124
+
125
+ def identity(self) -> tuple[float, float, float]:
126
+ return (0.0, 0.0, 0.0)
127
+
128
+ def step(
129
+ self, state: tuple[float, float, float], value: float, weight: float
130
+ ) -> tuple[float, float, float]:
131
+ total, mean, m2 = state
132
+ total += weight
133
+ delta = value - mean
134
+ mean += (weight / total) * delta
135
+ m2 += weight * delta * (value - mean)
136
+ return (total, mean, m2)
137
+
138
+ def merge(
139
+ self,
140
+ a: tuple[float, float, float],
141
+ b: tuple[float, float, float],
142
+ ) -> tuple[float, float, float]:
143
+ total_a, mean_a, m2_a = a
144
+ total_b, mean_b, m2_b = b
145
+ total = total_a + total_b
146
+ if total == 0:
147
+ return (0.0, 0.0, 0.0)
148
+ delta = mean_b - mean_a
149
+ mean = mean_a + delta * total_b / total
150
+ m2 = m2_a + m2_b + delta * delta * total_a * total_b / total
151
+ return (total, mean, m2)
152
+
153
+ def result(self, state: tuple[float, float, float]) -> float:
154
+ total, _mean, m2 = state
155
+ return m2 / total if total else float("nan")
156
+
157
+
158
+ @dataclass(frozen=True)
159
+ class Std(Var):
160
+ """Weighted population standard deviation: the square root of `Var`.
161
+
162
+ Shares `Var`'s online, mergeable moments; only the final projection
163
+ differs. Empty -> `nan`.
164
+ """
165
+
166
+ def result(self, state: tuple[float, float, float]) -> float:
167
+ variance = super().result(state)
168
+ if math.isnan(variance): # empty state
169
+ return variance
170
+ return max(variance, 0.0) ** 0.5
171
+
172
+
111
173
  @dataclass(frozen=True)
112
174
  class Sum(UnweightedReduction[float]):
113
175
  """Running sum of values (weight ignored). Empty -> `0.0`."""
@@ -12,7 +12,7 @@ build-backend = "uv_build"
12
12
 
13
13
  [project]
14
14
  name = "kaparoo-python"
15
- version = "0.4.0"
15
+ version = "0.6.0"
16
16
  description = "Personally common and useful Python features"
17
17
  readme = "README.md"
18
18
  requires-python = ">=3.14"
File without changes