kaparoo-python 0.5.0__tar.gz → 0.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {kaparoo_python-0.5.0 → kaparoo_python-0.7.0}/PKG-INFO +8 -7
- {kaparoo_python-0.5.0 → kaparoo_python-0.7.0}/README.md +7 -6
- {kaparoo_python-0.5.0 → kaparoo_python-0.7.0}/kaparoo/data/README.md +57 -4
- {kaparoo_python-0.5.0 → kaparoo_python-0.7.0}/kaparoo/data/__init__.py +4 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.7.0}/kaparoo/data/sequences/__init__.py +4 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.7.0}/kaparoo/data/sequences/composers.py +179 -6
- {kaparoo_python-0.5.0 → kaparoo_python-0.7.0}/kaparoo/data/sequences/templates.py +90 -120
- {kaparoo_python-0.5.0 → kaparoo_python-0.7.0}/kaparoo/filesystem/README.md +22 -1
- {kaparoo_python-0.5.0 → kaparoo_python-0.7.0}/kaparoo/filesystem/__init__.py +2 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.7.0}/kaparoo/filesystem/utils.py +53 -1
- {kaparoo_python-0.5.0 → kaparoo_python-0.7.0}/kaparoo/utils/README.md +14 -14
- {kaparoo_python-0.5.0 → kaparoo_python-0.7.0}/kaparoo/utils/__init__.py +3 -3
- {kaparoo_python-0.5.0 → kaparoo_python-0.7.0}/kaparoo/utils/timer.py +32 -32
- {kaparoo_python-0.5.0 → kaparoo_python-0.7.0}/pyproject.toml +1 -1
- {kaparoo_python-0.5.0 → kaparoo_python-0.7.0}/LICENSE +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.7.0}/kaparoo/__init__.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.7.0}/kaparoo/data/sequences/base.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.7.0}/kaparoo/data/sequences/utils.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.7.0}/kaparoo/filesystem/directory.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.7.0}/kaparoo/filesystem/exceptions.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.7.0}/kaparoo/filesystem/existence.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.7.0}/kaparoo/filesystem/search/README.md +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.7.0}/kaparoo/filesystem/search/__init__.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.7.0}/kaparoo/filesystem/search/classes.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.7.0}/kaparoo/filesystem/search/deprecated.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.7.0}/kaparoo/filesystem/search/filters/__init__.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.7.0}/kaparoo/filesystem/search/filters/base.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.7.0}/kaparoo/filesystem/search/filters/logical.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.7.0}/kaparoo/filesystem/search/filters/multi_pattern.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.7.0}/kaparoo/filesystem/search/filters/pattern.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.7.0}/kaparoo/filesystem/search/filters/types.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.7.0}/kaparoo/filesystem/search/filters/utils.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.7.0}/kaparoo/filesystem/search/wrappers.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.7.0}/kaparoo/filesystem/staged.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.7.0}/kaparoo/filesystem/types.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.7.0}/kaparoo/py.typed +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.7.0}/kaparoo/utils/aggregate.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.7.0}/kaparoo/utils/optional.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: kaparoo-python
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.7.0
|
|
4
4
|
Summary: Personally common and useful Python features
|
|
5
5
|
Keywords: filesystem,pathlib,paths,utilities
|
|
6
6
|
Author: Jaewoo Park
|
|
@@ -65,18 +65,19 @@ hook for custom filter kinds.
|
|
|
65
65
|
|
|
66
66
|
### [`kaparoo.utils`](https://github.com/kaparoo/kaparoo-python/tree/main/kaparoo/utils)
|
|
67
67
|
|
|
68
|
-
`Timer` / `
|
|
68
|
+
`Timer` / `SpanTimer` context-manager-and-decorator timers (with
|
|
69
69
|
`lap`-split and `measure`-block timings); `Aggregator` for nested,
|
|
70
|
-
pluggable metric aggregation (the batch → epoch → run pattern
|
|
71
|
-
small family of helpers for working with
|
|
72
|
-
(`replace_if_none`, `unwrap_or_default`, ...).
|
|
70
|
+
pluggable metric aggregation (the batch → epoch → run pattern;
|
|
71
|
+
experimental); plus a small family of helpers for working with
|
|
72
|
+
`Optional[T]` values (`replace_if_none`, `unwrap_or_default`, ...).
|
|
73
73
|
|
|
74
74
|
### [`kaparoo.data`](https://github.com/kaparoo/kaparoo-python/tree/main/kaparoo/data)
|
|
75
75
|
|
|
76
76
|
Building blocks for dataset code: `DataSequence[T, M]` ABC (item +
|
|
77
77
|
metadata), composers (`SlicedSequence`, `ConcatSequence`,
|
|
78
|
-
`WindowedSequence`), file-backed
|
|
79
|
-
`SingleFileSequence`),
|
|
78
|
+
`TransformedSequence`, `WindowedSequence`, `ZippedSequence`), file-backed
|
|
79
|
+
templates (`FileFolderSequence`, `FileListSequence`, `SingleFileSequence`),
|
|
80
|
+
and `generate_batches`.
|
|
80
81
|
|
|
81
82
|
## 🎯 Quick example
|
|
82
83
|
|
|
@@ -44,18 +44,19 @@ hook for custom filter kinds.
|
|
|
44
44
|
|
|
45
45
|
### [`kaparoo.utils`](https://github.com/kaparoo/kaparoo-python/tree/main/kaparoo/utils)
|
|
46
46
|
|
|
47
|
-
`Timer` / `
|
|
47
|
+
`Timer` / `SpanTimer` context-manager-and-decorator timers (with
|
|
48
48
|
`lap`-split and `measure`-block timings); `Aggregator` for nested,
|
|
49
|
-
pluggable metric aggregation (the batch → epoch → run pattern
|
|
50
|
-
small family of helpers for working with
|
|
51
|
-
(`replace_if_none`, `unwrap_or_default`, ...).
|
|
49
|
+
pluggable metric aggregation (the batch → epoch → run pattern;
|
|
50
|
+
experimental); plus a small family of helpers for working with
|
|
51
|
+
`Optional[T]` values (`replace_if_none`, `unwrap_or_default`, ...).
|
|
52
52
|
|
|
53
53
|
### [`kaparoo.data`](https://github.com/kaparoo/kaparoo-python/tree/main/kaparoo/data)
|
|
54
54
|
|
|
55
55
|
Building blocks for dataset code: `DataSequence[T, M]` ABC (item +
|
|
56
56
|
metadata), composers (`SlicedSequence`, `ConcatSequence`,
|
|
57
|
-
`WindowedSequence`), file-backed
|
|
58
|
-
`SingleFileSequence`),
|
|
57
|
+
`TransformedSequence`, `WindowedSequence`, `ZippedSequence`), file-backed
|
|
58
|
+
templates (`FileFolderSequence`, `FileListSequence`, `SingleFileSequence`),
|
|
59
|
+
and `generate_batches`.
|
|
59
60
|
|
|
60
61
|
## 🎯 Quick example
|
|
61
62
|
|
|
@@ -7,7 +7,8 @@ small set of composers, and ready-to-subclass file-backed templates.
|
|
|
7
7
|
|
|
8
8
|
- [`sequences/base`](./sequences/base.py) — `DataSequence[T, M]` abstract base
|
|
9
9
|
- [`sequences/composers`](./sequences/composers.py) — `SlicedSequence`,
|
|
10
|
-
`ConcatSequence`, `WindowedSequence
|
|
10
|
+
`TransformedSequence`, `ConcatSequence`, `WindowedSequence`,
|
|
11
|
+
`ZippedSequence`
|
|
11
12
|
- [`sequences/templates`](./sequences/templates.py) — `FileFolderSequence`,
|
|
12
13
|
`FileListSequence`, `SingleFileSequence`
|
|
13
14
|
- [`sequences/utils`](./sequences/utils.py) — `generate_batches`
|
|
@@ -83,18 +84,49 @@ combined = ConcatSequence(train_a, train_b, train_c)
|
|
|
83
84
|
len(combined) # == len(train_a) + len(train_b) + len(train_c)
|
|
84
85
|
```
|
|
85
86
|
|
|
87
|
+
### `TransformedSequence`
|
|
88
|
+
|
|
89
|
+
A lazy view that applies a `transform` callable to each item of
|
|
90
|
+
`source`. The transform is called on demand in `get_item` -- nothing
|
|
91
|
+
is computed at construction. `get_meta` passes through `source.get_meta`
|
|
92
|
+
unchanged by default; override it in a subclass when `M_out` differs
|
|
93
|
+
from `M_in`.
|
|
94
|
+
|
|
95
|
+
```python
|
|
96
|
+
from kaparoo.data.sequences import TransformedSequence
|
|
97
|
+
|
|
98
|
+
# Item transform only -- metadata type is unchanged.
|
|
99
|
+
normalized = TransformedSequence(image_folder, normalize_fn)
|
|
100
|
+
|
|
101
|
+
# Meta transform via subclassing:
|
|
102
|
+
class Augmented(TransformedSequence[ndarray, Path, ndarray, AugMeta]):
|
|
103
|
+
def get_meta(self, index: int) -> AugMeta:
|
|
104
|
+
return AugMeta(path=self.source.get_meta(index), applied="normalize")
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
Chaining two `TransformedSequence` instances applies the transforms in
|
|
108
|
+
order:
|
|
109
|
+
|
|
110
|
+
```python
|
|
111
|
+
resized = TransformedSequence(raw, resize)
|
|
112
|
+
normalized = TransformedSequence(resized, normalize)
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
`T_out` and `M_out` default to `T_in` and `M_in` respectively (PEP 696),
|
|
116
|
+
so you only need to specify them when the type actually changes.
|
|
117
|
+
|
|
86
118
|
### `WindowedSequence`
|
|
87
119
|
|
|
88
120
|
An abstract sliding-window view: each item is a `tuple[T, ...]` of
|
|
89
121
|
`size` frames from `source`. Per-frame `M_in` and window-level
|
|
90
|
-
`M_out` are independent type parameters
|
|
91
|
-
metadata aggregates.
|
|
122
|
+
`M_out` are independent type parameters (`M_out` defaults to `M_in`),
|
|
123
|
+
so subclasses decide how metadata aggregates.
|
|
92
124
|
|
|
93
125
|
```python
|
|
94
126
|
from pathlib import Path
|
|
95
127
|
from kaparoo.data.sequences import WindowedSequence
|
|
96
128
|
|
|
97
|
-
class FirstFrameMeta(WindowedSequence[bytes, Path
|
|
129
|
+
class FirstFrameMeta(WindowedSequence[bytes, Path]):
|
|
98
130
|
def get_meta(self, index):
|
|
99
131
|
# window's metadata is its first frame's metadata
|
|
100
132
|
index = self._normalize_index(index)
|
|
@@ -109,6 +141,27 @@ windows.get_meta(0) # frames.get_meta(0)
|
|
|
109
141
|
`size`, `step`, `skip` follow the same semantics as
|
|
110
142
|
[`generate_batches`](#generate_batches).
|
|
111
143
|
|
|
144
|
+
### `ZippedSequence`
|
|
145
|
+
|
|
146
|
+
Element-wise zip of two sequences — item `i` is `(first[i], second[i])`
|
|
147
|
+
and metadata `i` is the `(M1, M2)` tuple. This is the "paired image +
|
|
148
|
+
label" pattern that `ConcatSequence` (end-to-end) cannot express. With
|
|
149
|
+
`strict=True` (the default) the lengths must match or construction raises
|
|
150
|
+
`ValueError`; pass `strict=False` to truncate to the shorter length, like
|
|
151
|
+
the builtin `zip`. For a different combined metadata shape, subclass and
|
|
152
|
+
override `get_meta`.
|
|
153
|
+
|
|
154
|
+
```python
|
|
155
|
+
from kaparoo.data.sequences import ZippedSequence
|
|
156
|
+
|
|
157
|
+
pairs = ZippedSequence(images, labels)
|
|
158
|
+
pairs[0] # (images[0], labels[0])
|
|
159
|
+
pairs.get_meta(0) # (images.get_meta(0), labels.get_meta(0))
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
For three or more, nest: `ZippedSequence(a, ZippedSequence(b, c))` yields
|
|
163
|
+
`(a[i], (b[i], c[i]))`.
|
|
164
|
+
|
|
112
165
|
## Templates
|
|
113
166
|
|
|
114
167
|
### `FileFolderSequence`
|
|
@@ -5,7 +5,9 @@ __all__ = (
|
|
|
5
5
|
"FileListSequence",
|
|
6
6
|
"SingleFileSequence",
|
|
7
7
|
"SlicedSequence",
|
|
8
|
+
"TransformedSequence",
|
|
8
9
|
"WindowedSequence",
|
|
10
|
+
"ZippedSequence",
|
|
9
11
|
"generate_batches",
|
|
10
12
|
)
|
|
11
13
|
|
|
@@ -16,6 +18,8 @@ from kaparoo.data.sequences import (
|
|
|
16
18
|
FileListSequence,
|
|
17
19
|
SingleFileSequence,
|
|
18
20
|
SlicedSequence,
|
|
21
|
+
TransformedSequence,
|
|
19
22
|
WindowedSequence,
|
|
23
|
+
ZippedSequence,
|
|
20
24
|
generate_batches,
|
|
21
25
|
)
|
|
@@ -7,7 +7,9 @@ __all__ = (
|
|
|
7
7
|
"FileListSequence",
|
|
8
8
|
"SingleFileSequence",
|
|
9
9
|
"SlicedSequence",
|
|
10
|
+
"TransformedSequence",
|
|
10
11
|
"WindowedSequence",
|
|
12
|
+
"ZippedSequence",
|
|
11
13
|
"generate_batches",
|
|
12
14
|
)
|
|
13
15
|
|
|
@@ -15,7 +17,9 @@ from kaparoo.data.sequences.base import DataSequence
|
|
|
15
17
|
from kaparoo.data.sequences.composers import (
|
|
16
18
|
ConcatSequence,
|
|
17
19
|
SlicedSequence,
|
|
20
|
+
TransformedSequence,
|
|
18
21
|
WindowedSequence,
|
|
22
|
+
ZippedSequence,
|
|
19
23
|
)
|
|
20
24
|
from kaparoo.data.sequences.templates import (
|
|
21
25
|
FileFolderSequence,
|
|
@@ -1,15 +1,21 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
__all__ = (
|
|
3
|
+
__all__ = (
|
|
4
|
+
"ConcatSequence",
|
|
5
|
+
"SlicedSequence",
|
|
6
|
+
"TransformedSequence",
|
|
7
|
+
"WindowedSequence",
|
|
8
|
+
"ZippedSequence",
|
|
9
|
+
)
|
|
4
10
|
|
|
5
11
|
from abc import abstractmethod
|
|
6
12
|
from bisect import bisect_right
|
|
7
|
-
from typing import TYPE_CHECKING
|
|
13
|
+
from typing import TYPE_CHECKING, cast
|
|
8
14
|
|
|
9
15
|
from kaparoo.data.sequences.base import DataSequence
|
|
10
16
|
|
|
11
17
|
if TYPE_CHECKING:
|
|
12
|
-
from collections.abc import Sequence
|
|
18
|
+
from collections.abc import Callable, Sequence
|
|
13
19
|
|
|
14
20
|
|
|
15
21
|
class SlicedSequence[T, M](DataSequence[T, M]):
|
|
@@ -59,6 +65,61 @@ class SlicedSequence[T, M](DataSequence[T, M]):
|
|
|
59
65
|
return self._source.get_meta(self._indices[index])
|
|
60
66
|
|
|
61
67
|
|
|
68
|
+
class TransformedSequence[T_in, M_in, T_out = T_in, M_out = M_in](
|
|
69
|
+
DataSequence[T_out, M_out]
|
|
70
|
+
):
|
|
71
|
+
"""A view of `source` with `transform` applied lazily to each item.
|
|
72
|
+
|
|
73
|
+
`transform` is called on demand in `get_item`; nothing is loaded or
|
|
74
|
+
converted at construction time. `get_meta` passes through
|
|
75
|
+
`source.get_meta` unchanged by default -- override it in a subclass
|
|
76
|
+
when `M_out` differs from `M_in`.
|
|
77
|
+
|
|
78
|
+
Type Parameters:
|
|
79
|
+
T_in: Item type of `source`.
|
|
80
|
+
M_in: Metadata type of `source`.
|
|
81
|
+
T_out: Item type after the transform. Defaults to `T_in`.
|
|
82
|
+
M_out: Metadata type exposed by this view. Defaults to `M_in`.
|
|
83
|
+
When `M_out != M_in`, override `get_meta` in a subclass;
|
|
84
|
+
the default passthrough is only safe when `M_out == M_in`.
|
|
85
|
+
|
|
86
|
+
Example:
|
|
87
|
+
>>> # Item-only transform; metadata passes through unchanged.
|
|
88
|
+
>>> normalized = TransformedSequence(image_folder, normalize)
|
|
89
|
+
|
|
90
|
+
>>> # Meta transform via subclassing:
|
|
91
|
+
>>> class Augmented(TransformedSequence[ndarray, Path, ndarray, AugMeta]):
|
|
92
|
+
... def get_meta(self, index: int) -> AugMeta:
|
|
93
|
+
... return AugMeta(
|
|
94
|
+
... path=self.source.get_meta(index),
|
|
95
|
+
... applied="normalize",
|
|
96
|
+
... )
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
def __init__(
|
|
100
|
+
self,
|
|
101
|
+
source: DataSequence[T_in, M_in],
|
|
102
|
+
transform: Callable[[T_in], T_out],
|
|
103
|
+
) -> None:
|
|
104
|
+
self._source = source
|
|
105
|
+
self._transform = transform
|
|
106
|
+
|
|
107
|
+
@property
|
|
108
|
+
def source(self) -> DataSequence[T_in, M_in]:
|
|
109
|
+
"""The wrapped sequence."""
|
|
110
|
+
return self._source
|
|
111
|
+
|
|
112
|
+
def __len__(self) -> int:
|
|
113
|
+
return len(self._source)
|
|
114
|
+
|
|
115
|
+
def get_item(self, index: int) -> T_out:
|
|
116
|
+
return self._transform(self._source.get_item(index))
|
|
117
|
+
|
|
118
|
+
def get_meta(self, index: int) -> M_out:
|
|
119
|
+
# Passthrough by default. Override when M_out != M_in.
|
|
120
|
+
return cast("M_out", self._source.get_meta(index))
|
|
121
|
+
|
|
122
|
+
|
|
62
123
|
class ConcatSequence[T, M](DataSequence[T, M]):
|
|
63
124
|
"""The end-to-end concatenation of zero or more `sources`.
|
|
64
125
|
|
|
@@ -112,7 +173,7 @@ class ConcatSequence[T, M](DataSequence[T, M]):
|
|
|
112
173
|
return source.get_meta(local)
|
|
113
174
|
|
|
114
175
|
|
|
115
|
-
class WindowedSequence[T, M_in, M_out](DataSequence[tuple[T, ...], M_out]):
|
|
176
|
+
class WindowedSequence[T, M_in, M_out = M_in](DataSequence[tuple[T, ...], M_out]):
|
|
116
177
|
"""An abstract sliding-window view over `source`.
|
|
117
178
|
|
|
118
179
|
Each item is a tuple of `size` items from `source`, starting at
|
|
@@ -130,8 +191,8 @@ class WindowedSequence[T, M_in, M_out](DataSequence[tuple[T, ...], M_out]):
|
|
|
130
191
|
T: Item type of `source` (also the per-frame type within each
|
|
131
192
|
window).
|
|
132
193
|
M_in: Metadata type of `source` (per-frame metadata).
|
|
133
|
-
M_out: Metadata type of the window.
|
|
134
|
-
subclass's `get_meta` return.
|
|
194
|
+
M_out: Metadata type of the window. Defaults to `M_in`.
|
|
195
|
+
Determined by the subclass's `get_meta` return.
|
|
135
196
|
|
|
136
197
|
Args:
|
|
137
198
|
source: The sequence to window over.
|
|
@@ -219,3 +280,115 @@ class WindowedSequence[T, M_in, M_out](DataSequence[tuple[T, ...], M_out]):
|
|
|
219
280
|
@abstractmethod
|
|
220
281
|
def get_meta(self, index: int) -> M_out:
|
|
221
282
|
raise NotImplementedError
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
class ZippedSequence[T1, T2, M1 = None, M2 = None](
|
|
286
|
+
DataSequence[tuple[T1, T2], tuple[M1, M2]]
|
|
287
|
+
):
|
|
288
|
+
"""Element-wise zip of two sequences.
|
|
289
|
+
|
|
290
|
+
Item `i` is `(first[i], second[i])` and metadata `i` is
|
|
291
|
+
`(first.get_meta(i), second.get_meta(i))` -- the "paired image + label"
|
|
292
|
+
pattern that `ConcatSequence` (end-to-end) cannot express.
|
|
293
|
+
|
|
294
|
+
With `strict=True` (the default) the two sequences must have the same
|
|
295
|
+
length; a mismatch raises `ValueError` at construction. With
|
|
296
|
+
`strict=False` the view is truncated to the shorter length, like the
|
|
297
|
+
builtin `zip`. For a different combined-metadata shape, subclass and
|
|
298
|
+
override `get_meta`.
|
|
299
|
+
|
|
300
|
+
Type Parameters:
|
|
301
|
+
T1: Item type of the first source.
|
|
302
|
+
T2: Item type of the second source.
|
|
303
|
+
M1: Metadata type of the first source. Defaults to `None`.
|
|
304
|
+
M2: Metadata type of the second source. Defaults to `None`.
|
|
305
|
+
|
|
306
|
+
Args:
|
|
307
|
+
first: The first sequence.
|
|
308
|
+
second: The second sequence.
|
|
309
|
+
strict: When True (default), require equal lengths and raise on a
|
|
310
|
+
mismatch. When False, truncate to the shorter length.
|
|
311
|
+
|
|
312
|
+
Raises:
|
|
313
|
+
ValueError: If `strict` is True and the sequences differ in length.
|
|
314
|
+
|
|
315
|
+
Example:
|
|
316
|
+
>>> pairs = ZippedSequence(images, labels)
|
|
317
|
+
>>> pairs[0] # (images[0], labels[0])
|
|
318
|
+
>>> pairs.get_meta(0) # (images.get_meta(0), labels.get_meta(0))
|
|
319
|
+
"""
|
|
320
|
+
|
|
321
|
+
def __init__(
|
|
322
|
+
self,
|
|
323
|
+
first: DataSequence[T1, M1],
|
|
324
|
+
second: DataSequence[T2, M2],
|
|
325
|
+
*,
|
|
326
|
+
strict: bool = True,
|
|
327
|
+
) -> None:
|
|
328
|
+
if strict and len(first) != len(second):
|
|
329
|
+
msg = f"sequences differ in length: {len(first)} != {len(second)}"
|
|
330
|
+
raise ValueError(msg)
|
|
331
|
+
self._first = first
|
|
332
|
+
self._second = second
|
|
333
|
+
self._length = len(first) if strict else min(len(first), len(second))
|
|
334
|
+
|
|
335
|
+
@property
|
|
336
|
+
def first(self) -> DataSequence[T1, M1]:
|
|
337
|
+
"""The first wrapped sequence."""
|
|
338
|
+
return self._first
|
|
339
|
+
|
|
340
|
+
@property
|
|
341
|
+
def second(self) -> DataSequence[T2, M2]:
|
|
342
|
+
"""The second wrapped sequence."""
|
|
343
|
+
return self._second
|
|
344
|
+
|
|
345
|
+
def __len__(self) -> int:
|
|
346
|
+
return self._length
|
|
347
|
+
|
|
348
|
+
def _normalize_index(self, index: int) -> int:
|
|
349
|
+
"""Normalize a possibly-negative index and validate range.
|
|
350
|
+
|
|
351
|
+
Indices resolve against the zipped length (the shorter source when
|
|
352
|
+
`strict=False`), so they address the same position in both sources.
|
|
353
|
+
|
|
354
|
+
Raises:
|
|
355
|
+
IndexError: If `index` is outside `[-len(self), len(self))`.
|
|
356
|
+
"""
|
|
357
|
+
n = self._length
|
|
358
|
+
original = index
|
|
359
|
+
if index < 0:
|
|
360
|
+
index += n
|
|
361
|
+
if not 0 <= index < n:
|
|
362
|
+
msg = f"index {original} out of range for length {n}"
|
|
363
|
+
raise IndexError(msg)
|
|
364
|
+
return index
|
|
365
|
+
|
|
366
|
+
def get_item(self, index: int) -> tuple[T1, T2]:
|
|
367
|
+
index = self._normalize_index(index)
|
|
368
|
+
return self._first.get_item(index), self._second.get_item(index)
|
|
369
|
+
|
|
370
|
+
def get_items(self, indices: Sequence[int]) -> Sequence[tuple[T1, T2]]:
|
|
371
|
+
# Normalize, then bulk-delegate so each source's `get_items`
|
|
372
|
+
# optimization is used.
|
|
373
|
+
normalized = [self._normalize_index(i) for i in indices]
|
|
374
|
+
return list(
|
|
375
|
+
zip(
|
|
376
|
+
self._first.get_items(normalized),
|
|
377
|
+
self._second.get_items(normalized),
|
|
378
|
+
strict=True,
|
|
379
|
+
)
|
|
380
|
+
)
|
|
381
|
+
|
|
382
|
+
def get_meta(self, index: int) -> tuple[M1, M2]:
|
|
383
|
+
index = self._normalize_index(index)
|
|
384
|
+
return self._first.get_meta(index), self._second.get_meta(index)
|
|
385
|
+
|
|
386
|
+
def get_metas(self, indices: Sequence[int]) -> Sequence[tuple[M1, M2]]:
|
|
387
|
+
normalized = [self._normalize_index(i) for i in indices]
|
|
388
|
+
return list(
|
|
389
|
+
zip(
|
|
390
|
+
self._first.get_metas(normalized),
|
|
391
|
+
self._second.get_metas(normalized),
|
|
392
|
+
strict=True,
|
|
393
|
+
)
|
|
394
|
+
)
|
|
@@ -14,11 +14,92 @@ if TYPE_CHECKING:
|
|
|
14
14
|
from kaparoo.filesystem.types import StrPath, StrPaths
|
|
15
15
|
|
|
16
16
|
|
|
17
|
-
class
|
|
18
|
-
"""A
|
|
17
|
+
class FileListSequence[T, M = Path](DataSequence[T, M]):
|
|
18
|
+
"""A `DataSequence` over an explicit, ordered list of files.
|
|
19
|
+
|
|
20
|
+
Items live one-per-file; subclasses implement `load_file` and `get_meta`.
|
|
21
|
+
The files are given directly rather than discovered under a `root`, so
|
|
22
|
+
they may live in unrelated directories -- or, on Windows, on different
|
|
23
|
+
drives. (`FileFolderSequence` is the special case where the list is
|
|
24
|
+
discovered under a single root and stored relative to it.)
|
|
25
|
+
|
|
26
|
+
The given order is preserved verbatim and duplicates are kept; sort the
|
|
27
|
+
input yourself (`sorted(files, key=...)`) if a particular order is
|
|
28
|
+
needed. Paths are not checked for existence at construction; `load_file`
|
|
29
|
+
is called lazily on each `get_item`.
|
|
30
|
+
|
|
31
|
+
The base exposes:
|
|
32
|
+
|
|
33
|
+
- `files: tuple[Path, ...]` — full paths as an immutable snapshot.
|
|
34
|
+
- `get_file(index) -> Path` — full path of the i-th file.
|
|
19
35
|
|
|
20
|
-
|
|
21
|
-
|
|
36
|
+
Type Parameters:
|
|
37
|
+
T: Item type returned by `get_item`.
|
|
38
|
+
M: Per-item metadata type. Defaults to `Path`; override when the
|
|
39
|
+
metadata is something else (label, line number, ...).
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
files: The file paths to expose, in order.
|
|
43
|
+
|
|
44
|
+
Example:
|
|
45
|
+
>>> from pathlib import Path
|
|
46
|
+
>>> class BytesList(FileListSequence[bytes]):
|
|
47
|
+
... def get_meta(self, index: int) -> Path:
|
|
48
|
+
... return self.get_file(index)
|
|
49
|
+
...
|
|
50
|
+
... def load_file(self, path: Path) -> bytes:
|
|
51
|
+
... return path.read_bytes()
|
|
52
|
+
>>>
|
|
53
|
+
>>> data = BytesList(["images/a.png", "/other/b.png"])
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
def __init__(self, files: StrPaths) -> None:
|
|
57
|
+
self._files = list(stringify_paths(files))
|
|
58
|
+
|
|
59
|
+
def __len__(self) -> int:
|
|
60
|
+
return len(self._files)
|
|
61
|
+
|
|
62
|
+
@property
|
|
63
|
+
def files(self) -> tuple[Path, ...]:
|
|
64
|
+
"""Immutable snapshot of the full file paths, in order.
|
|
65
|
+
|
|
66
|
+
Returns a fresh `tuple[Path, ...]` on each access.
|
|
67
|
+
"""
|
|
68
|
+
return tuple(self.get_file(i) for i in range(len(self)))
|
|
69
|
+
|
|
70
|
+
def get_file(self, index: int) -> Path:
|
|
71
|
+
"""Full Path of the file at `index`."""
|
|
72
|
+
return Path(self._files[index])
|
|
73
|
+
|
|
74
|
+
def get_item(self, index: int) -> T:
|
|
75
|
+
return self.load_file(self.get_file(index))
|
|
76
|
+
|
|
77
|
+
@abstractmethod
|
|
78
|
+
def get_meta(self, index: int) -> M:
|
|
79
|
+
raise NotImplementedError
|
|
80
|
+
|
|
81
|
+
@abstractmethod
|
|
82
|
+
def load_file(self, path: Path) -> T:
|
|
83
|
+
"""Decode a single file into an item of type `T`.
|
|
84
|
+
|
|
85
|
+
Called lazily on each `get_item` -- not at construction time.
|
|
86
|
+
Subclasses may freely use external libraries (PIL, librosa,
|
|
87
|
+
cv2, ...) to decode.
|
|
88
|
+
"""
|
|
89
|
+
raise NotImplementedError
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class FileFolderSequence[T, M = Path](FileListSequence[T, M]):
|
|
93
|
+
"""A `FileListSequence` whose file list is discovered under a root.
|
|
94
|
+
|
|
95
|
+
The special case of `FileListSequence` where every file lives under one
|
|
96
|
+
base directory. The list is produced by `list_files(root)`, validated to
|
|
97
|
+
be under `root`, and stored in root-relative form so memory stays low for
|
|
98
|
+
large datasets and the paths survive a `root` relocation; `get_file`
|
|
99
|
+
transparently re-prepends `root`. `load_file`, `get_item`, `files`, and
|
|
100
|
+
`__len__` are inherited unchanged.
|
|
101
|
+
|
|
102
|
+
Subclasses are responsible for three things:
|
|
22
103
|
|
|
23
104
|
- **`list_files(self, root)`** (abstract): return the full `Path`
|
|
24
105
|
of every file to expose, in the desired order. Called once from
|
|
@@ -33,16 +114,9 @@ class FileFolderSequence[T, M = Path](DataSequence[T, M]):
|
|
|
33
114
|
to `Path` and `get_meta(i)` can be the one-liner
|
|
34
115
|
`return self.get_file(i)`.
|
|
35
116
|
|
|
36
|
-
The base
|
|
117
|
+
The base adds, on top of `FileListSequence`:
|
|
37
118
|
|
|
38
119
|
- `root: Path` — the base directory.
|
|
39
|
-
- `files: tuple[Path, ...]` — full paths as an immutable snapshot.
|
|
40
|
-
- `get_file(index) -> Path` — full path of the i-th file.
|
|
41
|
-
|
|
42
|
-
Paths are kept internally in their root-relative form so that
|
|
43
|
-
memory stays low for large datasets and the sequence survives
|
|
44
|
-
`root` relocations; the conversion is transparent to subclasses
|
|
45
|
-
and external callers.
|
|
46
120
|
|
|
47
121
|
Parameterized subclasses:
|
|
48
122
|
When a subclass needs instance-level options (e.g. `pattern`,
|
|
@@ -94,48 +168,20 @@ class FileFolderSequence[T, M = Path](DataSequence[T, M]):
|
|
|
94
168
|
|
|
95
169
|
def __init__(self, root: StrPath) -> None:
|
|
96
170
|
self._root = ensure_dir_exists(root)
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
def __len__(self) -> int:
|
|
102
|
-
return len(self._files)
|
|
171
|
+
# `after=root` makes each path root-relative and raises ValueError if
|
|
172
|
+
# any file is not under `root`. The base then stores the relative
|
|
173
|
+
# form; `get_file` re-prepends `root`.
|
|
174
|
+
super().__init__(stringify_paths(self.list_files(self._root), after=self._root))
|
|
103
175
|
|
|
104
176
|
@property
|
|
105
177
|
def root(self) -> Path:
|
|
106
178
|
"""The base directory the sequence was constructed from."""
|
|
107
179
|
return self._root
|
|
108
180
|
|
|
109
|
-
@property
|
|
110
|
-
def files(self) -> tuple[Path, ...]:
|
|
111
|
-
"""Immutable snapshot of the full file paths this sequence exposes.
|
|
112
|
-
|
|
113
|
-
Returns a fresh `tuple[Path, ...]` on each access, in the order
|
|
114
|
-
established by `list_files`.
|
|
115
|
-
"""
|
|
116
|
-
return tuple(self.get_file(i) for i in range(len(self)))
|
|
117
|
-
|
|
118
181
|
def get_file(self, index: int) -> Path:
|
|
119
182
|
"""Full Path of the file at `index`."""
|
|
120
183
|
return wrap_path(self._files[index], prepend=self._root)
|
|
121
184
|
|
|
122
|
-
def get_item(self, index: int) -> T:
|
|
123
|
-
return self.load_file(self.get_file(index))
|
|
124
|
-
|
|
125
|
-
@abstractmethod
|
|
126
|
-
def get_meta(self, index: int) -> M:
|
|
127
|
-
raise NotImplementedError
|
|
128
|
-
|
|
129
|
-
@abstractmethod
|
|
130
|
-
def load_file(self, path: Path) -> T:
|
|
131
|
-
"""Decode a single file into an item of type `T`.
|
|
132
|
-
|
|
133
|
-
Called lazily on each `get_item` -- not at construction time.
|
|
134
|
-
Subclasses may freely use external libraries (PIL, librosa,
|
|
135
|
-
cv2, ...) to decode.
|
|
136
|
-
"""
|
|
137
|
-
raise NotImplementedError
|
|
138
|
-
|
|
139
185
|
@abstractmethod
|
|
140
186
|
def list_files(self, root: Path) -> list[Path]:
|
|
141
187
|
"""Return the full Path of every file to expose, in order.
|
|
@@ -149,82 +195,6 @@ class FileFolderSequence[T, M = Path](DataSequence[T, M]):
|
|
|
149
195
|
raise NotImplementedError
|
|
150
196
|
|
|
151
197
|
|
|
152
|
-
class FileListSequence[T, M = Path](DataSequence[T, M]):
|
|
153
|
-
"""A `DataSequence` over an explicit, ordered list of files.
|
|
154
|
-
|
|
155
|
-
Like `FileFolderSequence`, items live one-per-file and subclasses
|
|
156
|
-
implement `load_file` and `get_meta`. Unlike it, the files are given
|
|
157
|
-
directly rather than discovered under a `root`, so they may live in
|
|
158
|
-
unrelated directories -- or, on Windows, on different drives -- which
|
|
159
|
-
`FileFolderSequence` cannot represent (it stores paths relative to one
|
|
160
|
-
root). There is no `list_files`: the input list *is* the listing.
|
|
161
|
-
|
|
162
|
-
The given order is preserved verbatim and duplicates are kept; sort the
|
|
163
|
-
input yourself (`sorted(files, key=...)`) if a particular order is
|
|
164
|
-
needed. Paths are not checked for existence at construction; `load_file`
|
|
165
|
-
is called lazily on each `get_item`.
|
|
166
|
-
|
|
167
|
-
The base exposes:
|
|
168
|
-
|
|
169
|
-
- `files: tuple[Path, ...]` — full paths as an immutable snapshot.
|
|
170
|
-
- `get_file(index) -> Path` — full path of the i-th file.
|
|
171
|
-
|
|
172
|
-
Type Parameters:
|
|
173
|
-
T: Item type returned by `get_item`.
|
|
174
|
-
M: Per-item metadata type. Defaults to `Path`; override when the
|
|
175
|
-
metadata is something else (label, line number, ...).
|
|
176
|
-
|
|
177
|
-
Args:
|
|
178
|
-
files: The file paths to expose, in order.
|
|
179
|
-
|
|
180
|
-
Example:
|
|
181
|
-
>>> from pathlib import Path
|
|
182
|
-
>>> class BytesList(FileListSequence[bytes]):
|
|
183
|
-
... def get_meta(self, index: int) -> Path:
|
|
184
|
-
... return self.get_file(index)
|
|
185
|
-
...
|
|
186
|
-
... def load_file(self, path: Path) -> bytes:
|
|
187
|
-
... return path.read_bytes()
|
|
188
|
-
>>>
|
|
189
|
-
>>> data = BytesList(["images/a.png", "/other/b.png"])
|
|
190
|
-
"""
|
|
191
|
-
|
|
192
|
-
def __init__(self, files: StrPaths) -> None:
|
|
193
|
-
self._files = list(stringify_paths(files))
|
|
194
|
-
|
|
195
|
-
def __len__(self) -> int:
|
|
196
|
-
return len(self._files)
|
|
197
|
-
|
|
198
|
-
@property
|
|
199
|
-
def files(self) -> tuple[Path, ...]:
|
|
200
|
-
"""Immutable snapshot of the full file paths, in the given order.
|
|
201
|
-
|
|
202
|
-
Returns a fresh `tuple[Path, ...]` on each access.
|
|
203
|
-
"""
|
|
204
|
-
return tuple(self.get_file(i) for i in range(len(self)))
|
|
205
|
-
|
|
206
|
-
def get_file(self, index: int) -> Path:
|
|
207
|
-
"""Full Path of the file at `index`."""
|
|
208
|
-
return Path(self._files[index])
|
|
209
|
-
|
|
210
|
-
def get_item(self, index: int) -> T:
|
|
211
|
-
return self.load_file(self.get_file(index))
|
|
212
|
-
|
|
213
|
-
@abstractmethod
|
|
214
|
-
def get_meta(self, index: int) -> M:
|
|
215
|
-
raise NotImplementedError
|
|
216
|
-
|
|
217
|
-
@abstractmethod
|
|
218
|
-
def load_file(self, path: Path) -> T:
|
|
219
|
-
"""Decode a single file into an item of type `T`.
|
|
220
|
-
|
|
221
|
-
Called lazily on each `get_item` -- not at construction time.
|
|
222
|
-
Subclasses may freely use external libraries (PIL, librosa,
|
|
223
|
-
cv2, ...) to decode.
|
|
224
|
-
"""
|
|
225
|
-
raise NotImplementedError
|
|
226
|
-
|
|
227
|
-
|
|
228
198
|
class SingleFileSequence[T, M = None](DataSequence[T, M]):
|
|
229
199
|
"""A `DataSequence` backed by a single file that holds multiple records.
|
|
230
200
|
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
`dir_not_empty(s)` with validation, plus `_unsafe` variants that skip
|
|
11
11
|
pre-checks
|
|
12
12
|
- [`utils`](./utils.py) — `stringify_path(s)`, `wrap_path(s)`,
|
|
13
|
-
`reserve_path(s)`
|
|
13
|
+
`reserve_path(s)`, `ensure_file_extension`
|
|
14
14
|
- [`staged`](./staged.py) — `StagedFile` / `StagedDirectory`, safe
|
|
15
15
|
(atomic) writers usable as a context manager or explicitly
|
|
16
16
|
- [`exceptions`](./exceptions.py) — `DirectoryNotFoundError`, `NotAFileError`
|
|
@@ -116,6 +116,27 @@ stringify_paths(["data/a.txt", "data/b.txt"], after="data") # ["a.txt", "b.txt"
|
|
|
116
116
|
wrap_path("logs", prepend="var", append="server.log") # var/logs/server.log
|
|
117
117
|
```
|
|
118
118
|
|
|
119
|
+
`ensure_file_extension` is a pure (no filesystem) extension check: it
|
|
120
|
+
requires a `.<ext>` final suffix, raising `ValueError` otherwise. `ext` may
|
|
121
|
+
be a single extension or an iterable of acceptable ones; the leading dot is
|
|
122
|
+
optional and the match is case-insensitive. `add=True` mirrors `make` on
|
|
123
|
+
`ensure_dir_exists` — it appends the (first) extension when the path has no
|
|
124
|
+
suffix (`np.save`-style) instead of raising; a *wrong* suffix still raises.
|
|
125
|
+
|
|
126
|
+
```python
|
|
127
|
+
from kaparoo.filesystem import ensure_file_extension
|
|
128
|
+
|
|
129
|
+
ensure_file_extension("data.bin", "bin") # Path("data.bin")
|
|
130
|
+
ensure_file_extension("data.txt", "bin") # ValueError
|
|
131
|
+
ensure_file_extension("out/00000_phase", "bin") # ValueError (no suffix)
|
|
132
|
+
|
|
133
|
+
# Any of several accepted extensions:
|
|
134
|
+
ensure_file_extension("img.jpeg", ("jpg", "jpeg", "png")) # Path("img.jpeg")
|
|
135
|
+
|
|
136
|
+
ensure_file_extension("out/00000_phase", "bin", add=True) # Path("out/00000_phase.bin")
|
|
137
|
+
ensure_file_extension("out/data.txt", "bin", add=True) # ValueError (wrong suffix)
|
|
138
|
+
```
|
|
139
|
+
|
|
119
140
|
## Reserving a destination
|
|
120
141
|
|
|
121
142
|
`reserve_path` guards a path that should *not* yet exist, so you don't
|
|
@@ -16,6 +16,7 @@ __all__ = (
|
|
|
16
16
|
"ensure_dir_exists",
|
|
17
17
|
"ensure_dirs_exist",
|
|
18
18
|
"ensure_file_exists",
|
|
19
|
+
"ensure_file_extension",
|
|
19
20
|
"ensure_files_exist",
|
|
20
21
|
"ensure_path_exists",
|
|
21
22
|
"ensure_paths_exist",
|
|
@@ -79,6 +80,7 @@ from kaparoo.filesystem.search import (
|
|
|
79
80
|
)
|
|
80
81
|
from kaparoo.filesystem.staged import StagedDirectory, StagedFile
|
|
81
82
|
from kaparoo.filesystem.utils import (
|
|
83
|
+
ensure_file_extension,
|
|
82
84
|
reserve_path,
|
|
83
85
|
reserve_paths,
|
|
84
86
|
stringify_path,
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
__all__ = (
|
|
4
|
+
"ensure_file_extension",
|
|
4
5
|
"reserve_path",
|
|
5
6
|
"reserve_paths",
|
|
6
7
|
"stringify_path",
|
|
@@ -15,7 +16,7 @@ from pathlib import Path
|
|
|
15
16
|
from typing import TYPE_CHECKING, overload
|
|
16
17
|
|
|
17
18
|
if TYPE_CHECKING:
|
|
18
|
-
from collections.abc import Sequence
|
|
19
|
+
from collections.abc import Iterable, Sequence
|
|
19
20
|
from typing import Literal
|
|
20
21
|
|
|
21
22
|
from kaparoo.filesystem.types import StrPath, StrPaths
|
|
@@ -357,3 +358,54 @@ def reserve_paths(
|
|
|
357
358
|
reserve_path(p, exist_ok=exist_ok, make_parents=make_parents) for p in paths
|
|
358
359
|
]
|
|
359
360
|
return stringify_paths(paths) if stringify else paths
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
def ensure_file_extension(
|
|
364
|
+
path: StrPath, ext: str | Iterable[str], *, add: bool = False
|
|
365
|
+
) -> Path:
|
|
366
|
+
"""Return `path` as a `Path`, requiring a case-insensitive `.<ext>` suffix.
|
|
367
|
+
|
|
368
|
+
A pure path check that never touches the filesystem. `ext` is a single
|
|
369
|
+
extension or an iterable of acceptable ones (e.g. `("jpg", "jpeg")`); the
|
|
370
|
+
leading dot on each is optional, so `"bin"` and `".bin"` behave the same.
|
|
371
|
+
Only the final suffix is considered: `archive.tar.gz` matches `ext="gz"`,
|
|
372
|
+
not `ext="tar.gz"`.
|
|
373
|
+
|
|
374
|
+
`add` mirrors `make` on `ensure_dir_exists`: when False (the default) a
|
|
375
|
+
path with no suffix raises like any other mismatch; when True, the missing
|
|
376
|
+
suffix is appended -- the *first* of `ext` when several are given, so pass
|
|
377
|
+
an ordered list/tuple if that matters. A *wrong* suffix always raises,
|
|
378
|
+
regardless of `add`.
|
|
379
|
+
|
|
380
|
+
Args:
|
|
381
|
+
path: The path to check.
|
|
382
|
+
ext: The required extension, or an iterable of acceptable ones, each
|
|
383
|
+
with or without a leading dot.
|
|
384
|
+
add: Whether to append the (first) extension when `path` has no
|
|
385
|
+
suffix, instead of raising. Defaults to False.
|
|
386
|
+
|
|
387
|
+
Returns:
|
|
388
|
+
The path as a Path object, guaranteed to end in an accepted `.<ext>`.
|
|
389
|
+
|
|
390
|
+
Raises:
|
|
391
|
+
ValueError: If `ext` is empty, or `path`'s final suffix is none of the
|
|
392
|
+
accepted extensions -- except the no-suffix case resolved by
|
|
393
|
+
`add=True`.
|
|
394
|
+
"""
|
|
395
|
+
exts = [ext] if isinstance(ext, str) else list(ext)
|
|
396
|
+
exts = [e.removeprefix(".") for e in exts]
|
|
397
|
+
|
|
398
|
+
if not exts:
|
|
399
|
+
msg = "ext must name at least one extension"
|
|
400
|
+
raise ValueError(msg)
|
|
401
|
+
|
|
402
|
+
path = Path(path)
|
|
403
|
+
if add and not path.suffix:
|
|
404
|
+
return path.with_suffix(f".{exts[0]}")
|
|
405
|
+
|
|
406
|
+
if path.suffix.lower() not in {f".{e.lower()}" for e in exts}:
|
|
407
|
+
wanted = " / ".join(f".{e}" for e in exts)
|
|
408
|
+
msg = f"{path.name} must have a {wanted} extension (got {path.suffix!r})"
|
|
409
|
+
raise ValueError(msg)
|
|
410
|
+
|
|
411
|
+
return path
|
|
@@ -4,7 +4,7 @@ Small, focused helpers — not enough material for their own packages.
|
|
|
4
4
|
|
|
5
5
|
## Modules
|
|
6
6
|
|
|
7
|
-
- [`timer`](./timer.py) — `Timer`, `
|
|
7
|
+
- [`timer`](./timer.py) — `Timer`, `SpanTimer`, `SpanRecord`
|
|
8
8
|
- [`aggregate`](./aggregate.py) — `Aggregator` + the `Reduction` family
|
|
9
9
|
(`Mean`, `Sum`, `Min`, `Max`, `Last`, `Fold`)
|
|
10
10
|
- [`optional`](./optional.py) — helpers for `T | None` values
|
|
@@ -47,29 +47,29 @@ with Timer("ms") as t:
|
|
|
47
47
|
do_work()
|
|
48
48
|
```
|
|
49
49
|
|
|
50
|
-
##
|
|
50
|
+
## SpanTimer
|
|
51
51
|
|
|
52
|
-
`
|
|
53
|
-
is a `
|
|
52
|
+
`SpanTimer` extends `Timer` with named time *spans*. Each span
|
|
53
|
+
is a `SpanRecord` (a `TypedDict` with `label`, `duration`,
|
|
54
54
|
`total_time`) and is produced in one of two ways:
|
|
55
55
|
|
|
56
56
|
- **`lap(label)` — split.** Each lap's `duration` is the time since the
|
|
57
57
|
previous lap (or the start), so every instant belongs to exactly one
|
|
58
|
-
|
|
58
|
+
span.
|
|
59
59
|
- **`measure(label)` — stopwatch.** Times only the wrapped block; time
|
|
60
|
-
spent outside any `measure` block is attributed to no
|
|
60
|
+
spent outside any `measure` block is attributed to no span.
|
|
61
61
|
|
|
62
62
|
```python
|
|
63
|
-
from kaparoo.utils.timer import
|
|
63
|
+
from kaparoo.utils.timer import SpanTimer
|
|
64
64
|
|
|
65
|
-
with
|
|
65
|
+
with SpanTimer("ms", ndigits=1) as st:
|
|
66
66
|
step_a()
|
|
67
67
|
st.lap("A") # split: time since start
|
|
68
68
|
idle() # NOT counted by the next measure
|
|
69
69
|
with st.measure("B"): # stopwatch: only this block
|
|
70
70
|
step_b()
|
|
71
71
|
|
|
72
|
-
# Per-
|
|
72
|
+
# Per-span details:
|
|
73
73
|
for record in st.records:
|
|
74
74
|
print(record["label"], record["duration"])
|
|
75
75
|
|
|
@@ -80,7 +80,7 @@ print(st.elapsed) # total wall time of the `with` block
|
|
|
80
80
|
|
|
81
81
|
### `lap` vs `measure`
|
|
82
82
|
|
|
83
|
-
`lap` splits the timeline into contiguous
|
|
83
|
+
`lap` splits the timeline into contiguous spans — the gap before a
|
|
84
84
|
lap is folded into that lap. `measure` brackets a region and ignores
|
|
85
85
|
everything outside it, so untimed work between blocks is excluded from
|
|
86
86
|
`summary`. Pick `lap` for back-to-back phases, `measure` for discrete
|
|
@@ -88,16 +88,16 @@ operations interleaved with untimed work. Pauses inside either are
|
|
|
88
88
|
excluded; a `measure` block that raises records nothing.
|
|
89
89
|
|
|
90
90
|
`measure` doubles as a decorator (every decorated call records one
|
|
91
|
-
|
|
91
|
+
span, as long as the timer is running when it is called):
|
|
92
92
|
|
|
93
93
|
```python
|
|
94
|
-
st =
|
|
94
|
+
st = SpanTimer("ms")
|
|
95
95
|
|
|
96
96
|
@st.measure("load")
|
|
97
97
|
def load() -> None: ...
|
|
98
98
|
|
|
99
99
|
with st:
|
|
100
|
-
load() # records a "load"
|
|
100
|
+
load() # records a "load" span each call
|
|
101
101
|
```
|
|
102
102
|
|
|
103
103
|
### Same-label policies
|
|
@@ -111,7 +111,7 @@ with st:
|
|
|
111
111
|
The policy applies to both `lap` and `measure`:
|
|
112
112
|
|
|
113
113
|
```python
|
|
114
|
-
with
|
|
114
|
+
with SpanTimer(on_same_label="separate") as st:
|
|
115
115
|
st.lap("A")
|
|
116
116
|
st.lap("A") # recorded as "A (2)"
|
|
117
117
|
st.lap("A") # recorded as "A (3)"
|
|
@@ -6,8 +6,8 @@ __all__ = (
|
|
|
6
6
|
"Mean",
|
|
7
7
|
"Min",
|
|
8
8
|
"Reduction",
|
|
9
|
-
"
|
|
10
|
-
"
|
|
9
|
+
"SpanRecord",
|
|
10
|
+
"SpanTimer",
|
|
11
11
|
"Std",
|
|
12
12
|
"Sum",
|
|
13
13
|
"Timer",
|
|
@@ -42,4 +42,4 @@ from kaparoo.utils.optional import (
|
|
|
42
42
|
unwrap_or_factories,
|
|
43
43
|
unwrap_or_factory,
|
|
44
44
|
)
|
|
45
|
-
from kaparoo.utils.timer import
|
|
45
|
+
from kaparoo.utils.timer import SpanRecord, SpanTimer, Timer
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
__all__ = ("
|
|
3
|
+
__all__ = ("SpanRecord", "SpanTimer", "Timer")
|
|
4
4
|
|
|
5
5
|
import time
|
|
6
6
|
from abc import ABC, abstractmethod
|
|
@@ -22,20 +22,20 @@ _SCALES: dict[str, float] = {"s": 1e-9, "ms": 1e-6, "us": 1e-3, "ns": 1.0}
|
|
|
22
22
|
_LABEL_POLICIES: frozenset[str] = frozenset({"merge", "separate", "reject"})
|
|
23
23
|
|
|
24
24
|
|
|
25
|
-
class
|
|
26
|
-
"""A single timing record produced by `
|
|
25
|
+
class SpanRecord(TypedDict):
|
|
26
|
+
"""A single timing record produced by `SpanTimer`.
|
|
27
27
|
|
|
28
|
-
A
|
|
28
|
+
A span is produced either by `lap` (the span since the previous
|
|
29
29
|
lap) or by `measure` (the span of a wrapped block).
|
|
30
30
|
|
|
31
31
|
Attributes:
|
|
32
|
-
label: The
|
|
32
|
+
label: The span's name. May carry a " (N)" suffix when produced
|
|
33
33
|
under `on_same_label="separate"`.
|
|
34
|
-
duration: Length of this
|
|
34
|
+
duration: Length of this span, in the timer's `unit` and rounded
|
|
35
35
|
by `ndigits` if given. For `lap`, the time since the previous
|
|
36
36
|
lap (or the timer start); for `measure`, the wrapped block's
|
|
37
37
|
duration.
|
|
38
|
-
total_time: Time elapsed from the timer start to this
|
|
38
|
+
total_time: Time elapsed from the timer start to this span's end,
|
|
39
39
|
in the timer's `unit` and rounded by `ndigits` if given.
|
|
40
40
|
"""
|
|
41
41
|
|
|
@@ -45,14 +45,14 @@ class SegmentRecord(TypedDict):
|
|
|
45
45
|
|
|
46
46
|
|
|
47
47
|
class BaseTimer(ContextDecorator, ABC):
|
|
48
|
-
"""Abstract base for `Timer` and `
|
|
48
|
+
"""Abstract base for `Timer` and `SpanTimer`.
|
|
49
49
|
|
|
50
50
|
Provides the shared timing machinery: unit/precision formatting,
|
|
51
51
|
`pause`/`resume`/`suspend`, and a context-manager protocol that
|
|
52
52
|
auto-resumes a paused timer on exit. Subclasses implement `_finalize`
|
|
53
53
|
to record their final result, and may override the `_reset` hook to
|
|
54
54
|
clear per-`with`-block state. Not part of the public API -- prefer
|
|
55
|
-
`Timer` or `
|
|
55
|
+
`Timer` or `SpanTimer`.
|
|
56
56
|
|
|
57
57
|
An instance is reusable but **not reentrant**: a single instance must
|
|
58
58
|
not be nested within itself -- including as a decorator on a recursive
|
|
@@ -249,30 +249,30 @@ class Timer(BaseTimer):
|
|
|
249
249
|
self.elapsed = self._format_time(elapsed_ns)
|
|
250
250
|
|
|
251
251
|
|
|
252
|
-
class
|
|
253
|
-
"""A timer recording named time
|
|
252
|
+
class SpanTimer(BaseTimer):
|
|
253
|
+
"""A timer recording named time spans within one `with` block.
|
|
254
254
|
|
|
255
|
-
Usable as a context manager or as a decorator.
|
|
255
|
+
Usable as a context manager or as a decorator. Spans are recorded
|
|
256
256
|
in two complementary ways:
|
|
257
257
|
|
|
258
258
|
- `lap(label)` splits the timeline: each lap's `duration` is the
|
|
259
259
|
time since the previous lap (or the start), so every instant is
|
|
260
|
-
attributed to exactly one
|
|
260
|
+
attributed to exactly one span.
|
|
261
261
|
- `measure(label)` brackets a region (as a `with` block or a
|
|
262
262
|
decorator): only the wrapped span is recorded, and time spent
|
|
263
|
-
outside any `measure` block is attributed to no
|
|
263
|
+
outside any `measure` block is attributed to no span.
|
|
264
264
|
|
|
265
265
|
Both feed the same `records` / `summary`, honour `on_same_label`, and
|
|
266
266
|
exclude paused intervals.
|
|
267
267
|
|
|
268
268
|
Attributes:
|
|
269
269
|
on_same_label: The same-label handling policy (see `__init__`).
|
|
270
|
-
records: The recorded
|
|
270
|
+
records: The recorded spans, in call order.
|
|
271
271
|
elapsed: The total measured duration, from start to exit.
|
|
272
272
|
Defaults to 0.0 until the first exit.
|
|
273
273
|
|
|
274
274
|
Example:
|
|
275
|
-
with
|
|
275
|
+
with SpanTimer("ms", ndigits=1) as st:
|
|
276
276
|
step_a()
|
|
277
277
|
st.lap("A") # split: time since start
|
|
278
278
|
with st.measure("B"): # block: only this region
|
|
@@ -288,7 +288,7 @@ class SegmentTimer(BaseTimer):
|
|
|
288
288
|
ndigits: int | None = None,
|
|
289
289
|
on_same_label: LabelPolicy = "merge",
|
|
290
290
|
) -> None:
|
|
291
|
-
"""Initialize the
|
|
291
|
+
"""Initialize the span timer.
|
|
292
292
|
|
|
293
293
|
Args:
|
|
294
294
|
unit: The time unit for reported values. One of "s", "ms", "us",
|
|
@@ -313,7 +313,7 @@ class SegmentTimer(BaseTimer):
|
|
|
313
313
|
raise ValueError(msg)
|
|
314
314
|
|
|
315
315
|
self.on_same_label = on_same_label
|
|
316
|
-
self.records: list[
|
|
316
|
+
self.records: list[SpanRecord] = []
|
|
317
317
|
self.elapsed: float = 0.0
|
|
318
318
|
|
|
319
319
|
self._last_time: int = 0
|
|
@@ -323,8 +323,8 @@ class SegmentTimer(BaseTimer):
|
|
|
323
323
|
def summary(self) -> dict[str, float]:
|
|
324
324
|
"""Per-label sum of `duration` across `records`.
|
|
325
325
|
|
|
326
|
-
Only recorded
|
|
327
|
-
|
|
326
|
+
Only recorded spans count: time outside every `lap` / `measure`
|
|
327
|
+
span (e.g. after the last `lap`, or between `measure` blocks) is
|
|
328
328
|
not included. Each record's `duration` is already rounded by
|
|
329
329
|
`ndigits` (when set); this property sums those rounded values and
|
|
330
330
|
rounds the sum once more.
|
|
@@ -377,11 +377,11 @@ class SegmentTimer(BaseTimer):
|
|
|
377
377
|
else label
|
|
378
378
|
)
|
|
379
379
|
|
|
380
|
-
def _make_record(self, label: str) ->
|
|
380
|
+
def _make_record(self, label: str) -> SpanRecord:
|
|
381
381
|
"""Build a record stamped with the current time and advance `_last_time`."""
|
|
382
382
|
current_time = time.perf_counter_ns()
|
|
383
383
|
|
|
384
|
-
record:
|
|
384
|
+
record: SpanRecord = {
|
|
385
385
|
"label": label,
|
|
386
386
|
"duration": self._format_time(current_time - self._last_time),
|
|
387
387
|
"total_time": self._format_time(current_time - self._start_time),
|
|
@@ -415,12 +415,12 @@ class SegmentTimer(BaseTimer):
|
|
|
415
415
|
|
|
416
416
|
@contextmanager
|
|
417
417
|
def measure(self, label: str = "Block") -> Iterator[None]:
|
|
418
|
-
"""Record a
|
|
418
|
+
"""Record a span covering only the wrapped block (stopwatch style).
|
|
419
419
|
|
|
420
|
-
Unlike `lap`, which splits the timeline into contiguous
|
|
420
|
+
Unlike `lap`, which splits the timeline into contiguous spans,
|
|
421
421
|
`measure` times only the wrapped region; time spent outside any
|
|
422
|
-
`measure` block is attributed to no
|
|
423
|
-
block are excluded. A
|
|
422
|
+
`measure` block is attributed to no span. Pauses inside the
|
|
423
|
+
block are excluded. A span is recorded only on clean exit -- if
|
|
424
424
|
the block raises, nothing is recorded and the exception propagates.
|
|
425
425
|
Repeated labels follow `on_same_label`, exactly as `lap`. Do not
|
|
426
426
|
nest `measure` blocks: each resets the shared baseline, so an outer
|
|
@@ -428,23 +428,23 @@ class SegmentTimer(BaseTimer):
|
|
|
428
428
|
|
|
429
429
|
Because `contextmanager` results are also `ContextDecorator`s, the
|
|
430
430
|
returned object doubles as a decorator (every decorated call
|
|
431
|
-
records one
|
|
431
|
+
records one span, provided the timer is running when called):
|
|
432
432
|
|
|
433
|
-
st =
|
|
433
|
+
st = SpanTimer("ms")
|
|
434
434
|
|
|
435
435
|
@st.measure("load")
|
|
436
436
|
def load() -> None: ...
|
|
437
437
|
|
|
438
438
|
with st:
|
|
439
|
-
load() # records a "load"
|
|
439
|
+
load() # records a "load" span
|
|
440
440
|
with st.measure("parse"):
|
|
441
|
-
parse() # records a "parse"
|
|
441
|
+
parse() # records a "parse" span
|
|
442
442
|
|
|
443
443
|
Args:
|
|
444
|
-
label: The
|
|
444
|
+
label: The span's name. Defaults to "Block".
|
|
445
445
|
|
|
446
446
|
Yields:
|
|
447
|
-
None. The wrapped block runs while the
|
|
447
|
+
None. The wrapped block runs while the span is timed.
|
|
448
448
|
|
|
449
449
|
Raises:
|
|
450
450
|
RuntimeError: If the timer has not been started, or is paused on
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{kaparoo_python-0.5.0 → kaparoo_python-0.7.0}/kaparoo/filesystem/search/filters/multi_pattern.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|