kaparoo-python 0.5.0__tar.gz → 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {kaparoo_python-0.5.0 → kaparoo_python-0.6.0}/PKG-INFO +7 -6
- {kaparoo_python-0.5.0 → kaparoo_python-0.6.0}/README.md +6 -5
- {kaparoo_python-0.5.0 → kaparoo_python-0.6.0}/kaparoo/data/README.md +57 -4
- {kaparoo_python-0.5.0 → kaparoo_python-0.6.0}/kaparoo/data/__init__.py +4 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.6.0}/kaparoo/data/sequences/__init__.py +4 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.6.0}/kaparoo/data/sequences/composers.py +179 -6
- {kaparoo_python-0.5.0 → kaparoo_python-0.6.0}/kaparoo/data/sequences/templates.py +90 -120
- {kaparoo_python-0.5.0 → kaparoo_python-0.6.0}/pyproject.toml +1 -1
- {kaparoo_python-0.5.0 → kaparoo_python-0.6.0}/LICENSE +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.6.0}/kaparoo/__init__.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.6.0}/kaparoo/data/sequences/base.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.6.0}/kaparoo/data/sequences/utils.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.6.0}/kaparoo/filesystem/README.md +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.6.0}/kaparoo/filesystem/__init__.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.6.0}/kaparoo/filesystem/directory.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.6.0}/kaparoo/filesystem/exceptions.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.6.0}/kaparoo/filesystem/existence.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.6.0}/kaparoo/filesystem/search/README.md +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.6.0}/kaparoo/filesystem/search/__init__.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.6.0}/kaparoo/filesystem/search/classes.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.6.0}/kaparoo/filesystem/search/deprecated.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.6.0}/kaparoo/filesystem/search/filters/__init__.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.6.0}/kaparoo/filesystem/search/filters/base.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.6.0}/kaparoo/filesystem/search/filters/logical.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.6.0}/kaparoo/filesystem/search/filters/multi_pattern.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.6.0}/kaparoo/filesystem/search/filters/pattern.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.6.0}/kaparoo/filesystem/search/filters/types.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.6.0}/kaparoo/filesystem/search/filters/utils.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.6.0}/kaparoo/filesystem/search/wrappers.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.6.0}/kaparoo/filesystem/staged.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.6.0}/kaparoo/filesystem/types.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.6.0}/kaparoo/filesystem/utils.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.6.0}/kaparoo/py.typed +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.6.0}/kaparoo/utils/README.md +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.6.0}/kaparoo/utils/__init__.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.6.0}/kaparoo/utils/aggregate.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.6.0}/kaparoo/utils/optional.py +0 -0
- {kaparoo_python-0.5.0 → kaparoo_python-0.6.0}/kaparoo/utils/timer.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: kaparoo-python
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.0
|
|
4
4
|
Summary: Personally common and useful Python features
|
|
5
5
|
Keywords: filesystem,pathlib,paths,utilities
|
|
6
6
|
Author: Jaewoo Park
|
|
@@ -67,16 +67,17 @@ hook for custom filter kinds.
|
|
|
67
67
|
|
|
68
68
|
`Timer` / `SegmentTimer` context-manager-and-decorator timers (with
|
|
69
69
|
`lap`-split and `measure`-block timings); `Aggregator` for nested,
|
|
70
|
-
pluggable metric aggregation (the batch → epoch → run pattern
|
|
71
|
-
small family of helpers for working with
|
|
72
|
-
(`replace_if_none`, `unwrap_or_default`, ...).
|
|
70
|
+
pluggable metric aggregation (the batch → epoch → run pattern;
|
|
71
|
+
experimental); plus a small family of helpers for working with
|
|
72
|
+
`Optional[T]` values (`replace_if_none`, `unwrap_or_default`, ...).
|
|
73
73
|
|
|
74
74
|
### [`kaparoo.data`](https://github.com/kaparoo/kaparoo-python/tree/main/kaparoo/data)
|
|
75
75
|
|
|
76
76
|
Building blocks for dataset code: `DataSequence[T, M]` ABC (item +
|
|
77
77
|
metadata), composers (`SlicedSequence`, `ConcatSequence`,
|
|
78
|
-
`WindowedSequence`), file-backed
|
|
79
|
-
`SingleFileSequence`),
|
|
78
|
+
`TransformedSequence`, `WindowedSequence`, `ZippedSequence`), file-backed
|
|
79
|
+
templates (`FileFolderSequence`, `FileListSequence`, `SingleFileSequence`),
|
|
80
|
+
and `generate_batches`.
|
|
80
81
|
|
|
81
82
|
## 🎯 Quick example
|
|
82
83
|
|
|
@@ -46,16 +46,17 @@ hook for custom filter kinds.
|
|
|
46
46
|
|
|
47
47
|
`Timer` / `SegmentTimer` context-manager-and-decorator timers (with
|
|
48
48
|
`lap`-split and `measure`-block timings); `Aggregator` for nested,
|
|
49
|
-
pluggable metric aggregation (the batch → epoch → run pattern
|
|
50
|
-
small family of helpers for working with
|
|
51
|
-
(`replace_if_none`, `unwrap_or_default`, ...).
|
|
49
|
+
pluggable metric aggregation (the batch → epoch → run pattern;
|
|
50
|
+
experimental); plus a small family of helpers for working with
|
|
51
|
+
`Optional[T]` values (`replace_if_none`, `unwrap_or_default`, ...).
|
|
52
52
|
|
|
53
53
|
### [`kaparoo.data`](https://github.com/kaparoo/kaparoo-python/tree/main/kaparoo/data)
|
|
54
54
|
|
|
55
55
|
Building blocks for dataset code: `DataSequence[T, M]` ABC (item +
|
|
56
56
|
metadata), composers (`SlicedSequence`, `ConcatSequence`,
|
|
57
|
-
`WindowedSequence`), file-backed
|
|
58
|
-
`SingleFileSequence`),
|
|
57
|
+
`TransformedSequence`, `WindowedSequence`, `ZippedSequence`), file-backed
|
|
58
|
+
templates (`FileFolderSequence`, `FileListSequence`, `SingleFileSequence`),
|
|
59
|
+
and `generate_batches`.
|
|
59
60
|
|
|
60
61
|
## 🎯 Quick example
|
|
61
62
|
|
|
@@ -7,7 +7,8 @@ small set of composers, and ready-to-subclass file-backed templates.
|
|
|
7
7
|
|
|
8
8
|
- [`sequences/base`](./sequences/base.py) — `DataSequence[T, M]` abstract base
|
|
9
9
|
- [`sequences/composers`](./sequences/composers.py) — `SlicedSequence`,
|
|
10
|
-
`ConcatSequence`, `WindowedSequence
|
|
10
|
+
`TransformedSequence`, `ConcatSequence`, `WindowedSequence`,
|
|
11
|
+
`ZippedSequence`
|
|
11
12
|
- [`sequences/templates`](./sequences/templates.py) — `FileFolderSequence`,
|
|
12
13
|
`FileListSequence`, `SingleFileSequence`
|
|
13
14
|
- [`sequences/utils`](./sequences/utils.py) — `generate_batches`
|
|
@@ -83,18 +84,49 @@ combined = ConcatSequence(train_a, train_b, train_c)
|
|
|
83
84
|
len(combined) # == len(train_a) + len(train_b) + len(train_c)
|
|
84
85
|
```
|
|
85
86
|
|
|
87
|
+
### `TransformedSequence`
|
|
88
|
+
|
|
89
|
+
A lazy view that applies a `transform` callable to each item of
|
|
90
|
+
`source`. The transform is called on demand in `get_item` -- nothing
|
|
91
|
+
is computed at construction. `get_meta` passes through `source.get_meta`
|
|
92
|
+
unchanged by default; override it in a subclass when `M_out` differs
|
|
93
|
+
from `M_in`.
|
|
94
|
+
|
|
95
|
+
```python
|
|
96
|
+
from kaparoo.data.sequences import TransformedSequence
|
|
97
|
+
|
|
98
|
+
# Item transform only -- metadata type is unchanged.
|
|
99
|
+
normalized = TransformedSequence(image_folder, normalize_fn)
|
|
100
|
+
|
|
101
|
+
# Meta transform via subclassing:
|
|
102
|
+
class Augmented(TransformedSequence[ndarray, Path, ndarray, AugMeta]):
|
|
103
|
+
def get_meta(self, index: int) -> AugMeta:
|
|
104
|
+
return AugMeta(path=self.source.get_meta(index), applied="normalize")
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
Chaining two `TransformedSequence` instances applies the transforms in
|
|
108
|
+
order:
|
|
109
|
+
|
|
110
|
+
```python
|
|
111
|
+
resized = TransformedSequence(raw, resize)
|
|
112
|
+
normalized = TransformedSequence(resized, normalize)
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
`T_out` and `M_out` default to `T_in` and `M_in` respectively (PEP 696),
|
|
116
|
+
so you only need to specify them when the type actually changes.
|
|
117
|
+
|
|
86
118
|
### `WindowedSequence`
|
|
87
119
|
|
|
88
120
|
An abstract sliding-window view: each item is a `tuple[T, ...]` of
|
|
89
121
|
`size` frames from `source`. Per-frame `M_in` and window-level
|
|
90
|
-
`M_out` are independent type parameters
|
|
91
|
-
metadata aggregates.
|
|
122
|
+
`M_out` are independent type parameters (`M_out` defaults to `M_in`),
|
|
123
|
+
so subclasses decide how metadata aggregates.
|
|
92
124
|
|
|
93
125
|
```python
|
|
94
126
|
from pathlib import Path
|
|
95
127
|
from kaparoo.data.sequences import WindowedSequence
|
|
96
128
|
|
|
97
|
-
class FirstFrameMeta(WindowedSequence[bytes, Path
|
|
129
|
+
class FirstFrameMeta(WindowedSequence[bytes, Path]):
|
|
98
130
|
def get_meta(self, index):
|
|
99
131
|
# window's metadata is its first frame's metadata
|
|
100
132
|
index = self._normalize_index(index)
|
|
@@ -109,6 +141,27 @@ windows.get_meta(0) # frames.get_meta(0)
|
|
|
109
141
|
`size`, `step`, `skip` follow the same semantics as
|
|
110
142
|
[`generate_batches`](#generate_batches).
|
|
111
143
|
|
|
144
|
+
### `ZippedSequence`
|
|
145
|
+
|
|
146
|
+
Element-wise zip of two sequences — item `i` is `(first[i], second[i])`
|
|
147
|
+
and metadata `i` is the `(M1, M2)` tuple. This is the "paired image +
|
|
148
|
+
label" pattern that `ConcatSequence` (end-to-end) cannot express. With
|
|
149
|
+
`strict=True` (the default) the lengths must match or construction raises
|
|
150
|
+
`ValueError`; pass `strict=False` to truncate to the shorter length, like
|
|
151
|
+
the builtin `zip`. For a different combined metadata shape, subclass and
|
|
152
|
+
override `get_meta`.
|
|
153
|
+
|
|
154
|
+
```python
|
|
155
|
+
from kaparoo.data.sequences import ZippedSequence
|
|
156
|
+
|
|
157
|
+
pairs = ZippedSequence(images, labels)
|
|
158
|
+
pairs[0] # (images[0], labels[0])
|
|
159
|
+
pairs.get_meta(0) # (images.get_meta(0), labels.get_meta(0))
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
For three or more, nest: `ZippedSequence(a, ZippedSequence(b, c))` yields
|
|
163
|
+
`(a[i], (b[i], c[i]))`.
|
|
164
|
+
|
|
112
165
|
## Templates
|
|
113
166
|
|
|
114
167
|
### `FileFolderSequence`
|
|
@@ -5,7 +5,9 @@ __all__ = (
|
|
|
5
5
|
"FileListSequence",
|
|
6
6
|
"SingleFileSequence",
|
|
7
7
|
"SlicedSequence",
|
|
8
|
+
"TransformedSequence",
|
|
8
9
|
"WindowedSequence",
|
|
10
|
+
"ZippedSequence",
|
|
9
11
|
"generate_batches",
|
|
10
12
|
)
|
|
11
13
|
|
|
@@ -16,6 +18,8 @@ from kaparoo.data.sequences import (
|
|
|
16
18
|
FileListSequence,
|
|
17
19
|
SingleFileSequence,
|
|
18
20
|
SlicedSequence,
|
|
21
|
+
TransformedSequence,
|
|
19
22
|
WindowedSequence,
|
|
23
|
+
ZippedSequence,
|
|
20
24
|
generate_batches,
|
|
21
25
|
)
|
|
@@ -7,7 +7,9 @@ __all__ = (
|
|
|
7
7
|
"FileListSequence",
|
|
8
8
|
"SingleFileSequence",
|
|
9
9
|
"SlicedSequence",
|
|
10
|
+
"TransformedSequence",
|
|
10
11
|
"WindowedSequence",
|
|
12
|
+
"ZippedSequence",
|
|
11
13
|
"generate_batches",
|
|
12
14
|
)
|
|
13
15
|
|
|
@@ -15,7 +17,9 @@ from kaparoo.data.sequences.base import DataSequence
|
|
|
15
17
|
from kaparoo.data.sequences.composers import (
|
|
16
18
|
ConcatSequence,
|
|
17
19
|
SlicedSequence,
|
|
20
|
+
TransformedSequence,
|
|
18
21
|
WindowedSequence,
|
|
22
|
+
ZippedSequence,
|
|
19
23
|
)
|
|
20
24
|
from kaparoo.data.sequences.templates import (
|
|
21
25
|
FileFolderSequence,
|
|
@@ -1,15 +1,21 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
__all__ = (
|
|
3
|
+
__all__ = (
|
|
4
|
+
"ConcatSequence",
|
|
5
|
+
"SlicedSequence",
|
|
6
|
+
"TransformedSequence",
|
|
7
|
+
"WindowedSequence",
|
|
8
|
+
"ZippedSequence",
|
|
9
|
+
)
|
|
4
10
|
|
|
5
11
|
from abc import abstractmethod
|
|
6
12
|
from bisect import bisect_right
|
|
7
|
-
from typing import TYPE_CHECKING
|
|
13
|
+
from typing import TYPE_CHECKING, cast
|
|
8
14
|
|
|
9
15
|
from kaparoo.data.sequences.base import DataSequence
|
|
10
16
|
|
|
11
17
|
if TYPE_CHECKING:
|
|
12
|
-
from collections.abc import Sequence
|
|
18
|
+
from collections.abc import Callable, Sequence
|
|
13
19
|
|
|
14
20
|
|
|
15
21
|
class SlicedSequence[T, M](DataSequence[T, M]):
|
|
@@ -59,6 +65,61 @@ class SlicedSequence[T, M](DataSequence[T, M]):
|
|
|
59
65
|
return self._source.get_meta(self._indices[index])
|
|
60
66
|
|
|
61
67
|
|
|
68
|
+
class TransformedSequence[T_in, M_in, T_out = T_in, M_out = M_in](
|
|
69
|
+
DataSequence[T_out, M_out]
|
|
70
|
+
):
|
|
71
|
+
"""A view of `source` with `transform` applied lazily to each item.
|
|
72
|
+
|
|
73
|
+
`transform` is called on demand in `get_item`; nothing is loaded or
|
|
74
|
+
converted at construction time. `get_meta` passes through
|
|
75
|
+
`source.get_meta` unchanged by default -- override it in a subclass
|
|
76
|
+
when `M_out` differs from `M_in`.
|
|
77
|
+
|
|
78
|
+
Type Parameters:
|
|
79
|
+
T_in: Item type of `source`.
|
|
80
|
+
M_in: Metadata type of `source`.
|
|
81
|
+
T_out: Item type after the transform. Defaults to `T_in`.
|
|
82
|
+
M_out: Metadata type exposed by this view. Defaults to `M_in`.
|
|
83
|
+
When `M_out != M_in`, override `get_meta` in a subclass;
|
|
84
|
+
the default passthrough is only safe when `M_out == M_in`.
|
|
85
|
+
|
|
86
|
+
Example:
|
|
87
|
+
>>> # Item-only transform; metadata passes through unchanged.
|
|
88
|
+
>>> normalized = TransformedSequence(image_folder, normalize)
|
|
89
|
+
|
|
90
|
+
>>> # Meta transform via subclassing:
|
|
91
|
+
>>> class Augmented(TransformedSequence[ndarray, Path, ndarray, AugMeta]):
|
|
92
|
+
... def get_meta(self, index: int) -> AugMeta:
|
|
93
|
+
... return AugMeta(
|
|
94
|
+
... path=self.source.get_meta(index),
|
|
95
|
+
... applied="normalize",
|
|
96
|
+
... )
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
def __init__(
|
|
100
|
+
self,
|
|
101
|
+
source: DataSequence[T_in, M_in],
|
|
102
|
+
transform: Callable[[T_in], T_out],
|
|
103
|
+
) -> None:
|
|
104
|
+
self._source = source
|
|
105
|
+
self._transform = transform
|
|
106
|
+
|
|
107
|
+
@property
|
|
108
|
+
def source(self) -> DataSequence[T_in, M_in]:
|
|
109
|
+
"""The wrapped sequence."""
|
|
110
|
+
return self._source
|
|
111
|
+
|
|
112
|
+
def __len__(self) -> int:
|
|
113
|
+
return len(self._source)
|
|
114
|
+
|
|
115
|
+
def get_item(self, index: int) -> T_out:
|
|
116
|
+
return self._transform(self._source.get_item(index))
|
|
117
|
+
|
|
118
|
+
def get_meta(self, index: int) -> M_out:
|
|
119
|
+
# Passthrough by default. Override when M_out != M_in.
|
|
120
|
+
return cast("M_out", self._source.get_meta(index))
|
|
121
|
+
|
|
122
|
+
|
|
62
123
|
class ConcatSequence[T, M](DataSequence[T, M]):
|
|
63
124
|
"""The end-to-end concatenation of zero or more `sources`.
|
|
64
125
|
|
|
@@ -112,7 +173,7 @@ class ConcatSequence[T, M](DataSequence[T, M]):
|
|
|
112
173
|
return source.get_meta(local)
|
|
113
174
|
|
|
114
175
|
|
|
115
|
-
class WindowedSequence[T, M_in, M_out](DataSequence[tuple[T, ...], M_out]):
|
|
176
|
+
class WindowedSequence[T, M_in, M_out = M_in](DataSequence[tuple[T, ...], M_out]):
|
|
116
177
|
"""An abstract sliding-window view over `source`.
|
|
117
178
|
|
|
118
179
|
Each item is a tuple of `size` items from `source`, starting at
|
|
@@ -130,8 +191,8 @@ class WindowedSequence[T, M_in, M_out](DataSequence[tuple[T, ...], M_out]):
|
|
|
130
191
|
T: Item type of `source` (also the per-frame type within each
|
|
131
192
|
window).
|
|
132
193
|
M_in: Metadata type of `source` (per-frame metadata).
|
|
133
|
-
M_out: Metadata type of the window.
|
|
134
|
-
subclass's `get_meta` return.
|
|
194
|
+
M_out: Metadata type of the window. Defaults to `M_in`.
|
|
195
|
+
Determined by the subclass's `get_meta` return.
|
|
135
196
|
|
|
136
197
|
Args:
|
|
137
198
|
source: The sequence to window over.
|
|
@@ -219,3 +280,115 @@ class WindowedSequence[T, M_in, M_out](DataSequence[tuple[T, ...], M_out]):
|
|
|
219
280
|
@abstractmethod
|
|
220
281
|
def get_meta(self, index: int) -> M_out:
|
|
221
282
|
raise NotImplementedError
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
class ZippedSequence[T1, T2, M1 = None, M2 = None](
|
|
286
|
+
DataSequence[tuple[T1, T2], tuple[M1, M2]]
|
|
287
|
+
):
|
|
288
|
+
"""Element-wise zip of two sequences.
|
|
289
|
+
|
|
290
|
+
Item `i` is `(first[i], second[i])` and metadata `i` is
|
|
291
|
+
`(first.get_meta(i), second.get_meta(i))` -- the "paired image + label"
|
|
292
|
+
pattern that `ConcatSequence` (end-to-end) cannot express.
|
|
293
|
+
|
|
294
|
+
With `strict=True` (the default) the two sequences must have the same
|
|
295
|
+
length; a mismatch raises `ValueError` at construction. With
|
|
296
|
+
`strict=False` the view is truncated to the shorter length, like the
|
|
297
|
+
builtin `zip`. For a different combined-metadata shape, subclass and
|
|
298
|
+
override `get_meta`.
|
|
299
|
+
|
|
300
|
+
Type Parameters:
|
|
301
|
+
T1: Item type of the first source.
|
|
302
|
+
T2: Item type of the second source.
|
|
303
|
+
M1: Metadata type of the first source. Defaults to `None`.
|
|
304
|
+
M2: Metadata type of the second source. Defaults to `None`.
|
|
305
|
+
|
|
306
|
+
Args:
|
|
307
|
+
first: The first sequence.
|
|
308
|
+
second: The second sequence.
|
|
309
|
+
strict: When True (default), require equal lengths and raise on a
|
|
310
|
+
mismatch. When False, truncate to the shorter length.
|
|
311
|
+
|
|
312
|
+
Raises:
|
|
313
|
+
ValueError: If `strict` is True and the sequences differ in length.
|
|
314
|
+
|
|
315
|
+
Example:
|
|
316
|
+
>>> pairs = ZippedSequence(images, labels)
|
|
317
|
+
>>> pairs[0] # (images[0], labels[0])
|
|
318
|
+
>>> pairs.get_meta(0) # (images.get_meta(0), labels.get_meta(0))
|
|
319
|
+
"""
|
|
320
|
+
|
|
321
|
+
def __init__(
|
|
322
|
+
self,
|
|
323
|
+
first: DataSequence[T1, M1],
|
|
324
|
+
second: DataSequence[T2, M2],
|
|
325
|
+
*,
|
|
326
|
+
strict: bool = True,
|
|
327
|
+
) -> None:
|
|
328
|
+
if strict and len(first) != len(second):
|
|
329
|
+
msg = f"sequences differ in length: {len(first)} != {len(second)}"
|
|
330
|
+
raise ValueError(msg)
|
|
331
|
+
self._first = first
|
|
332
|
+
self._second = second
|
|
333
|
+
self._length = len(first) if strict else min(len(first), len(second))
|
|
334
|
+
|
|
335
|
+
@property
|
|
336
|
+
def first(self) -> DataSequence[T1, M1]:
|
|
337
|
+
"""The first wrapped sequence."""
|
|
338
|
+
return self._first
|
|
339
|
+
|
|
340
|
+
@property
|
|
341
|
+
def second(self) -> DataSequence[T2, M2]:
|
|
342
|
+
"""The second wrapped sequence."""
|
|
343
|
+
return self._second
|
|
344
|
+
|
|
345
|
+
def __len__(self) -> int:
|
|
346
|
+
return self._length
|
|
347
|
+
|
|
348
|
+
def _normalize_index(self, index: int) -> int:
|
|
349
|
+
"""Normalize a possibly-negative index and validate range.
|
|
350
|
+
|
|
351
|
+
Indices resolve against the zipped length (the shorter source when
|
|
352
|
+
`strict=False`), so they address the same position in both sources.
|
|
353
|
+
|
|
354
|
+
Raises:
|
|
355
|
+
IndexError: If `index` is outside `[-len(self), len(self))`.
|
|
356
|
+
"""
|
|
357
|
+
n = self._length
|
|
358
|
+
original = index
|
|
359
|
+
if index < 0:
|
|
360
|
+
index += n
|
|
361
|
+
if not 0 <= index < n:
|
|
362
|
+
msg = f"index {original} out of range for length {n}"
|
|
363
|
+
raise IndexError(msg)
|
|
364
|
+
return index
|
|
365
|
+
|
|
366
|
+
def get_item(self, index: int) -> tuple[T1, T2]:
|
|
367
|
+
index = self._normalize_index(index)
|
|
368
|
+
return self._first.get_item(index), self._second.get_item(index)
|
|
369
|
+
|
|
370
|
+
def get_items(self, indices: Sequence[int]) -> Sequence[tuple[T1, T2]]:
|
|
371
|
+
# Normalize, then bulk-delegate so each source's `get_items`
|
|
372
|
+
# optimization is used.
|
|
373
|
+
normalized = [self._normalize_index(i) for i in indices]
|
|
374
|
+
return list(
|
|
375
|
+
zip(
|
|
376
|
+
self._first.get_items(normalized),
|
|
377
|
+
self._second.get_items(normalized),
|
|
378
|
+
strict=True,
|
|
379
|
+
)
|
|
380
|
+
)
|
|
381
|
+
|
|
382
|
+
def get_meta(self, index: int) -> tuple[M1, M2]:
|
|
383
|
+
index = self._normalize_index(index)
|
|
384
|
+
return self._first.get_meta(index), self._second.get_meta(index)
|
|
385
|
+
|
|
386
|
+
def get_metas(self, indices: Sequence[int]) -> Sequence[tuple[M1, M2]]:
|
|
387
|
+
normalized = [self._normalize_index(i) for i in indices]
|
|
388
|
+
return list(
|
|
389
|
+
zip(
|
|
390
|
+
self._first.get_metas(normalized),
|
|
391
|
+
self._second.get_metas(normalized),
|
|
392
|
+
strict=True,
|
|
393
|
+
)
|
|
394
|
+
)
|
|
@@ -14,11 +14,92 @@ if TYPE_CHECKING:
|
|
|
14
14
|
from kaparoo.filesystem.types import StrPath, StrPaths
|
|
15
15
|
|
|
16
16
|
|
|
17
|
-
class
|
|
18
|
-
"""A
|
|
17
|
+
class FileListSequence[T, M = Path](DataSequence[T, M]):
|
|
18
|
+
"""A `DataSequence` over an explicit, ordered list of files.
|
|
19
|
+
|
|
20
|
+
Items live one-per-file; subclasses implement `load_file` and `get_meta`.
|
|
21
|
+
The files are given directly rather than discovered under a `root`, so
|
|
22
|
+
they may live in unrelated directories -- or, on Windows, on different
|
|
23
|
+
drives. (`FileFolderSequence` is the special case where the list is
|
|
24
|
+
discovered under a single root and stored relative to it.)
|
|
25
|
+
|
|
26
|
+
The given order is preserved verbatim and duplicates are kept; sort the
|
|
27
|
+
input yourself (`sorted(files, key=...)`) if a particular order is
|
|
28
|
+
needed. Paths are not checked for existence at construction; `load_file`
|
|
29
|
+
is called lazily on each `get_item`.
|
|
30
|
+
|
|
31
|
+
The base exposes:
|
|
32
|
+
|
|
33
|
+
- `files: tuple[Path, ...]` — full paths as an immutable snapshot.
|
|
34
|
+
- `get_file(index) -> Path` — full path of the i-th file.
|
|
19
35
|
|
|
20
|
-
|
|
21
|
-
|
|
36
|
+
Type Parameters:
|
|
37
|
+
T: Item type returned by `get_item`.
|
|
38
|
+
M: Per-item metadata type. Defaults to `Path`; override when the
|
|
39
|
+
metadata is something else (label, line number, ...).
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
files: The file paths to expose, in order.
|
|
43
|
+
|
|
44
|
+
Example:
|
|
45
|
+
>>> from pathlib import Path
|
|
46
|
+
>>> class BytesList(FileListSequence[bytes]):
|
|
47
|
+
... def get_meta(self, index: int) -> Path:
|
|
48
|
+
... return self.get_file(index)
|
|
49
|
+
...
|
|
50
|
+
... def load_file(self, path: Path) -> bytes:
|
|
51
|
+
... return path.read_bytes()
|
|
52
|
+
>>>
|
|
53
|
+
>>> data = BytesList(["images/a.png", "/other/b.png"])
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
def __init__(self, files: StrPaths) -> None:
|
|
57
|
+
self._files = list(stringify_paths(files))
|
|
58
|
+
|
|
59
|
+
def __len__(self) -> int:
|
|
60
|
+
return len(self._files)
|
|
61
|
+
|
|
62
|
+
@property
|
|
63
|
+
def files(self) -> tuple[Path, ...]:
|
|
64
|
+
"""Immutable snapshot of the full file paths, in order.
|
|
65
|
+
|
|
66
|
+
Returns a fresh `tuple[Path, ...]` on each access.
|
|
67
|
+
"""
|
|
68
|
+
return tuple(self.get_file(i) for i in range(len(self)))
|
|
69
|
+
|
|
70
|
+
def get_file(self, index: int) -> Path:
|
|
71
|
+
"""Full Path of the file at `index`."""
|
|
72
|
+
return Path(self._files[index])
|
|
73
|
+
|
|
74
|
+
def get_item(self, index: int) -> T:
|
|
75
|
+
return self.load_file(self.get_file(index))
|
|
76
|
+
|
|
77
|
+
@abstractmethod
|
|
78
|
+
def get_meta(self, index: int) -> M:
|
|
79
|
+
raise NotImplementedError
|
|
80
|
+
|
|
81
|
+
@abstractmethod
|
|
82
|
+
def load_file(self, path: Path) -> T:
|
|
83
|
+
"""Decode a single file into an item of type `T`.
|
|
84
|
+
|
|
85
|
+
Called lazily on each `get_item` -- not at construction time.
|
|
86
|
+
Subclasses may freely use external libraries (PIL, librosa,
|
|
87
|
+
cv2, ...) to decode.
|
|
88
|
+
"""
|
|
89
|
+
raise NotImplementedError
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class FileFolderSequence[T, M = Path](FileListSequence[T, M]):
|
|
93
|
+
"""A `FileListSequence` whose file list is discovered under a root.
|
|
94
|
+
|
|
95
|
+
The special case of `FileListSequence` where every file lives under one
|
|
96
|
+
base directory. The list is produced by `list_files(root)`, validated to
|
|
97
|
+
be under `root`, and stored in root-relative form so memory stays low for
|
|
98
|
+
large datasets and the paths survive a `root` relocation; `get_file`
|
|
99
|
+
transparently re-prepends `root`. `load_file`, `get_item`, `files`, and
|
|
100
|
+
`__len__` are inherited unchanged.
|
|
101
|
+
|
|
102
|
+
Subclasses are responsible for three things:
|
|
22
103
|
|
|
23
104
|
- **`list_files(self, root)`** (abstract): return the full `Path`
|
|
24
105
|
of every file to expose, in the desired order. Called once from
|
|
@@ -33,16 +114,9 @@ class FileFolderSequence[T, M = Path](DataSequence[T, M]):
|
|
|
33
114
|
to `Path` and `get_meta(i)` can be the one-liner
|
|
34
115
|
`return self.get_file(i)`.
|
|
35
116
|
|
|
36
|
-
The base
|
|
117
|
+
The base adds, on top of `FileListSequence`:
|
|
37
118
|
|
|
38
119
|
- `root: Path` — the base directory.
|
|
39
|
-
- `files: tuple[Path, ...]` — full paths as an immutable snapshot.
|
|
40
|
-
- `get_file(index) -> Path` — full path of the i-th file.
|
|
41
|
-
|
|
42
|
-
Paths are kept internally in their root-relative form so that
|
|
43
|
-
memory stays low for large datasets and the sequence survives
|
|
44
|
-
`root` relocations; the conversion is transparent to subclasses
|
|
45
|
-
and external callers.
|
|
46
120
|
|
|
47
121
|
Parameterized subclasses:
|
|
48
122
|
When a subclass needs instance-level options (e.g. `pattern`,
|
|
@@ -94,48 +168,20 @@ class FileFolderSequence[T, M = Path](DataSequence[T, M]):
|
|
|
94
168
|
|
|
95
169
|
def __init__(self, root: StrPath) -> None:
|
|
96
170
|
self._root = ensure_dir_exists(root)
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
def __len__(self) -> int:
|
|
102
|
-
return len(self._files)
|
|
171
|
+
# `after=root` makes each path root-relative and raises ValueError if
|
|
172
|
+
# any file is not under `root`. The base then stores the relative
|
|
173
|
+
# form; `get_file` re-prepends `root`.
|
|
174
|
+
super().__init__(stringify_paths(self.list_files(self._root), after=self._root))
|
|
103
175
|
|
|
104
176
|
@property
|
|
105
177
|
def root(self) -> Path:
|
|
106
178
|
"""The base directory the sequence was constructed from."""
|
|
107
179
|
return self._root
|
|
108
180
|
|
|
109
|
-
@property
|
|
110
|
-
def files(self) -> tuple[Path, ...]:
|
|
111
|
-
"""Immutable snapshot of the full file paths this sequence exposes.
|
|
112
|
-
|
|
113
|
-
Returns a fresh `tuple[Path, ...]` on each access, in the order
|
|
114
|
-
established by `list_files`.
|
|
115
|
-
"""
|
|
116
|
-
return tuple(self.get_file(i) for i in range(len(self)))
|
|
117
|
-
|
|
118
181
|
def get_file(self, index: int) -> Path:
|
|
119
182
|
"""Full Path of the file at `index`."""
|
|
120
183
|
return wrap_path(self._files[index], prepend=self._root)
|
|
121
184
|
|
|
122
|
-
def get_item(self, index: int) -> T:
|
|
123
|
-
return self.load_file(self.get_file(index))
|
|
124
|
-
|
|
125
|
-
@abstractmethod
|
|
126
|
-
def get_meta(self, index: int) -> M:
|
|
127
|
-
raise NotImplementedError
|
|
128
|
-
|
|
129
|
-
@abstractmethod
|
|
130
|
-
def load_file(self, path: Path) -> T:
|
|
131
|
-
"""Decode a single file into an item of type `T`.
|
|
132
|
-
|
|
133
|
-
Called lazily on each `get_item` -- not at construction time.
|
|
134
|
-
Subclasses may freely use external libraries (PIL, librosa,
|
|
135
|
-
cv2, ...) to decode.
|
|
136
|
-
"""
|
|
137
|
-
raise NotImplementedError
|
|
138
|
-
|
|
139
185
|
@abstractmethod
|
|
140
186
|
def list_files(self, root: Path) -> list[Path]:
|
|
141
187
|
"""Return the full Path of every file to expose, in order.
|
|
@@ -149,82 +195,6 @@ class FileFolderSequence[T, M = Path](DataSequence[T, M]):
|
|
|
149
195
|
raise NotImplementedError
|
|
150
196
|
|
|
151
197
|
|
|
152
|
-
class FileListSequence[T, M = Path](DataSequence[T, M]):
|
|
153
|
-
"""A `DataSequence` over an explicit, ordered list of files.
|
|
154
|
-
|
|
155
|
-
Like `FileFolderSequence`, items live one-per-file and subclasses
|
|
156
|
-
implement `load_file` and `get_meta`. Unlike it, the files are given
|
|
157
|
-
directly rather than discovered under a `root`, so they may live in
|
|
158
|
-
unrelated directories -- or, on Windows, on different drives -- which
|
|
159
|
-
`FileFolderSequence` cannot represent (it stores paths relative to one
|
|
160
|
-
root). There is no `list_files`: the input list *is* the listing.
|
|
161
|
-
|
|
162
|
-
The given order is preserved verbatim and duplicates are kept; sort the
|
|
163
|
-
input yourself (`sorted(files, key=...)`) if a particular order is
|
|
164
|
-
needed. Paths are not checked for existence at construction; `load_file`
|
|
165
|
-
is called lazily on each `get_item`.
|
|
166
|
-
|
|
167
|
-
The base exposes:
|
|
168
|
-
|
|
169
|
-
- `files: tuple[Path, ...]` — full paths as an immutable snapshot.
|
|
170
|
-
- `get_file(index) -> Path` — full path of the i-th file.
|
|
171
|
-
|
|
172
|
-
Type Parameters:
|
|
173
|
-
T: Item type returned by `get_item`.
|
|
174
|
-
M: Per-item metadata type. Defaults to `Path`; override when the
|
|
175
|
-
metadata is something else (label, line number, ...).
|
|
176
|
-
|
|
177
|
-
Args:
|
|
178
|
-
files: The file paths to expose, in order.
|
|
179
|
-
|
|
180
|
-
Example:
|
|
181
|
-
>>> from pathlib import Path
|
|
182
|
-
>>> class BytesList(FileListSequence[bytes]):
|
|
183
|
-
... def get_meta(self, index: int) -> Path:
|
|
184
|
-
... return self.get_file(index)
|
|
185
|
-
...
|
|
186
|
-
... def load_file(self, path: Path) -> bytes:
|
|
187
|
-
... return path.read_bytes()
|
|
188
|
-
>>>
|
|
189
|
-
>>> data = BytesList(["images/a.png", "/other/b.png"])
|
|
190
|
-
"""
|
|
191
|
-
|
|
192
|
-
def __init__(self, files: StrPaths) -> None:
|
|
193
|
-
self._files = list(stringify_paths(files))
|
|
194
|
-
|
|
195
|
-
def __len__(self) -> int:
|
|
196
|
-
return len(self._files)
|
|
197
|
-
|
|
198
|
-
@property
|
|
199
|
-
def files(self) -> tuple[Path, ...]:
|
|
200
|
-
"""Immutable snapshot of the full file paths, in the given order.
|
|
201
|
-
|
|
202
|
-
Returns a fresh `tuple[Path, ...]` on each access.
|
|
203
|
-
"""
|
|
204
|
-
return tuple(self.get_file(i) for i in range(len(self)))
|
|
205
|
-
|
|
206
|
-
def get_file(self, index: int) -> Path:
|
|
207
|
-
"""Full Path of the file at `index`."""
|
|
208
|
-
return Path(self._files[index])
|
|
209
|
-
|
|
210
|
-
def get_item(self, index: int) -> T:
|
|
211
|
-
return self.load_file(self.get_file(index))
|
|
212
|
-
|
|
213
|
-
@abstractmethod
|
|
214
|
-
def get_meta(self, index: int) -> M:
|
|
215
|
-
raise NotImplementedError
|
|
216
|
-
|
|
217
|
-
@abstractmethod
|
|
218
|
-
def load_file(self, path: Path) -> T:
|
|
219
|
-
"""Decode a single file into an item of type `T`.
|
|
220
|
-
|
|
221
|
-
Called lazily on each `get_item` -- not at construction time.
|
|
222
|
-
Subclasses may freely use external libraries (PIL, librosa,
|
|
223
|
-
cv2, ...) to decode.
|
|
224
|
-
"""
|
|
225
|
-
raise NotImplementedError
|
|
226
|
-
|
|
227
|
-
|
|
228
198
|
class SingleFileSequence[T, M = None](DataSequence[T, M]):
|
|
229
199
|
"""A `DataSequence` backed by a single file that holds multiple records.
|
|
230
200
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{kaparoo_python-0.5.0 → kaparoo_python-0.6.0}/kaparoo/filesystem/search/filters/multi_pattern.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|