kaparoo-python 0.5.0__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kaparoo/data/__init__.py +4 -0
- kaparoo/data/sequences/__init__.py +4 -0
- kaparoo/data/sequences/composers.py +179 -6
- kaparoo/data/sequences/templates.py +90 -120
- {kaparoo_python-0.5.0.dist-info → kaparoo_python-0.6.0.dist-info}/METADATA +7 -6
- {kaparoo_python-0.5.0.dist-info → kaparoo_python-0.6.0.dist-info}/RECORD +8 -8
- {kaparoo_python-0.5.0.dist-info → kaparoo_python-0.6.0.dist-info}/WHEEL +0 -0
- {kaparoo_python-0.5.0.dist-info → kaparoo_python-0.6.0.dist-info}/licenses/LICENSE +0 -0
kaparoo/data/__init__.py
CHANGED
|
@@ -5,7 +5,9 @@ __all__ = (
|
|
|
5
5
|
"FileListSequence",
|
|
6
6
|
"SingleFileSequence",
|
|
7
7
|
"SlicedSequence",
|
|
8
|
+
"TransformedSequence",
|
|
8
9
|
"WindowedSequence",
|
|
10
|
+
"ZippedSequence",
|
|
9
11
|
"generate_batches",
|
|
10
12
|
)
|
|
11
13
|
|
|
@@ -16,6 +18,8 @@ from kaparoo.data.sequences import (
|
|
|
16
18
|
FileListSequence,
|
|
17
19
|
SingleFileSequence,
|
|
18
20
|
SlicedSequence,
|
|
21
|
+
TransformedSequence,
|
|
19
22
|
WindowedSequence,
|
|
23
|
+
ZippedSequence,
|
|
20
24
|
generate_batches,
|
|
21
25
|
)
|
|
@@ -7,7 +7,9 @@ __all__ = (
|
|
|
7
7
|
"FileListSequence",
|
|
8
8
|
"SingleFileSequence",
|
|
9
9
|
"SlicedSequence",
|
|
10
|
+
"TransformedSequence",
|
|
10
11
|
"WindowedSequence",
|
|
12
|
+
"ZippedSequence",
|
|
11
13
|
"generate_batches",
|
|
12
14
|
)
|
|
13
15
|
|
|
@@ -15,7 +17,9 @@ from kaparoo.data.sequences.base import DataSequence
|
|
|
15
17
|
from kaparoo.data.sequences.composers import (
|
|
16
18
|
ConcatSequence,
|
|
17
19
|
SlicedSequence,
|
|
20
|
+
TransformedSequence,
|
|
18
21
|
WindowedSequence,
|
|
22
|
+
ZippedSequence,
|
|
19
23
|
)
|
|
20
24
|
from kaparoo.data.sequences.templates import (
|
|
21
25
|
FileFolderSequence,
|
|
@@ -1,15 +1,21 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
__all__ = (
|
|
3
|
+
__all__ = (
|
|
4
|
+
"ConcatSequence",
|
|
5
|
+
"SlicedSequence",
|
|
6
|
+
"TransformedSequence",
|
|
7
|
+
"WindowedSequence",
|
|
8
|
+
"ZippedSequence",
|
|
9
|
+
)
|
|
4
10
|
|
|
5
11
|
from abc import abstractmethod
|
|
6
12
|
from bisect import bisect_right
|
|
7
|
-
from typing import TYPE_CHECKING
|
|
13
|
+
from typing import TYPE_CHECKING, cast
|
|
8
14
|
|
|
9
15
|
from kaparoo.data.sequences.base import DataSequence
|
|
10
16
|
|
|
11
17
|
if TYPE_CHECKING:
|
|
12
|
-
from collections.abc import Sequence
|
|
18
|
+
from collections.abc import Callable, Sequence
|
|
13
19
|
|
|
14
20
|
|
|
15
21
|
class SlicedSequence[T, M](DataSequence[T, M]):
|
|
@@ -59,6 +65,61 @@ class SlicedSequence[T, M](DataSequence[T, M]):
|
|
|
59
65
|
return self._source.get_meta(self._indices[index])
|
|
60
66
|
|
|
61
67
|
|
|
68
|
+
class TransformedSequence[T_in, M_in, T_out = T_in, M_out = M_in](
|
|
69
|
+
DataSequence[T_out, M_out]
|
|
70
|
+
):
|
|
71
|
+
"""A view of `source` with `transform` applied lazily to each item.
|
|
72
|
+
|
|
73
|
+
`transform` is called on demand in `get_item`; nothing is loaded or
|
|
74
|
+
converted at construction time. `get_meta` passes through
|
|
75
|
+
`source.get_meta` unchanged by default -- override it in a subclass
|
|
76
|
+
when `M_out` differs from `M_in`.
|
|
77
|
+
|
|
78
|
+
Type Parameters:
|
|
79
|
+
T_in: Item type of `source`.
|
|
80
|
+
M_in: Metadata type of `source`.
|
|
81
|
+
T_out: Item type after the transform. Defaults to `T_in`.
|
|
82
|
+
M_out: Metadata type exposed by this view. Defaults to `M_in`.
|
|
83
|
+
When `M_out != M_in`, override `get_meta` in a subclass;
|
|
84
|
+
the default passthrough is only safe when `M_out == M_in`.
|
|
85
|
+
|
|
86
|
+
Example:
|
|
87
|
+
>>> # Item-only transform; metadata passes through unchanged.
|
|
88
|
+
>>> normalized = TransformedSequence(image_folder, normalize)
|
|
89
|
+
|
|
90
|
+
>>> # Meta transform via subclassing:
|
|
91
|
+
>>> class Augmented(TransformedSequence[ndarray, Path, ndarray, AugMeta]):
|
|
92
|
+
... def get_meta(self, index: int) -> AugMeta:
|
|
93
|
+
... return AugMeta(
|
|
94
|
+
... path=self.source.get_meta(index),
|
|
95
|
+
... applied="normalize",
|
|
96
|
+
... )
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
def __init__(
|
|
100
|
+
self,
|
|
101
|
+
source: DataSequence[T_in, M_in],
|
|
102
|
+
transform: Callable[[T_in], T_out],
|
|
103
|
+
) -> None:
|
|
104
|
+
self._source = source
|
|
105
|
+
self._transform = transform
|
|
106
|
+
|
|
107
|
+
@property
|
|
108
|
+
def source(self) -> DataSequence[T_in, M_in]:
|
|
109
|
+
"""The wrapped sequence."""
|
|
110
|
+
return self._source
|
|
111
|
+
|
|
112
|
+
def __len__(self) -> int:
|
|
113
|
+
return len(self._source)
|
|
114
|
+
|
|
115
|
+
def get_item(self, index: int) -> T_out:
|
|
116
|
+
return self._transform(self._source.get_item(index))
|
|
117
|
+
|
|
118
|
+
def get_meta(self, index: int) -> M_out:
|
|
119
|
+
# Passthrough by default. Override when M_out != M_in.
|
|
120
|
+
return cast("M_out", self._source.get_meta(index))
|
|
121
|
+
|
|
122
|
+
|
|
62
123
|
class ConcatSequence[T, M](DataSequence[T, M]):
|
|
63
124
|
"""The end-to-end concatenation of zero or more `sources`.
|
|
64
125
|
|
|
@@ -112,7 +173,7 @@ class ConcatSequence[T, M](DataSequence[T, M]):
|
|
|
112
173
|
return source.get_meta(local)
|
|
113
174
|
|
|
114
175
|
|
|
115
|
-
class WindowedSequence[T, M_in, M_out](DataSequence[tuple[T, ...], M_out]):
|
|
176
|
+
class WindowedSequence[T, M_in, M_out = M_in](DataSequence[tuple[T, ...], M_out]):
|
|
116
177
|
"""An abstract sliding-window view over `source`.
|
|
117
178
|
|
|
118
179
|
Each item is a tuple of `size` items from `source`, starting at
|
|
@@ -130,8 +191,8 @@ class WindowedSequence[T, M_in, M_out](DataSequence[tuple[T, ...], M_out]):
|
|
|
130
191
|
T: Item type of `source` (also the per-frame type within each
|
|
131
192
|
window).
|
|
132
193
|
M_in: Metadata type of `source` (per-frame metadata).
|
|
133
|
-
M_out: Metadata type of the window.
|
|
134
|
-
subclass's `get_meta` return.
|
|
194
|
+
M_out: Metadata type of the window. Defaults to `M_in`.
|
|
195
|
+
Determined by the subclass's `get_meta` return.
|
|
135
196
|
|
|
136
197
|
Args:
|
|
137
198
|
source: The sequence to window over.
|
|
@@ -219,3 +280,115 @@ class WindowedSequence[T, M_in, M_out](DataSequence[tuple[T, ...], M_out]):
|
|
|
219
280
|
@abstractmethod
|
|
220
281
|
def get_meta(self, index: int) -> M_out:
|
|
221
282
|
raise NotImplementedError
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
class ZippedSequence[T1, T2, M1 = None, M2 = None](
|
|
286
|
+
DataSequence[tuple[T1, T2], tuple[M1, M2]]
|
|
287
|
+
):
|
|
288
|
+
"""Element-wise zip of two sequences.
|
|
289
|
+
|
|
290
|
+
Item `i` is `(first[i], second[i])` and metadata `i` is
|
|
291
|
+
`(first.get_meta(i), second.get_meta(i))` -- the "paired image + label"
|
|
292
|
+
pattern that `ConcatSequence` (end-to-end) cannot express.
|
|
293
|
+
|
|
294
|
+
With `strict=True` (the default) the two sequences must have the same
|
|
295
|
+
length; a mismatch raises `ValueError` at construction. With
|
|
296
|
+
`strict=False` the view is truncated to the shorter length, like the
|
|
297
|
+
builtin `zip`. For a different combined-metadata shape, subclass and
|
|
298
|
+
override `get_meta`.
|
|
299
|
+
|
|
300
|
+
Type Parameters:
|
|
301
|
+
T1: Item type of the first source.
|
|
302
|
+
T2: Item type of the second source.
|
|
303
|
+
M1: Metadata type of the first source. Defaults to `None`.
|
|
304
|
+
M2: Metadata type of the second source. Defaults to `None`.
|
|
305
|
+
|
|
306
|
+
Args:
|
|
307
|
+
first: The first sequence.
|
|
308
|
+
second: The second sequence.
|
|
309
|
+
strict: When True (default), require equal lengths and raise on a
|
|
310
|
+
mismatch. When False, truncate to the shorter length.
|
|
311
|
+
|
|
312
|
+
Raises:
|
|
313
|
+
ValueError: If `strict` is True and the sequences differ in length.
|
|
314
|
+
|
|
315
|
+
Example:
|
|
316
|
+
>>> pairs = ZippedSequence(images, labels)
|
|
317
|
+
>>> pairs[0] # (images[0], labels[0])
|
|
318
|
+
>>> pairs.get_meta(0) # (images.get_meta(0), labels.get_meta(0))
|
|
319
|
+
"""
|
|
320
|
+
|
|
321
|
+
def __init__(
|
|
322
|
+
self,
|
|
323
|
+
first: DataSequence[T1, M1],
|
|
324
|
+
second: DataSequence[T2, M2],
|
|
325
|
+
*,
|
|
326
|
+
strict: bool = True,
|
|
327
|
+
) -> None:
|
|
328
|
+
if strict and len(first) != len(second):
|
|
329
|
+
msg = f"sequences differ in length: {len(first)} != {len(second)}"
|
|
330
|
+
raise ValueError(msg)
|
|
331
|
+
self._first = first
|
|
332
|
+
self._second = second
|
|
333
|
+
self._length = len(first) if strict else min(len(first), len(second))
|
|
334
|
+
|
|
335
|
+
@property
|
|
336
|
+
def first(self) -> DataSequence[T1, M1]:
|
|
337
|
+
"""The first wrapped sequence."""
|
|
338
|
+
return self._first
|
|
339
|
+
|
|
340
|
+
@property
|
|
341
|
+
def second(self) -> DataSequence[T2, M2]:
|
|
342
|
+
"""The second wrapped sequence."""
|
|
343
|
+
return self._second
|
|
344
|
+
|
|
345
|
+
def __len__(self) -> int:
|
|
346
|
+
return self._length
|
|
347
|
+
|
|
348
|
+
def _normalize_index(self, index: int) -> int:
|
|
349
|
+
"""Normalize a possibly-negative index and validate range.
|
|
350
|
+
|
|
351
|
+
Indices resolve against the zipped length (the shorter source when
|
|
352
|
+
`strict=False`), so they address the same position in both sources.
|
|
353
|
+
|
|
354
|
+
Raises:
|
|
355
|
+
IndexError: If `index` is outside `[-len(self), len(self))`.
|
|
356
|
+
"""
|
|
357
|
+
n = self._length
|
|
358
|
+
original = index
|
|
359
|
+
if index < 0:
|
|
360
|
+
index += n
|
|
361
|
+
if not 0 <= index < n:
|
|
362
|
+
msg = f"index {original} out of range for length {n}"
|
|
363
|
+
raise IndexError(msg)
|
|
364
|
+
return index
|
|
365
|
+
|
|
366
|
+
def get_item(self, index: int) -> tuple[T1, T2]:
|
|
367
|
+
index = self._normalize_index(index)
|
|
368
|
+
return self._first.get_item(index), self._second.get_item(index)
|
|
369
|
+
|
|
370
|
+
def get_items(self, indices: Sequence[int]) -> Sequence[tuple[T1, T2]]:
|
|
371
|
+
# Normalize, then bulk-delegate so each source's `get_items`
|
|
372
|
+
# optimization is used.
|
|
373
|
+
normalized = [self._normalize_index(i) for i in indices]
|
|
374
|
+
return list(
|
|
375
|
+
zip(
|
|
376
|
+
self._first.get_items(normalized),
|
|
377
|
+
self._second.get_items(normalized),
|
|
378
|
+
strict=True,
|
|
379
|
+
)
|
|
380
|
+
)
|
|
381
|
+
|
|
382
|
+
def get_meta(self, index: int) -> tuple[M1, M2]:
|
|
383
|
+
index = self._normalize_index(index)
|
|
384
|
+
return self._first.get_meta(index), self._second.get_meta(index)
|
|
385
|
+
|
|
386
|
+
def get_metas(self, indices: Sequence[int]) -> Sequence[tuple[M1, M2]]:
|
|
387
|
+
normalized = [self._normalize_index(i) for i in indices]
|
|
388
|
+
return list(
|
|
389
|
+
zip(
|
|
390
|
+
self._first.get_metas(normalized),
|
|
391
|
+
self._second.get_metas(normalized),
|
|
392
|
+
strict=True,
|
|
393
|
+
)
|
|
394
|
+
)
|
|
@@ -14,11 +14,92 @@ if TYPE_CHECKING:
|
|
|
14
14
|
from kaparoo.filesystem.types import StrPath, StrPaths
|
|
15
15
|
|
|
16
16
|
|
|
17
|
-
class
|
|
18
|
-
"""A
|
|
17
|
+
class FileListSequence[T, M = Path](DataSequence[T, M]):
|
|
18
|
+
"""A `DataSequence` over an explicit, ordered list of files.
|
|
19
|
+
|
|
20
|
+
Items live one-per-file; subclasses implement `load_file` and `get_meta`.
|
|
21
|
+
The files are given directly rather than discovered under a `root`, so
|
|
22
|
+
they may live in unrelated directories -- or, on Windows, on different
|
|
23
|
+
drives. (`FileFolderSequence` is the special case where the list is
|
|
24
|
+
discovered under a single root and stored relative to it.)
|
|
25
|
+
|
|
26
|
+
The given order is preserved verbatim and duplicates are kept; sort the
|
|
27
|
+
input yourself (`sorted(files, key=...)`) if a particular order is
|
|
28
|
+
needed. Paths are not checked for existence at construction; `load_file`
|
|
29
|
+
is called lazily on each `get_item`.
|
|
30
|
+
|
|
31
|
+
The base exposes:
|
|
32
|
+
|
|
33
|
+
- `files: tuple[Path, ...]` — full paths as an immutable snapshot.
|
|
34
|
+
- `get_file(index) -> Path` — full path of the i-th file.
|
|
19
35
|
|
|
20
|
-
|
|
21
|
-
|
|
36
|
+
Type Parameters:
|
|
37
|
+
T: Item type returned by `get_item`.
|
|
38
|
+
M: Per-item metadata type. Defaults to `Path`; override when the
|
|
39
|
+
metadata is something else (label, line number, ...).
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
files: The file paths to expose, in order.
|
|
43
|
+
|
|
44
|
+
Example:
|
|
45
|
+
>>> from pathlib import Path
|
|
46
|
+
>>> class BytesList(FileListSequence[bytes]):
|
|
47
|
+
... def get_meta(self, index: int) -> Path:
|
|
48
|
+
... return self.get_file(index)
|
|
49
|
+
...
|
|
50
|
+
... def load_file(self, path: Path) -> bytes:
|
|
51
|
+
... return path.read_bytes()
|
|
52
|
+
>>>
|
|
53
|
+
>>> data = BytesList(["images/a.png", "/other/b.png"])
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
def __init__(self, files: StrPaths) -> None:
|
|
57
|
+
self._files = list(stringify_paths(files))
|
|
58
|
+
|
|
59
|
+
def __len__(self) -> int:
|
|
60
|
+
return len(self._files)
|
|
61
|
+
|
|
62
|
+
@property
|
|
63
|
+
def files(self) -> tuple[Path, ...]:
|
|
64
|
+
"""Immutable snapshot of the full file paths, in order.
|
|
65
|
+
|
|
66
|
+
Returns a fresh `tuple[Path, ...]` on each access.
|
|
67
|
+
"""
|
|
68
|
+
return tuple(self.get_file(i) for i in range(len(self)))
|
|
69
|
+
|
|
70
|
+
def get_file(self, index: int) -> Path:
|
|
71
|
+
"""Full Path of the file at `index`."""
|
|
72
|
+
return Path(self._files[index])
|
|
73
|
+
|
|
74
|
+
def get_item(self, index: int) -> T:
|
|
75
|
+
return self.load_file(self.get_file(index))
|
|
76
|
+
|
|
77
|
+
@abstractmethod
|
|
78
|
+
def get_meta(self, index: int) -> M:
|
|
79
|
+
raise NotImplementedError
|
|
80
|
+
|
|
81
|
+
@abstractmethod
|
|
82
|
+
def load_file(self, path: Path) -> T:
|
|
83
|
+
"""Decode a single file into an item of type `T`.
|
|
84
|
+
|
|
85
|
+
Called lazily on each `get_item` -- not at construction time.
|
|
86
|
+
Subclasses may freely use external libraries (PIL, librosa,
|
|
87
|
+
cv2, ...) to decode.
|
|
88
|
+
"""
|
|
89
|
+
raise NotImplementedError
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class FileFolderSequence[T, M = Path](FileListSequence[T, M]):
|
|
93
|
+
"""A `FileListSequence` whose file list is discovered under a root.
|
|
94
|
+
|
|
95
|
+
The special case of `FileListSequence` where every file lives under one
|
|
96
|
+
base directory. The list is produced by `list_files(root)`, validated to
|
|
97
|
+
be under `root`, and stored in root-relative form so memory stays low for
|
|
98
|
+
large datasets and the paths survive a `root` relocation; `get_file`
|
|
99
|
+
transparently re-prepends `root`. `load_file`, `get_item`, `files`, and
|
|
100
|
+
`__len__` are inherited unchanged.
|
|
101
|
+
|
|
102
|
+
Subclasses are responsible for three things:
|
|
22
103
|
|
|
23
104
|
- **`list_files(self, root)`** (abstract): return the full `Path`
|
|
24
105
|
of every file to expose, in the desired order. Called once from
|
|
@@ -33,16 +114,9 @@ class FileFolderSequence[T, M = Path](DataSequence[T, M]):
|
|
|
33
114
|
to `Path` and `get_meta(i)` can be the one-liner
|
|
34
115
|
`return self.get_file(i)`.
|
|
35
116
|
|
|
36
|
-
The base
|
|
117
|
+
The base adds, on top of `FileListSequence`:
|
|
37
118
|
|
|
38
119
|
- `root: Path` — the base directory.
|
|
39
|
-
- `files: tuple[Path, ...]` — full paths as an immutable snapshot.
|
|
40
|
-
- `get_file(index) -> Path` — full path of the i-th file.
|
|
41
|
-
|
|
42
|
-
Paths are kept internally in their root-relative form so that
|
|
43
|
-
memory stays low for large datasets and the sequence survives
|
|
44
|
-
`root` relocations; the conversion is transparent to subclasses
|
|
45
|
-
and external callers.
|
|
46
120
|
|
|
47
121
|
Parameterized subclasses:
|
|
48
122
|
When a subclass needs instance-level options (e.g. `pattern`,
|
|
@@ -94,48 +168,20 @@ class FileFolderSequence[T, M = Path](DataSequence[T, M]):
|
|
|
94
168
|
|
|
95
169
|
def __init__(self, root: StrPath) -> None:
|
|
96
170
|
self._root = ensure_dir_exists(root)
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
def __len__(self) -> int:
|
|
102
|
-
return len(self._files)
|
|
171
|
+
# `after=root` makes each path root-relative and raises ValueError if
|
|
172
|
+
# any file is not under `root`. The base then stores the relative
|
|
173
|
+
# form; `get_file` re-prepends `root`.
|
|
174
|
+
super().__init__(stringify_paths(self.list_files(self._root), after=self._root))
|
|
103
175
|
|
|
104
176
|
@property
|
|
105
177
|
def root(self) -> Path:
|
|
106
178
|
"""The base directory the sequence was constructed from."""
|
|
107
179
|
return self._root
|
|
108
180
|
|
|
109
|
-
@property
|
|
110
|
-
def files(self) -> tuple[Path, ...]:
|
|
111
|
-
"""Immutable snapshot of the full file paths this sequence exposes.
|
|
112
|
-
|
|
113
|
-
Returns a fresh `tuple[Path, ...]` on each access, in the order
|
|
114
|
-
established by `list_files`.
|
|
115
|
-
"""
|
|
116
|
-
return tuple(self.get_file(i) for i in range(len(self)))
|
|
117
|
-
|
|
118
181
|
def get_file(self, index: int) -> Path:
|
|
119
182
|
"""Full Path of the file at `index`."""
|
|
120
183
|
return wrap_path(self._files[index], prepend=self._root)
|
|
121
184
|
|
|
122
|
-
def get_item(self, index: int) -> T:
|
|
123
|
-
return self.load_file(self.get_file(index))
|
|
124
|
-
|
|
125
|
-
@abstractmethod
|
|
126
|
-
def get_meta(self, index: int) -> M:
|
|
127
|
-
raise NotImplementedError
|
|
128
|
-
|
|
129
|
-
@abstractmethod
|
|
130
|
-
def load_file(self, path: Path) -> T:
|
|
131
|
-
"""Decode a single file into an item of type `T`.
|
|
132
|
-
|
|
133
|
-
Called lazily on each `get_item` -- not at construction time.
|
|
134
|
-
Subclasses may freely use external libraries (PIL, librosa,
|
|
135
|
-
cv2, ...) to decode.
|
|
136
|
-
"""
|
|
137
|
-
raise NotImplementedError
|
|
138
|
-
|
|
139
185
|
@abstractmethod
|
|
140
186
|
def list_files(self, root: Path) -> list[Path]:
|
|
141
187
|
"""Return the full Path of every file to expose, in order.
|
|
@@ -149,82 +195,6 @@ class FileFolderSequence[T, M = Path](DataSequence[T, M]):
|
|
|
149
195
|
raise NotImplementedError
|
|
150
196
|
|
|
151
197
|
|
|
152
|
-
class FileListSequence[T, M = Path](DataSequence[T, M]):
|
|
153
|
-
"""A `DataSequence` over an explicit, ordered list of files.
|
|
154
|
-
|
|
155
|
-
Like `FileFolderSequence`, items live one-per-file and subclasses
|
|
156
|
-
implement `load_file` and `get_meta`. Unlike it, the files are given
|
|
157
|
-
directly rather than discovered under a `root`, so they may live in
|
|
158
|
-
unrelated directories -- or, on Windows, on different drives -- which
|
|
159
|
-
`FileFolderSequence` cannot represent (it stores paths relative to one
|
|
160
|
-
root). There is no `list_files`: the input list *is* the listing.
|
|
161
|
-
|
|
162
|
-
The given order is preserved verbatim and duplicates are kept; sort the
|
|
163
|
-
input yourself (`sorted(files, key=...)`) if a particular order is
|
|
164
|
-
needed. Paths are not checked for existence at construction; `load_file`
|
|
165
|
-
is called lazily on each `get_item`.
|
|
166
|
-
|
|
167
|
-
The base exposes:
|
|
168
|
-
|
|
169
|
-
- `files: tuple[Path, ...]` — full paths as an immutable snapshot.
|
|
170
|
-
- `get_file(index) -> Path` — full path of the i-th file.
|
|
171
|
-
|
|
172
|
-
Type Parameters:
|
|
173
|
-
T: Item type returned by `get_item`.
|
|
174
|
-
M: Per-item metadata type. Defaults to `Path`; override when the
|
|
175
|
-
metadata is something else (label, line number, ...).
|
|
176
|
-
|
|
177
|
-
Args:
|
|
178
|
-
files: The file paths to expose, in order.
|
|
179
|
-
|
|
180
|
-
Example:
|
|
181
|
-
>>> from pathlib import Path
|
|
182
|
-
>>> class BytesList(FileListSequence[bytes]):
|
|
183
|
-
... def get_meta(self, index: int) -> Path:
|
|
184
|
-
... return self.get_file(index)
|
|
185
|
-
...
|
|
186
|
-
... def load_file(self, path: Path) -> bytes:
|
|
187
|
-
... return path.read_bytes()
|
|
188
|
-
>>>
|
|
189
|
-
>>> data = BytesList(["images/a.png", "/other/b.png"])
|
|
190
|
-
"""
|
|
191
|
-
|
|
192
|
-
def __init__(self, files: StrPaths) -> None:
|
|
193
|
-
self._files = list(stringify_paths(files))
|
|
194
|
-
|
|
195
|
-
def __len__(self) -> int:
|
|
196
|
-
return len(self._files)
|
|
197
|
-
|
|
198
|
-
@property
|
|
199
|
-
def files(self) -> tuple[Path, ...]:
|
|
200
|
-
"""Immutable snapshot of the full file paths, in the given order.
|
|
201
|
-
|
|
202
|
-
Returns a fresh `tuple[Path, ...]` on each access.
|
|
203
|
-
"""
|
|
204
|
-
return tuple(self.get_file(i) for i in range(len(self)))
|
|
205
|
-
|
|
206
|
-
def get_file(self, index: int) -> Path:
|
|
207
|
-
"""Full Path of the file at `index`."""
|
|
208
|
-
return Path(self._files[index])
|
|
209
|
-
|
|
210
|
-
def get_item(self, index: int) -> T:
|
|
211
|
-
return self.load_file(self.get_file(index))
|
|
212
|
-
|
|
213
|
-
@abstractmethod
|
|
214
|
-
def get_meta(self, index: int) -> M:
|
|
215
|
-
raise NotImplementedError
|
|
216
|
-
|
|
217
|
-
@abstractmethod
|
|
218
|
-
def load_file(self, path: Path) -> T:
|
|
219
|
-
"""Decode a single file into an item of type `T`.
|
|
220
|
-
|
|
221
|
-
Called lazily on each `get_item` -- not at construction time.
|
|
222
|
-
Subclasses may freely use external libraries (PIL, librosa,
|
|
223
|
-
cv2, ...) to decode.
|
|
224
|
-
"""
|
|
225
|
-
raise NotImplementedError
|
|
226
|
-
|
|
227
|
-
|
|
228
198
|
class SingleFileSequence[T, M = None](DataSequence[T, M]):
|
|
229
199
|
"""A `DataSequence` backed by a single file that holds multiple records.
|
|
230
200
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: kaparoo-python
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.0
|
|
4
4
|
Summary: Personally common and useful Python features
|
|
5
5
|
Keywords: filesystem,pathlib,paths,utilities
|
|
6
6
|
Author: Jaewoo Park
|
|
@@ -67,16 +67,17 @@ hook for custom filter kinds.
|
|
|
67
67
|
|
|
68
68
|
`Timer` / `SegmentTimer` context-manager-and-decorator timers (with
|
|
69
69
|
`lap`-split and `measure`-block timings); `Aggregator` for nested,
|
|
70
|
-
pluggable metric aggregation (the batch → epoch → run pattern
|
|
71
|
-
small family of helpers for working with
|
|
72
|
-
(`replace_if_none`, `unwrap_or_default`, ...).
|
|
70
|
+
pluggable metric aggregation (the batch → epoch → run pattern;
|
|
71
|
+
experimental); plus a small family of helpers for working with
|
|
72
|
+
`Optional[T]` values (`replace_if_none`, `unwrap_or_default`, ...).
|
|
73
73
|
|
|
74
74
|
### [`kaparoo.data`](https://github.com/kaparoo/kaparoo-python/tree/main/kaparoo/data)
|
|
75
75
|
|
|
76
76
|
Building blocks for dataset code: `DataSequence[T, M]` ABC (item +
|
|
77
77
|
metadata), composers (`SlicedSequence`, `ConcatSequence`,
|
|
78
|
-
`WindowedSequence`), file-backed
|
|
79
|
-
`SingleFileSequence`),
|
|
78
|
+
`TransformedSequence`, `WindowedSequence`, `ZippedSequence`), file-backed
|
|
79
|
+
templates (`FileFolderSequence`, `FileListSequence`, `SingleFileSequence`),
|
|
80
|
+
and `generate_batches`.
|
|
80
81
|
|
|
81
82
|
## 🎯 Quick example
|
|
82
83
|
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
kaparoo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
kaparoo/data/__init__.py,sha256=
|
|
3
|
-
kaparoo/data/sequences/__init__.py,sha256=
|
|
2
|
+
kaparoo/data/__init__.py,sha256=dgbmVOVq_As2ovxRw_JQRdVjQ8d1UTkMFt_A50YfCfM,508
|
|
3
|
+
kaparoo/data/sequences/__init__.py,sha256=3WPq8yDzGvGFXRxu0Dtbp0WPLga32qMDDkL_CZlmFpE,674
|
|
4
4
|
kaparoo/data/sequences/base.py,sha256=m2JcIcT-SLrTzsjFCtgrQ9I5XVpB6PYBigyooEpg4VE,2628
|
|
5
|
-
kaparoo/data/sequences/composers.py,sha256=
|
|
6
|
-
kaparoo/data/sequences/templates.py,sha256=
|
|
5
|
+
kaparoo/data/sequences/composers.py,sha256=eaWW8VKyVq4zzJAvIh_LKYt6g-R-HqzzpoV4NKtFeqc,13373
|
|
6
|
+
kaparoo/data/sequences/templates.py,sha256=9a_vM-c9OaF-9qY0ybqRaGkVkywJzrQbNlEHfe39MHU,9572
|
|
7
7
|
kaparoo/data/sequences/utils.py,sha256=oe0qWwnAjsf-9CBUPSlkxkeuQS8kjn1sYhx2eDIwPKI,2808
|
|
8
8
|
kaparoo/filesystem/__init__.py,sha256=uES_e8DYBE0db5z-_E7N2-vSGvi9-uJiSOWnKHdtuPs,1797
|
|
9
9
|
kaparoo/filesystem/directory.py,sha256=Pr15aMl0tz2-VU14rkaFzsV0Zo2oqaevDM0JLW-ZOwk,10421
|
|
@@ -28,7 +28,7 @@ kaparoo/utils/__init__.py,sha256=QR-aXuDvxCtOXZLiqHNrKozLlzg_v60UaKI6x2S3YtU,785
|
|
|
28
28
|
kaparoo/utils/aggregate.py,sha256=8apzZiqLAxoSO51DDDMsOnkrsEaejudXDTc6h3uKRZc,13953
|
|
29
29
|
kaparoo/utils/optional.py,sha256=UgNhGDzl317PE_ESt9hW7yl9MYcdL0nV6Ly0mpqIz0U,4224
|
|
30
30
|
kaparoo/utils/timer.py,sha256=n2RenrYik51v1Dmo9JmZpG3_cPafRDgGMbxdvNoRhgs,17001
|
|
31
|
-
kaparoo_python-0.
|
|
32
|
-
kaparoo_python-0.
|
|
33
|
-
kaparoo_python-0.
|
|
34
|
-
kaparoo_python-0.
|
|
31
|
+
kaparoo_python-0.6.0.dist-info/licenses/LICENSE,sha256=hb6LWYP2rtcoz4V2HpawmblDfHwjwsg9N3cz0c5JQJE,1067
|
|
32
|
+
kaparoo_python-0.6.0.dist-info/WHEEL,sha256=V5-3dKee3Zs8C4JP6swr6zdqriLsOpItBEQxe6_oWpY,81
|
|
33
|
+
kaparoo_python-0.6.0.dist-info/METADATA,sha256=I9qqiuRdVQeIh0cHdadR_3UQlyLgtKpTLJ-ycLtYWjQ,4327
|
|
34
|
+
kaparoo_python-0.6.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|