kaparoo-python 0.2.0__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {kaparoo_python-0.2.0 → kaparoo_python-0.3.0}/PKG-INFO +36 -20
- kaparoo_python-0.3.0/README.md +81 -0
- kaparoo_python-0.3.0/kaparoo/data/README.md +216 -0
- kaparoo_python-0.3.0/kaparoo/data/__init__.py +19 -0
- kaparoo_python-0.3.0/kaparoo/data/sequences/__init__.py +23 -0
- kaparoo_python-0.3.0/kaparoo/data/sequences/base.py +73 -0
- kaparoo_python-0.3.0/kaparoo/data/sequences/composers.py +221 -0
- kaparoo_python-0.3.0/kaparoo/data/sequences/templates.py +196 -0
- kaparoo_python-0.3.0/kaparoo/data/sequences/utils.py +79 -0
- kaparoo_python-0.3.0/kaparoo/filesystem/README.md +120 -0
- {kaparoo_python-0.2.0 → kaparoo_python-0.3.0}/kaparoo/filesystem/__init__.py +73 -73
- {kaparoo_python-0.2.0 → kaparoo_python-0.3.0}/kaparoo/filesystem/directory.py +226 -226
- {kaparoo_python-0.2.0 → kaparoo_python-0.3.0}/kaparoo/filesystem/exceptions.py +17 -17
- {kaparoo_python-0.2.0 → kaparoo_python-0.3.0}/kaparoo/filesystem/existence.py +392 -392
- kaparoo_python-0.3.0/kaparoo/filesystem/search/README.md +221 -0
- {kaparoo_python-0.2.0 → kaparoo_python-0.3.0}/kaparoo/filesystem/search/__init__.py +2 -0
- {kaparoo_python-0.2.0 → kaparoo_python-0.3.0}/kaparoo/filesystem/search/classes.py +208 -199
- {kaparoo_python-0.2.0 → kaparoo_python-0.3.0}/kaparoo/filesystem/search/deprecated.py +289 -289
- kaparoo_python-0.3.0/kaparoo/filesystem/search/filters/__init__.py +73 -0
- kaparoo_python-0.3.0/kaparoo/filesystem/search/filters/base.py +92 -0
- kaparoo_python-0.3.0/kaparoo/filesystem/search/filters/logical.py +138 -0
- kaparoo_python-0.3.0/kaparoo/filesystem/search/filters/multi_pattern.py +160 -0
- kaparoo_python-0.3.0/kaparoo/filesystem/search/filters/pattern.py +216 -0
- kaparoo_python-0.3.0/kaparoo/filesystem/search/filters/types.py +47 -0
- kaparoo_python-0.3.0/kaparoo/filesystem/search/filters/utils.py +51 -0
- {kaparoo_python-0.2.0 → kaparoo_python-0.3.0}/kaparoo/filesystem/search/wrappers.py +318 -311
- {kaparoo_python-0.2.0 → kaparoo_python-0.3.0}/kaparoo/filesystem/types.py +9 -9
- {kaparoo_python-0.2.0 → kaparoo_python-0.3.0}/kaparoo/filesystem/utils.py +208 -208
- kaparoo_python-0.3.0/kaparoo/utils/README.md +121 -0
- {kaparoo_python-0.2.0 → kaparoo_python-0.3.0}/kaparoo/utils/__init__.py +21 -21
- {kaparoo_python-0.2.0 → kaparoo_python-0.3.0}/kaparoo/utils/optional.py +129 -129
- {kaparoo_python-0.2.0 → kaparoo_python-0.3.0}/kaparoo/utils/timer.py +374 -374
- {kaparoo_python-0.2.0 → kaparoo_python-0.3.0}/pyproject.toml +305 -253
- kaparoo_python-0.2.0/README.md +0 -65
- kaparoo_python-0.2.0/kaparoo/data/__init__.py +0 -0
- kaparoo_python-0.2.0/kaparoo/data/sequence.py +0 -39
- kaparoo_python-0.2.0/kaparoo/data/utils.py +0 -46
- kaparoo_python-0.2.0/kaparoo/filesystem/search/filters.py +0 -322
- {kaparoo_python-0.2.0 → kaparoo_python-0.3.0}/LICENSE +0 -0
- {kaparoo_python-0.2.0 → kaparoo_python-0.3.0}/kaparoo/__init__.py +0 -0
- {kaparoo_python-0.2.0 → kaparoo_python-0.3.0}/kaparoo/py.typed +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: kaparoo-python
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Personally common and useful Python features
|
|
5
5
|
Keywords: filesystem,pathlib,paths,utilities
|
|
6
6
|
Author: Jaewoo Park
|
|
@@ -46,32 +46,48 @@ pip install kaparoo-python
|
|
|
46
46
|
|
|
47
47
|
## 🧩 Modules
|
|
48
48
|
|
|
49
|
-
|
|
49
|
+
Each submodule ships its own README with focused examples.
|
|
50
50
|
|
|
51
|
-
`
|
|
51
|
+
### [`kaparoo.filesystem`](https://github.com/kaparoo/kaparoo-python/tree/main/kaparoo/filesystem)
|
|
52
52
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
- **`exceptions`** — `DirectoryNotFoundError`, `NotAFileError`.
|
|
57
|
-
- **`types`** — `StrPath`, `StrPaths`.
|
|
53
|
+
`pathlib`-based filesystem helpers: existence checks (`*_exists`),
|
|
54
|
+
`ensure_*` validators, `make_dir(s)`, `dir_empty(s)`, path
|
|
55
|
+
stringification, and a small exception hierarchy.
|
|
58
56
|
|
|
59
|
-
### `kaparoo.filesystem.search`
|
|
57
|
+
### [`kaparoo.filesystem.search`](https://github.com/kaparoo/kaparoo-python/tree/main/kaparoo/filesystem/search)
|
|
60
58
|
|
|
61
|
-
Filesystem traversal with composable filters.
|
|
59
|
+
Filesystem traversal with composable filters. Includes `search_paths` /
|
|
60
|
+
`search_files` / `search_dirs`, a `Filter` family (pattern, multi-pattern,
|
|
61
|
+
logical) that round-trips through JSON-friendly dicts, and an extension
|
|
62
|
+
hook for custom filter kinds.
|
|
62
63
|
|
|
63
|
-
|
|
64
|
-
- **Pattern filters** — `Equals`, `StartsWith`, `EndsWith`, `Contains`,
|
|
65
|
-
`Regex`, `Glob`.
|
|
66
|
-
- **Multi-pattern filters** — `EqualsAny`, `StartsWithAny`, `EndsWithAny`,
|
|
67
|
-
`ContainsAny`.
|
|
68
|
-
- **Logical filters** — `And`, `Or`, `Not`.
|
|
69
|
-
- **Deprecated** — `get_paths`, `get_files`, `get_dirs` (use `search_*`).
|
|
64
|
+
### [`kaparoo.utils`](https://github.com/kaparoo/kaparoo-python/tree/main/kaparoo/utils)
|
|
70
65
|
|
|
71
|
-
|
|
66
|
+
`Timer` / `LapTimer` context-manager-and-decorator timers, plus a small
|
|
67
|
+
family of helpers for working with `Optional[T]` values
|
|
68
|
+
(`replace_if_none`, `unwrap_or_default`, ...).
|
|
72
69
|
|
|
73
|
-
|
|
74
|
-
|
|
70
|
+
### [`kaparoo.data`](https://github.com/kaparoo/kaparoo-python/tree/main/kaparoo/data)
|
|
71
|
+
|
|
72
|
+
Building blocks for dataset code: `DataSequence[T, M]` ABC (item +
|
|
73
|
+
metadata), composers (`SlicedSequence`, `ConcatSequence`,
|
|
74
|
+
`WindowedSequence`), file-backed templates (`FileFolderSequence`,
|
|
75
|
+
`SingleFileSequence`), and `generate_batches`.
|
|
76
|
+
|
|
77
|
+
## 🎯 Quick example
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
from kaparoo.filesystem import search_files
|
|
81
|
+
from kaparoo.filesystem.search.filters import And, EndsWith, Equals, Not
|
|
82
|
+
|
|
83
|
+
# All .py files except __init__.py
|
|
84
|
+
py_files = search_files(
|
|
85
|
+
"src",
|
|
86
|
+
name_filter=And((EndsWith(".py"), Not(Equals("__init__.py")))),
|
|
87
|
+
)
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
See each submodule's README for more.
|
|
75
91
|
|
|
76
92
|
## 📋 TODO
|
|
77
93
|
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# kaparoo-python
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/kaparoo-python/)
|
|
4
|
+
[](https://pypi.org/project/kaparoo-python/)
|
|
5
|
+
[](https://www.python.org/)
|
|
6
|
+
[](./LICENSE)
|
|
7
|
+
[](https://github.com/astral-sh/uv)
|
|
8
|
+
[](https://github.com/astral-sh/ruff)
|
|
9
|
+
[](https://github.com/astral-sh/ty)
|
|
10
|
+
[](https://github.com/copier-org/copier)
|
|
11
|
+
|
|
12
|
+
*Personally common and useful Python features.*
|
|
13
|
+
|
|
14
|
+
## 📦 Installation
|
|
15
|
+
|
|
16
|
+
Requires Python 3.14+.
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
# With uv (recommended)
|
|
20
|
+
uv add kaparoo-python
|
|
21
|
+
|
|
22
|
+
# With pip
|
|
23
|
+
pip install kaparoo-python
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## 🧩 Modules
|
|
27
|
+
|
|
28
|
+
Each submodule ships its own README with focused examples.
|
|
29
|
+
|
|
30
|
+
### [`kaparoo.filesystem`](https://github.com/kaparoo/kaparoo-python/tree/main/kaparoo/filesystem)
|
|
31
|
+
|
|
32
|
+
`pathlib`-based filesystem helpers: existence checks (`*_exists`),
|
|
33
|
+
`ensure_*` validators, `make_dir(s)`, `dir_empty(s)`, path
|
|
34
|
+
stringification, and a small exception hierarchy.
|
|
35
|
+
|
|
36
|
+
### [`kaparoo.filesystem.search`](https://github.com/kaparoo/kaparoo-python/tree/main/kaparoo/filesystem/search)
|
|
37
|
+
|
|
38
|
+
Filesystem traversal with composable filters. Includes `search_paths` /
|
|
39
|
+
`search_files` / `search_dirs`, a `Filter` family (pattern, multi-pattern,
|
|
40
|
+
logical) that round-trips through JSON-friendly dicts, and an extension
|
|
41
|
+
hook for custom filter kinds.
|
|
42
|
+
|
|
43
|
+
### [`kaparoo.utils`](https://github.com/kaparoo/kaparoo-python/tree/main/kaparoo/utils)
|
|
44
|
+
|
|
45
|
+
`Timer` / `LapTimer` context-manager-and-decorator timers, plus a small
|
|
46
|
+
family of helpers for working with `Optional[T]` values
|
|
47
|
+
(`replace_if_none`, `unwrap_or_default`, ...).
|
|
48
|
+
|
|
49
|
+
### [`kaparoo.data`](https://github.com/kaparoo/kaparoo-python/tree/main/kaparoo/data)
|
|
50
|
+
|
|
51
|
+
Building blocks for dataset code: `DataSequence[T, M]` ABC (item +
|
|
52
|
+
metadata), composers (`SlicedSequence`, `ConcatSequence`,
|
|
53
|
+
`WindowedSequence`), file-backed templates (`FileFolderSequence`,
|
|
54
|
+
`SingleFileSequence`), and `generate_batches`.
|
|
55
|
+
|
|
56
|
+
## 🎯 Quick example
|
|
57
|
+
|
|
58
|
+
```python
|
|
59
|
+
from kaparoo.filesystem import search_files
|
|
60
|
+
from kaparoo.filesystem.search.filters import And, EndsWith, Equals, Not
|
|
61
|
+
|
|
62
|
+
# All .py files except __init__.py
|
|
63
|
+
py_files = search_files(
|
|
64
|
+
"src",
|
|
65
|
+
name_filter=And((EndsWith(".py"), Not(Equals("__init__.py")))),
|
|
66
|
+
)
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
See each submodule's README for more.
|
|
70
|
+
|
|
71
|
+
## 📋 TODO
|
|
72
|
+
|
|
73
|
+
See [TODO.md](./TODO.md) for tracked open items.
|
|
74
|
+
|
|
75
|
+
## 📜 Changelog
|
|
76
|
+
|
|
77
|
+
See [CHANGELOG.md](./CHANGELOG.md) for the version history.
|
|
78
|
+
|
|
79
|
+
## ⚖️ License
|
|
80
|
+
|
|
81
|
+
This project is distributed under the terms of the [MIT](./LICENSE) license.
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
# `kaparoo.data`
|
|
2
|
+
|
|
3
|
+
Building blocks for dataset code: a `Sequence`-based abstract base, a
|
|
4
|
+
small set of composers, and ready-to-subclass file-backed templates.
|
|
5
|
+
|
|
6
|
+
## Modules
|
|
7
|
+
|
|
8
|
+
- [`sequences/base`](./sequences/base.py) — `DataSequence[T, M]` abstract base
|
|
9
|
+
- [`sequences/composers`](./sequences/composers.py) — `SlicedSequence`,
|
|
10
|
+
`ConcatSequence`, `WindowedSequence`
|
|
11
|
+
- [`sequences/templates`](./sequences/templates.py) — `FileFolderSequence`,
|
|
12
|
+
`SingleFileSequence`
|
|
13
|
+
- [`sequences/utils`](./sequences/utils.py) — `generate_batches`
|
|
14
|
+
|
|
15
|
+
All public symbols are re-exported from both `kaparoo.data` and
|
|
16
|
+
`kaparoo.data.sequences`.
|
|
17
|
+
|
|
18
|
+
## DataSequence
|
|
19
|
+
|
|
20
|
+
`DataSequence[T, M]` is a `Sequence[T]` ABC that adds a parallel
|
|
21
|
+
metadata channel. Subclasses implement two abstract methods:
|
|
22
|
+
|
|
23
|
+
| Method | Purpose |
|
|
24
|
+
| --- | --- |
|
|
25
|
+
| `get_item(index) -> T` | Decode the i-th item. |
|
|
26
|
+
| `get_meta(index) -> M` | Produce the i-th item's metadata. |
|
|
27
|
+
|
|
28
|
+
The base derives `get_items` / `get_metas` (bulk) and `get_pair` /
|
|
29
|
+
`get_pairs` (item + metadata together). `__getitem__` returns the item
|
|
30
|
+
only — slicing yields a `SlicedSequence`. The `M` type parameter
|
|
31
|
+
defaults to `None`; set it explicitly when items carry meaningful
|
|
32
|
+
metadata (paths, labels, line numbers, ...).
|
|
33
|
+
|
|
34
|
+
```python
|
|
35
|
+
from kaparoo.data.sequences import DataSequence
|
|
36
|
+
|
|
37
|
+
class Labeled(DataSequence[bytes, str]):
|
|
38
|
+
def __init__(self, items, labels):
|
|
39
|
+
self._items = items
|
|
40
|
+
self._labels = labels
|
|
41
|
+
|
|
42
|
+
def __len__(self):
|
|
43
|
+
return len(self._items)
|
|
44
|
+
|
|
45
|
+
def get_item(self, index):
|
|
46
|
+
return self._items[index]
|
|
47
|
+
|
|
48
|
+
def get_meta(self, index):
|
|
49
|
+
return self._labels[index]
|
|
50
|
+
|
|
51
|
+
ds = Labeled([b"a", b"b"], ["cat", "dog"])
|
|
52
|
+
ds[0] # b"a" (item only)
|
|
53
|
+
ds.get_pair(0) # (b"a", "cat") (item + metadata)
|
|
54
|
+
list(ds.get_metas([0, 1])) # ["cat", "dog"]
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## Composers
|
|
58
|
+
|
|
59
|
+
### `SlicedSequence`
|
|
60
|
+
|
|
61
|
+
A stable-length view over `source` exposing only items at the given
|
|
62
|
+
`indices`. `indices` is materialized as a tuple, so `len()` is O(1) and
|
|
63
|
+
random access is O(1) into the index table. **Duplicates are allowed,
|
|
64
|
+
order is preserved.**
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
from kaparoo.data.sequences import SlicedSequence
|
|
68
|
+
|
|
69
|
+
view = SlicedSequence(dataset, [3, 7, 11])
|
|
70
|
+
view[0] # == dataset[3]
|
|
71
|
+
view[1] # == dataset[7]
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### `ConcatSequence`
|
|
75
|
+
|
|
76
|
+
End-to-end concatenation of zero or more sources. Lookup is O(log N) in
|
|
77
|
+
the number of sources via cumulative-length `bisect_right`.
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
from kaparoo.data.sequences import ConcatSequence
|
|
81
|
+
|
|
82
|
+
combined = ConcatSequence(train_a, train_b, train_c)
|
|
83
|
+
len(combined) # == len(train_a) + len(train_b) + len(train_c)
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
### `WindowedSequence`
|
|
87
|
+
|
|
88
|
+
An abstract sliding-window view: each item is a `tuple[T, ...]` of
|
|
89
|
+
`size` frames from `source`. Per-frame `M_in` and window-level
|
|
90
|
+
`M_out` are independent type parameters, so subclasses decide how
|
|
91
|
+
metadata aggregates.
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
from pathlib import Path
|
|
95
|
+
from kaparoo.data.sequences import WindowedSequence
|
|
96
|
+
|
|
97
|
+
class FirstFrameMeta(WindowedSequence[bytes, Path, Path]):
|
|
98
|
+
def get_meta(self, index):
|
|
99
|
+
# window's metadata is its first frame's metadata
|
|
100
|
+
index = self._normalize_index(index)
|
|
101
|
+
return self._source.get_meta(index * self._step)
|
|
102
|
+
|
|
103
|
+
# 3-frame windows, hop 1, no intra-window skip
|
|
104
|
+
windows = FirstFrameMeta(frames, size=3)
|
|
105
|
+
windows[0] # (frames[0], frames[1], frames[2])
|
|
106
|
+
windows.get_meta(0) # frames.get_meta(0)
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
`size`, `step`, `skip` follow the same semantics as
|
|
110
|
+
[`generate_batches`](#generate_batches).
|
|
111
|
+
|
|
112
|
+
## Templates
|
|
113
|
+
|
|
114
|
+
### `FileFolderSequence`
|
|
115
|
+
|
|
116
|
+
Folder-rooted base for "one file per item" datasets. Subclasses
|
|
117
|
+
implement three methods:
|
|
118
|
+
|
|
119
|
+
- `list_files(self, root)` — return the full `Path` of every file to
|
|
120
|
+
expose, in order. Called once from the base's `__init__`. Every
|
|
121
|
+
returned path must be under `root`.
|
|
122
|
+
- `load_file(self, path)` — decode a single file. Called lazily on each
|
|
123
|
+
`get_item`.
|
|
124
|
+
- `get_meta(self, index)` — per-item metadata. When metadata is the
|
|
125
|
+
source path, `M` defaults to `Path` and `get_meta` can be
|
|
126
|
+
`return self.get_file(index)`.
|
|
127
|
+
|
|
128
|
+
The base exposes `root: Path`, `files: tuple[Path, ...]` (fresh snapshot),
|
|
129
|
+
and `get_file(index) -> Path`. Paths are stored root-relative
|
|
130
|
+
internally, so the sequence stays compact and survives a relocated root.
|
|
131
|
+
|
|
132
|
+
**Parameterized subclasses**: when `list_files` needs instance options
|
|
133
|
+
(patterns, recursive flags, ...), set them on `self` **before** calling
|
|
134
|
+
`super().__init__(root)` — the base invokes `list_files` from its own
|
|
135
|
+
`__init__`.
|
|
136
|
+
|
|
137
|
+
```python
|
|
138
|
+
from pathlib import Path
|
|
139
|
+
from kaparoo.data.sequences import FileFolderSequence
|
|
140
|
+
|
|
141
|
+
class GlobFolder(FileFolderSequence[bytes]):
|
|
142
|
+
def __init__(self, root, *, pattern="*", recursive=False):
|
|
143
|
+
# Set state BEFORE super().__init__() so list_files can read it.
|
|
144
|
+
self._pattern = pattern
|
|
145
|
+
self._recursive = recursive
|
|
146
|
+
super().__init__(root)
|
|
147
|
+
|
|
148
|
+
def list_files(self, root):
|
|
149
|
+
glob_fn = root.rglob if self._recursive else root.glob
|
|
150
|
+
return sorted(p for p in glob_fn(self._pattern) if p.is_file())
|
|
151
|
+
|
|
152
|
+
def get_meta(self, index):
|
|
153
|
+
return self.get_file(index)
|
|
154
|
+
|
|
155
|
+
def load_file(self, path):
|
|
156
|
+
return path.read_bytes()
|
|
157
|
+
|
|
158
|
+
folder = GlobFolder("data", pattern="*.png", recursive=True)
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
### `SingleFileSequence`
|
|
162
|
+
|
|
163
|
+
Thin ABC for the "one file, many records" pattern (a video with many
|
|
164
|
+
frames, a CSV with many rows, ...). The base validates that `path`
|
|
165
|
+
exists and is a regular file and exposes it via the `path` property.
|
|
166
|
+
Indexing strategies vary too widely across formats to abstract here —
|
|
167
|
+
subclasses own opening, indexing, and decoding.
|
|
168
|
+
|
|
169
|
+
```python
|
|
170
|
+
from kaparoo.data.sequences import SingleFileSequence
|
|
171
|
+
|
|
172
|
+
class LinesFile(SingleFileSequence[str, int]):
|
|
173
|
+
def __init__(self, path):
|
|
174
|
+
super().__init__(path)
|
|
175
|
+
self._lines = tuple(self.path.read_text().splitlines())
|
|
176
|
+
|
|
177
|
+
def __len__(self):
|
|
178
|
+
return len(self._lines)
|
|
179
|
+
|
|
180
|
+
def get_item(self, index):
|
|
181
|
+
return self._lines[index]
|
|
182
|
+
|
|
183
|
+
def get_meta(self, index):
|
|
184
|
+
return index + 1 # 1-based line number
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
## generate_batches
|
|
188
|
+
|
|
189
|
+
A windowing iterator over any `Sequence`. `size` is the only positional
|
|
190
|
+
parameter; `step` / `skip` / `start` / `stop` / `drop_last` are
|
|
191
|
+
keyword-only.
|
|
192
|
+
|
|
193
|
+
| Parameter | Effect |
|
|
194
|
+
| --- | --- |
|
|
195
|
+
| `size` | items per window |
|
|
196
|
+
| `step` *(default 1)* | distance between consecutive windows |
|
|
197
|
+
| `skip` *(default 1)* | intra-window stride |
|
|
198
|
+
| `start`, `stop` | restrict the source range; `start == stop` yields nothing |
|
|
199
|
+
| `drop_last` *(default `True`)* | drop a trailing partial window if any |
|
|
200
|
+
|
|
201
|
+
```python
|
|
202
|
+
from kaparoo.data.sequences import generate_batches
|
|
203
|
+
|
|
204
|
+
# Overlapping 3-windows (default step=1)
|
|
205
|
+
list(generate_batches(range(6), size=3))
|
|
206
|
+
# [[0, 1, 2], [1, 2, 3], [2, 3, 4], [3, 4, 5]]
|
|
207
|
+
|
|
208
|
+
# Non-overlapping batches
|
|
209
|
+
list(generate_batches(range(7), size=3, step=3, drop_last=False))
|
|
210
|
+
# [[0, 1, 2], [3, 4, 5], [6]]
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
## See also
|
|
214
|
+
|
|
215
|
+
- [`kaparoo.filesystem`](../filesystem/) for path helpers and search
|
|
216
|
+
- [`kaparoo.utils`](../utils/) for `Timer` and Optional helpers
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
__all__ = (
|
|
2
|
+
"ConcatSequence",
|
|
3
|
+
"DataSequence",
|
|
4
|
+
"FileFolderSequence",
|
|
5
|
+
"SingleFileSequence",
|
|
6
|
+
"SlicedSequence",
|
|
7
|
+
"WindowedSequence",
|
|
8
|
+
"generate_batches",
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
from kaparoo.data.sequences import (
|
|
12
|
+
ConcatSequence,
|
|
13
|
+
DataSequence,
|
|
14
|
+
FileFolderSequence,
|
|
15
|
+
SingleFileSequence,
|
|
16
|
+
SlicedSequence,
|
|
17
|
+
WindowedSequence,
|
|
18
|
+
generate_batches,
|
|
19
|
+
)
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
__all__ = (
|
|
4
|
+
"ConcatSequence",
|
|
5
|
+
"DataSequence",
|
|
6
|
+
"FileFolderSequence",
|
|
7
|
+
"SingleFileSequence",
|
|
8
|
+
"SlicedSequence",
|
|
9
|
+
"WindowedSequence",
|
|
10
|
+
"generate_batches",
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
from kaparoo.data.sequences.base import DataSequence
|
|
14
|
+
from kaparoo.data.sequences.composers import (
|
|
15
|
+
ConcatSequence,
|
|
16
|
+
SlicedSequence,
|
|
17
|
+
WindowedSequence,
|
|
18
|
+
)
|
|
19
|
+
from kaparoo.data.sequences.templates import (
|
|
20
|
+
FileFolderSequence,
|
|
21
|
+
SingleFileSequence,
|
|
22
|
+
)
|
|
23
|
+
from kaparoo.data.sequences.utils import generate_batches
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
__all__ = ("DataSequence",)
|
|
4
|
+
|
|
5
|
+
from abc import abstractmethod
|
|
6
|
+
from collections.abc import Sequence
|
|
7
|
+
from typing import overload
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class DataSequence[T, M = None](Sequence[T]):
|
|
11
|
+
"""An ordered, lazily-loaded, read-only sequence with per-item metadata.
|
|
12
|
+
|
|
13
|
+
Subclasses implement `get_item` (and `__len__`) to fetch a single
|
|
14
|
+
item by index. Sequence operations (`ds[i]`, `ds[i:j]`, `for x in
|
|
15
|
+
ds`, `x in ds`, `reversed(ds)`, ...) come from the inherited
|
|
16
|
+
`collections.abc.Sequence` protocol; only `get_item` need be
|
|
17
|
+
overridden, and `get_items` may be overridden for batch-fetch
|
|
18
|
+
optimization.
|
|
19
|
+
|
|
20
|
+
The second type parameter `M` carries per-item metadata (labels,
|
|
21
|
+
source paths, timestamps, ...). Subclasses implement `get_meta`.
|
|
22
|
+
When the data has no metadata, parameterize as `DataSequence[T]`
|
|
23
|
+
(so `M` defaults to `None`) and let `get_meta` simply return
|
|
24
|
+
`None`.
|
|
25
|
+
|
|
26
|
+
Type Parameters:
|
|
27
|
+
T: Element type. `ds[i]` and `get_item(i)` return `T`.
|
|
28
|
+
M: Per-item metadata type. `get_meta(i)` returns `M`. Defaults
|
|
29
|
+
to `None` -- meaning "no metadata", in which case
|
|
30
|
+
subclasses still implement `get_meta` but as a no-op.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
@abstractmethod
|
|
34
|
+
def __len__(self) -> int:
|
|
35
|
+
raise NotImplementedError
|
|
36
|
+
|
|
37
|
+
# --- item access -------------------------------------------------------
|
|
38
|
+
|
|
39
|
+
@overload
|
|
40
|
+
def __getitem__(self, index: int, /) -> T: ...
|
|
41
|
+
|
|
42
|
+
@overload
|
|
43
|
+
def __getitem__(self, index: slice, /) -> Sequence[T]: ...
|
|
44
|
+
|
|
45
|
+
def __getitem__(self, index: int | slice, /) -> T | Sequence[T]:
|
|
46
|
+
if isinstance(index, slice):
|
|
47
|
+
start, stop, step = index.indices(len(self))
|
|
48
|
+
return self.get_items(range(start, stop, step))
|
|
49
|
+
return self.get_item(index)
|
|
50
|
+
|
|
51
|
+
@abstractmethod
|
|
52
|
+
def get_item(self, index: int) -> T:
|
|
53
|
+
raise NotImplementedError
|
|
54
|
+
|
|
55
|
+
def get_items(self, indices: Sequence[int]) -> Sequence[T]:
|
|
56
|
+
return [self.get_item(index) for index in indices]
|
|
57
|
+
|
|
58
|
+
# --- metadata access ---------------------------------------------------
|
|
59
|
+
|
|
60
|
+
@abstractmethod
|
|
61
|
+
def get_meta(self, index: int) -> M:
|
|
62
|
+
raise NotImplementedError
|
|
63
|
+
|
|
64
|
+
def get_metas(self, indices: Sequence[int]) -> Sequence[M]:
|
|
65
|
+
return [self.get_meta(index) for index in indices]
|
|
66
|
+
|
|
67
|
+
# --- combined item + metadata ------------------------------------------
|
|
68
|
+
|
|
69
|
+
def get_pair(self, index: int) -> tuple[T, M]:
|
|
70
|
+
return self.get_item(index), self.get_meta(index)
|
|
71
|
+
|
|
72
|
+
def get_pairs(self, indices: Sequence[int]) -> Sequence[tuple[T, M]]:
|
|
73
|
+
return [self.get_pair(index) for index in indices]
|