kaparoo-python 0.7.0__tar.gz → 0.8.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kaparoo_python-0.8.0/PKG-INFO +149 -0
- kaparoo_python-0.7.0/PKG-INFO → kaparoo_python-0.8.0/README.md +43 -29
- {kaparoo_python-0.7.0 → kaparoo_python-0.8.0}/kaparoo/data/README.md +35 -10
- {kaparoo_python-0.7.0 → kaparoo_python-0.8.0}/kaparoo/data/sequences/base.py +17 -3
- {kaparoo_python-0.7.0 → kaparoo_python-0.8.0}/kaparoo/data/sequences/composers.py +153 -98
- kaparoo_python-0.8.0/kaparoo/data/sequences/templates.py +180 -0
- kaparoo_python-0.8.0/kaparoo/data/sequences/utils.py +69 -0
- {kaparoo_python-0.7.0 → kaparoo_python-0.8.0}/kaparoo/filesystem/README.md +33 -4
- {kaparoo_python-0.7.0 → kaparoo_python-0.8.0}/kaparoo/filesystem/__init__.py +2 -15
- {kaparoo_python-0.7.0 → kaparoo_python-0.8.0}/kaparoo/filesystem/directory.py +44 -21
- {kaparoo_python-0.7.0 → kaparoo_python-0.8.0}/kaparoo/filesystem/exceptions.py +4 -3
- kaparoo_python-0.8.0/kaparoo/filesystem/exclude.py +109 -0
- {kaparoo_python-0.7.0 → kaparoo_python-0.8.0}/kaparoo/filesystem/existence.py +29 -24
- kaparoo_python-0.8.0/kaparoo/filesystem/hierarchy/README.md +642 -0
- kaparoo_python-0.8.0/kaparoo/filesystem/hierarchy/__init__.py +33 -0
- kaparoo_python-0.8.0/kaparoo/filesystem/hierarchy/base.py +100 -0
- kaparoo_python-0.8.0/kaparoo/filesystem/hierarchy/conditions.py +470 -0
- kaparoo_python-0.8.0/kaparoo/filesystem/hierarchy/entry.py +395 -0
- kaparoo_python-0.8.0/kaparoo/filesystem/hierarchy/group.py +302 -0
- kaparoo_python-0.8.0/kaparoo/filesystem/hierarchy/scaffold.py +227 -0
- kaparoo_python-0.8.0/kaparoo/filesystem/hierarchy/traverse/__init__.py +20 -0
- kaparoo_python-0.8.0/kaparoo/filesystem/hierarchy/traverse/_utils.py +89 -0
- kaparoo_python-0.8.0/kaparoo/filesystem/hierarchy/traverse/locate.py +181 -0
- kaparoo_python-0.8.0/kaparoo/filesystem/hierarchy/traverse/validate.py +609 -0
- kaparoo_python-0.8.0/kaparoo/filesystem/hierarchy/utils.py +45 -0
- kaparoo_python-0.8.0/kaparoo/filesystem/search/README.md +132 -0
- kaparoo_python-0.8.0/kaparoo/filesystem/search/__init__.py +5 -0
- {kaparoo_python-0.7.0 → kaparoo_python-0.8.0}/kaparoo/filesystem/search/classes.py +70 -67
- {kaparoo_python-0.7.0 → kaparoo_python-0.8.0}/kaparoo/filesystem/search/wrappers.py +65 -29
- {kaparoo_python-0.7.0 → kaparoo_python-0.8.0}/kaparoo/filesystem/staged.py +140 -114
- {kaparoo_python-0.7.0 → kaparoo_python-0.8.0}/kaparoo/filesystem/types.py +2 -0
- kaparoo_python-0.8.0/kaparoo/filesystem/units.py +23 -0
- {kaparoo_python-0.7.0 → kaparoo_python-0.8.0}/kaparoo/filesystem/utils.py +60 -41
- kaparoo_python-0.8.0/kaparoo/filters/README.md +228 -0
- {kaparoo_python-0.7.0/kaparoo/filesystem/search → kaparoo_python-0.8.0/kaparoo/filters}/__init__.py +39 -31
- {kaparoo_python-0.7.0/kaparoo/filesystem/search → kaparoo_python-0.8.0/kaparoo}/filters/base.py +36 -16
- kaparoo_python-0.8.0/kaparoo/filters/enumerable.py +346 -0
- kaparoo_python-0.8.0/kaparoo/filters/logical.py +112 -0
- kaparoo_python-0.8.0/kaparoo/filters/multi_pattern.py +142 -0
- {kaparoo_python-0.7.0/kaparoo/filesystem/search → kaparoo_python-0.8.0/kaparoo}/filters/pattern.py +58 -82
- kaparoo_python-0.8.0/kaparoo/filters/types.py +89 -0
- {kaparoo_python-0.7.0/kaparoo/filesystem/search → kaparoo_python-0.8.0/kaparoo}/filters/utils.py +7 -2
- {kaparoo_python-0.7.0 → kaparoo_python-0.8.0}/kaparoo/utils/README.md +96 -11
- {kaparoo_python-0.7.0 → kaparoo_python-0.8.0}/kaparoo/utils/__init__.py +11 -0
- kaparoo_python-0.8.0/kaparoo/utils/aggregate.py +684 -0
- kaparoo_python-0.8.0/kaparoo/utils/checks.py +99 -0
- kaparoo_python-0.8.0/kaparoo/utils/optional.py +109 -0
- {kaparoo_python-0.7.0 → kaparoo_python-0.8.0}/kaparoo/utils/timer.py +95 -101
- {kaparoo_python-0.7.0 → kaparoo_python-0.8.0}/pyproject.toml +26 -2
- kaparoo_python-0.7.0/README.md +0 -86
- kaparoo_python-0.7.0/kaparoo/data/sequences/templates.py +0 -242
- kaparoo_python-0.7.0/kaparoo/data/sequences/utils.py +0 -79
- kaparoo_python-0.7.0/kaparoo/filesystem/search/README.md +0 -221
- kaparoo_python-0.7.0/kaparoo/filesystem/search/deprecated.py +0 -289
- kaparoo_python-0.7.0/kaparoo/filesystem/search/filters/__init__.py +0 -73
- kaparoo_python-0.7.0/kaparoo/filesystem/search/filters/logical.py +0 -138
- kaparoo_python-0.7.0/kaparoo/filesystem/search/filters/multi_pattern.py +0 -160
- kaparoo_python-0.7.0/kaparoo/filesystem/search/filters/types.py +0 -47
- kaparoo_python-0.7.0/kaparoo/utils/aggregate.py +0 -404
- kaparoo_python-0.7.0/kaparoo/utils/optional.py +0 -129
- {kaparoo_python-0.7.0 → kaparoo_python-0.8.0}/LICENSE +0 -0
- {kaparoo_python-0.7.0 → kaparoo_python-0.8.0}/kaparoo/__init__.py +0 -0
- {kaparoo_python-0.7.0 → kaparoo_python-0.8.0}/kaparoo/data/__init__.py +0 -0
- {kaparoo_python-0.7.0 → kaparoo_python-0.8.0}/kaparoo/data/sequences/__init__.py +0 -0
- {kaparoo_python-0.7.0 → kaparoo_python-0.8.0}/kaparoo/py.typed +0 -0
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: kaparoo-python
|
|
3
|
+
Version: 0.8.0
|
|
4
|
+
Summary: Personally common and useful Python features
|
|
5
|
+
Keywords: filesystem,pathlib,paths,glob,filters,pattern-matching,dataset,sequence,batching,timer,aggregation,metrics,utilities,typed
|
|
6
|
+
Author: Jaewoo Park
|
|
7
|
+
Author-email: Jaewoo Park <kaparoo2001@gmail.com>
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: Operating System :: OS Independent
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
17
|
+
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
18
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
19
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
20
|
+
Classifier: Topic :: System :: Filesystems
|
|
21
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
22
|
+
Classifier: Topic :: Utilities
|
|
23
|
+
Classifier: Typing :: Typed
|
|
24
|
+
Requires-Python: >=3.14
|
|
25
|
+
Project-URL: GitHub, https://www.github.com/kaparoo/kaparoo-python
|
|
26
|
+
Project-URL: Issues, https://www.github.com/kaparoo/kaparoo-python/issues
|
|
27
|
+
Description-Content-Type: text/markdown
|
|
28
|
+
|
|
29
|
+
# kaparoo-python
|
|
30
|
+
|
|
31
|
+
[](https://pypi.org/project/kaparoo-python/)
|
|
32
|
+
[](https://pypi.org/project/kaparoo-python/)
|
|
33
|
+
[](https://www.python.org/)
|
|
34
|
+
[](./LICENSE)
|
|
35
|
+
[](https://github.com/astral-sh/uv)
|
|
36
|
+
[](https://github.com/astral-sh/ruff)
|
|
37
|
+
[](https://github.com/astral-sh/ty)
|
|
38
|
+
[](https://github.com/copier-org/copier)
|
|
39
|
+
|
|
40
|
+
*Personally common and useful Python features.*
|
|
41
|
+
|
|
42
|
+
## 📦 Installation
|
|
43
|
+
|
|
44
|
+
Requires Python 3.14+.
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
# With uv (recommended)
|
|
48
|
+
uv add kaparoo-python
|
|
49
|
+
|
|
50
|
+
# With pip
|
|
51
|
+
pip install kaparoo-python
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## 🧩 Modules
|
|
55
|
+
|
|
56
|
+
Each submodule ships its own README with focused examples.
|
|
57
|
+
|
|
58
|
+
### [`kaparoo.filesystem`](https://github.com/kaparoo/kaparoo-python/tree/main/kaparoo/filesystem)
|
|
59
|
+
|
|
60
|
+
`pathlib`-based filesystem helpers: existence checks (`*_exists`),
|
|
61
|
+
`ensure_*` validators, `make_dir(s)` (with a destructive `clean` reset
|
|
62
|
+
option), `dir_empty(s)`, `reserve_path(s)` guards for not-yet-existing
|
|
63
|
+
destinations, `StagedFile` / `StagedDirectory` for safe (atomic) writes,
|
|
64
|
+
path stringification, and a small exception hierarchy.
|
|
65
|
+
|
|
66
|
+
### [`kaparoo.filesystem.search`](https://github.com/kaparoo/kaparoo-python/tree/main/kaparoo/filesystem/search)
|
|
67
|
+
|
|
68
|
+
Filesystem traversal with composable filters: `search_paths` /
|
|
69
|
+
`search_files` / `search_dirs`, wired to the `kaparoo.filters` DSL via
|
|
70
|
+
`part_filter` / `name_filter` / `predicate`, with `min_depth` / `max_depth`
|
|
71
|
+
control and a subtree-pruning `exclude`.
|
|
72
|
+
|
|
73
|
+
### [`kaparoo.filesystem.hierarchy`](https://github.com/kaparoo/kaparoo-python/tree/main/kaparoo/filesystem/hierarchy)
|
|
74
|
+
|
|
75
|
+
A declarative description of a filesystem tree: `File` / `Directory`
|
|
76
|
+
nodes whose names are drawn from the `kaparoo.filters` DSL (so one node
|
|
77
|
+
can stand for many regularly-named siblings), plus `Exclusive` / `Together`
|
|
78
|
+
constraints and per-node attribute `condition`s. It drives four disk
|
|
79
|
+
operations — `locate` (map on-disk paths to spec nodes), `validate` (check
|
|
80
|
+
a directory against the spec), `conformer` (build a `search` predicate from
|
|
81
|
+
a spec), and `scaffold` (create the tree on disk).
|
|
82
|
+
|
|
83
|
+
### [`kaparoo.filters`](https://github.com/kaparoo/kaparoo-python/tree/main/kaparoo/filters)
|
|
84
|
+
|
|
85
|
+
A declarative, composable string-matching DSL: a `Filter` family
|
|
86
|
+
(pattern, multi-pattern, logical, and enumerable `Literal` / `OneOf` /
|
|
87
|
+
`Template`) that round-trips through JSON-friendly dicts, plus an
|
|
88
|
+
extension hook for custom filter kinds. Used by
|
|
89
|
+
`kaparoo.filesystem.search` for path matching and
|
|
90
|
+
`kaparoo.filesystem.hierarchy` for declaring trees.
|
|
91
|
+
|
|
92
|
+
### [`kaparoo.utils`](https://github.com/kaparoo/kaparoo-python/tree/main/kaparoo/utils)
|
|
93
|
+
|
|
94
|
+
`Timer` / `SpanTimer` context-manager-and-decorator timers (with
|
|
95
|
+
`lap`-split and `measure`-block timings); `Aggregator` for nested,
|
|
96
|
+
pluggable metric aggregation (the batch → epoch → run pattern);
|
|
97
|
+
`ensure_one_of` / `ensure_in_range` validation guards; plus helpers
|
|
98
|
+
for `Optional[T]` values (`replace_if_none`, `unwrap_or_default`, ...).
|
|
99
|
+
|
|
100
|
+
### [`kaparoo.data`](https://github.com/kaparoo/kaparoo-python/tree/main/kaparoo/data)
|
|
101
|
+
|
|
102
|
+
Building blocks for dataset code: `DataSequence[T, M]` ABC (item +
|
|
103
|
+
metadata), composers (`SlicedSequence`, `ConcatSequence`,
|
|
104
|
+
`TransformedSequence`, `WindowedSequence`, `ZippedSequence`), file-backed
|
|
105
|
+
templates (`FileFolderSequence`, `FileListSequence`, `SingleFileSequence`),
|
|
106
|
+
and `generate_batches`.
|
|
107
|
+
|
|
108
|
+
## 🎯 Quick example
|
|
109
|
+
|
|
110
|
+
Search a tree with composable filters:
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
from kaparoo.filesystem import search_files
|
|
114
|
+
from kaparoo.filters import And, EndsWith, Equals, Not
|
|
115
|
+
|
|
116
|
+
# All .py files except __init__.py
|
|
117
|
+
py_files = search_files(
|
|
118
|
+
"src",
|
|
119
|
+
name_filter=And((EndsWith(".py"), Not(Equals("__init__.py")))),
|
|
120
|
+
)
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
…or describe a tree declaratively and check a directory against it:
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
from kaparoo.filesystem.hierarchy import Directory, File, validate
|
|
127
|
+
from kaparoo.filters import Glob
|
|
128
|
+
|
|
129
|
+
spec = Directory("dataset", [
|
|
130
|
+
File("metadata.json"),
|
|
131
|
+
Directory("images", [File(Glob("*.png"))]),
|
|
132
|
+
])
|
|
133
|
+
report = validate(spec, "data/dataset", root_as_top=True)
|
|
134
|
+
assert report.ok # required entries present, nothing unexpected
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
See each submodule's README for more.
|
|
138
|
+
|
|
139
|
+
## 📋 TODO
|
|
140
|
+
|
|
141
|
+
See [TODO.md](./TODO.md) for tracked open items.
|
|
142
|
+
|
|
143
|
+
## 📜 Changelog
|
|
144
|
+
|
|
145
|
+
See [CHANGELOG.md](./CHANGELOG.md) for the version history.
|
|
146
|
+
|
|
147
|
+
## ⚖️ License
|
|
148
|
+
|
|
149
|
+
This project is distributed under the terms of the [MIT](./LICENSE) license.
|
|
@@ -1,24 +1,3 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: kaparoo-python
|
|
3
|
-
Version: 0.7.0
|
|
4
|
-
Summary: Personally common and useful Python features
|
|
5
|
-
Keywords: filesystem,pathlib,paths,utilities
|
|
6
|
-
Author: Jaewoo Park
|
|
7
|
-
Author-email: Jaewoo Park <kaparoo2001@gmail.com>
|
|
8
|
-
License-Expression: MIT
|
|
9
|
-
License-File: LICENSE
|
|
10
|
-
Classifier: Development Status :: 4 - Beta
|
|
11
|
-
Classifier: Intended Audience :: Developers
|
|
12
|
-
Classifier: Operating System :: OS Independent
|
|
13
|
-
Classifier: Programming Language :: Python :: 3
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.14
|
|
15
|
-
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
16
|
-
Classifier: Typing :: Typed
|
|
17
|
-
Requires-Python: >=3.14
|
|
18
|
-
Project-URL: GitHub, https://www.github.com/kaparoo/kaparoo-python
|
|
19
|
-
Project-URL: Issues, https://www.github.com/kaparoo/kaparoo-python/issues
|
|
20
|
-
Description-Content-Type: text/markdown
|
|
21
|
-
|
|
22
1
|
# kaparoo-python
|
|
23
2
|
|
|
24
3
|
[](https://pypi.org/project/kaparoo-python/)
|
|
@@ -58,18 +37,37 @@ path stringification, and a small exception hierarchy.
|
|
|
58
37
|
|
|
59
38
|
### [`kaparoo.filesystem.search`](https://github.com/kaparoo/kaparoo-python/tree/main/kaparoo/filesystem/search)
|
|
60
39
|
|
|
61
|
-
Filesystem traversal with composable filters
|
|
62
|
-
`search_files` / `search_dirs`,
|
|
63
|
-
|
|
64
|
-
|
|
40
|
+
Filesystem traversal with composable filters: `search_paths` /
|
|
41
|
+
`search_files` / `search_dirs`, wired to the `kaparoo.filters` DSL via
|
|
42
|
+
`part_filter` / `name_filter` / `predicate`, with `min_depth` / `max_depth`
|
|
43
|
+
control and a subtree-pruning `exclude`.
|
|
44
|
+
|
|
45
|
+
### [`kaparoo.filesystem.hierarchy`](https://github.com/kaparoo/kaparoo-python/tree/main/kaparoo/filesystem/hierarchy)
|
|
46
|
+
|
|
47
|
+
A declarative description of a filesystem tree: `File` / `Directory`
|
|
48
|
+
nodes whose names are drawn from the `kaparoo.filters` DSL (so one node
|
|
49
|
+
can stand for many regularly-named siblings), plus `Exclusive` / `Together`
|
|
50
|
+
constraints and per-node attribute `condition`s. It drives four disk
|
|
51
|
+
operations — `locate` (map on-disk paths to spec nodes), `validate` (check
|
|
52
|
+
a directory against the spec), `conformer` (build a `search` predicate from
|
|
53
|
+
a spec), and `scaffold` (create the tree on disk).
|
|
54
|
+
|
|
55
|
+
### [`kaparoo.filters`](https://github.com/kaparoo/kaparoo-python/tree/main/kaparoo/filters)
|
|
56
|
+
|
|
57
|
+
A declarative, composable string-matching DSL: a `Filter` family
|
|
58
|
+
(pattern, multi-pattern, logical, and enumerable `Literal` / `OneOf` /
|
|
59
|
+
`Template`) that round-trips through JSON-friendly dicts, plus an
|
|
60
|
+
extension hook for custom filter kinds. Used by
|
|
61
|
+
`kaparoo.filesystem.search` for path matching and
|
|
62
|
+
`kaparoo.filesystem.hierarchy` for declaring trees.
|
|
65
63
|
|
|
66
64
|
### [`kaparoo.utils`](https://github.com/kaparoo/kaparoo-python/tree/main/kaparoo/utils)
|
|
67
65
|
|
|
68
66
|
`Timer` / `SpanTimer` context-manager-and-decorator timers (with
|
|
69
67
|
`lap`-split and `measure`-block timings); `Aggregator` for nested,
|
|
70
|
-
pluggable metric aggregation (the batch → epoch → run pattern;
|
|
71
|
-
|
|
72
|
-
`Optional[T]` values (`replace_if_none`, `unwrap_or_default`, ...).
|
|
68
|
+
pluggable metric aggregation (the batch → epoch → run pattern);
|
|
69
|
+
`ensure_one_of` / `ensure_in_range` validation guards; plus helpers
|
|
70
|
+
for `Optional[T]` values (`replace_if_none`, `unwrap_or_default`, ...).
|
|
73
71
|
|
|
74
72
|
### [`kaparoo.data`](https://github.com/kaparoo/kaparoo-python/tree/main/kaparoo/data)
|
|
75
73
|
|
|
@@ -81,9 +79,11 @@ and `generate_batches`.
|
|
|
81
79
|
|
|
82
80
|
## 🎯 Quick example
|
|
83
81
|
|
|
82
|
+
Search a tree with composable filters:
|
|
83
|
+
|
|
84
84
|
```python
|
|
85
85
|
from kaparoo.filesystem import search_files
|
|
86
|
-
from kaparoo.
|
|
86
|
+
from kaparoo.filters import And, EndsWith, Equals, Not
|
|
87
87
|
|
|
88
88
|
# All .py files except __init__.py
|
|
89
89
|
py_files = search_files(
|
|
@@ -92,6 +92,20 @@ py_files = search_files(
|
|
|
92
92
|
)
|
|
93
93
|
```
|
|
94
94
|
|
|
95
|
+
…or describe a tree declaratively and check a directory against it:
|
|
96
|
+
|
|
97
|
+
```python
|
|
98
|
+
from kaparoo.filesystem.hierarchy import Directory, File, validate
|
|
99
|
+
from kaparoo.filters import Glob
|
|
100
|
+
|
|
101
|
+
spec = Directory("dataset", [
|
|
102
|
+
File("metadata.json"),
|
|
103
|
+
Directory("images", [File(Glob("*.png"))]),
|
|
104
|
+
])
|
|
105
|
+
report = validate(spec, "data/dataset", root_as_top=True)
|
|
106
|
+
assert report.ok # required entries present, nothing unexpected
|
|
107
|
+
```
|
|
108
|
+
|
|
95
109
|
See each submodule's README for more.
|
|
96
110
|
|
|
97
111
|
## 📋 TODO
|
|
@@ -3,6 +3,23 @@
|
|
|
3
3
|
Building blocks for dataset code: a `Sequence`-based abstract base, a
|
|
4
4
|
small set of composers, and ready-to-subclass file-backed templates.
|
|
5
5
|
|
|
6
|
+
## Contents
|
|
7
|
+
|
|
8
|
+
- [Modules](#modules)
|
|
9
|
+
- [DataSequence](#datasequence)
|
|
10
|
+
- [Composers](#composers)
|
|
11
|
+
- [`SlicedSequence`](#slicedsequence)
|
|
12
|
+
- [`ConcatSequence`](#concatsequence)
|
|
13
|
+
- [`TransformedSequence`](#transformedsequence)
|
|
14
|
+
- [`WindowedSequence`](#windowedsequence)
|
|
15
|
+
- [`ZippedSequence`](#zippedsequence)
|
|
16
|
+
- [Templates](#templates)
|
|
17
|
+
- [`FileFolderSequence`](#filefoldersequence)
|
|
18
|
+
- [`FileListSequence`](#filelistsequence)
|
|
19
|
+
- [`SingleFileSequence`](#singlefilesequence)
|
|
20
|
+
- [generate_batches](#generate_batches)
|
|
21
|
+
- [See also](#see-also)
|
|
22
|
+
|
|
6
23
|
## Modules
|
|
7
24
|
|
|
8
25
|
- [`sequences/base`](./sequences/base.py) — `DataSequence[T, M]` abstract base
|
|
@@ -27,10 +44,11 @@ metadata channel. Subclasses implement two abstract methods:
|
|
|
27
44
|
| `get_meta(index) -> M` | Produce the i-th item's metadata. |
|
|
28
45
|
|
|
29
46
|
The base derives `get_items` / `get_metas` (bulk) and `get_pair` /
|
|
30
|
-
`get_pairs` (item + metadata together). `__getitem__` returns
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
metadata (paths, labels, line
|
|
47
|
+
`get_pairs` (item + metadata together). `__getitem__` returns one item by
|
|
48
|
+
index, or a plain list of items for a slice (`get_items` over the slice's
|
|
49
|
+
range) — not a `SlicedSequence`. The `M` type parameter defaults to `None`;
|
|
50
|
+
set it explicitly when items carry meaningful metadata (paths, labels, line
|
|
51
|
+
numbers, ...).
|
|
34
52
|
|
|
35
53
|
```python
|
|
36
54
|
from kaparoo.data.sequences import DataSequence
|
|
@@ -90,7 +108,11 @@ A lazy view that applies a `transform` callable to each item of
|
|
|
90
108
|
`source`. The transform is called on demand in `get_item` -- nothing
|
|
91
109
|
is computed at construction. `get_meta` passes through `source.get_meta`
|
|
92
110
|
unchanged by default; override it in a subclass when `M_out` differs
|
|
93
|
-
from `M_in`.
|
|
111
|
+
from `M_in`. **That override is required, not optional**: if you declare a
|
|
112
|
+
different `M_out` but forget it, the default silently returns the source's
|
|
113
|
+
`M_in` metadata typed as `M_out` (a `cast` hides the mismatch, and Python
|
|
114
|
+
erases generics at runtime, so nothing catches it until the wrong value is
|
|
115
|
+
used).
|
|
94
116
|
|
|
95
117
|
```python
|
|
96
118
|
from kaparoo.data.sequences import TransformedSequence
|
|
@@ -130,7 +152,7 @@ class FirstFrameMeta(WindowedSequence[bytes, Path]):
|
|
|
130
152
|
def get_meta(self, index):
|
|
131
153
|
# window's metadata is its first frame's metadata
|
|
132
154
|
index = self._normalize_index(index)
|
|
133
|
-
return self.
|
|
155
|
+
return self.source.get_meta(index * self.step)
|
|
134
156
|
|
|
135
157
|
# 3-frame windows, hop 1, no intra-window skip
|
|
136
158
|
windows = FirstFrameMeta(frames, size=3)
|
|
@@ -178,8 +200,9 @@ implement three methods:
|
|
|
178
200
|
source path, `M` defaults to `Path` and `get_meta` can be
|
|
179
201
|
`return self.get_file(index)`.
|
|
180
202
|
|
|
181
|
-
The base exposes `root: Path`, `files: tuple[Path, ...]` (
|
|
182
|
-
and `get_file(index) -> Path`. Paths are
|
|
203
|
+
The base exposes `root: Path`, `files: tuple[Path, ...]` (an immutable
|
|
204
|
+
snapshot, built once and cached), and `get_file(index) -> Path`. Paths are
|
|
205
|
+
stored root-relative
|
|
183
206
|
internally, so the sequence stays compact and survives a relocated root.
|
|
184
207
|
|
|
185
208
|
**Parameterized subclasses**: when `list_files` needs instance options
|
|
@@ -289,5 +312,7 @@ list(generate_batches(range(7), size=3, step=3, drop_last=False))
|
|
|
289
312
|
|
|
290
313
|
## See also
|
|
291
314
|
|
|
292
|
-
- [`kaparoo.filesystem`](../filesystem/)
|
|
293
|
-
|
|
315
|
+
- [`kaparoo.filesystem`](../filesystem/) — path helpers for file-backed
|
|
316
|
+
sequences
|
|
317
|
+
- [`kaparoo.filesystem.search`](../filesystem/search/) — discover the files a
|
|
318
|
+
`FileFolderSequence` wraps
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
"""The `DataSequence[T, M]` abstract base: indexable items with metadata."""
|
|
2
|
+
|
|
1
3
|
from __future__ import annotations
|
|
2
4
|
|
|
3
5
|
__all__ = ("DataSequence",)
|
|
@@ -32,7 +34,7 @@ class DataSequence[T, M = None](Sequence[T]):
|
|
|
32
34
|
|
|
33
35
|
@abstractmethod
|
|
34
36
|
def __len__(self) -> int:
|
|
35
|
-
|
|
37
|
+
"""Return the number of items in the sequence."""
|
|
36
38
|
|
|
37
39
|
# --- item access -------------------------------------------------------
|
|
38
40
|
|
|
@@ -50,24 +52,36 @@ class DataSequence[T, M = None](Sequence[T]):
|
|
|
50
52
|
|
|
51
53
|
@abstractmethod
|
|
52
54
|
def get_item(self, index: int) -> T:
|
|
53
|
-
|
|
55
|
+
"""Fetch and return the item at `index`."""
|
|
54
56
|
|
|
55
57
|
def get_items(self, indices: Sequence[int]) -> Sequence[T]:
|
|
58
|
+
"""Fetch many items at once, in `indices` order.
|
|
59
|
+
|
|
60
|
+
Defaults to one `get_item` per index; override to use a backing
|
|
61
|
+
store's native batch read.
|
|
62
|
+
"""
|
|
56
63
|
return [self.get_item(index) for index in indices]
|
|
57
64
|
|
|
58
65
|
# --- metadata access ---------------------------------------------------
|
|
59
66
|
|
|
60
67
|
@abstractmethod
|
|
61
68
|
def get_meta(self, index: int) -> M:
|
|
62
|
-
|
|
69
|
+
"""Return the metadata for the item at `index` (`None` when `M` is `None`)."""
|
|
63
70
|
|
|
64
71
|
def get_metas(self, indices: Sequence[int]) -> Sequence[M]:
|
|
72
|
+
"""Fetch many metadata values at once, in `indices` order.
|
|
73
|
+
|
|
74
|
+
Defaults to one `get_meta` per index; override alongside
|
|
75
|
+
`get_items` when a batch read is cheaper.
|
|
76
|
+
"""
|
|
65
77
|
return [self.get_meta(index) for index in indices]
|
|
66
78
|
|
|
67
79
|
# --- combined item + metadata ------------------------------------------
|
|
68
80
|
|
|
69
81
|
def get_pair(self, index: int) -> tuple[T, M]:
|
|
82
|
+
"""Return the `(item, metadata)` pair at `index`."""
|
|
70
83
|
return self.get_item(index), self.get_meta(index)
|
|
71
84
|
|
|
72
85
|
def get_pairs(self, indices: Sequence[int]) -> Sequence[tuple[T, M]]:
|
|
86
|
+
"""Fetch many `(item, metadata)` pairs at once, in `indices` order."""
|
|
73
87
|
return [self.get_pair(index) for index in indices]
|