kaparoo-python 0.3.0__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {kaparoo_python-0.3.0 → kaparoo_python-0.5.0}/PKG-INFO +9 -5
- {kaparoo_python-0.3.0 → kaparoo_python-0.5.0}/README.md +8 -4
- {kaparoo_python-0.3.0 → kaparoo_python-0.5.0}/kaparoo/data/README.md +25 -1
- {kaparoo_python-0.3.0 → kaparoo_python-0.5.0}/kaparoo/data/__init__.py +2 -0
- {kaparoo_python-0.3.0 → kaparoo_python-0.5.0}/kaparoo/data/sequences/__init__.py +2 -0
- {kaparoo_python-0.3.0 → kaparoo_python-0.5.0}/kaparoo/data/sequences/templates.py +78 -2
- kaparoo_python-0.5.0/kaparoo/filesystem/README.md +230 -0
- {kaparoo_python-0.3.0 → kaparoo_python-0.5.0}/kaparoo/filesystem/__init__.py +15 -0
- {kaparoo_python-0.3.0 → kaparoo_python-0.5.0}/kaparoo/filesystem/directory.py +111 -8
- {kaparoo_python-0.3.0 → kaparoo_python-0.5.0}/kaparoo/filesystem/search/classes.py +1 -1
- kaparoo_python-0.5.0/kaparoo/filesystem/staged.py +499 -0
- {kaparoo_python-0.3.0 → kaparoo_python-0.5.0}/kaparoo/filesystem/utils.py +152 -1
- kaparoo_python-0.5.0/kaparoo/utils/README.md +221 -0
- kaparoo_python-0.5.0/kaparoo/utils/__init__.py +45 -0
- kaparoo_python-0.5.0/kaparoo/utils/aggregate.py +404 -0
- {kaparoo_python-0.3.0 → kaparoo_python-0.5.0}/kaparoo/utils/timer.py +146 -53
- {kaparoo_python-0.3.0 → kaparoo_python-0.5.0}/pyproject.toml +5 -5
- kaparoo_python-0.3.0/kaparoo/filesystem/README.md +0 -120
- kaparoo_python-0.3.0/kaparoo/utils/README.md +0 -121
- kaparoo_python-0.3.0/kaparoo/utils/__init__.py +0 -21
- {kaparoo_python-0.3.0 → kaparoo_python-0.5.0}/LICENSE +0 -0
- {kaparoo_python-0.3.0 → kaparoo_python-0.5.0}/kaparoo/__init__.py +0 -0
- {kaparoo_python-0.3.0 → kaparoo_python-0.5.0}/kaparoo/data/sequences/base.py +0 -0
- {kaparoo_python-0.3.0 → kaparoo_python-0.5.0}/kaparoo/data/sequences/composers.py +0 -0
- {kaparoo_python-0.3.0 → kaparoo_python-0.5.0}/kaparoo/data/sequences/utils.py +0 -0
- {kaparoo_python-0.3.0 → kaparoo_python-0.5.0}/kaparoo/filesystem/exceptions.py +0 -0
- {kaparoo_python-0.3.0 → kaparoo_python-0.5.0}/kaparoo/filesystem/existence.py +0 -0
- {kaparoo_python-0.3.0 → kaparoo_python-0.5.0}/kaparoo/filesystem/search/README.md +0 -0
- {kaparoo_python-0.3.0 → kaparoo_python-0.5.0}/kaparoo/filesystem/search/__init__.py +0 -0
- {kaparoo_python-0.3.0 → kaparoo_python-0.5.0}/kaparoo/filesystem/search/deprecated.py +0 -0
- {kaparoo_python-0.3.0 → kaparoo_python-0.5.0}/kaparoo/filesystem/search/filters/__init__.py +0 -0
- {kaparoo_python-0.3.0 → kaparoo_python-0.5.0}/kaparoo/filesystem/search/filters/base.py +0 -0
- {kaparoo_python-0.3.0 → kaparoo_python-0.5.0}/kaparoo/filesystem/search/filters/logical.py +0 -0
- {kaparoo_python-0.3.0 → kaparoo_python-0.5.0}/kaparoo/filesystem/search/filters/multi_pattern.py +0 -0
- {kaparoo_python-0.3.0 → kaparoo_python-0.5.0}/kaparoo/filesystem/search/filters/pattern.py +0 -0
- {kaparoo_python-0.3.0 → kaparoo_python-0.5.0}/kaparoo/filesystem/search/filters/types.py +0 -0
- {kaparoo_python-0.3.0 → kaparoo_python-0.5.0}/kaparoo/filesystem/search/filters/utils.py +0 -0
- {kaparoo_python-0.3.0 → kaparoo_python-0.5.0}/kaparoo/filesystem/search/wrappers.py +0 -0
- {kaparoo_python-0.3.0 → kaparoo_python-0.5.0}/kaparoo/filesystem/types.py +0 -0
- {kaparoo_python-0.3.0 → kaparoo_python-0.5.0}/kaparoo/py.typed +0 -0
- {kaparoo_python-0.3.0 → kaparoo_python-0.5.0}/kaparoo/utils/optional.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: kaparoo-python
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: Personally common and useful Python features
|
|
5
5
|
Keywords: filesystem,pathlib,paths,utilities
|
|
6
6
|
Author: Jaewoo Park
|
|
@@ -51,8 +51,10 @@ Each submodule ships its own README with focused examples.
|
|
|
51
51
|
### [`kaparoo.filesystem`](https://github.com/kaparoo/kaparoo-python/tree/main/kaparoo/filesystem)
|
|
52
52
|
|
|
53
53
|
`pathlib`-based filesystem helpers: existence checks (`*_exists`),
|
|
54
|
-
`ensure_*` validators, `make_dir(s)
|
|
55
|
-
|
|
54
|
+
`ensure_*` validators, `make_dir(s)` (with a destructive `clean` reset
|
|
55
|
+
option), `dir_empty(s)`, `reserve_path(s)` guards for not-yet-existing
|
|
56
|
+
destinations, `StagedFile` / `StagedDirectory` for safe (atomic) writes,
|
|
57
|
+
path stringification, and a small exception hierarchy.
|
|
56
58
|
|
|
57
59
|
### [`kaparoo.filesystem.search`](https://github.com/kaparoo/kaparoo-python/tree/main/kaparoo/filesystem/search)
|
|
58
60
|
|
|
@@ -63,8 +65,10 @@ hook for custom filter kinds.
|
|
|
63
65
|
|
|
64
66
|
### [`kaparoo.utils`](https://github.com/kaparoo/kaparoo-python/tree/main/kaparoo/utils)
|
|
65
67
|
|
|
66
|
-
`Timer` / `
|
|
67
|
-
|
|
68
|
+
`Timer` / `SegmentTimer` context-manager-and-decorator timers (with
|
|
69
|
+
`lap`-split and `measure`-block timings); `Aggregator` for nested,
|
|
70
|
+
pluggable metric aggregation (the batch → epoch → run pattern); plus a
|
|
71
|
+
small family of helpers for working with `Optional[T]` values
|
|
68
72
|
(`replace_if_none`, `unwrap_or_default`, ...).
|
|
69
73
|
|
|
70
74
|
### [`kaparoo.data`](https://github.com/kaparoo/kaparoo-python/tree/main/kaparoo/data)
|
|
@@ -30,8 +30,10 @@ Each submodule ships its own README with focused examples.
|
|
|
30
30
|
### [`kaparoo.filesystem`](https://github.com/kaparoo/kaparoo-python/tree/main/kaparoo/filesystem)
|
|
31
31
|
|
|
32
32
|
`pathlib`-based filesystem helpers: existence checks (`*_exists`),
|
|
33
|
-
`ensure_*` validators, `make_dir(s)
|
|
34
|
-
|
|
33
|
+
`ensure_*` validators, `make_dir(s)` (with a destructive `clean` reset
|
|
34
|
+
option), `dir_empty(s)`, `reserve_path(s)` guards for not-yet-existing
|
|
35
|
+
destinations, `StagedFile` / `StagedDirectory` for safe (atomic) writes,
|
|
36
|
+
path stringification, and a small exception hierarchy.
|
|
35
37
|
|
|
36
38
|
### [`kaparoo.filesystem.search`](https://github.com/kaparoo/kaparoo-python/tree/main/kaparoo/filesystem/search)
|
|
37
39
|
|
|
@@ -42,8 +44,10 @@ hook for custom filter kinds.
|
|
|
42
44
|
|
|
43
45
|
### [`kaparoo.utils`](https://github.com/kaparoo/kaparoo-python/tree/main/kaparoo/utils)
|
|
44
46
|
|
|
45
|
-
`Timer` / `
|
|
46
|
-
|
|
47
|
+
`Timer` / `SegmentTimer` context-manager-and-decorator timers (with
|
|
48
|
+
`lap`-split and `measure`-block timings); `Aggregator` for nested,
|
|
49
|
+
pluggable metric aggregation (the batch → epoch → run pattern); plus a
|
|
50
|
+
small family of helpers for working with `Optional[T]` values
|
|
47
51
|
(`replace_if_none`, `unwrap_or_default`, ...).
|
|
48
52
|
|
|
49
53
|
### [`kaparoo.data`](https://github.com/kaparoo/kaparoo-python/tree/main/kaparoo/data)
|
|
@@ -9,7 +9,7 @@ small set of composers, and ready-to-subclass file-backed templates.
|
|
|
9
9
|
- [`sequences/composers`](./sequences/composers.py) — `SlicedSequence`,
|
|
10
10
|
`ConcatSequence`, `WindowedSequence`
|
|
11
11
|
- [`sequences/templates`](./sequences/templates.py) — `FileFolderSequence`,
|
|
12
|
-
`SingleFileSequence`
|
|
12
|
+
`FileListSequence`, `SingleFileSequence`
|
|
13
13
|
- [`sequences/utils`](./sequences/utils.py) — `generate_batches`
|
|
14
14
|
|
|
15
15
|
All public symbols are re-exported from both `kaparoo.data` and
|
|
@@ -158,6 +158,30 @@ class GlobFolder(FileFolderSequence[bytes]):
|
|
|
158
158
|
folder = GlobFolder("data", pattern="*.png", recursive=True)
|
|
159
159
|
```
|
|
160
160
|
|
|
161
|
+
### `FileListSequence`
|
|
162
|
+
|
|
163
|
+
Same "one file per item" contract as `FileFolderSequence`, but the files
|
|
164
|
+
are given as an explicit list instead of discovered under a `root` — so
|
|
165
|
+
they may live in unrelated directories (or, on Windows, different drives),
|
|
166
|
+
which `FileFolderSequence` cannot represent. There is no `list_files`;
|
|
167
|
+
subclasses implement only `load_file` and `get_meta`. The input order is
|
|
168
|
+
preserved verbatim (duplicates kept) — sort it yourself if needed.
|
|
169
|
+
|
|
170
|
+
```python
|
|
171
|
+
from pathlib import Path
|
|
172
|
+
from kaparoo.data.sequences import FileListSequence
|
|
173
|
+
|
|
174
|
+
class BytesList(FileListSequence[bytes]):
|
|
175
|
+
def get_meta(self, index):
|
|
176
|
+
return self.get_file(index)
|
|
177
|
+
|
|
178
|
+
def load_file(self, path):
|
|
179
|
+
return path.read_bytes()
|
|
180
|
+
|
|
181
|
+
# Files from anywhere, in the order given:
|
|
182
|
+
data = BytesList(["images/a.png", "/other/disk/b.png"])
|
|
183
|
+
```
|
|
184
|
+
|
|
161
185
|
### `SingleFileSequence`
|
|
162
186
|
|
|
163
187
|
Thin ABC for the "one file, many records" pattern (a video with many
|
|
@@ -2,6 +2,7 @@ __all__ = (
|
|
|
2
2
|
"ConcatSequence",
|
|
3
3
|
"DataSequence",
|
|
4
4
|
"FileFolderSequence",
|
|
5
|
+
"FileListSequence",
|
|
5
6
|
"SingleFileSequence",
|
|
6
7
|
"SlicedSequence",
|
|
7
8
|
"WindowedSequence",
|
|
@@ -12,6 +13,7 @@ from kaparoo.data.sequences import (
|
|
|
12
13
|
ConcatSequence,
|
|
13
14
|
DataSequence,
|
|
14
15
|
FileFolderSequence,
|
|
16
|
+
FileListSequence,
|
|
15
17
|
SingleFileSequence,
|
|
16
18
|
SlicedSequence,
|
|
17
19
|
WindowedSequence,
|
|
@@ -4,6 +4,7 @@ __all__ = (
|
|
|
4
4
|
"ConcatSequence",
|
|
5
5
|
"DataSequence",
|
|
6
6
|
"FileFolderSequence",
|
|
7
|
+
"FileListSequence",
|
|
7
8
|
"SingleFileSequence",
|
|
8
9
|
"SlicedSequence",
|
|
9
10
|
"WindowedSequence",
|
|
@@ -18,6 +19,7 @@ from kaparoo.data.sequences.composers import (
|
|
|
18
19
|
)
|
|
19
20
|
from kaparoo.data.sequences.templates import (
|
|
20
21
|
FileFolderSequence,
|
|
22
|
+
FileListSequence,
|
|
21
23
|
SingleFileSequence,
|
|
22
24
|
)
|
|
23
25
|
from kaparoo.data.sequences.utils import generate_batches
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
__all__ = ("FileFolderSequence", "SingleFileSequence")
|
|
3
|
+
__all__ = ("FileFolderSequence", "FileListSequence", "SingleFileSequence")
|
|
4
4
|
|
|
5
5
|
from abc import abstractmethod
|
|
6
6
|
from pathlib import Path
|
|
@@ -11,7 +11,7 @@ from kaparoo.filesystem.existence import ensure_dir_exists, ensure_file_exists
|
|
|
11
11
|
from kaparoo.filesystem.utils import stringify_paths, wrap_path
|
|
12
12
|
|
|
13
13
|
if TYPE_CHECKING:
|
|
14
|
-
from kaparoo.filesystem.types import StrPath
|
|
14
|
+
from kaparoo.filesystem.types import StrPath, StrPaths
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
class FileFolderSequence[T, M = Path](DataSequence[T, M]):
|
|
@@ -149,6 +149,82 @@ class FileFolderSequence[T, M = Path](DataSequence[T, M]):
|
|
|
149
149
|
raise NotImplementedError
|
|
150
150
|
|
|
151
151
|
|
|
152
|
+
class FileListSequence[T, M = Path](DataSequence[T, M]):
|
|
153
|
+
"""A `DataSequence` over an explicit, ordered list of files.
|
|
154
|
+
|
|
155
|
+
Like `FileFolderSequence`, items live one-per-file and subclasses
|
|
156
|
+
implement `load_file` and `get_meta`. Unlike it, the files are given
|
|
157
|
+
directly rather than discovered under a `root`, so they may live in
|
|
158
|
+
unrelated directories -- or, on Windows, on different drives -- which
|
|
159
|
+
`FileFolderSequence` cannot represent (it stores paths relative to one
|
|
160
|
+
root). There is no `list_files`: the input list *is* the listing.
|
|
161
|
+
|
|
162
|
+
The given order is preserved verbatim and duplicates are kept; sort the
|
|
163
|
+
input yourself (`sorted(files, key=...)`) if a particular order is
|
|
164
|
+
needed. Paths are not checked for existence at construction; `load_file`
|
|
165
|
+
is called lazily on each `get_item`.
|
|
166
|
+
|
|
167
|
+
The base exposes:
|
|
168
|
+
|
|
169
|
+
- `files: tuple[Path, ...]` — full paths as an immutable snapshot.
|
|
170
|
+
- `get_file(index) -> Path` — full path of the i-th file.
|
|
171
|
+
|
|
172
|
+
Type Parameters:
|
|
173
|
+
T: Item type returned by `get_item`.
|
|
174
|
+
M: Per-item metadata type. Defaults to `Path`; override when the
|
|
175
|
+
metadata is something else (label, line number, ...).
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
files: The file paths to expose, in order.
|
|
179
|
+
|
|
180
|
+
Example:
|
|
181
|
+
>>> from pathlib import Path
|
|
182
|
+
>>> class BytesList(FileListSequence[bytes]):
|
|
183
|
+
... def get_meta(self, index: int) -> Path:
|
|
184
|
+
... return self.get_file(index)
|
|
185
|
+
...
|
|
186
|
+
... def load_file(self, path: Path) -> bytes:
|
|
187
|
+
... return path.read_bytes()
|
|
188
|
+
>>>
|
|
189
|
+
>>> data = BytesList(["images/a.png", "/other/b.png"])
|
|
190
|
+
"""
|
|
191
|
+
|
|
192
|
+
def __init__(self, files: StrPaths) -> None:
|
|
193
|
+
self._files = list(stringify_paths(files))
|
|
194
|
+
|
|
195
|
+
def __len__(self) -> int:
|
|
196
|
+
return len(self._files)
|
|
197
|
+
|
|
198
|
+
@property
|
|
199
|
+
def files(self) -> tuple[Path, ...]:
|
|
200
|
+
"""Immutable snapshot of the full file paths, in the given order.
|
|
201
|
+
|
|
202
|
+
Returns a fresh `tuple[Path, ...]` on each access.
|
|
203
|
+
"""
|
|
204
|
+
return tuple(self.get_file(i) for i in range(len(self)))
|
|
205
|
+
|
|
206
|
+
def get_file(self, index: int) -> Path:
|
|
207
|
+
"""Full Path of the file at `index`."""
|
|
208
|
+
return Path(self._files[index])
|
|
209
|
+
|
|
210
|
+
def get_item(self, index: int) -> T:
|
|
211
|
+
return self.load_file(self.get_file(index))
|
|
212
|
+
|
|
213
|
+
@abstractmethod
|
|
214
|
+
def get_meta(self, index: int) -> M:
|
|
215
|
+
raise NotImplementedError
|
|
216
|
+
|
|
217
|
+
@abstractmethod
|
|
218
|
+
def load_file(self, path: Path) -> T:
|
|
219
|
+
"""Decode a single file into an item of type `T`.
|
|
220
|
+
|
|
221
|
+
Called lazily on each `get_item` -- not at construction time.
|
|
222
|
+
Subclasses may freely use external libraries (PIL, librosa,
|
|
223
|
+
cv2, ...) to decode.
|
|
224
|
+
"""
|
|
225
|
+
raise NotImplementedError
|
|
226
|
+
|
|
227
|
+
|
|
152
228
|
class SingleFileSequence[T, M = None](DataSequence[T, M]):
|
|
153
229
|
"""A `DataSequence` backed by a single file that holds multiple records.
|
|
154
230
|
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
# `kaparoo.filesystem`
|
|
2
|
+
|
|
3
|
+
`pathlib`-based filesystem helpers.
|
|
4
|
+
|
|
5
|
+
## Modules
|
|
6
|
+
|
|
7
|
+
- [`existence`](./existence.py) — boolean predicates (`*_exists`) and
|
|
8
|
+
validating `ensure_*` variants
|
|
9
|
+
- [`directory`](./directory.py) — `make_dir(s)`, `dir_empty(s)` /
|
|
10
|
+
`dir_not_empty(s)` with validation, plus `_unsafe` variants that skip
|
|
11
|
+
pre-checks
|
|
12
|
+
- [`utils`](./utils.py) — `stringify_path(s)`, `wrap_path(s)`,
|
|
13
|
+
`reserve_path(s)`
|
|
14
|
+
- [`staged`](./staged.py) — `StagedFile` / `StagedDirectory`, safe
|
|
15
|
+
(atomic) writers usable as a context manager or explicitly
|
|
16
|
+
- [`exceptions`](./exceptions.py) — `DirectoryNotFoundError`, `NotAFileError`
|
|
17
|
+
- [`types`](./types.py) — `StrPath`, `StrPaths` type aliases
|
|
18
|
+
- [`search/`](./search/) — composable filesystem search (own README)
|
|
19
|
+
|
|
20
|
+
All public symbols are re-exported from the top-level `kaparoo.filesystem`
|
|
21
|
+
namespace.
|
|
22
|
+
|
|
23
|
+
## Existence checks
|
|
24
|
+
|
|
25
|
+
`*_exists` return a bool; `ensure_*` raise on failure and return the
|
|
26
|
+
(optionally stringified) path.
|
|
27
|
+
|
|
28
|
+
```python
|
|
29
|
+
from kaparoo.filesystem import (
|
|
30
|
+
dir_exists, ensure_dir_exists, ensure_files_exist, file_exists,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
if file_exists("config.toml"):
|
|
34
|
+
...
|
|
35
|
+
|
|
36
|
+
# Single path: raises FileNotFoundError / NotAFileError / NotADirectoryError
|
|
37
|
+
config = ensure_dir_exists("var/cache", make=True) # create if missing
|
|
38
|
+
report = ensure_dir_exists("var/cache", make=0o755) # mode bits: POSIX only
|
|
39
|
+
|
|
40
|
+
# Bulk with a shared root; each entry is resolved relative to it.
|
|
41
|
+
files = ensure_files_exist(
|
|
42
|
+
["a.txt", "b.txt"],
|
|
43
|
+
root="data",
|
|
44
|
+
)
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Exception hierarchy
|
|
48
|
+
|
|
49
|
+
`DirectoryNotFoundError` subclasses `FileNotFoundError`, so callers may
|
|
50
|
+
catch the broader type:
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
from kaparoo.filesystem import DirectoryNotFoundError, ensure_dir_exists
|
|
54
|
+
|
|
55
|
+
try:
|
|
56
|
+
ensure_dir_exists("var/missing")
|
|
57
|
+
except FileNotFoundError: # catches DirectoryNotFoundError too
|
|
58
|
+
...
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Creating and emptying directories
|
|
62
|
+
|
|
63
|
+
```python
|
|
64
|
+
from kaparoo.filesystem import (
|
|
65
|
+
dir_empty, dir_not_empty, dirs_empty, make_dir, make_dirs,
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
cache_dir = make_dir("var/cache", exist_ok=True)
|
|
69
|
+
|
|
70
|
+
# Start from a clean slate: wipe an existing directory's contents and
|
|
71
|
+
# recreate it empty. Destructive, and only ever wipes a *directory* (a
|
|
72
|
+
# non-directory -- or a symlink -- at the path still raises). `clean=True`
|
|
73
|
+
# makes `exist_ok` moot, since the directory is removed and remade.
|
|
74
|
+
run_dir = make_dir("out/run_42", clean=True)
|
|
75
|
+
|
|
76
|
+
# Bulk creation with a shared root
|
|
77
|
+
make_dirs(["logs", "tmp"], root="var", exist_ok=True)
|
|
78
|
+
|
|
79
|
+
# Empty checks (raise if missing or not a directory)
|
|
80
|
+
assert dir_empty(cache_dir)
|
|
81
|
+
assert dirs_empty(["logs", "tmp"], root="var")
|
|
82
|
+
|
|
83
|
+
# ...and their negations
|
|
84
|
+
(cache_dir / "data.bin").touch()
|
|
85
|
+
assert dir_not_empty(cache_dir)
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Each check has a negated counterpart (`dir_not_empty`, `dirs_not_empty`);
|
|
89
|
+
`dirs_not_empty` is True only when *every* directory is non-empty. The
|
|
90
|
+
`_unsafe` variants (`dir_empty_unsafe`, `dir_not_empty_unsafe`,
|
|
91
|
+
`dirs_empty_unsafe`, `dirs_not_empty_unsafe`) skip existence/type
|
|
92
|
+
validation and are intended for hot paths where the caller has already
|
|
93
|
+
validated.
|
|
94
|
+
|
|
95
|
+
## Path manipulation
|
|
96
|
+
|
|
97
|
+
`stringify_path(s)` converts to forward-slash strings, optionally
|
|
98
|
+
trimming a leading or trailing portion. `wrap_path(s)` prepends and/or
|
|
99
|
+
appends path components, rejecting absolute inputs where ambiguous.
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
from pathlib import Path
|
|
103
|
+
from kaparoo.filesystem import stringify_path, stringify_paths, wrap_path
|
|
104
|
+
|
|
105
|
+
# "path/to/file.txt" on every platform (including Windows)
|
|
106
|
+
stringify_path(Path("path") / "to" / "file.txt")
|
|
107
|
+
|
|
108
|
+
# Trim leading or trailing components
|
|
109
|
+
stringify_path("a/b/c", after="a") # "b/c"
|
|
110
|
+
stringify_path("a/b/c", before="c") # "a/b"
|
|
111
|
+
|
|
112
|
+
# Bulk stringify with a shared base.
|
|
113
|
+
stringify_paths(["data/a.txt", "data/b.txt"], after="data") # ["a.txt", "b.txt"]
|
|
114
|
+
|
|
115
|
+
# Compose paths without joining manually
|
|
116
|
+
wrap_path("logs", prepend="var", append="server.log") # var/logs/server.log
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## Reserving a destination
|
|
120
|
+
|
|
121
|
+
`reserve_path` guards a path that should *not* yet exist, so you don't
|
|
122
|
+
clobber something when creating a new file or directory there. It only
|
|
123
|
+
checks (and optionally creates the parent) — it never creates or deletes
|
|
124
|
+
the target itself.
|
|
125
|
+
|
|
126
|
+
```python
|
|
127
|
+
from kaparoo.filesystem import reserve_path
|
|
128
|
+
|
|
129
|
+
# Raises FileExistsError if out/run.json exists; otherwise creates the
|
|
130
|
+
# missing parent directory and returns the path ready to write to.
|
|
131
|
+
out = reserve_path("out/run.json", make_parents=True)
|
|
132
|
+
out.write_text("{}")
|
|
133
|
+
|
|
134
|
+
# `exist_ok` (named as in make_dir / Path.mkdir) is a non-destructive
|
|
135
|
+
# bypass: it suppresses the conflict but deletes nothing, so a later write
|
|
136
|
+
# overwrites in place.
|
|
137
|
+
out = reserve_path("out/run.json", exist_ok=True)
|
|
138
|
+
|
|
139
|
+
# `reserve_paths` is the bulk form (fail-fast on the first conflict). It
|
|
140
|
+
# takes no `root`; compose with `wrap_paths` to share a base directory.
|
|
141
|
+
from kaparoo.filesystem import reserve_paths, wrap_paths
|
|
142
|
+
a, b = reserve_paths(wrap_paths(["a.bin", "b.bin"], prepend="out"))
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
For a *directory* destination, `make_dir(..., exist_ok=...)` both guards
|
|
146
|
+
and creates it; for an exclusive *file* create, the stdlib `open(path,
|
|
147
|
+
"x")` raises the same `FileExistsError` directly. Reach for `reserve_path`
|
|
148
|
+
when you want the check (and parent setup) decoupled from the creation.
|
|
149
|
+
|
|
150
|
+
`reserve_path` is intentionally **non-destructive** — it never removes an
|
|
151
|
+
existing target. To start a directory from a clean slate, use the
|
|
152
|
+
`clean` option on `make_dir` / `make_dirs` (see below), which is the only
|
|
153
|
+
destructive operation here and is named to say so.
|
|
154
|
+
|
|
155
|
+
## Safe (atomic) writes
|
|
156
|
+
|
|
157
|
+
`StagedFile` saves a file safely: it stages the content in a temporary file
|
|
158
|
+
in the destination's own directory and moves it into place only on commit.
|
|
159
|
+
A reader never sees a half-written file, and a failed write leaves any
|
|
160
|
+
existing file untouched. It works as a context manager — commit on a clean
|
|
161
|
+
exit, discard on an exception — or explicitly, like a file object.
|
|
162
|
+
|
|
163
|
+
```python
|
|
164
|
+
from kaparoo.filesystem import StagedFile
|
|
165
|
+
|
|
166
|
+
# Text (the default), as a context manager: commit on success, discard
|
|
167
|
+
# on error.
|
|
168
|
+
with StagedFile("out/report.json", encoding="utf-8") as f:
|
|
169
|
+
f.write(json.dumps(data)) # an exception here leaves out/ untouched
|
|
170
|
+
|
|
171
|
+
# Binary mode, explicitly: write, then commit (or abort to discard).
|
|
172
|
+
f = StagedFile("out/data.bin", binary=True)
|
|
173
|
+
f.write(payload)
|
|
174
|
+
f.commit() # returns the destination Path; idempotent
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
The default is text (`StagedFile[str]`) with optional `encoding` / `newline`,
|
|
178
|
+
as with `open`; pass `binary=True` for a binary writer (`StagedFile[bytes]`).
|
|
179
|
+
The type parameter follows the mode, so `write` and `file` are typed `str`
|
|
180
|
+
or `bytes` accordingly.
|
|
181
|
+
|
|
182
|
+
With `overwrite=False` (the default) an existing destination raises
|
|
183
|
+
`FileExistsError` up front, and the commit creates the file atomically —
|
|
184
|
+
never clobbering a file that appeared meanwhile. With `overwrite=True` the
|
|
185
|
+
destination is replaced in one atomic step, keeping its previous
|
|
186
|
+
permissions. Pass `make_parents=True` to create the destination's parent
|
|
187
|
+
directory if it is missing. An uncommitted writer (an explicit instance
|
|
188
|
+
dropped without `commit()`) discards its staged file on garbage collection,
|
|
189
|
+
so a partial write is never promoted by accident.
|
|
190
|
+
|
|
191
|
+
The committed file gets the usual umask-based permissions.
|
|
192
|
+
|
|
193
|
+
`StagedDirectory` is the directory counterpart: you populate its `workdir`
|
|
194
|
+
(a temporary directory in the destination's parent) and it is moved into
|
|
195
|
+
place on commit.
|
|
196
|
+
|
|
197
|
+
```python
|
|
198
|
+
from kaparoo.filesystem import StagedDirectory
|
|
199
|
+
|
|
200
|
+
with StagedDirectory("out/dataset", make_parents=True) as d:
|
|
201
|
+
(d.workdir / "train.json").write_text(payload)
|
|
202
|
+
(d.workdir / "shards").mkdir()
|
|
203
|
+
# out/dataset appears in one step; an exception would leave it absent
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
Creating a new directory (`overwrite=False`) is atomic — a single rename.
|
|
207
|
+
Replacing an existing one (`overwrite=True`) is *not* fully atomic: the old
|
|
208
|
+
directory is swapped aside and removed, so there is a brief window where the
|
|
209
|
+
destination is absent and, on a rare failure mid-swap, the previous contents
|
|
210
|
+
remain in a sibling `<name>.old` directory for recovery.
|
|
211
|
+
|
|
212
|
+
## Platform notes
|
|
213
|
+
|
|
214
|
+
- **Directory mode bits**: `mode` (on `make_dir` / `make_dirs`) and
|
|
215
|
+
`make=<int>` (on `ensure_dir_exists` / `ensure_dirs_exist`) are
|
|
216
|
+
validated against the `0o1`–`0o7777` range and applied to the created
|
|
217
|
+
directory on **POSIX systems only**. On Windows, mode values are still
|
|
218
|
+
accepted (so cross-platform code stays clean) but the range check is
|
|
219
|
+
skipped and the OS ignores the bits — see
|
|
220
|
+
[`os.mkdir`](https://docs.python.org/3/library/os.html#os.mkdir).
|
|
221
|
+
- **Path separators**: `stringify_path` and `stringify_paths` normalize
|
|
222
|
+
backslashes to forward slashes on Windows. Functions that return
|
|
223
|
+
strings via `stringify=True` (`make_dir`, `ensure_dir_exists`,
|
|
224
|
+
`wrap_path`, ...) inherit this normalization. If you need a native
|
|
225
|
+
Windows path string, call `str(path)` directly on a `Path`.
|
|
226
|
+
|
|
227
|
+
## See also
|
|
228
|
+
|
|
229
|
+
- [`search/`](./search/) for filesystem traversal with filters
|
|
230
|
+
- [`kaparoo.utils`](../utils/) for `Timer` and Optional helpers
|
|
@@ -1,12 +1,18 @@
|
|
|
1
1
|
__all__ = (
|
|
2
2
|
"DirectoryNotFoundError",
|
|
3
3
|
"NotAFileError",
|
|
4
|
+
"StagedDirectory",
|
|
5
|
+
"StagedFile",
|
|
4
6
|
"dir_empty",
|
|
5
7
|
"dir_empty_unsafe",
|
|
6
8
|
"dir_exists",
|
|
9
|
+
"dir_not_empty",
|
|
10
|
+
"dir_not_empty_unsafe",
|
|
7
11
|
"dirs_empty",
|
|
8
12
|
"dirs_empty_unsafe",
|
|
9
13
|
"dirs_exist",
|
|
14
|
+
"dirs_not_empty",
|
|
15
|
+
"dirs_not_empty_unsafe",
|
|
10
16
|
"ensure_dir_exists",
|
|
11
17
|
"ensure_dirs_exist",
|
|
12
18
|
"ensure_file_exists",
|
|
@@ -22,6 +28,8 @@ __all__ = (
|
|
|
22
28
|
"make_dirs",
|
|
23
29
|
"path_exists",
|
|
24
30
|
"paths_exist",
|
|
31
|
+
"reserve_path",
|
|
32
|
+
"reserve_paths",
|
|
25
33
|
"search_dirs",
|
|
26
34
|
"search_files",
|
|
27
35
|
"search_paths",
|
|
@@ -34,8 +42,12 @@ __all__ = (
|
|
|
34
42
|
from kaparoo.filesystem.directory import (
|
|
35
43
|
dir_empty,
|
|
36
44
|
dir_empty_unsafe,
|
|
45
|
+
dir_not_empty,
|
|
46
|
+
dir_not_empty_unsafe,
|
|
37
47
|
dirs_empty,
|
|
38
48
|
dirs_empty_unsafe,
|
|
49
|
+
dirs_not_empty,
|
|
50
|
+
dirs_not_empty_unsafe,
|
|
39
51
|
make_dir,
|
|
40
52
|
make_dirs,
|
|
41
53
|
)
|
|
@@ -65,7 +77,10 @@ from kaparoo.filesystem.search import (
|
|
|
65
77
|
search_files,
|
|
66
78
|
search_paths,
|
|
67
79
|
)
|
|
80
|
+
from kaparoo.filesystem.staged import StagedDirectory, StagedFile
|
|
68
81
|
from kaparoo.filesystem.utils import (
|
|
82
|
+
reserve_path,
|
|
83
|
+
reserve_paths,
|
|
69
84
|
stringify_path,
|
|
70
85
|
stringify_paths,
|
|
71
86
|
wrap_path,
|