kaparoo-python 0.4.0__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {kaparoo_python-0.4.0 → kaparoo_python-0.5.0}/PKG-INFO +1 -1
- {kaparoo_python-0.4.0 → kaparoo_python-0.5.0}/kaparoo/data/README.md +25 -1
- {kaparoo_python-0.4.0 → kaparoo_python-0.5.0}/kaparoo/data/__init__.py +2 -0
- {kaparoo_python-0.4.0 → kaparoo_python-0.5.0}/kaparoo/data/sequences/__init__.py +2 -0
- {kaparoo_python-0.4.0 → kaparoo_python-0.5.0}/kaparoo/data/sequences/templates.py +78 -2
- {kaparoo_python-0.4.0 → kaparoo_python-0.5.0}/kaparoo/filesystem/README.md +2 -2
- {kaparoo_python-0.4.0 → kaparoo_python-0.5.0}/kaparoo/filesystem/directory.py +36 -12
- {kaparoo_python-0.4.0 → kaparoo_python-0.5.0}/kaparoo/filesystem/staged.py +61 -20
- {kaparoo_python-0.4.0 → kaparoo_python-0.5.0}/kaparoo/filesystem/utils.py +10 -2
- {kaparoo_python-0.4.0 → kaparoo_python-0.5.0}/kaparoo/utils/README.md +3 -2
- {kaparoo_python-0.4.0 → kaparoo_python-0.5.0}/kaparoo/utils/__init__.py +4 -0
- {kaparoo_python-0.4.0 → kaparoo_python-0.5.0}/kaparoo/utils/aggregate.py +62 -0
- {kaparoo_python-0.4.0 → kaparoo_python-0.5.0}/pyproject.toml +1 -1
- {kaparoo_python-0.4.0 → kaparoo_python-0.5.0}/LICENSE +0 -0
- {kaparoo_python-0.4.0 → kaparoo_python-0.5.0}/README.md +0 -0
- {kaparoo_python-0.4.0 → kaparoo_python-0.5.0}/kaparoo/__init__.py +0 -0
- {kaparoo_python-0.4.0 → kaparoo_python-0.5.0}/kaparoo/data/sequences/base.py +0 -0
- {kaparoo_python-0.4.0 → kaparoo_python-0.5.0}/kaparoo/data/sequences/composers.py +0 -0
- {kaparoo_python-0.4.0 → kaparoo_python-0.5.0}/kaparoo/data/sequences/utils.py +0 -0
- {kaparoo_python-0.4.0 → kaparoo_python-0.5.0}/kaparoo/filesystem/__init__.py +0 -0
- {kaparoo_python-0.4.0 → kaparoo_python-0.5.0}/kaparoo/filesystem/exceptions.py +0 -0
- {kaparoo_python-0.4.0 → kaparoo_python-0.5.0}/kaparoo/filesystem/existence.py +0 -0
- {kaparoo_python-0.4.0 → kaparoo_python-0.5.0}/kaparoo/filesystem/search/README.md +0 -0
- {kaparoo_python-0.4.0 → kaparoo_python-0.5.0}/kaparoo/filesystem/search/__init__.py +0 -0
- {kaparoo_python-0.4.0 → kaparoo_python-0.5.0}/kaparoo/filesystem/search/classes.py +0 -0
- {kaparoo_python-0.4.0 → kaparoo_python-0.5.0}/kaparoo/filesystem/search/deprecated.py +0 -0
- {kaparoo_python-0.4.0 → kaparoo_python-0.5.0}/kaparoo/filesystem/search/filters/__init__.py +0 -0
- {kaparoo_python-0.4.0 → kaparoo_python-0.5.0}/kaparoo/filesystem/search/filters/base.py +0 -0
- {kaparoo_python-0.4.0 → kaparoo_python-0.5.0}/kaparoo/filesystem/search/filters/logical.py +0 -0
- {kaparoo_python-0.4.0 → kaparoo_python-0.5.0}/kaparoo/filesystem/search/filters/multi_pattern.py +0 -0
- {kaparoo_python-0.4.0 → kaparoo_python-0.5.0}/kaparoo/filesystem/search/filters/pattern.py +0 -0
- {kaparoo_python-0.4.0 → kaparoo_python-0.5.0}/kaparoo/filesystem/search/filters/types.py +0 -0
- {kaparoo_python-0.4.0 → kaparoo_python-0.5.0}/kaparoo/filesystem/search/filters/utils.py +0 -0
- {kaparoo_python-0.4.0 → kaparoo_python-0.5.0}/kaparoo/filesystem/search/wrappers.py +0 -0
- {kaparoo_python-0.4.0 → kaparoo_python-0.5.0}/kaparoo/filesystem/types.py +0 -0
- {kaparoo_python-0.4.0 → kaparoo_python-0.5.0}/kaparoo/py.typed +0 -0
- {kaparoo_python-0.4.0 → kaparoo_python-0.5.0}/kaparoo/utils/optional.py +0 -0
- {kaparoo_python-0.4.0 → kaparoo_python-0.5.0}/kaparoo/utils/timer.py +0 -0
|
@@ -9,7 +9,7 @@ small set of composers, and ready-to-subclass file-backed templates.
|
|
|
9
9
|
- [`sequences/composers`](./sequences/composers.py) — `SlicedSequence`,
|
|
10
10
|
`ConcatSequence`, `WindowedSequence`
|
|
11
11
|
- [`sequences/templates`](./sequences/templates.py) — `FileFolderSequence`,
|
|
12
|
-
`SingleFileSequence`
|
|
12
|
+
`FileListSequence`, `SingleFileSequence`
|
|
13
13
|
- [`sequences/utils`](./sequences/utils.py) — `generate_batches`
|
|
14
14
|
|
|
15
15
|
All public symbols are re-exported from both `kaparoo.data` and
|
|
@@ -158,6 +158,30 @@ class GlobFolder(FileFolderSequence[bytes]):
|
|
|
158
158
|
folder = GlobFolder("data", pattern="*.png", recursive=True)
|
|
159
159
|
```
|
|
160
160
|
|
|
161
|
+
### `FileListSequence`
|
|
162
|
+
|
|
163
|
+
Same "one file per item" contract as `FileFolderSequence`, but the files
|
|
164
|
+
are given as an explicit list instead of discovered under a `root` — so
|
|
165
|
+
they may live in unrelated directories (or, on Windows, different drives),
|
|
166
|
+
which `FileFolderSequence` cannot represent. There is no `list_files`;
|
|
167
|
+
subclasses implement only `load_file` and `get_meta`. The input order is
|
|
168
|
+
preserved verbatim (duplicates kept) — sort it yourself if needed.
|
|
169
|
+
|
|
170
|
+
```python
|
|
171
|
+
from pathlib import Path
|
|
172
|
+
from kaparoo.data.sequences import FileListSequence
|
|
173
|
+
|
|
174
|
+
class BytesList(FileListSequence[bytes]):
|
|
175
|
+
def get_meta(self, index):
|
|
176
|
+
return self.get_file(index)
|
|
177
|
+
|
|
178
|
+
def load_file(self, path):
|
|
179
|
+
return path.read_bytes()
|
|
180
|
+
|
|
181
|
+
# Files from anywhere, in the order given:
|
|
182
|
+
data = BytesList(["images/a.png", "/other/disk/b.png"])
|
|
183
|
+
```
|
|
184
|
+
|
|
161
185
|
### `SingleFileSequence`
|
|
162
186
|
|
|
163
187
|
Thin ABC for the "one file, many records" pattern (a video with many
|
|
@@ -2,6 +2,7 @@ __all__ = (
|
|
|
2
2
|
"ConcatSequence",
|
|
3
3
|
"DataSequence",
|
|
4
4
|
"FileFolderSequence",
|
|
5
|
+
"FileListSequence",
|
|
5
6
|
"SingleFileSequence",
|
|
6
7
|
"SlicedSequence",
|
|
7
8
|
"WindowedSequence",
|
|
@@ -12,6 +13,7 @@ from kaparoo.data.sequences import (
|
|
|
12
13
|
ConcatSequence,
|
|
13
14
|
DataSequence,
|
|
14
15
|
FileFolderSequence,
|
|
16
|
+
FileListSequence,
|
|
15
17
|
SingleFileSequence,
|
|
16
18
|
SlicedSequence,
|
|
17
19
|
WindowedSequence,
|
|
@@ -4,6 +4,7 @@ __all__ = (
|
|
|
4
4
|
"ConcatSequence",
|
|
5
5
|
"DataSequence",
|
|
6
6
|
"FileFolderSequence",
|
|
7
|
+
"FileListSequence",
|
|
7
8
|
"SingleFileSequence",
|
|
8
9
|
"SlicedSequence",
|
|
9
10
|
"WindowedSequence",
|
|
@@ -18,6 +19,7 @@ from kaparoo.data.sequences.composers import (
|
|
|
18
19
|
)
|
|
19
20
|
from kaparoo.data.sequences.templates import (
|
|
20
21
|
FileFolderSequence,
|
|
22
|
+
FileListSequence,
|
|
21
23
|
SingleFileSequence,
|
|
22
24
|
)
|
|
23
25
|
from kaparoo.data.sequences.utils import generate_batches
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
__all__ = ("FileFolderSequence", "SingleFileSequence")
|
|
3
|
+
__all__ = ("FileFolderSequence", "FileListSequence", "SingleFileSequence")
|
|
4
4
|
|
|
5
5
|
from abc import abstractmethod
|
|
6
6
|
from pathlib import Path
|
|
@@ -11,7 +11,7 @@ from kaparoo.filesystem.existence import ensure_dir_exists, ensure_file_exists
|
|
|
11
11
|
from kaparoo.filesystem.utils import stringify_paths, wrap_path
|
|
12
12
|
|
|
13
13
|
if TYPE_CHECKING:
|
|
14
|
-
from kaparoo.filesystem.types import StrPath
|
|
14
|
+
from kaparoo.filesystem.types import StrPath, StrPaths
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
class FileFolderSequence[T, M = Path](DataSequence[T, M]):
|
|
@@ -149,6 +149,82 @@ class FileFolderSequence[T, M = Path](DataSequence[T, M]):
|
|
|
149
149
|
raise NotImplementedError
|
|
150
150
|
|
|
151
151
|
|
|
152
|
+
class FileListSequence[T, M = Path](DataSequence[T, M]):
|
|
153
|
+
"""A `DataSequence` over an explicit, ordered list of files.
|
|
154
|
+
|
|
155
|
+
Like `FileFolderSequence`, items live one-per-file and subclasses
|
|
156
|
+
implement `load_file` and `get_meta`. Unlike it, the files are given
|
|
157
|
+
directly rather than discovered under a `root`, so they may live in
|
|
158
|
+
unrelated directories -- or, on Windows, on different drives -- which
|
|
159
|
+
`FileFolderSequence` cannot represent (it stores paths relative to one
|
|
160
|
+
root). There is no `list_files`: the input list *is* the listing.
|
|
161
|
+
|
|
162
|
+
The given order is preserved verbatim and duplicates are kept; sort the
|
|
163
|
+
input yourself (`sorted(files, key=...)`) if a particular order is
|
|
164
|
+
needed. Paths are not checked for existence at construction; `load_file`
|
|
165
|
+
is called lazily on each `get_item`.
|
|
166
|
+
|
|
167
|
+
The base exposes:
|
|
168
|
+
|
|
169
|
+
- `files: tuple[Path, ...]` — full paths as an immutable snapshot.
|
|
170
|
+
- `get_file(index) -> Path` — full path of the i-th file.
|
|
171
|
+
|
|
172
|
+
Type Parameters:
|
|
173
|
+
T: Item type returned by `get_item`.
|
|
174
|
+
M: Per-item metadata type. Defaults to `Path`; override when the
|
|
175
|
+
metadata is something else (label, line number, ...).
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
files: The file paths to expose, in order.
|
|
179
|
+
|
|
180
|
+
Example:
|
|
181
|
+
>>> from pathlib import Path
|
|
182
|
+
>>> class BytesList(FileListSequence[bytes]):
|
|
183
|
+
... def get_meta(self, index: int) -> Path:
|
|
184
|
+
... return self.get_file(index)
|
|
185
|
+
...
|
|
186
|
+
... def load_file(self, path: Path) -> bytes:
|
|
187
|
+
... return path.read_bytes()
|
|
188
|
+
>>>
|
|
189
|
+
>>> data = BytesList(["images/a.png", "/other/b.png"])
|
|
190
|
+
"""
|
|
191
|
+
|
|
192
|
+
def __init__(self, files: StrPaths) -> None:
|
|
193
|
+
self._files = list(stringify_paths(files))
|
|
194
|
+
|
|
195
|
+
def __len__(self) -> int:
|
|
196
|
+
return len(self._files)
|
|
197
|
+
|
|
198
|
+
@property
|
|
199
|
+
def files(self) -> tuple[Path, ...]:
|
|
200
|
+
"""Immutable snapshot of the full file paths, in the given order.
|
|
201
|
+
|
|
202
|
+
Returns a fresh `tuple[Path, ...]` on each access.
|
|
203
|
+
"""
|
|
204
|
+
return tuple(self.get_file(i) for i in range(len(self)))
|
|
205
|
+
|
|
206
|
+
def get_file(self, index: int) -> Path:
|
|
207
|
+
"""Full Path of the file at `index`."""
|
|
208
|
+
return Path(self._files[index])
|
|
209
|
+
|
|
210
|
+
def get_item(self, index: int) -> T:
|
|
211
|
+
return self.load_file(self.get_file(index))
|
|
212
|
+
|
|
213
|
+
@abstractmethod
|
|
214
|
+
def get_meta(self, index: int) -> M:
|
|
215
|
+
raise NotImplementedError
|
|
216
|
+
|
|
217
|
+
@abstractmethod
|
|
218
|
+
def load_file(self, path: Path) -> T:
|
|
219
|
+
"""Decode a single file into an item of type `T`.
|
|
220
|
+
|
|
221
|
+
Called lazily on each `get_item` -- not at construction time.
|
|
222
|
+
Subclasses may freely use external libraries (PIL, librosa,
|
|
223
|
+
cv2, ...) to decode.
|
|
224
|
+
"""
|
|
225
|
+
raise NotImplementedError
|
|
226
|
+
|
|
227
|
+
|
|
152
228
|
class SingleFileSequence[T, M = None](DataSequence[T, M]):
|
|
153
229
|
"""A `DataSequence` backed by a single file that holds multiple records.
|
|
154
230
|
|
|
@@ -69,8 +69,8 @@ cache_dir = make_dir("var/cache", exist_ok=True)
|
|
|
69
69
|
|
|
70
70
|
# Start from a clean slate: wipe an existing directory's contents and
|
|
71
71
|
# recreate it empty. Destructive, and only ever wipes a *directory* (a
|
|
72
|
-
# non-directory at the path still raises). `clean=True`
|
|
73
|
-
# moot, since the directory is removed and remade.
|
|
72
|
+
# non-directory -- or a symlink -- at the path still raises). `clean=True`
|
|
73
|
+
# makes `exist_ok` moot, since the directory is removed and remade.
|
|
74
74
|
run_dir = make_dir("out/run_42", clean=True)
|
|
75
75
|
|
|
76
76
|
# Bulk creation with a shared root
|
|
@@ -38,6 +38,20 @@ if TYPE_CHECKING:
|
|
|
38
38
|
# ========================== #
|
|
39
39
|
|
|
40
40
|
|
|
41
|
+
def _ensure_directory_target(path: Path, *, clean: bool) -> None:
|
|
42
|
+
"""Reject a path that cannot serve as a directory target.
|
|
43
|
+
|
|
44
|
+
Raises `NotADirectoryError` when `path` exists but is not a directory,
|
|
45
|
+
or when `clean` is requested on a symlink -- cleaning must operate on a
|
|
46
|
+
real directory, never through a link (which would otherwise reach the
|
|
47
|
+
link's target). A symlink to a directory is accepted only when `clean`
|
|
48
|
+
is False.
|
|
49
|
+
"""
|
|
50
|
+
if (path.exists() and not path.is_dir()) or (clean and path.is_symlink()):
|
|
51
|
+
msg = f"not a usable directory target: {path}"
|
|
52
|
+
raise NotADirectoryError(msg)
|
|
53
|
+
|
|
54
|
+
|
|
41
55
|
@overload
|
|
42
56
|
def make_dir(
|
|
43
57
|
path: StrPath,
|
|
@@ -88,9 +102,9 @@ def make_dir(
|
|
|
88
102
|
Defaults to False.
|
|
89
103
|
clean: Whether to recreate the directory empty when it already exists,
|
|
90
104
|
removing its contents first. Only an existing *directory* is wiped;
|
|
91
|
-
a non-directory still raises. Because the
|
|
92
|
-
remade, `clean=True` makes `exist_ok`
|
|
93
|
-
Defaults to False.
|
|
105
|
+
a non-directory -- or a symlink -- still raises. Because the
|
|
106
|
+
directory is removed and remade, `clean=True` makes `exist_ok`
|
|
107
|
+
moot. **Destructive.** Defaults to False.
|
|
94
108
|
stringify: Whether to return the path as a string. Defaults to False.
|
|
95
109
|
|
|
96
110
|
Returns:
|
|
@@ -100,15 +114,14 @@ def make_dir(
|
|
|
100
114
|
Raises:
|
|
101
115
|
ValueError: If `mode` is outside the range 0o1-0o7777
|
|
102
116
|
(not checked on Windows, where the mode is ignored).
|
|
103
|
-
NotADirectoryError: If the path exists but is not a directory
|
|
117
|
+
NotADirectoryError: If the path exists but is not a directory, or
|
|
118
|
+
`clean` is True and the path is a symlink.
|
|
104
119
|
OSError: If `exist_ok` is False, `clean` is False, and the path
|
|
105
120
|
already exists.
|
|
106
121
|
"""
|
|
107
122
|
_validate_mode(mode)
|
|
108
123
|
path = Path(path)
|
|
109
|
-
|
|
110
|
-
msg = f"not a directory: {path}"
|
|
111
|
-
raise NotADirectoryError(msg)
|
|
124
|
+
_ensure_directory_target(path, clean=clean)
|
|
112
125
|
if clean and path.is_dir():
|
|
113
126
|
shutil.rmtree(path)
|
|
114
127
|
path.mkdir(mode=mode, parents=True, exist_ok=exist_ok)
|
|
@@ -170,9 +183,9 @@ def make_dirs(
|
|
|
170
183
|
Defaults to False.
|
|
171
184
|
clean: Whether to recreate each directory empty when it already exists,
|
|
172
185
|
removing its contents first. Only an existing *directory* is wiped;
|
|
173
|
-
a non-directory still raises. Because the
|
|
174
|
-
remade, `clean=True` makes `exist_ok`
|
|
175
|
-
Defaults to False.
|
|
186
|
+
a non-directory -- or a symlink -- still raises. Because the
|
|
187
|
+
directory is removed and remade, `clean=True` makes `exist_ok`
|
|
188
|
+
moot. **Destructive.** Defaults to False.
|
|
176
189
|
stringify: Whether to return the paths as strings. Defaults to False.
|
|
177
190
|
|
|
178
191
|
Returns:
|
|
@@ -183,15 +196,26 @@ def make_dirs(
|
|
|
183
196
|
ValueError: If `mode` is outside the range 0o1-0o7777
|
|
184
197
|
(not checked on Windows, where the mode is ignored).
|
|
185
198
|
DirectoryNotFoundError: If `root` is provided and does not exist.
|
|
186
|
-
NotADirectoryError: If `root` is provided and is not a directory
|
|
199
|
+
NotADirectoryError: If `root` is provided and is not a directory, if
|
|
200
|
+
any path exists but is not a directory, or `clean` is True and
|
|
201
|
+
any path is a symlink.
|
|
187
202
|
ValueError: If `root` is provided and any of the paths are absolute.
|
|
188
203
|
OSError: If `exist_ok` is False, `clean` is False, and any of the
|
|
189
204
|
paths already exist.
|
|
190
|
-
|
|
205
|
+
|
|
206
|
+
Note:
|
|
207
|
+
Every path is validated (the non-directory / symlink checks above)
|
|
208
|
+
*before* any directory is wiped or created, so a deterministically
|
|
209
|
+
bad entry -- e.g. a file in the list -- fails without partially
|
|
210
|
+
cleaning earlier entries. Creation/cleanup is otherwise per-path and
|
|
211
|
+
not transactional, so a runtime failure (a race, a permission error)
|
|
212
|
+
partway through can still leave earlier entries created or cleaned.
|
|
191
213
|
"""
|
|
192
214
|
_validate_mode(mode)
|
|
193
215
|
paths = _join_root_if_provided(paths, root)
|
|
194
216
|
directories = [Path(p) for p in paths]
|
|
217
|
+
for directory in directories:
|
|
218
|
+
_ensure_directory_target(directory, clean=clean)
|
|
195
219
|
for directory in directories:
|
|
196
220
|
if clean and directory.is_dir():
|
|
197
221
|
shutil.rmtree(directory)
|
|
@@ -58,6 +58,23 @@ def _default_dir_mode() -> int:
|
|
|
58
58
|
return _umask_default(0o777)
|
|
59
59
|
|
|
60
60
|
|
|
61
|
+
def _fsync_parent(path: Path) -> None:
|
|
62
|
+
"""Best-effort fsync of `path`'s parent directory entry.
|
|
63
|
+
|
|
64
|
+
Makes a just-completed rename/link into `path` durable across a crash on
|
|
65
|
+
POSIX (the file's own data is fsynced separately). A no-op where a
|
|
66
|
+
directory cannot be opened for fsync, e.g. Windows.
|
|
67
|
+
"""
|
|
68
|
+
try:
|
|
69
|
+
fd = os.open(path.parent, os.O_RDONLY)
|
|
70
|
+
except OSError:
|
|
71
|
+
return
|
|
72
|
+
try:
|
|
73
|
+
os.fsync(fd)
|
|
74
|
+
finally:
|
|
75
|
+
os.close(fd)
|
|
76
|
+
|
|
77
|
+
|
|
61
78
|
class StagedFile[AnyStrT: (str, bytes)]:
|
|
62
79
|
"""Write a file safely: stage to a temp file, then commit by atomic move.
|
|
63
80
|
|
|
@@ -87,10 +104,13 @@ class StagedFile[AnyStrT: (str, bytes)]:
|
|
|
87
104
|
```
|
|
88
105
|
|
|
89
106
|
With `overwrite=False` (the default) an existing destination is a
|
|
90
|
-
fail-fast `FileExistsError`, and the commit creates the file atomically
|
|
91
|
-
it never clobbers a file that appeared meanwhile.
|
|
92
|
-
|
|
93
|
-
|
|
107
|
+
fail-fast `FileExistsError`, and the commit creates the file atomically
|
|
108
|
+
via a hardlink -- it never clobbers a file that appeared meanwhile. On a
|
|
109
|
+
filesystem without hardlink support (FAT/exFAT, some network mounts) the
|
|
110
|
+
commit falls back to a best-effort existence check plus replace, leaving
|
|
111
|
+
a small window where a file appearing concurrently could be clobbered.
|
|
112
|
+
With `overwrite=True` the destination is atomically replaced, inheriting
|
|
113
|
+
its previous permissions.
|
|
94
114
|
|
|
95
115
|
The committed file gets the usual umask-based permissions (not the
|
|
96
116
|
restrictive mode of the internal temp file). The destination's parent
|
|
@@ -254,15 +274,26 @@ class StagedFile[AnyStrT: (str, bytes)]:
|
|
|
254
274
|
if self._overwrite:
|
|
255
275
|
self._temp_path.replace(self._path)
|
|
256
276
|
else:
|
|
277
|
+
# Atomic exclusive create via hardlink where supported. A
|
|
278
|
+
# filesystem without hardlinks (FAT/exFAT, some network mounts)
|
|
279
|
+
# raises a non-`FileExistsError` `OSError`; fall back to a
|
|
280
|
+
# best-effort existence check plus `replace` (which leaves a
|
|
281
|
+
# small TOCTOU window where a file appearing meanwhile could be
|
|
282
|
+
# clobbered -- unavoidable without an atomic no-clobber move).
|
|
257
283
|
try:
|
|
258
284
|
self._path.hardlink_to(self._temp_path)
|
|
259
|
-
except
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
285
|
+
except OSError as exc:
|
|
286
|
+
if isinstance(exc, FileExistsError) or self._path.exists():
|
|
287
|
+
self._temp_path.unlink(missing_ok=True)
|
|
288
|
+
msg = (
|
|
289
|
+
"file already exists, pass overwrite=True to replace: "
|
|
290
|
+
f"{self._path}"
|
|
291
|
+
)
|
|
292
|
+
raise FileExistsError(msg) from None
|
|
293
|
+
self._temp_path.replace(self._path)
|
|
294
|
+
else:
|
|
295
|
+
self._temp_path.unlink()
|
|
296
|
+
_fsync_parent(self._path)
|
|
266
297
|
self._committed = True
|
|
267
298
|
self._finalizer.detach()
|
|
268
299
|
return self._path
|
|
@@ -318,9 +349,9 @@ class StagedDirectory:
|
|
|
318
349
|
staged directory is moved into place with a single rename, and an existing
|
|
319
350
|
destination is a fail-fast `FileExistsError`. Replacing an existing one
|
|
320
351
|
(`overwrite=True`) is *not* fully atomic -- the old directory is swapped
|
|
321
|
-
aside
|
|
322
|
-
|
|
323
|
-
``<name>.old`` directory for recovery.
|
|
352
|
+
aside, the staged one moved in, then the old removed. A failed move
|
|
353
|
+
restores the original; only a crash *between* the two renames leaves the
|
|
354
|
+
previous contents in a sibling ``<name>.old`` directory for recovery.
|
|
324
355
|
|
|
325
356
|
The committed directory gets the usual umask-based permissions. Pass
|
|
326
357
|
`make_parents=True` to create the destination's parent if it is missing.
|
|
@@ -395,6 +426,8 @@ class StagedDirectory:
|
|
|
395
426
|
appeared after this builder opened.
|
|
396
427
|
NotADirectoryError: If `overwrite` is True and the destination
|
|
397
428
|
exists but is not a directory.
|
|
429
|
+
OSError: If replacing an existing directory and moving the staged
|
|
430
|
+
one into place fails; the original is restored first.
|
|
398
431
|
"""
|
|
399
432
|
if self._committed:
|
|
400
433
|
return self._path
|
|
@@ -420,16 +453,24 @@ class StagedDirectory:
|
|
|
420
453
|
mode = stat.S_IMODE(self._path.stat().st_mode)
|
|
421
454
|
self._workdir.chmod(mode)
|
|
422
455
|
if exists:
|
|
423
|
-
# Replacing an existing directory.
|
|
424
|
-
# swap the old one aside, move the staged one
|
|
425
|
-
# old.
|
|
426
|
-
#
|
|
456
|
+
# Replacing an existing directory. There is no portable atomic
|
|
457
|
+
# directory replace, so swap the old one aside, move the staged one
|
|
458
|
+
# in, then remove the old. If the second move fails, restore the
|
|
459
|
+
# original; removing the backup is best-effort (the destination is
|
|
460
|
+
# already correct). A crash *between* the two moves is the residual
|
|
461
|
+
# non-atomic window -- the previous contents remain in a sibling
|
|
462
|
+
# `<name>.old` directory for manual recovery.
|
|
427
463
|
backup = self._path.with_name(f"{self._workdir.name}.old")
|
|
428
464
|
self._path.rename(backup)
|
|
429
|
-
|
|
430
|
-
|
|
465
|
+
try:
|
|
466
|
+
self._workdir.rename(self._path)
|
|
467
|
+
except OSError:
|
|
468
|
+
backup.rename(self._path)
|
|
469
|
+
raise
|
|
470
|
+
shutil.rmtree(backup, ignore_errors=True)
|
|
431
471
|
else:
|
|
432
472
|
self._workdir.rename(self._path)
|
|
473
|
+
_fsync_parent(self._path)
|
|
433
474
|
self._committed = True
|
|
434
475
|
self._finalizer.detach()
|
|
435
476
|
return self._path
|
|
@@ -266,6 +266,10 @@ def reserve_path(
|
|
|
266
266
|
an exclusive file create, `open(path, "x")` raises the same
|
|
267
267
|
`FileExistsError` directly.
|
|
268
268
|
|
|
269
|
+
A symlink counts as occupying the path -- including a *broken* one,
|
|
270
|
+
which `Path.exists` alone reports as absent yet still takes the name
|
|
271
|
+
(so `open(path, "x")` would fail). Such a path is treated as existing.
|
|
272
|
+
|
|
269
273
|
Args:
|
|
270
274
|
path: The path that should not yet exist.
|
|
271
275
|
exist_ok: Whether to allow an already-existing path. Defaults to False.
|
|
@@ -277,9 +281,13 @@ def reserve_path(
|
|
|
277
281
|
The path as a Path object or a string, depending on `stringify`.
|
|
278
282
|
|
|
279
283
|
Raises:
|
|
280
|
-
FileExistsError: If the path exists and `exist_ok`
|
|
284
|
+
FileExistsError: If the path exists (or is a symlink) and `exist_ok`
|
|
285
|
+
is False.
|
|
286
|
+
OSError: If `make_parents` is True and the parent cannot be created
|
|
287
|
+
(e.g. an ancestor along the path is a file).
|
|
281
288
|
"""
|
|
282
|
-
|
|
289
|
+
path = Path(path)
|
|
290
|
+
if (path.exists() or path.is_symlink()) and not exist_ok:
|
|
283
291
|
msg = f"path already exists: {path}"
|
|
284
292
|
raise FileExistsError(msg)
|
|
285
293
|
if make_parents:
|
|
@@ -164,6 +164,7 @@ print(run.compute())
|
|
|
164
164
|
| Reduction | Result | Empty |
|
|
165
165
|
| --- | --- | --- |
|
|
166
166
|
| `Mean()` | weighted arithmetic mean | `nan` |
|
|
167
|
+
| `Var()` / `Std()` | weighted population variance / std (Welford) | `nan` |
|
|
167
168
|
| `Sum()` | sum of values (weight ignored) | `0.0` |
|
|
168
169
|
| `Min()` / `Max()` | running min / max (weight ignored) | `nan` |
|
|
169
170
|
| `Last()` | most recent value | `nan` |
|
|
@@ -177,8 +178,8 @@ import operator
|
|
|
177
178
|
Aggregator(Fold(operator.mul, 1.0)) # running product
|
|
178
179
|
```
|
|
179
180
|
|
|
180
|
-
For a reduction with richer state (weighted
|
|
181
|
-
`Reduction` (or `UnweightedReduction` when weight is irrelevant) and
|
|
181
|
+
For a reduction with richer state (RMS, a weighted geometric mean, ...),
|
|
182
|
+
subclass `Reduction` (or `UnweightedReduction` when weight is irrelevant) and
|
|
182
183
|
implement `identity` / `step` (or `accumulate`) / `merge` / `result`. The
|
|
183
184
|
`merge` method *is* the nesting behavior, so custom reductions nest as
|
|
184
185
|
exactly as the built-ins.
|
|
@@ -8,9 +8,11 @@ __all__ = (
|
|
|
8
8
|
"Reduction",
|
|
9
9
|
"SegmentRecord",
|
|
10
10
|
"SegmentTimer",
|
|
11
|
+
"Std",
|
|
11
12
|
"Sum",
|
|
12
13
|
"Timer",
|
|
13
14
|
"UnweightedReduction",
|
|
15
|
+
"Var",
|
|
14
16
|
"factory_if_none",
|
|
15
17
|
"replace_if_none",
|
|
16
18
|
"unwrap_or_default",
|
|
@@ -27,8 +29,10 @@ from kaparoo.utils.aggregate import (
|
|
|
27
29
|
Mean,
|
|
28
30
|
Min,
|
|
29
31
|
Reduction,
|
|
32
|
+
Std,
|
|
30
33
|
Sum,
|
|
31
34
|
UnweightedReduction,
|
|
35
|
+
Var,
|
|
32
36
|
)
|
|
33
37
|
from kaparoo.utils.optional import (
|
|
34
38
|
factory_if_none,
|
|
@@ -16,10 +16,13 @@ __all__ = (
|
|
|
16
16
|
"Mean",
|
|
17
17
|
"Min",
|
|
18
18
|
"Reduction",
|
|
19
|
+
"Std",
|
|
19
20
|
"Sum",
|
|
20
21
|
"UnweightedReduction",
|
|
22
|
+
"Var",
|
|
21
23
|
)
|
|
22
24
|
|
|
25
|
+
import math
|
|
23
26
|
from abc import ABC, abstractmethod
|
|
24
27
|
from dataclasses import dataclass
|
|
25
28
|
from typing import TYPE_CHECKING
|
|
@@ -108,6 +111,65 @@ class Mean(Reduction[tuple[float, float]]):
|
|
|
108
111
|
return state[0] / state[1] if state[1] else float("nan")
|
|
109
112
|
|
|
110
113
|
|
|
114
|
+
@dataclass(frozen=True)
|
|
115
|
+
class Var(Reduction[tuple[float, float, float]]):
|
|
116
|
+
"""Weighted population variance; state is `(weight, mean, M2)`.
|
|
117
|
+
|
|
118
|
+
Accumulated online (Welford) and merged exactly (Chan's parallel
|
|
119
|
+
algorithm), so it nests across loop levels like the other reductions.
|
|
120
|
+
Uses the population convention -- M2 over the total weight, as in
|
|
121
|
+
numpy's default `ddof=0` -- which stays well-defined under weighting.
|
|
122
|
+
Empty -> `nan`.
|
|
123
|
+
"""
|
|
124
|
+
|
|
125
|
+
def identity(self) -> tuple[float, float, float]:
|
|
126
|
+
return (0.0, 0.0, 0.0)
|
|
127
|
+
|
|
128
|
+
def step(
|
|
129
|
+
self, state: tuple[float, float, float], value: float, weight: float
|
|
130
|
+
) -> tuple[float, float, float]:
|
|
131
|
+
total, mean, m2 = state
|
|
132
|
+
total += weight
|
|
133
|
+
delta = value - mean
|
|
134
|
+
mean += (weight / total) * delta
|
|
135
|
+
m2 += weight * delta * (value - mean)
|
|
136
|
+
return (total, mean, m2)
|
|
137
|
+
|
|
138
|
+
def merge(
|
|
139
|
+
self,
|
|
140
|
+
a: tuple[float, float, float],
|
|
141
|
+
b: tuple[float, float, float],
|
|
142
|
+
) -> tuple[float, float, float]:
|
|
143
|
+
total_a, mean_a, m2_a = a
|
|
144
|
+
total_b, mean_b, m2_b = b
|
|
145
|
+
total = total_a + total_b
|
|
146
|
+
if total == 0:
|
|
147
|
+
return (0.0, 0.0, 0.0)
|
|
148
|
+
delta = mean_b - mean_a
|
|
149
|
+
mean = mean_a + delta * total_b / total
|
|
150
|
+
m2 = m2_a + m2_b + delta * delta * total_a * total_b / total
|
|
151
|
+
return (total, mean, m2)
|
|
152
|
+
|
|
153
|
+
def result(self, state: tuple[float, float, float]) -> float:
|
|
154
|
+
total, _mean, m2 = state
|
|
155
|
+
return m2 / total if total else float("nan")
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
@dataclass(frozen=True)
|
|
159
|
+
class Std(Var):
|
|
160
|
+
"""Weighted population standard deviation: the square root of `Var`.
|
|
161
|
+
|
|
162
|
+
Shares `Var`'s online, mergeable moments; only the final projection
|
|
163
|
+
differs. Empty -> `nan`.
|
|
164
|
+
"""
|
|
165
|
+
|
|
166
|
+
def result(self, state: tuple[float, float, float]) -> float:
|
|
167
|
+
variance = super().result(state)
|
|
168
|
+
if math.isnan(variance): # empty state
|
|
169
|
+
return variance
|
|
170
|
+
return max(variance, 0.0) ** 0.5
|
|
171
|
+
|
|
172
|
+
|
|
111
173
|
@dataclass(frozen=True)
|
|
112
174
|
class Sum(UnweightedReduction[float]):
|
|
113
175
|
"""Running sum of values (weight ignored). Empty -> `0.0`."""
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{kaparoo_python-0.4.0 → kaparoo_python-0.5.0}/kaparoo/filesystem/search/filters/multi_pattern.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|