kaparoo-python 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kaparoo/data/__init__.py CHANGED
@@ -2,6 +2,7 @@ __all__ = (
2
2
  "ConcatSequence",
3
3
  "DataSequence",
4
4
  "FileFolderSequence",
5
+ "FileListSequence",
5
6
  "SingleFileSequence",
6
7
  "SlicedSequence",
7
8
  "WindowedSequence",
@@ -12,6 +13,7 @@ from kaparoo.data.sequences import (
12
13
  ConcatSequence,
13
14
  DataSequence,
14
15
  FileFolderSequence,
16
+ FileListSequence,
15
17
  SingleFileSequence,
16
18
  SlicedSequence,
17
19
  WindowedSequence,
@@ -4,6 +4,7 @@ __all__ = (
4
4
  "ConcatSequence",
5
5
  "DataSequence",
6
6
  "FileFolderSequence",
7
+ "FileListSequence",
7
8
  "SingleFileSequence",
8
9
  "SlicedSequence",
9
10
  "WindowedSequence",
@@ -18,6 +19,7 @@ from kaparoo.data.sequences.composers import (
18
19
  )
19
20
  from kaparoo.data.sequences.templates import (
20
21
  FileFolderSequence,
22
+ FileListSequence,
21
23
  SingleFileSequence,
22
24
  )
23
25
  from kaparoo.data.sequences.utils import generate_batches
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- __all__ = ("FileFolderSequence", "SingleFileSequence")
3
+ __all__ = ("FileFolderSequence", "FileListSequence", "SingleFileSequence")
4
4
 
5
5
  from abc import abstractmethod
6
6
  from pathlib import Path
@@ -11,7 +11,7 @@ from kaparoo.filesystem.existence import ensure_dir_exists, ensure_file_exists
11
11
  from kaparoo.filesystem.utils import stringify_paths, wrap_path
12
12
 
13
13
  if TYPE_CHECKING:
14
- from kaparoo.filesystem.types import StrPath
14
+ from kaparoo.filesystem.types import StrPath, StrPaths
15
15
 
16
16
 
17
17
  class FileFolderSequence[T, M = Path](DataSequence[T, M]):
@@ -149,6 +149,82 @@ class FileFolderSequence[T, M = Path](DataSequence[T, M]):
149
149
  raise NotImplementedError
150
150
 
151
151
 
152
+ class FileListSequence[T, M = Path](DataSequence[T, M]):
153
+ """A `DataSequence` over an explicit, ordered list of files.
154
+
155
+ Like `FileFolderSequence`, items live one-per-file and subclasses
156
+ implement `load_file` and `get_meta`. Unlike it, the files are given
157
+ directly rather than discovered under a `root`, so they may live in
158
+ unrelated directories -- or, on Windows, on different drives -- which
159
+ `FileFolderSequence` cannot represent (it stores paths relative to one
160
+ root). There is no `list_files`: the input list *is* the listing.
161
+
162
+ The given order is preserved verbatim and duplicates are kept; sort the
163
+ input yourself (`sorted(files, key=...)`) if a particular order is
164
+ needed. Paths are not checked for existence at construction; `load_file`
165
+ is called lazily on each `get_item`.
166
+
167
+ The base exposes:
168
+
169
+ - `files: tuple[Path, ...]` — full paths as an immutable snapshot.
170
+ - `get_file(index) -> Path` — full path of the i-th file.
171
+
172
+ Type Parameters:
173
+ T: Item type returned by `get_item`.
174
+ M: Per-item metadata type. Defaults to `Path`; override when the
175
+ metadata is something else (label, line number, ...).
176
+
177
+ Args:
178
+ files: The file paths to expose, in order.
179
+
180
+ Example:
181
+ >>> from pathlib import Path
182
+ >>> class BytesList(FileListSequence[bytes]):
183
+ ... def get_meta(self, index: int) -> Path:
184
+ ... return self.get_file(index)
185
+ ...
186
+ ... def load_file(self, path: Path) -> bytes:
187
+ ... return path.read_bytes()
188
+ >>>
189
+ >>> data = BytesList(["images/a.png", "/other/b.png"])
190
+ """
191
+
192
+ def __init__(self, files: StrPaths) -> None:
193
+ self._files = list(stringify_paths(files))
194
+
195
+ def __len__(self) -> int:
196
+ return len(self._files)
197
+
198
+ @property
199
+ def files(self) -> tuple[Path, ...]:
200
+ """Immutable snapshot of the full file paths, in the given order.
201
+
202
+ Returns a fresh `tuple[Path, ...]` on each access.
203
+ """
204
+ return tuple(self.get_file(i) for i in range(len(self)))
205
+
206
+ def get_file(self, index: int) -> Path:
207
+ """Full Path of the file at `index`."""
208
+ return Path(self._files[index])
209
+
210
+ def get_item(self, index: int) -> T:
211
+ return self.load_file(self.get_file(index))
212
+
213
+ @abstractmethod
214
+ def get_meta(self, index: int) -> M:
215
+ raise NotImplementedError
216
+
217
+ @abstractmethod
218
+ def load_file(self, path: Path) -> T:
219
+ """Decode a single file into an item of type `T`.
220
+
221
+ Called lazily on each `get_item` -- not at construction time.
222
+ Subclasses may freely use external libraries (PIL, librosa,
223
+ cv2, ...) to decode.
224
+ """
225
+ raise NotImplementedError
226
+
227
+
152
228
  class SingleFileSequence[T, M = None](DataSequence[T, M]):
153
229
  """A `DataSequence` backed by a single file that holds multiple records.
154
230
 
@@ -1,12 +1,18 @@
1
1
  __all__ = (
2
2
  "DirectoryNotFoundError",
3
3
  "NotAFileError",
4
+ "StagedDirectory",
5
+ "StagedFile",
4
6
  "dir_empty",
5
7
  "dir_empty_unsafe",
6
8
  "dir_exists",
9
+ "dir_not_empty",
10
+ "dir_not_empty_unsafe",
7
11
  "dirs_empty",
8
12
  "dirs_empty_unsafe",
9
13
  "dirs_exist",
14
+ "dirs_not_empty",
15
+ "dirs_not_empty_unsafe",
10
16
  "ensure_dir_exists",
11
17
  "ensure_dirs_exist",
12
18
  "ensure_file_exists",
@@ -22,6 +28,8 @@ __all__ = (
22
28
  "make_dirs",
23
29
  "path_exists",
24
30
  "paths_exist",
31
+ "reserve_path",
32
+ "reserve_paths",
25
33
  "search_dirs",
26
34
  "search_files",
27
35
  "search_paths",
@@ -34,8 +42,12 @@ __all__ = (
34
42
  from kaparoo.filesystem.directory import (
35
43
  dir_empty,
36
44
  dir_empty_unsafe,
45
+ dir_not_empty,
46
+ dir_not_empty_unsafe,
37
47
  dirs_empty,
38
48
  dirs_empty_unsafe,
49
+ dirs_not_empty,
50
+ dirs_not_empty_unsafe,
39
51
  make_dir,
40
52
  make_dirs,
41
53
  )
@@ -65,7 +77,10 @@ from kaparoo.filesystem.search import (
65
77
  search_files,
66
78
  search_paths,
67
79
  )
80
+ from kaparoo.filesystem.staged import StagedDirectory, StagedFile
68
81
  from kaparoo.filesystem.utils import (
82
+ reserve_path,
83
+ reserve_paths,
69
84
  stringify_path,
70
85
  stringify_paths,
71
86
  wrap_path,
@@ -3,13 +3,18 @@ from __future__ import annotations
3
3
  __all__ = (
4
4
  "dir_empty",
5
5
  "dir_empty_unsafe",
6
+ "dir_not_empty",
7
+ "dir_not_empty_unsafe",
6
8
  "dirs_empty",
7
9
  "dirs_empty_unsafe",
10
+ "dirs_not_empty",
11
+ "dirs_not_empty_unsafe",
8
12
  "make_dir",
9
13
  "make_dirs",
10
14
  )
11
15
 
12
16
  import os
17
+ import shutil
13
18
  from pathlib import Path
14
19
  from typing import TYPE_CHECKING, overload
15
20
 
@@ -33,12 +38,27 @@ if TYPE_CHECKING:
33
38
  # ========================== #
34
39
 
35
40
 
41
+ def _ensure_directory_target(path: Path, *, clean: bool) -> None:
42
+ """Reject a path that cannot serve as a directory target.
43
+
44
+ Raises `NotADirectoryError` when `path` exists but is not a directory,
45
+ or when `clean` is requested on a symlink -- cleaning must operate on a
46
+ real directory, never through a link (which would otherwise reach the
47
+ link's target). A symlink to a directory is accepted only when `clean`
48
+ is False.
49
+ """
50
+ if (path.exists() and not path.is_dir()) or (clean and path.is_symlink()):
51
+ msg = f"not a usable directory target: {path}"
52
+ raise NotADirectoryError(msg)
53
+
54
+
36
55
  @overload
37
56
  def make_dir(
38
57
  path: StrPath,
39
58
  *,
40
59
  mode: int = 0o777,
41
60
  exist_ok: bool = False,
61
+ clean: bool = False,
42
62
  stringify: Literal[False] = False,
43
63
  ) -> Path: ...
44
64
 
@@ -49,6 +69,7 @@ def make_dir(
49
69
  *,
50
70
  mode: int = 0o777,
51
71
  exist_ok: bool = False,
72
+ clean: bool = False,
52
73
  stringify: Literal[True],
53
74
  ) -> str: ...
54
75
 
@@ -59,6 +80,7 @@ def make_dir(
59
80
  *,
60
81
  mode: int = 0o777,
61
82
  exist_ok: bool = False,
83
+ clean: bool = False,
62
84
  stringify: bool,
63
85
  ) -> Path | str: ...
64
86
 
@@ -68,6 +90,7 @@ def make_dir(
68
90
  *,
69
91
  mode: int = 0o777,
70
92
  exist_ok: bool = False,
93
+ clean: bool = False,
71
94
  stringify: bool = False,
72
95
  ) -> Path | str:
73
96
  """Recursively create a directory.
@@ -77,6 +100,11 @@ def make_dir(
77
100
  mode: The mode to use when creating the directory. Defaults to 0o777.
78
101
  exist_ok: Whether to suppress OSError if the path already exists.
79
102
  Defaults to False.
103
+ clean: Whether to recreate the directory empty when it already exists,
104
+ removing its contents first. Only an existing *directory* is wiped;
105
+ a non-directory -- or a symlink -- still raises. Because the
106
+ directory is removed and remade, `clean=True` makes `exist_ok`
107
+ moot. **Destructive.** Defaults to False.
80
108
  stringify: Whether to return the path as a string. Defaults to False.
81
109
 
82
110
  Returns:
@@ -86,14 +114,16 @@ def make_dir(
86
114
  Raises:
87
115
  ValueError: If `mode` is outside the range 0o1-0o7777
88
116
  (not checked on Windows, where the mode is ignored).
89
- NotADirectoryError: If the path exists but is not a directory.
90
- OSError: If `exist_ok` is False and the path already exists.
117
+ NotADirectoryError: If the path exists but is not a directory, or
118
+ `clean` is True and the path is a symlink.
119
+ OSError: If `exist_ok` is False, `clean` is False, and the path
120
+ already exists.
91
121
  """
92
122
  _validate_mode(mode)
93
123
  path = Path(path)
94
- if path.exists() and not path.is_dir():
95
- msg = f"not a directory: {path}"
96
- raise NotADirectoryError(msg)
124
+ _ensure_directory_target(path, clean=clean)
125
+ if clean and path.is_dir():
126
+ shutil.rmtree(path)
97
127
  path.mkdir(mode=mode, parents=True, exist_ok=exist_ok)
98
128
  return stringify_path(path) if stringify else path
99
129
 
@@ -105,6 +135,7 @@ def make_dirs(
105
135
  root: StrPath | None = None,
106
136
  mode: int = 0o777,
107
137
  exist_ok: bool = False,
138
+ clean: bool = False,
108
139
  stringify: Literal[False] = False,
109
140
  ) -> Sequence[Path]: ...
110
141
 
@@ -116,6 +147,7 @@ def make_dirs(
116
147
  root: StrPath | None = None,
117
148
  mode: int = 0o777,
118
149
  exist_ok: bool = False,
150
+ clean: bool = False,
119
151
  stringify: Literal[True],
120
152
  ) -> Sequence[str]: ...
121
153
 
@@ -127,6 +159,7 @@ def make_dirs(
127
159
  root: StrPath | None = None,
128
160
  mode: int = 0o777,
129
161
  exist_ok: bool = False,
162
+ clean: bool = False,
130
163
  stringify: bool,
131
164
  ) -> Sequence[Path] | Sequence[str]: ...
132
165
 
@@ -137,6 +170,7 @@ def make_dirs(
137
170
  root: StrPath | None = None,
138
171
  mode: int = 0o777,
139
172
  exist_ok: bool = False,
173
+ clean: bool = False,
140
174
  stringify: bool = False,
141
175
  ) -> Sequence[Path] | Sequence[str]:
142
176
  """Recursively create directories.
@@ -147,6 +181,11 @@ def make_dirs(
147
181
  mode: The mode to use when creating the directories. Defaults to 0o777.
148
182
  exist_ok: Whether to suppress OSError if any of the paths already exist.
149
183
  Defaults to False.
184
+ clean: Whether to recreate each directory empty when it already exists,
185
+ removing its contents first. Only an existing *directory* is wiped;
186
+ a non-directory -- or a symlink -- still raises. Because the
187
+ directory is removed and remade, `clean=True` makes `exist_ok`
188
+ moot. **Destructive.** Defaults to False.
150
189
  stringify: Whether to return the paths as strings. Defaults to False.
151
190
 
152
191
  Returns:
@@ -157,15 +196,29 @@ def make_dirs(
157
196
  ValueError: If `mode` is outside the range 0o1-0o7777
158
197
  (not checked on Windows, where the mode is ignored).
159
198
  DirectoryNotFoundError: If `root` is provided and does not exist.
160
- NotADirectoryError: If `root` is provided and is not a directory.
199
+ NotADirectoryError: If `root` is provided and is not a directory, if
200
+ any path exists but is not a directory, or `clean` is True and
201
+ any path is a symlink.
161
202
  ValueError: If `root` is provided and any of the paths are absolute.
162
- OSError: If `exist_ok` is False and any of the paths already exist.
163
- OSError: If any of the paths are not directories.
203
+ OSError: If `exist_ok` is False, `clean` is False, and any of the
204
+ paths already exist.
205
+
206
+ Note:
207
+ Every path is validated (the non-directory / symlink checks above)
208
+ *before* any directory is wiped or created, so a deterministically
209
+ bad entry -- e.g. a file in the list -- fails without partially
210
+ cleaning earlier entries. Creation/cleanup is otherwise per-path and
211
+ not transactional, so a runtime failure (a race, a permission error)
212
+ partway through can still leave earlier entries created or cleaned.
164
213
  """
165
214
  _validate_mode(mode)
166
215
  paths = _join_root_if_provided(paths, root)
167
216
  directories = [Path(p) for p in paths]
168
217
  for directory in directories:
218
+ _ensure_directory_target(directory, clean=clean)
219
+ for directory in directories:
220
+ if clean and directory.is_dir():
221
+ shutil.rmtree(directory)
169
222
  directory.mkdir(mode=mode, parents=True, exist_ok=exist_ok)
170
223
  return stringify_paths(directories) if stringify else directories
171
224
 
@@ -224,3 +277,53 @@ def dirs_empty(paths: StrPaths, *, root: StrPath | None = None) -> bool:
224
277
  """
225
278
  paths = ensure_dirs_exist(paths, root=root)
226
279
  return all(dir_empty_unsafe(p) for p in paths)
280
+
281
+
282
+ def dir_not_empty_unsafe(path: StrPath) -> bool:
283
+ """Check if a directory is not empty without existence checks."""
284
+ return not dir_empty_unsafe(path)
285
+
286
+
287
+ def dirs_not_empty_unsafe(paths: StrPaths, *, root: StrPath | None = None) -> bool:
288
+ """Check if directories are not empty without existence checks."""
289
+ if root is not None:
290
+ paths = [Path(root) / p for p in paths]
291
+ return all(dir_not_empty_unsafe(p) for p in paths)
292
+
293
+
294
+ def dir_not_empty(path: StrPath) -> bool:
295
+ """Check if a directory is not empty.
296
+
297
+ Args:
298
+ path: The directory path to check.
299
+
300
+ Returns:
301
+ True if the directory is not empty, False otherwise.
302
+
303
+ Raises:
304
+ DirectoryNotFoundError: If the path does not exist.
305
+ NotADirectoryError: If the path is not a directory.
306
+ """
307
+ path = ensure_dir_exists(path)
308
+ return dir_not_empty_unsafe(path)
309
+
310
+
311
+ def dirs_not_empty(paths: StrPaths, *, root: StrPath | None = None) -> bool:
312
+ """Check if directories are not empty.
313
+
314
+ Args:
315
+ paths: A sequence of directory paths to check.
316
+ root: The root directory to prepend to each path. Defaults to None.
317
+
318
+ Returns:
319
+ True if all directories are not empty, False otherwise.
320
+
321
+ Raises:
322
+ DirectoryNotFoundError: If `root` is provided and does not exist.
323
+ DirectoryNotFoundError: If any of the paths do not exist.
324
+ NotADirectoryError: If `root` is provided and is not a directory.
325
+ NotADirectoryError: If any of the paths are not directories.
326
+ ValueError: If `root` is provided and any of the paths are absolute.
327
+ """
328
+ paths = ensure_dirs_exist(paths, root=root)
329
+ return all(dir_not_empty_unsafe(p) for p in paths)
@@ -169,7 +169,7 @@ class Search(ABC):
169
169
 
170
170
  for dirpath, dirnames, filenames in root.walk():
171
171
  child_depth = len(dirpath.parts) - root_depth + 1
172
- part = stringify_path(dirpath.relative_to(root))
172
+ part = stringify_path(dirpath, after=root)
173
173
 
174
174
  if child_depth >= min_depth and cls._filter_part(part, part_filter):
175
175
  names = cls._select_names(dirnames, filenames)