pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (245) hide show
  1. pixeltable/__init__.py +83 -19
  2. pixeltable/_query.py +1444 -0
  3. pixeltable/_version.py +1 -0
  4. pixeltable/catalog/__init__.py +7 -4
  5. pixeltable/catalog/catalog.py +2394 -119
  6. pixeltable/catalog/column.py +225 -104
  7. pixeltable/catalog/dir.py +38 -9
  8. pixeltable/catalog/globals.py +53 -34
  9. pixeltable/catalog/insertable_table.py +265 -115
  10. pixeltable/catalog/path.py +80 -17
  11. pixeltable/catalog/schema_object.py +28 -43
  12. pixeltable/catalog/table.py +1270 -677
  13. pixeltable/catalog/table_metadata.py +103 -0
  14. pixeltable/catalog/table_version.py +1270 -751
  15. pixeltable/catalog/table_version_handle.py +109 -0
  16. pixeltable/catalog/table_version_path.py +137 -42
  17. pixeltable/catalog/tbl_ops.py +53 -0
  18. pixeltable/catalog/update_status.py +191 -0
  19. pixeltable/catalog/view.py +251 -134
  20. pixeltable/config.py +215 -0
  21. pixeltable/env.py +736 -285
  22. pixeltable/exceptions.py +26 -2
  23. pixeltable/exec/__init__.py +7 -2
  24. pixeltable/exec/aggregation_node.py +39 -21
  25. pixeltable/exec/cache_prefetch_node.py +87 -109
  26. pixeltable/exec/cell_materialization_node.py +268 -0
  27. pixeltable/exec/cell_reconstruction_node.py +168 -0
  28. pixeltable/exec/component_iteration_node.py +25 -28
  29. pixeltable/exec/data_row_batch.py +11 -46
  30. pixeltable/exec/exec_context.py +26 -11
  31. pixeltable/exec/exec_node.py +35 -27
  32. pixeltable/exec/expr_eval/__init__.py +3 -0
  33. pixeltable/exec/expr_eval/evaluators.py +365 -0
  34. pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
  35. pixeltable/exec/expr_eval/globals.py +200 -0
  36. pixeltable/exec/expr_eval/row_buffer.py +74 -0
  37. pixeltable/exec/expr_eval/schedulers.py +413 -0
  38. pixeltable/exec/globals.py +35 -0
  39. pixeltable/exec/in_memory_data_node.py +35 -27
  40. pixeltable/exec/object_store_save_node.py +293 -0
  41. pixeltable/exec/row_update_node.py +44 -29
  42. pixeltable/exec/sql_node.py +414 -115
  43. pixeltable/exprs/__init__.py +8 -5
  44. pixeltable/exprs/arithmetic_expr.py +79 -45
  45. pixeltable/exprs/array_slice.py +5 -5
  46. pixeltable/exprs/column_property_ref.py +40 -26
  47. pixeltable/exprs/column_ref.py +254 -61
  48. pixeltable/exprs/comparison.py +14 -9
  49. pixeltable/exprs/compound_predicate.py +9 -10
  50. pixeltable/exprs/data_row.py +213 -72
  51. pixeltable/exprs/expr.py +270 -104
  52. pixeltable/exprs/expr_dict.py +6 -5
  53. pixeltable/exprs/expr_set.py +20 -11
  54. pixeltable/exprs/function_call.py +383 -284
  55. pixeltable/exprs/globals.py +18 -5
  56. pixeltable/exprs/in_predicate.py +7 -7
  57. pixeltable/exprs/inline_expr.py +37 -37
  58. pixeltable/exprs/is_null.py +8 -4
  59. pixeltable/exprs/json_mapper.py +120 -54
  60. pixeltable/exprs/json_path.py +90 -60
  61. pixeltable/exprs/literal.py +61 -16
  62. pixeltable/exprs/method_ref.py +7 -6
  63. pixeltable/exprs/object_ref.py +19 -8
  64. pixeltable/exprs/row_builder.py +238 -75
  65. pixeltable/exprs/rowid_ref.py +53 -15
  66. pixeltable/exprs/similarity_expr.py +65 -50
  67. pixeltable/exprs/sql_element_cache.py +5 -5
  68. pixeltable/exprs/string_op.py +107 -0
  69. pixeltable/exprs/type_cast.py +25 -13
  70. pixeltable/exprs/variable.py +2 -2
  71. pixeltable/func/__init__.py +9 -5
  72. pixeltable/func/aggregate_function.py +197 -92
  73. pixeltable/func/callable_function.py +119 -35
  74. pixeltable/func/expr_template_function.py +101 -48
  75. pixeltable/func/function.py +375 -62
  76. pixeltable/func/function_registry.py +20 -19
  77. pixeltable/func/globals.py +6 -5
  78. pixeltable/func/mcp.py +74 -0
  79. pixeltable/func/query_template_function.py +151 -35
  80. pixeltable/func/signature.py +178 -49
  81. pixeltable/func/tools.py +164 -0
  82. pixeltable/func/udf.py +176 -53
  83. pixeltable/functions/__init__.py +44 -4
  84. pixeltable/functions/anthropic.py +226 -47
  85. pixeltable/functions/audio.py +148 -11
  86. pixeltable/functions/bedrock.py +137 -0
  87. pixeltable/functions/date.py +188 -0
  88. pixeltable/functions/deepseek.py +113 -0
  89. pixeltable/functions/document.py +81 -0
  90. pixeltable/functions/fal.py +76 -0
  91. pixeltable/functions/fireworks.py +72 -20
  92. pixeltable/functions/gemini.py +249 -0
  93. pixeltable/functions/globals.py +208 -53
  94. pixeltable/functions/groq.py +108 -0
  95. pixeltable/functions/huggingface.py +1088 -95
  96. pixeltable/functions/image.py +155 -84
  97. pixeltable/functions/json.py +8 -11
  98. pixeltable/functions/llama_cpp.py +31 -19
  99. pixeltable/functions/math.py +169 -0
  100. pixeltable/functions/mistralai.py +50 -75
  101. pixeltable/functions/net.py +70 -0
  102. pixeltable/functions/ollama.py +29 -36
  103. pixeltable/functions/openai.py +548 -160
  104. pixeltable/functions/openrouter.py +143 -0
  105. pixeltable/functions/replicate.py +15 -14
  106. pixeltable/functions/reve.py +250 -0
  107. pixeltable/functions/string.py +310 -85
  108. pixeltable/functions/timestamp.py +37 -19
  109. pixeltable/functions/together.py +77 -120
  110. pixeltable/functions/twelvelabs.py +188 -0
  111. pixeltable/functions/util.py +7 -2
  112. pixeltable/functions/uuid.py +30 -0
  113. pixeltable/functions/video.py +1528 -117
  114. pixeltable/functions/vision.py +26 -26
  115. pixeltable/functions/voyageai.py +289 -0
  116. pixeltable/functions/whisper.py +19 -10
  117. pixeltable/functions/whisperx.py +179 -0
  118. pixeltable/functions/yolox.py +112 -0
  119. pixeltable/globals.py +716 -236
  120. pixeltable/index/__init__.py +3 -1
  121. pixeltable/index/base.py +17 -21
  122. pixeltable/index/btree.py +32 -22
  123. pixeltable/index/embedding_index.py +155 -92
  124. pixeltable/io/__init__.py +12 -7
  125. pixeltable/io/datarows.py +140 -0
  126. pixeltable/io/external_store.py +83 -125
  127. pixeltable/io/fiftyone.py +24 -33
  128. pixeltable/io/globals.py +47 -182
  129. pixeltable/io/hf_datasets.py +96 -127
  130. pixeltable/io/label_studio.py +171 -156
  131. pixeltable/io/lancedb.py +3 -0
  132. pixeltable/io/pandas.py +136 -115
  133. pixeltable/io/parquet.py +40 -153
  134. pixeltable/io/table_data_conduit.py +702 -0
  135. pixeltable/io/utils.py +100 -0
  136. pixeltable/iterators/__init__.py +8 -4
  137. pixeltable/iterators/audio.py +207 -0
  138. pixeltable/iterators/base.py +9 -3
  139. pixeltable/iterators/document.py +144 -87
  140. pixeltable/iterators/image.py +17 -38
  141. pixeltable/iterators/string.py +15 -12
  142. pixeltable/iterators/video.py +523 -127
  143. pixeltable/metadata/__init__.py +33 -8
  144. pixeltable/metadata/converters/convert_10.py +2 -3
  145. pixeltable/metadata/converters/convert_13.py +2 -2
  146. pixeltable/metadata/converters/convert_15.py +15 -11
  147. pixeltable/metadata/converters/convert_16.py +4 -5
  148. pixeltable/metadata/converters/convert_17.py +4 -5
  149. pixeltable/metadata/converters/convert_18.py +4 -6
  150. pixeltable/metadata/converters/convert_19.py +6 -9
  151. pixeltable/metadata/converters/convert_20.py +3 -6
  152. pixeltable/metadata/converters/convert_21.py +6 -8
  153. pixeltable/metadata/converters/convert_22.py +3 -2
  154. pixeltable/metadata/converters/convert_23.py +33 -0
  155. pixeltable/metadata/converters/convert_24.py +55 -0
  156. pixeltable/metadata/converters/convert_25.py +19 -0
  157. pixeltable/metadata/converters/convert_26.py +23 -0
  158. pixeltable/metadata/converters/convert_27.py +29 -0
  159. pixeltable/metadata/converters/convert_28.py +13 -0
  160. pixeltable/metadata/converters/convert_29.py +110 -0
  161. pixeltable/metadata/converters/convert_30.py +63 -0
  162. pixeltable/metadata/converters/convert_31.py +11 -0
  163. pixeltable/metadata/converters/convert_32.py +15 -0
  164. pixeltable/metadata/converters/convert_33.py +17 -0
  165. pixeltable/metadata/converters/convert_34.py +21 -0
  166. pixeltable/metadata/converters/convert_35.py +9 -0
  167. pixeltable/metadata/converters/convert_36.py +38 -0
  168. pixeltable/metadata/converters/convert_37.py +15 -0
  169. pixeltable/metadata/converters/convert_38.py +39 -0
  170. pixeltable/metadata/converters/convert_39.py +124 -0
  171. pixeltable/metadata/converters/convert_40.py +73 -0
  172. pixeltable/metadata/converters/convert_41.py +12 -0
  173. pixeltable/metadata/converters/convert_42.py +9 -0
  174. pixeltable/metadata/converters/convert_43.py +44 -0
  175. pixeltable/metadata/converters/util.py +44 -18
  176. pixeltable/metadata/notes.py +21 -0
  177. pixeltable/metadata/schema.py +185 -42
  178. pixeltable/metadata/utils.py +74 -0
  179. pixeltable/mypy/__init__.py +3 -0
  180. pixeltable/mypy/mypy_plugin.py +123 -0
  181. pixeltable/plan.py +616 -225
  182. pixeltable/share/__init__.py +3 -0
  183. pixeltable/share/packager.py +797 -0
  184. pixeltable/share/protocol/__init__.py +33 -0
  185. pixeltable/share/protocol/common.py +165 -0
  186. pixeltable/share/protocol/operation_types.py +33 -0
  187. pixeltable/share/protocol/replica.py +119 -0
  188. pixeltable/share/publish.py +349 -0
  189. pixeltable/store.py +398 -232
  190. pixeltable/type_system.py +730 -267
  191. pixeltable/utils/__init__.py +40 -0
  192. pixeltable/utils/arrow.py +201 -29
  193. pixeltable/utils/av.py +298 -0
  194. pixeltable/utils/azure_store.py +346 -0
  195. pixeltable/utils/coco.py +26 -27
  196. pixeltable/utils/code.py +4 -4
  197. pixeltable/utils/console_output.py +46 -0
  198. pixeltable/utils/coroutine.py +24 -0
  199. pixeltable/utils/dbms.py +92 -0
  200. pixeltable/utils/description_helper.py +11 -12
  201. pixeltable/utils/documents.py +60 -61
  202. pixeltable/utils/exception_handler.py +36 -0
  203. pixeltable/utils/filecache.py +38 -22
  204. pixeltable/utils/formatter.py +88 -51
  205. pixeltable/utils/gcs_store.py +295 -0
  206. pixeltable/utils/http.py +133 -0
  207. pixeltable/utils/http_server.py +14 -13
  208. pixeltable/utils/iceberg.py +13 -0
  209. pixeltable/utils/image.py +17 -0
  210. pixeltable/utils/lancedb.py +90 -0
  211. pixeltable/utils/local_store.py +322 -0
  212. pixeltable/utils/misc.py +5 -0
  213. pixeltable/utils/object_stores.py +573 -0
  214. pixeltable/utils/pydantic.py +60 -0
  215. pixeltable/utils/pytorch.py +20 -20
  216. pixeltable/utils/s3_store.py +527 -0
  217. pixeltable/utils/sql.py +32 -5
  218. pixeltable/utils/system.py +30 -0
  219. pixeltable/utils/transactional_directory.py +4 -3
  220. pixeltable-0.5.7.dist-info/METADATA +579 -0
  221. pixeltable-0.5.7.dist-info/RECORD +227 -0
  222. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
  223. pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
  224. pixeltable/__version__.py +0 -3
  225. pixeltable/catalog/named_function.py +0 -36
  226. pixeltable/catalog/path_dict.py +0 -141
  227. pixeltable/dataframe.py +0 -894
  228. pixeltable/exec/expr_eval_node.py +0 -232
  229. pixeltable/ext/__init__.py +0 -14
  230. pixeltable/ext/functions/__init__.py +0 -8
  231. pixeltable/ext/functions/whisperx.py +0 -77
  232. pixeltable/ext/functions/yolox.py +0 -157
  233. pixeltable/tool/create_test_db_dump.py +0 -311
  234. pixeltable/tool/create_test_video.py +0 -81
  235. pixeltable/tool/doc_plugins/griffe.py +0 -50
  236. pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
  237. pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
  238. pixeltable/tool/embed_udf.py +0 -9
  239. pixeltable/tool/mypy_plugin.py +0 -55
  240. pixeltable/utils/media_store.py +0 -76
  241. pixeltable/utils/s3.py +0 -16
  242. pixeltable-0.2.26.dist-info/METADATA +0 -400
  243. pixeltable-0.2.26.dist-info/RECORD +0 -156
  244. pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
  245. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,322 @@
1
+ from __future__ import annotations
2
+
3
+ import glob
4
+ import logging
5
+ import os
6
+ import re
7
+ import shutil
8
+ import urllib.parse
9
+ import urllib.request
10
+ import uuid
11
+ from collections import defaultdict
12
+ from pathlib import Path
13
+ from typing import TYPE_CHECKING
14
+ from uuid import UUID
15
+
16
+ import PIL.Image
17
+
18
+ from pixeltable import env, exceptions as excs
19
+ from pixeltable.utils.object_stores import ObjectPath, ObjectStoreBase, StorageObjectAddress
20
+
21
+ if TYPE_CHECKING:
22
+ from pixeltable.catalog import Column
23
+
24
+ _logger = logging.getLogger('pixeltable')
25
+
26
+
27
+ class LocalStore(ObjectStoreBase):
28
+ """
29
+ Utilities to manage files stored in a local filesystem directory.
30
+
31
+ Media file names are a composite of: table id, column id, tbl_version, new uuid:
32
+ the table id/column id/tbl_version are redundant but useful for identifying all files for a table
33
+ or all files created for a particular version of a table
34
+ """
35
+
36
+ __base_dir: Path
37
+
38
+ soa: StorageObjectAddress | None
39
+
40
+ def __init__(self, location: Path | StorageObjectAddress):
41
+ if isinstance(location, Path):
42
+ self.__base_dir = location
43
+ self.soa = None
44
+ else:
45
+ assert isinstance(location, StorageObjectAddress)
46
+ self.__base_dir = location.to_path
47
+ self.soa = location
48
+
49
+ def validate(self, error_col_name: str) -> str:
50
+ """Convert a Column destination parameter to a URI, else raise errors."""
51
+ dest_path = self.__base_dir
52
+
53
+ # Check if path exists and validate it's a directory
54
+ if not dest_path.exists():
55
+ raise excs.Error(f'{error_col_name}`destination` does not exist')
56
+ if not dest_path.is_dir():
57
+ raise excs.Error(f'{error_col_name}`destination` must be a directory, not a file')
58
+
59
+ # Check if path is absolute
60
+ if dest_path.is_absolute():
61
+ # Convert to file URI
62
+ return dest_path.as_uri()
63
+
64
+ # For relative paths, convert to absolute first
65
+ try:
66
+ absolute_path = dest_path.resolve()
67
+ return absolute_path.as_uri()
68
+ except (OSError, ValueError) as e:
69
+ raise excs.Error(f'{error_col_name}`destination` must be a valid path. Error: {e}') from None
70
+
71
+ @staticmethod
72
+ def file_url_to_path(url: str) -> Path | None:
73
+ """Convert a file:// URI to a Path object with support for Windows UNC paths."""
74
+ assert isinstance(url, str), type(url)
75
+ parsed = urllib.parse.urlparse(url)
76
+
77
+ # Verify it's a file scheme
78
+ # We should never be passed a local file path here. The "len > 1" ensures that Windows
79
+ # file paths aren't mistaken for URLs with a single-character scheme.
80
+ assert len(parsed.scheme) > 1, url
81
+ if parsed.scheme.lower() != 'file':
82
+ return None
83
+
84
+ pth = parsed.path
85
+ if parsed.netloc:
86
+ # This is a UNC path, ie, file://host/share/path/to/file
87
+ pth = f'//{parsed.netloc}{pth}'
88
+
89
+ path_str = urllib.parse.unquote(urllib.request.url2pathname(pth))
90
+ return Path(path_str)
91
+
92
+ @classmethod
93
+ def _save_binary_media_file(cls, file_data: bytes, dest_path: Path, format: str | None) -> Path:
94
+ """Save binary data to a file in a LocalStore. format is ignored for binary data."""
95
+ assert isinstance(file_data, bytes)
96
+ with open(dest_path, 'wb') as f:
97
+ f.write(file_data)
98
+ f.flush() # Ensures Python buffers are written to OS
99
+ os.fsync(f.fileno()) # Forces OS to write to physical storage
100
+ return dest_path
101
+
102
+ @classmethod
103
+ def _save_pil_image_file(cls, image: PIL.Image.Image, dest_path: Path, format: str | None) -> Path:
104
+ """Save a PIL Image to a file in a LocalStore with the specified format."""
105
+ if dest_path.suffix != f'.{format}':
106
+ dest_path = dest_path.with_name(f'{dest_path.name}.{format}')
107
+
108
+ with open(dest_path, 'wb') as f:
109
+ image.save(f, format=format)
110
+ f.flush() # Ensures Python buffers are written to OS
111
+ os.fsync(f.fileno()) # Forces OS to write to physical storage
112
+ return dest_path
113
+
114
+ def _prepare_path_raw(self, tbl_id: UUID, col_id: int, tbl_version: int, ext: str | None = None) -> Path:
115
+ """
116
+ Construct a new, unique Path name in the __base_dir for a persisted file.
117
+ Create the parent directory for the new Path if it does not already exist.
118
+ """
119
+ prefix, filename = ObjectPath.create_prefix_raw(tbl_id, col_id, tbl_version, ext)
120
+ parent = self.__base_dir / Path(prefix)
121
+ parent.mkdir(parents=True, exist_ok=True)
122
+ return parent / filename
123
+
124
+ def _prepare_path(self, col: Column, ext: str | None = None) -> Path:
125
+ """
126
+ Construct a new, unique Path name in the __base_dir for a persisted file.
127
+ Create the parent directory for the new Path if it does not already exist.
128
+ """
129
+ assert col.get_tbl() is not None, 'Column must be associated with a table'
130
+ return self._prepare_path_raw(col.get_tbl().id, col.id, col.get_tbl().version, ext)
131
+
132
+ def contains_path(self, file_path: Path) -> bool:
133
+ """Return True if the given path refers to a file managed by this LocalStore, else False."""
134
+ return str(file_path).startswith(str(self.__base_dir))
135
+
136
+ def resolve_url(self, file_url: str | None) -> Path | None:
137
+ """Return path if the given url refers to a file managed by this LocalStore, else None.
138
+
139
+ Args:
140
+ file_url: URL to check
141
+
142
+ Returns:
143
+ If the url is a managed file, return a Path() to the file, None, otherwise
144
+ """
145
+ if file_url is None:
146
+ return None
147
+ file_path = self.file_url_to_path(file_url)
148
+ if file_path is None:
149
+ return None
150
+ if not str(file_path).startswith(str(self.__base_dir)):
151
+ # not a tmp file
152
+ return None
153
+ return file_path
154
+
155
+ def move_local_file(self, col: Column, src_path: Path) -> str:
156
+ """Move a local file to this store, and return its new URL"""
157
+ dest_path = self._prepare_path(col, ext=src_path.suffix)
158
+ src_path.rename(dest_path)
159
+ new_file_url = urllib.parse.urljoin('file:', urllib.request.pathname2url(str(dest_path)))
160
+ _logger.debug(f'Media Storage: moved {src_path} to {new_file_url}')
161
+ return new_file_url
162
+
163
+ def copy_local_file(self, col: Column, src_path: Path) -> str:
164
+ """Copy a local file to a this store, and return its new URL"""
165
+ dest_path = self._prepare_path(col, ext=src_path.suffix)
166
+ shutil.copy2(src_path, dest_path)
167
+ new_file_url = urllib.parse.urljoin('file:', urllib.request.pathname2url(str(dest_path)))
168
+ _logger.debug(f'Media Storage: copied {src_path} to {new_file_url}')
169
+ return new_file_url
170
+
171
+ def save_media_object(self, data: bytes | PIL.Image.Image, col: Column, format: str | None) -> tuple[Path, str]:
172
+ """Save a data object to a file in a LocalStore
173
+ Returns:
174
+ dest_path: Path to the saved file
175
+ url: URL of the saved file
176
+ """
177
+ assert col.col_type.is_media_type(), f'LocalStore: request to store non media_type Column {col.name}'
178
+ dest_path = self._prepare_path(col)
179
+ if isinstance(data, bytes):
180
+ dest_path = self._save_binary_media_file(data, dest_path, format)
181
+ elif isinstance(data, PIL.Image.Image):
182
+ dest_path = self._save_pil_image_file(data, dest_path, format)
183
+ else:
184
+ raise ValueError(f'Unsupported object type: {type(data)}')
185
+ new_file_url = urllib.parse.urljoin('file:', urllib.request.pathname2url(str(dest_path)))
186
+ return dest_path, new_file_url
187
+
188
+ def create_presigned_url(self, soa: StorageObjectAddress, expiration_seconds: int) -> str:
189
+ """Create a presigned URL for local storage (not supported)."""
190
+ raise excs.Error('Cannot generate servable URL for local file storage.')
191
+
192
+ def delete(self, tbl_id: UUID, tbl_version: int | None = None) -> int | None:
193
+ """Delete all files belonging to tbl_id. If tbl_version is not None, delete
194
+ only those files belonging to the specified tbl_version.
195
+
196
+ Return:
197
+ Number of files deleted or None
198
+ """
199
+ assert tbl_id is not None
200
+ table_prefix = ObjectPath.table_prefix(tbl_id)
201
+ if tbl_version is None:
202
+ # Remove the entire folder for this table id.
203
+ path = self.__base_dir / table_prefix
204
+ if path.exists():
205
+ shutil.rmtree(path)
206
+ return None
207
+ else:
208
+ # Remove only the elements for the specified tbl_version.
209
+ paths = glob.glob(
210
+ str(self.__base_dir / table_prefix) + f'/**/{table_prefix}_*_{tbl_version}_*', recursive=True
211
+ )
212
+ for p in paths:
213
+ os.remove(p)
214
+ return len(paths)
215
+
216
+ def count(self, tbl_id: UUID | None, tbl_version: int | None = None) -> int:
217
+ """
218
+ Return number of files for given tbl_id.
219
+ """
220
+ if tbl_id is None:
221
+ paths = glob.glob(str(self.__base_dir / '*'), recursive=True)
222
+ elif tbl_version is None:
223
+ table_prefix = ObjectPath.table_prefix(tbl_id)
224
+ paths = glob.glob(str(self.__base_dir / table_prefix) + f'/**/{table_prefix}_*', recursive=True)
225
+ else:
226
+ table_prefix = ObjectPath.table_prefix(tbl_id)
227
+ paths = glob.glob(
228
+ str(self.__base_dir / table_prefix) + f'/**/{table_prefix}_*_{tbl_version}_*', recursive=True
229
+ )
230
+ # Filter out directories, only count files
231
+ return len([p for p in paths if not os.path.isdir(p)])
232
+
233
+ def stats(self) -> list[tuple[UUID, int, int, int]]:
234
+ paths = glob.glob(str(self.__base_dir) + '/**', recursive=True)
235
+ # key: (tbl_id, col_id), value: (num_files, size)
236
+ d: dict[tuple[UUID, int], list[int]] = defaultdict(lambda: [0, 0])
237
+ for p in paths:
238
+ if not os.path.isdir(p):
239
+ matched = re.match(ObjectPath.PATTERN, Path(p).name)
240
+ assert matched is not None
241
+ tbl_id, col_id = UUID(hex=matched[1]), int(matched[2])
242
+ file_info = os.stat(p)
243
+ t = d[tbl_id, col_id]
244
+ t[0] += 1
245
+ t[1] += file_info.st_size
246
+ result = [(tbl_id, col_id, num_files, size) for (tbl_id, col_id), (num_files, size) in d.items()]
247
+ result.sort(key=lambda e: e[3], reverse=True)
248
+ return result
249
+
250
+ def list_objects(self, return_uri: bool, n_max: int = 10) -> list[str]:
251
+ """Return a list of objects found with the specified location
252
+ Each returned object includes the full set of prefixes.
253
+ if return_uri is True, the full GCS URI is returned; otherwise, just the object key.
254
+ """
255
+ r = []
256
+ for root, _, files in os.walk(self.__base_dir):
257
+ for file in files:
258
+ r.append(Path(root, file).as_uri() if return_uri else os.path.join(root, file))
259
+ return r
260
+
261
+ def clear(self) -> None:
262
+ """Clear all files from the store."""
263
+ if self.__base_dir.exists():
264
+ shutil.rmtree(self.__base_dir)
265
+ self.__base_dir.mkdir()
266
+
267
+
268
+ class TempStore:
269
+ """
270
+ A temporary store for files of data that are not yet persisted to their destination(s).
271
+ A destination is typically either a LocalStore (local persisted files) or a cloud object store.
272
+
273
+ The TempStore class has no internal state. It provides functionality to manage temporary files
274
+ in the env.Env.get().tmp_dir directory.
275
+ It reuses some of the LocalStore functionality to create unique file names and save objects.
276
+ """
277
+
278
+ @classmethod
279
+ def _tmp_dir(cls) -> Path:
280
+ """Returns the path to the temporary directory where files are stored."""
281
+ return env.Env.get().tmp_dir
282
+
283
+ @classmethod
284
+ def count(cls, tbl_id: UUID | None = None, tbl_version: int | None = None) -> int:
285
+ return LocalStore(cls._tmp_dir()).count(tbl_id, tbl_version)
286
+
287
+ @classmethod
288
+ def contains_path(cls, file_path: Path) -> bool:
289
+ return LocalStore(cls._tmp_dir()).contains_path(file_path)
290
+
291
+ @classmethod
292
+ def resolve_url(cls, file_url: str | None) -> Path | None:
293
+ return LocalStore(cls._tmp_dir()).resolve_url(file_url)
294
+
295
+ @classmethod
296
+ def save_media_object(cls, data: bytes | PIL.Image.Image, col: Column, format: str | None) -> tuple[Path, str]:
297
+ return LocalStore(cls._tmp_dir()).save_media_object(data, col, format)
298
+
299
+ @classmethod
300
+ def delete_media_file(cls, file_path: Path) -> None:
301
+ """Delete an object from the temporary store."""
302
+ assert file_path is not None, 'Object path must be provided'
303
+ assert file_path.exists(), f'Object path does not exist: {file_path}'
304
+ assert cls.contains_path(file_path), f'Object path must be in the TempStore: {file_path}'
305
+ file_path.unlink()
306
+ _logger.debug(f'Media Storage: deleted {file_path}')
307
+
308
+ @classmethod
309
+ def create_path(cls, tbl_id: UUID | None = None, extension: str | None = None) -> Path:
310
+ """Return a new, unique Path located in the temporary store.
311
+ If tbl_id is provided, the path name will be similar to a LocalStore path based on the tbl_id.
312
+ If tbl_id is None, a random UUID will be used to create the path."""
313
+ if extension is None:
314
+ extension = ''
315
+ if tbl_id is not None:
316
+ return LocalStore(cls._tmp_dir())._prepare_path_raw(tbl_id, 0, 0, extension)
317
+ return cls._tmp_dir() / f'{uuid.uuid4()}{extension}'
318
+
319
+ @classmethod
320
+ def clear(cls) -> None:
321
+ """Clear all files from the temporary store."""
322
+ LocalStore(cls._tmp_dir()).clear()
@@ -0,0 +1,5 @@
1
+ from typing import Any
2
+
3
+
4
+ def non_none_dict_factory(d: list[tuple[str, Any]]) -> dict:
5
+ return {k: v for (k, v) in d if v is not None}