pixeltable 0.4.0rc3__py3-none-any.whl → 0.4.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (202) hide show
  1. pixeltable/__init__.py +23 -5
  2. pixeltable/_version.py +1 -0
  3. pixeltable/catalog/__init__.py +5 -3
  4. pixeltable/catalog/catalog.py +1318 -404
  5. pixeltable/catalog/column.py +186 -115
  6. pixeltable/catalog/dir.py +1 -2
  7. pixeltable/catalog/globals.py +11 -43
  8. pixeltable/catalog/insertable_table.py +167 -79
  9. pixeltable/catalog/path.py +61 -23
  10. pixeltable/catalog/schema_object.py +9 -10
  11. pixeltable/catalog/table.py +626 -308
  12. pixeltable/catalog/table_metadata.py +101 -0
  13. pixeltable/catalog/table_version.py +713 -569
  14. pixeltable/catalog/table_version_handle.py +37 -6
  15. pixeltable/catalog/table_version_path.py +42 -29
  16. pixeltable/catalog/tbl_ops.py +50 -0
  17. pixeltable/catalog/update_status.py +191 -0
  18. pixeltable/catalog/view.py +108 -94
  19. pixeltable/config.py +128 -22
  20. pixeltable/dataframe.py +188 -100
  21. pixeltable/env.py +407 -136
  22. pixeltable/exceptions.py +6 -0
  23. pixeltable/exec/__init__.py +3 -0
  24. pixeltable/exec/aggregation_node.py +7 -8
  25. pixeltable/exec/cache_prefetch_node.py +83 -110
  26. pixeltable/exec/cell_materialization_node.py +231 -0
  27. pixeltable/exec/cell_reconstruction_node.py +135 -0
  28. pixeltable/exec/component_iteration_node.py +4 -3
  29. pixeltable/exec/data_row_batch.py +8 -65
  30. pixeltable/exec/exec_context.py +16 -4
  31. pixeltable/exec/exec_node.py +13 -36
  32. pixeltable/exec/expr_eval/evaluators.py +7 -6
  33. pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
  34. pixeltable/exec/expr_eval/globals.py +8 -5
  35. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  36. pixeltable/exec/expr_eval/schedulers.py +190 -30
  37. pixeltable/exec/globals.py +32 -0
  38. pixeltable/exec/in_memory_data_node.py +18 -18
  39. pixeltable/exec/object_store_save_node.py +293 -0
  40. pixeltable/exec/row_update_node.py +16 -9
  41. pixeltable/exec/sql_node.py +206 -101
  42. pixeltable/exprs/__init__.py +1 -1
  43. pixeltable/exprs/arithmetic_expr.py +27 -22
  44. pixeltable/exprs/array_slice.py +3 -3
  45. pixeltable/exprs/column_property_ref.py +34 -30
  46. pixeltable/exprs/column_ref.py +92 -96
  47. pixeltable/exprs/comparison.py +5 -5
  48. pixeltable/exprs/compound_predicate.py +5 -4
  49. pixeltable/exprs/data_row.py +152 -55
  50. pixeltable/exprs/expr.py +62 -43
  51. pixeltable/exprs/expr_dict.py +3 -3
  52. pixeltable/exprs/expr_set.py +17 -10
  53. pixeltable/exprs/function_call.py +75 -37
  54. pixeltable/exprs/globals.py +1 -2
  55. pixeltable/exprs/in_predicate.py +4 -4
  56. pixeltable/exprs/inline_expr.py +10 -27
  57. pixeltable/exprs/is_null.py +1 -3
  58. pixeltable/exprs/json_mapper.py +8 -8
  59. pixeltable/exprs/json_path.py +56 -22
  60. pixeltable/exprs/literal.py +5 -5
  61. pixeltable/exprs/method_ref.py +2 -2
  62. pixeltable/exprs/object_ref.py +2 -2
  63. pixeltable/exprs/row_builder.py +127 -53
  64. pixeltable/exprs/rowid_ref.py +8 -12
  65. pixeltable/exprs/similarity_expr.py +50 -25
  66. pixeltable/exprs/sql_element_cache.py +4 -4
  67. pixeltable/exprs/string_op.py +5 -5
  68. pixeltable/exprs/type_cast.py +3 -5
  69. pixeltable/func/__init__.py +1 -0
  70. pixeltable/func/aggregate_function.py +8 -8
  71. pixeltable/func/callable_function.py +9 -9
  72. pixeltable/func/expr_template_function.py +10 -10
  73. pixeltable/func/function.py +18 -20
  74. pixeltable/func/function_registry.py +6 -7
  75. pixeltable/func/globals.py +2 -3
  76. pixeltable/func/mcp.py +74 -0
  77. pixeltable/func/query_template_function.py +20 -18
  78. pixeltable/func/signature.py +43 -16
  79. pixeltable/func/tools.py +23 -13
  80. pixeltable/func/udf.py +18 -20
  81. pixeltable/functions/__init__.py +6 -0
  82. pixeltable/functions/anthropic.py +93 -33
  83. pixeltable/functions/audio.py +114 -10
  84. pixeltable/functions/bedrock.py +13 -6
  85. pixeltable/functions/date.py +1 -1
  86. pixeltable/functions/deepseek.py +20 -9
  87. pixeltable/functions/fireworks.py +2 -2
  88. pixeltable/functions/gemini.py +28 -11
  89. pixeltable/functions/globals.py +13 -13
  90. pixeltable/functions/groq.py +108 -0
  91. pixeltable/functions/huggingface.py +1046 -23
  92. pixeltable/functions/image.py +9 -18
  93. pixeltable/functions/llama_cpp.py +23 -8
  94. pixeltable/functions/math.py +3 -4
  95. pixeltable/functions/mistralai.py +4 -15
  96. pixeltable/functions/ollama.py +16 -9
  97. pixeltable/functions/openai.py +104 -82
  98. pixeltable/functions/openrouter.py +143 -0
  99. pixeltable/functions/replicate.py +2 -2
  100. pixeltable/functions/reve.py +250 -0
  101. pixeltable/functions/string.py +21 -28
  102. pixeltable/functions/timestamp.py +13 -14
  103. pixeltable/functions/together.py +4 -6
  104. pixeltable/functions/twelvelabs.py +92 -0
  105. pixeltable/functions/util.py +6 -1
  106. pixeltable/functions/video.py +1388 -106
  107. pixeltable/functions/vision.py +7 -7
  108. pixeltable/functions/whisper.py +15 -7
  109. pixeltable/functions/whisperx.py +179 -0
  110. pixeltable/{ext/functions → functions}/yolox.py +2 -4
  111. pixeltable/globals.py +332 -105
  112. pixeltable/index/base.py +13 -22
  113. pixeltable/index/btree.py +23 -22
  114. pixeltable/index/embedding_index.py +32 -44
  115. pixeltable/io/__init__.py +4 -2
  116. pixeltable/io/datarows.py +7 -6
  117. pixeltable/io/external_store.py +49 -77
  118. pixeltable/io/fiftyone.py +11 -11
  119. pixeltable/io/globals.py +29 -28
  120. pixeltable/io/hf_datasets.py +17 -9
  121. pixeltable/io/label_studio.py +70 -66
  122. pixeltable/io/lancedb.py +3 -0
  123. pixeltable/io/pandas.py +12 -11
  124. pixeltable/io/parquet.py +13 -93
  125. pixeltable/io/table_data_conduit.py +71 -47
  126. pixeltable/io/utils.py +3 -3
  127. pixeltable/iterators/__init__.py +2 -1
  128. pixeltable/iterators/audio.py +21 -11
  129. pixeltable/iterators/document.py +116 -55
  130. pixeltable/iterators/image.py +5 -2
  131. pixeltable/iterators/video.py +293 -13
  132. pixeltable/metadata/__init__.py +4 -2
  133. pixeltable/metadata/converters/convert_18.py +2 -2
  134. pixeltable/metadata/converters/convert_19.py +2 -2
  135. pixeltable/metadata/converters/convert_20.py +2 -2
  136. pixeltable/metadata/converters/convert_21.py +2 -2
  137. pixeltable/metadata/converters/convert_22.py +2 -2
  138. pixeltable/metadata/converters/convert_24.py +2 -2
  139. pixeltable/metadata/converters/convert_25.py +2 -2
  140. pixeltable/metadata/converters/convert_26.py +2 -2
  141. pixeltable/metadata/converters/convert_29.py +4 -4
  142. pixeltable/metadata/converters/convert_34.py +2 -2
  143. pixeltable/metadata/converters/convert_36.py +2 -2
  144. pixeltable/metadata/converters/convert_37.py +15 -0
  145. pixeltable/metadata/converters/convert_38.py +39 -0
  146. pixeltable/metadata/converters/convert_39.py +124 -0
  147. pixeltable/metadata/converters/convert_40.py +73 -0
  148. pixeltable/metadata/converters/util.py +13 -12
  149. pixeltable/metadata/notes.py +4 -0
  150. pixeltable/metadata/schema.py +79 -42
  151. pixeltable/metadata/utils.py +74 -0
  152. pixeltable/mypy/__init__.py +3 -0
  153. pixeltable/mypy/mypy_plugin.py +123 -0
  154. pixeltable/plan.py +274 -223
  155. pixeltable/share/__init__.py +1 -1
  156. pixeltable/share/packager.py +259 -129
  157. pixeltable/share/protocol/__init__.py +34 -0
  158. pixeltable/share/protocol/common.py +170 -0
  159. pixeltable/share/protocol/operation_types.py +33 -0
  160. pixeltable/share/protocol/replica.py +109 -0
  161. pixeltable/share/publish.py +213 -57
  162. pixeltable/store.py +238 -175
  163. pixeltable/type_system.py +104 -63
  164. pixeltable/utils/__init__.py +2 -3
  165. pixeltable/utils/arrow.py +108 -13
  166. pixeltable/utils/av.py +298 -0
  167. pixeltable/utils/azure_store.py +305 -0
  168. pixeltable/utils/code.py +3 -3
  169. pixeltable/utils/console_output.py +4 -1
  170. pixeltable/utils/coroutine.py +6 -23
  171. pixeltable/utils/dbms.py +31 -5
  172. pixeltable/utils/description_helper.py +4 -5
  173. pixeltable/utils/documents.py +5 -6
  174. pixeltable/utils/exception_handler.py +7 -30
  175. pixeltable/utils/filecache.py +6 -6
  176. pixeltable/utils/formatter.py +4 -6
  177. pixeltable/utils/gcs_store.py +283 -0
  178. pixeltable/utils/http_server.py +2 -3
  179. pixeltable/utils/iceberg.py +1 -2
  180. pixeltable/utils/image.py +17 -0
  181. pixeltable/utils/lancedb.py +88 -0
  182. pixeltable/utils/local_store.py +316 -0
  183. pixeltable/utils/misc.py +5 -0
  184. pixeltable/utils/object_stores.py +528 -0
  185. pixeltable/utils/pydantic.py +60 -0
  186. pixeltable/utils/pytorch.py +5 -6
  187. pixeltable/utils/s3_store.py +392 -0
  188. pixeltable-0.4.20.dist-info/METADATA +587 -0
  189. pixeltable-0.4.20.dist-info/RECORD +218 -0
  190. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info}/WHEEL +1 -1
  191. pixeltable-0.4.20.dist-info/entry_points.txt +2 -0
  192. pixeltable/__version__.py +0 -3
  193. pixeltable/ext/__init__.py +0 -17
  194. pixeltable/ext/functions/__init__.py +0 -11
  195. pixeltable/ext/functions/whisperx.py +0 -77
  196. pixeltable/utils/media_store.py +0 -77
  197. pixeltable/utils/s3.py +0 -17
  198. pixeltable/utils/sample.py +0 -25
  199. pixeltable-0.4.0rc3.dist-info/METADATA +0 -435
  200. pixeltable-0.4.0rc3.dist-info/RECORD +0 -189
  201. pixeltable-0.4.0rc3.dist-info/entry_points.txt +0 -3
  202. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,392 @@
1
+ import logging
2
+ import re
3
+ import threading
4
+ import urllib.parse
5
+ import uuid
6
+ from pathlib import Path
7
+ from typing import TYPE_CHECKING, Any, Iterator, NamedTuple
8
+
9
+ import boto3
10
+ import botocore
11
+ from botocore.exceptions import ClientError, ConnectionError
12
+
13
+ from pixeltable import env, exceptions as excs
14
+ from pixeltable.config import Config
15
+ from pixeltable.utils.object_stores import ObjectPath, ObjectStoreBase, StorageObjectAddress, StorageTarget
16
+
17
+ if TYPE_CHECKING:
18
+ from botocore.exceptions import ClientError
19
+
20
+ from pixeltable.catalog import Column
21
+
22
+ _logger = logging.getLogger('pixeltable')
23
+
24
+ client_lock = threading.Lock()
25
+
26
+
27
+ class S3CompatClientDict(NamedTuple):
28
+ """Container for S3-compatible storage access objects (R2, B2, etc.).
29
+ Thread-safe via the module-level 'client_lock'.
30
+ """
31
+
32
+ profile: str | None # AWS-style profile used to locate credentials
33
+ clients: dict[str, Any] # Map of endpoint URL → boto3 client instance
34
+
35
+
36
+ @env.register_client('r2')
37
+ def _() -> Any:
38
+ profile_name = Config.get().get_string_value('r2_profile')
39
+ return S3CompatClientDict(profile=profile_name, clients={})
40
+
41
+
42
+ @env.register_client('r2_resource')
43
+ def _() -> Any:
44
+ profile_name = Config.get().get_string_value('r2_profile')
45
+ return S3CompatClientDict(profile=profile_name, clients={})
46
+
47
+
48
+ @env.register_client('b2')
49
+ def _() -> Any:
50
+ profile_name = Config.get().get_string_value('b2_profile')
51
+ return S3CompatClientDict(profile=profile_name, clients={})
52
+
53
+
54
+ @env.register_client('b2_resource')
55
+ def _() -> Any:
56
+ profile_name = Config.get().get_string_value('b2_profile')
57
+ return S3CompatClientDict(profile=profile_name, clients={})
58
+
59
+
60
+ @env.register_client('s3')
61
+ def _() -> Any:
62
+ profile_name = Config.get().get_string_value('s3_profile')
63
+ return S3Store.create_boto_client(profile_name=profile_name)
64
+
65
+
66
+ @env.register_client('s3_resource')
67
+ def _() -> Any:
68
+ profile_name = Config.get().get_string_value('s3_profile')
69
+ return S3Store.create_boto_resource(profile_name=profile_name)
70
+
71
+
72
+ class S3Store(ObjectStoreBase):
73
+ """Wrapper for an s3 storage target with all needed methods."""
74
+
75
+ # URI of the S3 bucket in the format s3://bucket_name/prefix/
76
+ # Always ends with a slash
77
+ __base_uri: str
78
+
79
+ # bucket name extracted from the URI
80
+ __bucket_name: str
81
+
82
+ # prefix path within the bucket, either empty or ending with a slash
83
+ __prefix_name: str
84
+
85
+ soa: StorageObjectAddress
86
+
87
+ def __init__(self, soa: StorageObjectAddress):
88
+ self.soa = soa
89
+ self.__bucket_name = self.soa.container
90
+ self.__prefix_name = self.soa.prefix
91
+ assert self.soa.storage_target in {StorageTarget.R2_STORE, StorageTarget.S3_STORE, StorageTarget.B2_STORE}, (
92
+ f'Expected storage_target "s3", "r2", or "b2", got {self.soa.storage_target}'
93
+ )
94
+ self.__base_uri = self.soa.prefix_free_uri + self.soa.prefix
95
+
96
+ def client(self) -> Any:
97
+ """Return a client to access the store."""
98
+ if self.soa.storage_target == StorageTarget.R2_STORE:
99
+ cd = env.Env.get().get_client('r2')
100
+ with client_lock:
101
+ if self.soa.container_free_uri not in cd.clients:
102
+ cd.clients[self.soa.container_free_uri] = S3Store.create_boto_client(
103
+ profile_name=cd.profile,
104
+ extra_args={'endpoint_url': self.soa.container_free_uri, 'region_name': 'auto'},
105
+ )
106
+ return cd.clients[self.soa.container_free_uri]
107
+ if self.soa.storage_target == StorageTarget.B2_STORE:
108
+ cd = env.Env.get().get_client('b2')
109
+ with client_lock:
110
+ if self.soa.container_free_uri not in cd.clients:
111
+ cd.clients[self.soa.container_free_uri] = S3Store.create_boto_client(
112
+ profile_name=cd.profile,
113
+ extra_args={'endpoint_url': self.soa.container_free_uri, 'region_name': 'auto'},
114
+ )
115
+ return cd.clients[self.soa.container_free_uri]
116
+ if self.soa.storage_target == StorageTarget.S3_STORE:
117
+ return env.Env.get().get_client('s3')
118
+ raise AssertionError(f'Unexpected storage_target: {self.soa.storage_target}')
119
+
120
+ def get_resource(self) -> Any:
121
+ if self.soa.storage_target == StorageTarget.R2_STORE:
122
+ cd = env.Env.get().get_client('r2_resource')
123
+ with client_lock:
124
+ if self.soa.container_free_uri not in cd.clients:
125
+ cd.clients[self.soa.container_free_uri] = S3Store.create_boto_resource(
126
+ profile_name=cd.profile,
127
+ extra_args={'endpoint_url': self.soa.container_free_uri, 'region_name': 'auto'},
128
+ )
129
+ return cd.clients[self.soa.container_free_uri]
130
+ if self.soa.storage_target == StorageTarget.B2_STORE:
131
+ cd = env.Env.get().get_client('b2_resource')
132
+ with client_lock:
133
+ if self.soa.container_free_uri not in cd.clients:
134
+ cd.clients[self.soa.container_free_uri] = S3Store.create_boto_resource(
135
+ profile_name=cd.profile,
136
+ extra_args={'endpoint_url': self.soa.container_free_uri, 'region_name': 'auto'},
137
+ )
138
+ return cd.clients[self.soa.container_free_uri]
139
+ if self.soa.storage_target == StorageTarget.S3_STORE:
140
+ return env.Env.get().get_client('s3_resource')
141
+ raise AssertionError(f'Unexpected storage_target: {self.soa.storage_target}')
142
+
143
+ @property
144
+ def bucket_name(self) -> str:
145
+ """Return the bucket name from the base URI."""
146
+ return self.__bucket_name
147
+
148
+ @property
149
+ def prefix(self) -> str:
150
+ """Return the prefix from the base URI."""
151
+ return self.__prefix_name
152
+
153
+ def validate(self, error_col_name: str) -> str | None:
154
+ """
155
+ Checks if the URI exists.
156
+
157
+ Returns:
158
+ bool: True if the S3 URI exists and is accessible, False otherwise.
159
+ """
160
+ try:
161
+ self.client().head_bucket(Bucket=self.bucket_name)
162
+ return self.__base_uri
163
+ except ClientError as e:
164
+ self.handle_s3_error(e, f'validating destination for {error_col_name}')
165
+ except ConnectionError as e:
166
+ raise excs.Error(
167
+ f'Connection error while validating destination {self.__base_uri!r} for {error_col_name}: {e}'
168
+ ) from e
169
+ return None
170
+
171
+ def _prepare_uri_raw(self, tbl_id: uuid.UUID, col_id: int, tbl_version: int, ext: str | None = None) -> str:
172
+ """
173
+ Construct a new, unique URI for a persisted media file.
174
+ """
175
+ prefix, filename = ObjectPath.create_prefix_raw(tbl_id, col_id, tbl_version, ext)
176
+ parent = f'{self.__base_uri}{prefix}'
177
+ return f'{parent}/{filename}'
178
+
179
+ def _prepare_uri(self, col: 'Column', ext: str | None = None) -> str:
180
+ """
181
+ Construct a new, unique URI for a persisted media file.
182
+ """
183
+ assert col.get_tbl() is not None, 'Column must be associated with a table'
184
+ return self._prepare_uri_raw(col.get_tbl().id, col.id, col.get_tbl().version, ext=ext)
185
+
186
+ def copy_object_to_local_file(self, src_path: str, dest_path: Path) -> None:
187
+ """Copies an object to a local file. Thread safe."""
188
+ try:
189
+ self.client().download_file(Bucket=self.bucket_name, Key=self.prefix + src_path, Filename=str(dest_path))
190
+ except ClientError as e:
191
+ self.handle_s3_error(e, f'downloading file {src_path!r}')
192
+ raise
193
+
194
+ def copy_local_file(self, col: 'Column', src_path: Path) -> str:
195
+ """Copy a local file, and return its new URL"""
196
+ new_file_uri = self._prepare_uri(col, ext=src_path.suffix)
197
+ parsed = urllib.parse.urlparse(new_file_uri)
198
+ key = parsed.path.lstrip('/')
199
+ if self.soa.storage_target in {StorageTarget.R2_STORE, StorageTarget.B2_STORE}:
200
+ key = key.split('/', 1)[-1] # Remove the bucket name from the key for R2/B2
201
+ try:
202
+ _logger.debug(f'Media Storage: copying {src_path} to {new_file_uri} : Key: {key}')
203
+ self.client().upload_file(Filename=str(src_path), Bucket=self.bucket_name, Key=key)
204
+ _logger.debug(f'Media Storage: copied {src_path} to {new_file_uri}')
205
+ return new_file_uri
206
+ except ClientError as e:
207
+ self.handle_s3_error(e, 'uploading file')
208
+ raise
209
+
210
+ def _get_filtered_objects(self, tbl_id: uuid.UUID, tbl_version: int | None = None) -> tuple[Iterator, Any]:
211
+ """Private method to get filtered objects for a table, optionally filtered by version.
212
+
213
+ Args:
214
+ tbl_id: Table UUID to filter by
215
+ tbl_version: Optional table version to filter by
216
+
217
+ Returns:
218
+ Tuple of (iterator over S3 objects matching the criteria, bucket object)
219
+ """
220
+ # Use ObjectPath to construct the prefix for this table
221
+ table_prefix = ObjectPath.table_prefix(tbl_id)
222
+ prefix = f'{self.prefix}{table_prefix}/'
223
+
224
+ try:
225
+ # Use S3 resource interface for filtering
226
+ s3_resource = self.get_resource()
227
+ bucket = s3_resource.Bucket(self.bucket_name)
228
+
229
+ if tbl_version is None:
230
+ # Return all objects with the table prefix
231
+ object_iterator = bucket.objects.filter(Prefix=prefix)
232
+ else:
233
+ # Filter by both table_id and table_version using the ObjectPath pattern
234
+ # Pattern: tbl_id_col_id_version_uuid
235
+ version_pattern = re.compile(
236
+ rf'{re.escape(table_prefix)}_\d+_{re.escape(str(tbl_version))}_[0-9a-fA-F]+.*'
237
+ )
238
+ # Return filtered collection - this still uses lazy loading
239
+ object_iterator = (
240
+ obj for obj in bucket.objects.filter(Prefix=prefix) if version_pattern.match(obj.key.split('/')[-1])
241
+ )
242
+
243
+ return object_iterator, bucket
244
+
245
+ except ClientError as e:
246
+ self.handle_s3_error(e, f'setting up iterator {self.prefix}')
247
+ raise
248
+
249
+ def count(self, tbl_id: uuid.UUID, tbl_version: int | None = None) -> int:
250
+ """Count the number of files belonging to tbl_id. If tbl_version is not None,
251
+ count only those files belonging to the specified tbl_version.
252
+
253
+ Args:
254
+ tbl_id: Table UUID to count objects for
255
+ tbl_version: Optional table version to filter by
256
+
257
+ Returns:
258
+ Number of objects matching the criteria
259
+ """
260
+ assert tbl_id is not None
261
+
262
+ object_iterator, _ = self._get_filtered_objects(tbl_id, tbl_version)
263
+
264
+ return sum(1 for _ in object_iterator)
265
+
266
+ def delete(self, tbl_id: uuid.UUID, tbl_version: int | None = None) -> int:
267
+ """Delete all files belonging to tbl_id. If tbl_version is not None, delete
268
+ only those files belonging to the specified tbl_version.
269
+
270
+ Args:
271
+ tbl_id: Table UUID to delete objects for
272
+ tbl_version: Optional table version to filter by
273
+
274
+ Returns:
275
+ Number of objects deleted
276
+ """
277
+ assert tbl_id is not None
278
+
279
+ # Use shared method to get filtered objects and bucket
280
+ object_iterator, bucket = self._get_filtered_objects(tbl_id, tbl_version)
281
+
282
+ total_deleted = 0
283
+
284
+ try:
285
+ objects_to_delete = []
286
+
287
+ # Process objects in batches as we iterate (memory efficient)
288
+ for obj in object_iterator:
289
+ objects_to_delete.append({'Key': obj.key})
290
+
291
+ # Delete in batches of 1000 (S3 limit)
292
+ if len(objects_to_delete) >= 1000:
293
+ bucket.delete_objects(Delete={'Objects': objects_to_delete, 'Quiet': True})
294
+ total_deleted += len(objects_to_delete)
295
+ objects_to_delete = []
296
+
297
+ # Delete any remaining objects in the final batch
298
+ if len(objects_to_delete) > 0:
299
+ bucket.delete_objects(Delete={'Objects': objects_to_delete, 'Quiet': True})
300
+ total_deleted += len(objects_to_delete)
301
+
302
+ return total_deleted
303
+
304
+ except ClientError as e:
305
+ self.handle_s3_error(e, f'deleting with {self.prefix}')
306
+ raise
307
+
308
+ def list_objects(self, return_uri: bool, n_max: int = 10) -> list[str]:
309
+ """Return a list of objects found in the specified destination bucket.
310
+ Each returned object includes the full set of prefixes.
311
+ if return_uri is True, full URI's are returned; otherwise, just the object keys.
312
+ """
313
+ p = self.soa.prefix_free_uri if return_uri else ''
314
+
315
+ s3_client = self.client()
316
+ r: list[str] = []
317
+ try:
318
+ # Use paginator to handle more than 1000 objects
319
+ paginator = s3_client.get_paginator('list_objects_v2')
320
+ for page in paginator.paginate(Bucket=self.bucket_name, Prefix=self.prefix):
321
+ if 'Contents' not in page:
322
+ continue
323
+ for obj in page['Contents']:
324
+ if len(r) >= n_max:
325
+ return r
326
+ r.append(f'{p}{obj["Key"]}')
327
+ except ClientError as e:
328
+ self.handle_s3_error(e, f'listing objects from {self.prefix!r}')
329
+ return r
330
+
331
+ def handle_s3_error(self, e: 'ClientError', operation: str = '', *, ignore_404: bool = False) -> None:
332
+ error_code = e.response.get('Error', {}).get('Code')
333
+ error_message = e.response.get('Error', {}).get('Message', str(e))
334
+ if ignore_404 and error_code == '404':
335
+ return
336
+ if error_code == '404':
337
+ raise excs.Error(f'Client error while {operation}: Bucket {self.bucket_name!r} not found') from e
338
+ elif error_code == '403':
339
+ raise excs.Error(
340
+ f'Client error while {operation}: Access denied to bucket {self.bucket_name!r}: {error_message}'
341
+ ) from e
342
+ elif error_code == 'PreconditionFailed' or 'PreconditionFailed' in error_message:
343
+ raise excs.Error(
344
+ f'Client error while {operation}: Precondition failed for bucket {self.bucket_name!r}: {error_message}'
345
+ ) from e
346
+ else:
347
+ raise excs.Error(
348
+ f'Client error while {operation} in bucket {self.bucket_name!r}: {error_code} - {error_message}'
349
+ ) from e
350
+
351
+ @classmethod
352
+ def create_boto_session(cls, profile_name: str | None = None) -> Any:
353
+ """Create a boto session using the defined profile"""
354
+ if profile_name:
355
+ try:
356
+ _logger.info(f'Creating boto session with profile {profile_name}')
357
+ session = boto3.Session(profile_name=profile_name)
358
+ return session
359
+ except Exception as e:
360
+ _logger.info(f'Error occurred while creating boto session with profile {profile_name}: {e}')
361
+ return boto3.Session()
362
+
363
+ @classmethod
364
+ def create_boto_client(cls, profile_name: str | None = None, extra_args: dict[str, Any] | None = None) -> Any:
365
+ config_args: dict[str, Any] = {
366
+ 'max_pool_connections': 30,
367
+ 'connect_timeout': 15,
368
+ 'read_timeout': 30,
369
+ 'retries': {'max_attempts': 3, 'mode': 'adaptive'},
370
+ 's3': {'addressing_style': 'path'}, # Use path-style addressing for S3-compatible services
371
+ 'user_agent_extra': 'pixeltable', # Marks requests as coming from Pixeltable for tracking and debugging
372
+ }
373
+
374
+ session = cls.create_boto_session(profile_name)
375
+
376
+ try:
377
+ # Check if credentials are available
378
+ session.get_credentials().get_frozen_credentials()
379
+ config = botocore.config.Config(**config_args)
380
+ return session.client('s3', config=config, **(extra_args or {})) # credentials are available
381
+ except Exception as e:
382
+ _logger.info(f'Error occurred while creating S3 client: {e}, fallback to unsigned mode')
383
+ # No credentials available, use unsigned mode
384
+ config_args = config_args.copy()
385
+ config_args['signature_version'] = botocore.UNSIGNED
386
+ config = botocore.config.Config(**config_args)
387
+ return boto3.client('s3', config=config)
388
+
389
+ @classmethod
390
+ def create_boto_resource(cls, profile_name: str | None = None, extra_args: dict[str, Any] | None = None) -> Any:
391
+ # Create a session using the defined profile
392
+ return cls.create_boto_session(profile_name).resource('s3', **(extra_args or {}))