pixeltable 0.4.18__py3-none-any.whl → 0.4.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -1
- pixeltable/_version.py +1 -0
- pixeltable/catalog/catalog.py +119 -100
- pixeltable/catalog/column.py +104 -115
- pixeltable/catalog/globals.py +1 -2
- pixeltable/catalog/insertable_table.py +44 -49
- pixeltable/catalog/path.py +3 -4
- pixeltable/catalog/schema_object.py +4 -4
- pixeltable/catalog/table.py +118 -122
- pixeltable/catalog/table_metadata.py +6 -6
- pixeltable/catalog/table_version.py +322 -257
- pixeltable/catalog/table_version_handle.py +4 -4
- pixeltable/catalog/table_version_path.py +9 -10
- pixeltable/catalog/tbl_ops.py +9 -3
- pixeltable/catalog/view.py +34 -28
- pixeltable/config.py +14 -10
- pixeltable/dataframe.py +68 -77
- pixeltable/env.py +74 -64
- pixeltable/exec/aggregation_node.py +6 -6
- pixeltable/exec/cache_prefetch_node.py +10 -10
- pixeltable/exec/data_row_batch.py +3 -3
- pixeltable/exec/exec_context.py +4 -5
- pixeltable/exec/exec_node.py +5 -5
- pixeltable/exec/expr_eval/evaluators.py +6 -6
- pixeltable/exec/expr_eval/expr_eval_node.py +8 -7
- pixeltable/exec/expr_eval/globals.py +6 -6
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +11 -11
- pixeltable/exec/in_memory_data_node.py +2 -2
- pixeltable/exec/object_store_save_node.py +14 -17
- pixeltable/exec/sql_node.py +25 -25
- pixeltable/exprs/arithmetic_expr.py +4 -4
- pixeltable/exprs/array_slice.py +2 -2
- pixeltable/exprs/column_property_ref.py +3 -3
- pixeltable/exprs/column_ref.py +61 -74
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +3 -3
- pixeltable/exprs/data_row.py +12 -12
- pixeltable/exprs/expr.py +41 -31
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +3 -3
- pixeltable/exprs/function_call.py +14 -14
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +8 -8
- pixeltable/exprs/is_null.py +1 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +6 -6
- pixeltable/exprs/literal.py +5 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +14 -14
- pixeltable/exprs/rowid_ref.py +8 -8
- pixeltable/exprs/similarity_expr.py +50 -25
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +2 -2
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +3 -3
- pixeltable/func/function.py +15 -17
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +2 -2
- pixeltable/func/query_template_function.py +16 -16
- pixeltable/func/signature.py +14 -14
- pixeltable/func/tools.py +11 -11
- pixeltable/func/udf.py +16 -18
- pixeltable/functions/__init__.py +1 -0
- pixeltable/functions/anthropic.py +7 -7
- pixeltable/functions/audio.py +76 -0
- pixeltable/functions/bedrock.py +6 -6
- pixeltable/functions/deepseek.py +4 -4
- pixeltable/functions/fireworks.py +2 -2
- pixeltable/functions/gemini.py +6 -6
- pixeltable/functions/globals.py +12 -12
- pixeltable/functions/groq.py +4 -4
- pixeltable/functions/huggingface.py +18 -20
- pixeltable/functions/image.py +7 -10
- pixeltable/functions/llama_cpp.py +7 -7
- pixeltable/functions/math.py +2 -3
- pixeltable/functions/mistralai.py +3 -3
- pixeltable/functions/ollama.py +9 -9
- pixeltable/functions/openai.py +21 -21
- pixeltable/functions/openrouter.py +7 -7
- pixeltable/functions/string.py +21 -28
- pixeltable/functions/timestamp.py +7 -8
- pixeltable/functions/together.py +4 -6
- pixeltable/functions/twelvelabs.py +92 -0
- pixeltable/functions/video.py +2 -24
- pixeltable/functions/vision.py +6 -6
- pixeltable/functions/whisper.py +7 -7
- pixeltable/functions/whisperx.py +16 -16
- pixeltable/globals.py +52 -36
- pixeltable/index/base.py +12 -8
- pixeltable/index/btree.py +19 -22
- pixeltable/index/embedding_index.py +30 -39
- pixeltable/io/datarows.py +3 -3
- pixeltable/io/external_store.py +13 -16
- pixeltable/io/fiftyone.py +5 -5
- pixeltable/io/globals.py +5 -5
- pixeltable/io/hf_datasets.py +4 -4
- pixeltable/io/label_studio.py +12 -12
- pixeltable/io/pandas.py +6 -6
- pixeltable/io/parquet.py +2 -2
- pixeltable/io/table_data_conduit.py +12 -12
- pixeltable/io/utils.py +2 -2
- pixeltable/iterators/audio.py +2 -2
- pixeltable/iterators/video.py +8 -13
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_36.py +2 -2
- pixeltable/metadata/converters/convert_38.py +2 -2
- pixeltable/metadata/converters/convert_39.py +1 -2
- pixeltable/metadata/converters/util.py +11 -13
- pixeltable/metadata/schema.py +22 -21
- pixeltable/metadata/utils.py +2 -6
- pixeltable/mypy/mypy_plugin.py +5 -5
- pixeltable/plan.py +30 -28
- pixeltable/share/packager.py +7 -7
- pixeltable/share/publish.py +3 -3
- pixeltable/store.py +125 -61
- pixeltable/type_system.py +43 -46
- pixeltable/utils/__init__.py +1 -2
- pixeltable/utils/arrow.py +4 -4
- pixeltable/utils/av.py +8 -0
- pixeltable/utils/azure_store.py +305 -0
- pixeltable/utils/code.py +1 -2
- pixeltable/utils/dbms.py +15 -19
- pixeltable/utils/description_helper.py +2 -3
- pixeltable/utils/documents.py +5 -6
- pixeltable/utils/exception_handler.py +2 -2
- pixeltable/utils/filecache.py +5 -5
- pixeltable/utils/formatter.py +4 -6
- pixeltable/utils/gcs_store.py +9 -9
- pixeltable/utils/local_store.py +17 -17
- pixeltable/utils/object_stores.py +59 -43
- pixeltable/utils/s3_store.py +35 -30
- {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/METADATA +1 -1
- pixeltable-0.4.19.dist-info/RECORD +213 -0
- pixeltable/__version__.py +0 -3
- pixeltable-0.4.18.dist-info/RECORD +0 -211
- {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/entry_points.txt +0 -0
- {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/licenses/LICENSE +0 -0
pixeltable/utils/local_store.py
CHANGED
|
@@ -10,7 +10,7 @@ import urllib.request
|
|
|
10
10
|
import uuid
|
|
11
11
|
from collections import defaultdict
|
|
12
12
|
from pathlib import Path
|
|
13
|
-
from typing import TYPE_CHECKING
|
|
13
|
+
from typing import TYPE_CHECKING
|
|
14
14
|
from uuid import UUID
|
|
15
15
|
|
|
16
16
|
import PIL.Image
|
|
@@ -35,7 +35,7 @@ class LocalStore(ObjectStoreBase):
|
|
|
35
35
|
|
|
36
36
|
__base_dir: Path
|
|
37
37
|
|
|
38
|
-
soa:
|
|
38
|
+
soa: StorageObjectAddress | None
|
|
39
39
|
|
|
40
40
|
def __init__(self, location: Path | StorageObjectAddress):
|
|
41
41
|
if isinstance(location, Path):
|
|
@@ -69,7 +69,7 @@ class LocalStore(ObjectStoreBase):
|
|
|
69
69
|
raise excs.Error(f'{error_col_name}`destination` must be a valid path. Error: {e}') from None
|
|
70
70
|
|
|
71
71
|
@staticmethod
|
|
72
|
-
def file_url_to_path(url: str) ->
|
|
72
|
+
def file_url_to_path(url: str) -> Path | None:
|
|
73
73
|
"""Convert a file:// URI to a Path object with support for Windows UNC paths."""
|
|
74
74
|
assert isinstance(url, str), type(url)
|
|
75
75
|
parsed = urllib.parse.urlparse(url)
|
|
@@ -90,7 +90,7 @@ class LocalStore(ObjectStoreBase):
|
|
|
90
90
|
return Path(path_str)
|
|
91
91
|
|
|
92
92
|
@classmethod
|
|
93
|
-
def _save_binary_media_file(cls, file_data: bytes, dest_path: Path, format:
|
|
93
|
+
def _save_binary_media_file(cls, file_data: bytes, dest_path: Path, format: str | None) -> Path:
|
|
94
94
|
"""Save binary data to a file in a LocalStore. format is ignored for binary data."""
|
|
95
95
|
assert isinstance(file_data, bytes)
|
|
96
96
|
with open(dest_path, 'wb') as f:
|
|
@@ -100,7 +100,7 @@ class LocalStore(ObjectStoreBase):
|
|
|
100
100
|
return dest_path
|
|
101
101
|
|
|
102
102
|
@classmethod
|
|
103
|
-
def _save_pil_image_file(cls, image: PIL.Image.Image, dest_path: Path, format:
|
|
103
|
+
def _save_pil_image_file(cls, image: PIL.Image.Image, dest_path: Path, format: str | None) -> Path:
|
|
104
104
|
"""Save a PIL Image to a file in a LocalStore with the specified format."""
|
|
105
105
|
if dest_path.suffix != f'.{format}':
|
|
106
106
|
dest_path = dest_path.with_name(f'{dest_path.name}.{format}')
|
|
@@ -111,7 +111,7 @@ class LocalStore(ObjectStoreBase):
|
|
|
111
111
|
os.fsync(f.fileno()) # Forces OS to write to physical storage
|
|
112
112
|
return dest_path
|
|
113
113
|
|
|
114
|
-
def _prepare_path_raw(self, tbl_id: UUID, col_id: int, tbl_version: int, ext:
|
|
114
|
+
def _prepare_path_raw(self, tbl_id: UUID, col_id: int, tbl_version: int, ext: str | None = None) -> Path:
|
|
115
115
|
"""
|
|
116
116
|
Construct a new, unique Path name in the __base_dir for a persisted file.
|
|
117
117
|
Create the parent directory for the new Path if it does not already exist.
|
|
@@ -121,19 +121,19 @@ class LocalStore(ObjectStoreBase):
|
|
|
121
121
|
parent.mkdir(parents=True, exist_ok=True)
|
|
122
122
|
return parent / filename
|
|
123
123
|
|
|
124
|
-
def _prepare_path(self, col: Column, ext:
|
|
124
|
+
def _prepare_path(self, col: Column, ext: str | None = None) -> Path:
|
|
125
125
|
"""
|
|
126
126
|
Construct a new, unique Path name in the __base_dir for a persisted file.
|
|
127
127
|
Create the parent directory for the new Path if it does not already exist.
|
|
128
128
|
"""
|
|
129
|
-
assert col.
|
|
130
|
-
return self._prepare_path_raw(col.
|
|
129
|
+
assert col.get_tbl() is not None, 'Column must be associated with a table'
|
|
130
|
+
return self._prepare_path_raw(col.get_tbl().id, col.id, col.get_tbl().version, ext)
|
|
131
131
|
|
|
132
132
|
def contains_path(self, file_path: Path) -> bool:
|
|
133
133
|
"""Return True if the given path refers to a file managed by this LocalStore, else False."""
|
|
134
134
|
return str(file_path).startswith(str(self.__base_dir))
|
|
135
135
|
|
|
136
|
-
def resolve_url(self, file_url:
|
|
136
|
+
def resolve_url(self, file_url: str | None) -> Path | None:
|
|
137
137
|
"""Return path if the given url refers to a file managed by this LocalStore, else None.
|
|
138
138
|
|
|
139
139
|
Args:
|
|
@@ -168,7 +168,7 @@ class LocalStore(ObjectStoreBase):
|
|
|
168
168
|
_logger.debug(f'Media Storage: copied {src_path} to {new_file_url}')
|
|
169
169
|
return new_file_url
|
|
170
170
|
|
|
171
|
-
def save_media_object(self, data: bytes | PIL.Image.Image, col: Column, format:
|
|
171
|
+
def save_media_object(self, data: bytes | PIL.Image.Image, col: Column, format: str | None) -> tuple[Path, str]:
|
|
172
172
|
"""Save a data object to a file in a LocalStore
|
|
173
173
|
Returns:
|
|
174
174
|
dest_path: Path to the saved file
|
|
@@ -185,7 +185,7 @@ class LocalStore(ObjectStoreBase):
|
|
|
185
185
|
new_file_url = urllib.parse.urljoin('file:', urllib.request.pathname2url(str(dest_path)))
|
|
186
186
|
return dest_path, new_file_url
|
|
187
187
|
|
|
188
|
-
def delete(self, tbl_id: UUID, tbl_version:
|
|
188
|
+
def delete(self, tbl_id: UUID, tbl_version: int | None = None) -> int | None:
|
|
189
189
|
"""Delete all files belonging to tbl_id. If tbl_version is not None, delete
|
|
190
190
|
only those files belonging to the specified tbl_version.
|
|
191
191
|
|
|
@@ -209,7 +209,7 @@ class LocalStore(ObjectStoreBase):
|
|
|
209
209
|
os.remove(p)
|
|
210
210
|
return len(paths)
|
|
211
211
|
|
|
212
|
-
def count(self, tbl_id:
|
|
212
|
+
def count(self, tbl_id: UUID | None, tbl_version: int | None = None) -> int:
|
|
213
213
|
"""
|
|
214
214
|
Return number of files for given tbl_id.
|
|
215
215
|
"""
|
|
@@ -277,7 +277,7 @@ class TempStore:
|
|
|
277
277
|
return env.Env.get().tmp_dir
|
|
278
278
|
|
|
279
279
|
@classmethod
|
|
280
|
-
def count(cls, tbl_id:
|
|
280
|
+
def count(cls, tbl_id: UUID | None = None, tbl_version: int | None = None) -> int:
|
|
281
281
|
return LocalStore(cls._tmp_dir()).count(tbl_id, tbl_version)
|
|
282
282
|
|
|
283
283
|
@classmethod
|
|
@@ -285,11 +285,11 @@ class TempStore:
|
|
|
285
285
|
return LocalStore(cls._tmp_dir()).contains_path(file_path)
|
|
286
286
|
|
|
287
287
|
@classmethod
|
|
288
|
-
def resolve_url(cls, file_url:
|
|
288
|
+
def resolve_url(cls, file_url: str | None) -> Path | None:
|
|
289
289
|
return LocalStore(cls._tmp_dir()).resolve_url(file_url)
|
|
290
290
|
|
|
291
291
|
@classmethod
|
|
292
|
-
def save_media_object(cls, data: bytes | PIL.Image.Image, col: Column, format:
|
|
292
|
+
def save_media_object(cls, data: bytes | PIL.Image.Image, col: Column, format: str | None) -> tuple[Path, str]:
|
|
293
293
|
return LocalStore(cls._tmp_dir()).save_media_object(data, col, format)
|
|
294
294
|
|
|
295
295
|
@classmethod
|
|
@@ -302,7 +302,7 @@ class TempStore:
|
|
|
302
302
|
_logger.debug(f'Media Storage: deleted {file_path}')
|
|
303
303
|
|
|
304
304
|
@classmethod
|
|
305
|
-
def create_path(cls, tbl_id:
|
|
305
|
+
def create_path(cls, tbl_id: UUID | None = None, extension: str = '') -> Path:
|
|
306
306
|
"""Return a new, unique Path located in the temporary store.
|
|
307
307
|
If tbl_id is provided, the path name will be similar to a LocalStore path based on the tbl_id.
|
|
308
308
|
If tbl_id is None, a random UUID will be used to create the path."""
|
|
@@ -7,7 +7,7 @@ import urllib.parse
|
|
|
7
7
|
import urllib.request
|
|
8
8
|
import uuid
|
|
9
9
|
from pathlib import Path
|
|
10
|
-
from typing import TYPE_CHECKING,
|
|
10
|
+
from typing import TYPE_CHECKING, NamedTuple
|
|
11
11
|
from uuid import UUID
|
|
12
12
|
|
|
13
13
|
from pixeltable import env, exceptions as excs
|
|
@@ -44,7 +44,7 @@ class StorageObjectAddress(NamedTuple):
|
|
|
44
44
|
key: str = '' # Key parsed from the source (prefix + object_name)
|
|
45
45
|
prefix: str = '' # Prefix (within the bucket) parsed from the source
|
|
46
46
|
object_name: str = '' # Object name parsed from the source (if requested and applicable)
|
|
47
|
-
path:
|
|
47
|
+
path: Path | None = None
|
|
48
48
|
|
|
49
49
|
@property
|
|
50
50
|
def has_object(self) -> bool:
|
|
@@ -56,11 +56,11 @@ class StorageObjectAddress(NamedTuple):
|
|
|
56
56
|
|
|
57
57
|
@property
|
|
58
58
|
def is_azure_scheme(self) -> bool:
|
|
59
|
-
return self.scheme in
|
|
59
|
+
return self.scheme in ('wasb', 'wasbs', 'abfs', 'abfss')
|
|
60
60
|
|
|
61
61
|
@property
|
|
62
62
|
def has_valid_storage_target(self) -> bool:
|
|
63
|
-
return self.storage_target in
|
|
63
|
+
return self.storage_target in (
|
|
64
64
|
StorageTarget.LOCAL_STORE,
|
|
65
65
|
StorageTarget.S3_STORE,
|
|
66
66
|
StorageTarget.R2_STORE,
|
|
@@ -68,7 +68,7 @@ class StorageObjectAddress(NamedTuple):
|
|
|
68
68
|
StorageTarget.GCS_STORE,
|
|
69
69
|
StorageTarget.AZURE_STORE,
|
|
70
70
|
StorageTarget.HTTP_STORE,
|
|
71
|
-
|
|
71
|
+
)
|
|
72
72
|
|
|
73
73
|
@property
|
|
74
74
|
def prefix_free_uri(self) -> str:
|
|
@@ -120,9 +120,7 @@ class ObjectPath:
|
|
|
120
120
|
return tbl_id.hex
|
|
121
121
|
|
|
122
122
|
@classmethod
|
|
123
|
-
def create_prefix_raw(
|
|
124
|
-
cls, tbl_id: UUID, col_id: int, tbl_version: int, ext: Optional[str] = None
|
|
125
|
-
) -> tuple[str, str]:
|
|
123
|
+
def create_prefix_raw(cls, tbl_id: UUID, col_id: int, tbl_version: int, ext: str | None = None) -> tuple[str, str]:
|
|
126
124
|
"""Construct a unique unix-style prefix and filename for a persisted file.
|
|
127
125
|
The results are derived from table, col, and version specs.
|
|
128
126
|
Returns:
|
|
@@ -202,7 +200,7 @@ class ObjectPath:
|
|
|
202
200
|
container = parsed.netloc
|
|
203
201
|
key = parsed.path.lstrip('/')
|
|
204
202
|
|
|
205
|
-
elif scheme in
|
|
203
|
+
elif scheme in ('wasb', 'wasbs', 'abfs', 'abfss'):
|
|
206
204
|
# Azure-specific URI schemes
|
|
207
205
|
# wasb[s]://container@account.blob.core.windows.net/<optional prefix>/<optional object>
|
|
208
206
|
# abfs[s]://container@account.dfs.core.windows.net/<optional prefix>/<optional object>
|
|
@@ -216,7 +214,7 @@ class ObjectPath:
|
|
|
216
214
|
raise ValueError(f'Invalid Azure URI format: {src_addr}')
|
|
217
215
|
key = parsed.path.lstrip('/')
|
|
218
216
|
|
|
219
|
-
elif scheme in
|
|
217
|
+
elif scheme in ('http', 'https'):
|
|
220
218
|
# Standard HTTP(S) URL format
|
|
221
219
|
# https://account.blob.core.windows.net/container/<optional path>/<optional object>
|
|
222
220
|
# https://account.r2.cloudflarestorage.com/container/<optional path>/<optional object>
|
|
@@ -253,7 +251,7 @@ class ObjectPath:
|
|
|
253
251
|
return r
|
|
254
252
|
|
|
255
253
|
@classmethod
|
|
256
|
-
def parse_object_storage_addr(cls, src_addr: str,
|
|
254
|
+
def parse_object_storage_addr(cls, src_addr: str, allow_obj_name: bool) -> StorageObjectAddress:
|
|
257
255
|
"""
|
|
258
256
|
Parses a cloud storage URI into its scheme, bucket, prefix, and object name.
|
|
259
257
|
|
|
@@ -273,14 +271,14 @@ class ObjectPath:
|
|
|
273
271
|
https://raw.github.com/pixeltable/pixeltable/main/docs/resources/images/000000000030.jpg
|
|
274
272
|
"""
|
|
275
273
|
soa = cls.parse_object_storage_addr1(src_addr)
|
|
276
|
-
prefix, object_name = cls.separate_prefix_object(soa.key,
|
|
274
|
+
prefix, object_name = cls.separate_prefix_object(soa.key, allow_obj_name)
|
|
277
275
|
assert not object_name.endswith('/')
|
|
278
276
|
r = soa._replace(prefix=prefix, object_name=object_name)
|
|
279
277
|
return r
|
|
280
278
|
|
|
281
279
|
|
|
282
280
|
class ObjectStoreBase:
|
|
283
|
-
def validate(self,
|
|
281
|
+
def validate(self, error_prefix: str) -> str | None:
|
|
284
282
|
"""Check the store configuration. Returns base URI if store is accessible.
|
|
285
283
|
|
|
286
284
|
Args:
|
|
@@ -303,7 +301,7 @@ class ObjectStoreBase:
|
|
|
303
301
|
"""
|
|
304
302
|
raise AssertionError
|
|
305
303
|
|
|
306
|
-
def move_local_file(self, col: Column, src_path: Path) ->
|
|
304
|
+
def move_local_file(self, col: Column, src_path: Path) -> str | None:
|
|
307
305
|
"""Move a file associated with a Column to the store, returning the file's URL within the destination.
|
|
308
306
|
|
|
309
307
|
Args:
|
|
@@ -324,7 +322,7 @@ class ObjectStoreBase:
|
|
|
324
322
|
"""
|
|
325
323
|
raise AssertionError
|
|
326
324
|
|
|
327
|
-
def count(self, tbl_id: UUID, tbl_version:
|
|
325
|
+
def count(self, tbl_id: UUID, tbl_version: int | None = None) -> int:
|
|
328
326
|
"""Return the number of objects in the store associated with the given tbl_id
|
|
329
327
|
|
|
330
328
|
Args:
|
|
@@ -336,7 +334,7 @@ class ObjectStoreBase:
|
|
|
336
334
|
"""
|
|
337
335
|
raise AssertionError
|
|
338
336
|
|
|
339
|
-
def delete(self, tbl_id: UUID, tbl_version:
|
|
337
|
+
def delete(self, tbl_id: UUID, tbl_version: int | None = None) -> int | None:
|
|
340
338
|
"""Delete objects in the destination for a given table ID, table version.
|
|
341
339
|
|
|
342
340
|
Args:
|
|
@@ -360,28 +358,15 @@ class ObjectStoreBase:
|
|
|
360
358
|
|
|
361
359
|
class ObjectOps:
|
|
362
360
|
@classmethod
|
|
363
|
-
def get_store(cls, dest:
|
|
361
|
+
def get_store(cls, dest: str | None, allow_obj_name: bool, col_name: str | None = None) -> ObjectStoreBase:
|
|
364
362
|
from pixeltable.env import Env
|
|
365
363
|
from pixeltable.utils.local_store import LocalStore
|
|
366
364
|
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
if dest is None
|
|
370
|
-
else ObjectPath.parse_object_storage_addr(dest, may_contain_object_name=may_contain_object_name)
|
|
371
|
-
)
|
|
365
|
+
dest = dest or str(Env.get().media_dir) # Use local media dir as fallback
|
|
366
|
+
soa = ObjectPath.parse_object_storage_addr(dest, allow_obj_name=allow_obj_name)
|
|
372
367
|
if soa.storage_target == StorageTarget.LOCAL_STORE:
|
|
373
368
|
return LocalStore(soa)
|
|
374
|
-
if soa.storage_target
|
|
375
|
-
env.Env.get().require_package('boto3')
|
|
376
|
-
from pixeltable.utils.s3_store import S3Store
|
|
377
|
-
|
|
378
|
-
return S3Store(soa)
|
|
379
|
-
if soa.storage_target == StorageTarget.R2_STORE:
|
|
380
|
-
env.Env.get().require_package('boto3')
|
|
381
|
-
from pixeltable.utils.s3_store import S3Store
|
|
382
|
-
|
|
383
|
-
return S3Store(soa)
|
|
384
|
-
if soa.storage_target == StorageTarget.B2_STORE:
|
|
369
|
+
if soa.storage_target in (StorageTarget.S3_STORE, StorageTarget.R2_STORE, StorageTarget.B2_STORE):
|
|
385
370
|
env.Env.get().require_package('boto3')
|
|
386
371
|
from pixeltable.utils.s3_store import S3Store
|
|
387
372
|
|
|
@@ -391,6 +376,11 @@ class ObjectOps:
|
|
|
391
376
|
from pixeltable.utils.gcs_store import GCSStore
|
|
392
377
|
|
|
393
378
|
return GCSStore(soa)
|
|
379
|
+
if soa.storage_target == StorageTarget.AZURE_STORE:
|
|
380
|
+
env.Env.get().require_package('azure.storage.blob')
|
|
381
|
+
from pixeltable.utils.azure_store import AzureBlobStore
|
|
382
|
+
|
|
383
|
+
return AzureBlobStore(soa)
|
|
394
384
|
if soa.storage_target == StorageTarget.HTTP_STORE and soa.is_http_readable:
|
|
395
385
|
return HTTPStore(soa)
|
|
396
386
|
error_col_name = f'Column {col_name!r}: ' if col_name is not None else ''
|
|
@@ -399,7 +389,7 @@ class ObjectOps:
|
|
|
399
389
|
)
|
|
400
390
|
|
|
401
391
|
@classmethod
|
|
402
|
-
def validate_destination(cls, dest: str | Path | None, col_name:
|
|
392
|
+
def validate_destination(cls, dest: str | Path | None, col_name: str | None = None) -> str:
|
|
403
393
|
"""Convert a Column destination parameter to a URI, else raise errors.
|
|
404
394
|
Args:
|
|
405
395
|
dest: The requested destination
|
|
@@ -407,19 +397,19 @@ class ObjectOps:
|
|
|
407
397
|
Returns:
|
|
408
398
|
URI of destination, or raises an error
|
|
409
399
|
"""
|
|
410
|
-
|
|
400
|
+
error_col_str = f'column {col_name!r}' if col_name is not None else ''
|
|
411
401
|
|
|
412
402
|
# General checks on any destination
|
|
413
403
|
if isinstance(dest, Path):
|
|
414
404
|
dest = str(dest)
|
|
415
405
|
if dest is not None and not isinstance(dest, str):
|
|
416
|
-
raise excs.Error(f'{
|
|
406
|
+
raise excs.Error(f'{error_col_str}: `destination` must be a string or path; got {dest!r}')
|
|
417
407
|
|
|
418
408
|
# Specific checks for storage backends
|
|
419
409
|
store = cls.get_store(dest, False, col_name)
|
|
420
|
-
dest2 = store.validate(
|
|
410
|
+
dest2 = store.validate(error_col_str)
|
|
421
411
|
if dest2 is None:
|
|
422
|
-
raise excs.Error(f'{
|
|
412
|
+
raise excs.Error(f'{error_col_str}: `destination` must be a supported destination; got {dest!r}')
|
|
423
413
|
return dest2
|
|
424
414
|
|
|
425
415
|
@classmethod
|
|
@@ -427,7 +417,7 @@ class ObjectOps:
|
|
|
427
417
|
"""Copy an object from a URL to a local Path. Thread safe.
|
|
428
418
|
Raises an exception if the download fails or the scheme is not supported
|
|
429
419
|
"""
|
|
430
|
-
soa = ObjectPath.parse_object_storage_addr(src_uri,
|
|
420
|
+
soa = ObjectPath.parse_object_storage_addr(src_uri, allow_obj_name=True)
|
|
431
421
|
store = cls.get_store(src_uri, True)
|
|
432
422
|
store.copy_object_to_local_file(soa.object_name, dest_path)
|
|
433
423
|
|
|
@@ -466,7 +456,7 @@ class ObjectOps:
|
|
|
466
456
|
return store.copy_local_file(col, src_path)
|
|
467
457
|
|
|
468
458
|
@classmethod
|
|
469
|
-
def delete(cls, dest:
|
|
459
|
+
def delete(cls, dest: str | None, tbl_id: UUID, tbl_version: int | None = None) -> int | None:
|
|
470
460
|
"""Delete objects in the destination for a given table ID, table version.
|
|
471
461
|
Returns:
|
|
472
462
|
Number of objects deleted or None
|
|
@@ -475,13 +465,39 @@ class ObjectOps:
|
|
|
475
465
|
return store.delete(tbl_id, tbl_version)
|
|
476
466
|
|
|
477
467
|
@classmethod
|
|
478
|
-
def count(
|
|
479
|
-
|
|
468
|
+
def count(
|
|
469
|
+
cls,
|
|
470
|
+
tbl_id: UUID,
|
|
471
|
+
tbl_version: int | None = None,
|
|
472
|
+
dest: str | None = None,
|
|
473
|
+
default_input_dest: bool = False,
|
|
474
|
+
default_output_dest: bool = False,
|
|
475
|
+
) -> int:
|
|
476
|
+
"""
|
|
477
|
+
Return the count of objects in the destination for a given table ID.
|
|
478
|
+
|
|
479
|
+
At most one of dest, default_input, default_output may be specified. If none are specified, the fallback is the
|
|
480
|
+
local media directory.
|
|
481
|
+
|
|
482
|
+
Args:
|
|
483
|
+
tbl_id: Table ID for which to count objects
|
|
484
|
+
tbl_version: If specified, only counts objects for a specific table version
|
|
485
|
+
dest: The destination to count objects in
|
|
486
|
+
default_input_dest: If `True`, use the default input media destination
|
|
487
|
+
default_output_dest: If `True`, use the default output media destination
|
|
488
|
+
"""
|
|
489
|
+
assert sum((dest is not None, default_input_dest, default_output_dest)) <= 1, (
|
|
490
|
+
'At most one of dest, default_input, default_output may be specified'
|
|
491
|
+
)
|
|
492
|
+
if default_input_dest:
|
|
493
|
+
dest = env.Env.get().default_input_media_dest
|
|
494
|
+
if default_output_dest:
|
|
495
|
+
dest = env.Env.get().default_output_media_dest
|
|
480
496
|
store = cls.get_store(dest, False)
|
|
481
497
|
return store.count(tbl_id, tbl_version)
|
|
482
498
|
|
|
483
499
|
@classmethod
|
|
484
|
-
def list_objects(cls, dest:
|
|
500
|
+
def list_objects(cls, dest: str | None, return_uri: bool, n_max: int = 10) -> list[str]:
|
|
485
501
|
"""Return a list of objects found in the specified destination bucket.
|
|
486
502
|
The dest specification string must not contain an object name.
|
|
487
503
|
Each returned object includes the full set of prefixes.
|
pixeltable/utils/s3_store.py
CHANGED
|
@@ -4,11 +4,11 @@ import threading
|
|
|
4
4
|
import urllib.parse
|
|
5
5
|
import uuid
|
|
6
6
|
from pathlib import Path
|
|
7
|
-
from typing import TYPE_CHECKING, Any, Iterator, NamedTuple
|
|
7
|
+
from typing import TYPE_CHECKING, Any, Iterator, NamedTuple
|
|
8
8
|
|
|
9
9
|
import boto3
|
|
10
10
|
import botocore
|
|
11
|
-
from botocore.exceptions import ClientError
|
|
11
|
+
from botocore.exceptions import ClientError, ConnectionError
|
|
12
12
|
|
|
13
13
|
from pixeltable import env, exceptions as excs
|
|
14
14
|
from pixeltable.config import Config
|
|
@@ -29,7 +29,7 @@ class S3CompatClientDict(NamedTuple):
|
|
|
29
29
|
Thread-safe via the module-level 'client_lock'.
|
|
30
30
|
"""
|
|
31
31
|
|
|
32
|
-
profile:
|
|
32
|
+
profile: str | None # AWS-style profile used to locate credentials
|
|
33
33
|
clients: dict[str, Any] # Map of endpoint URL → boto3 client instance
|
|
34
34
|
|
|
35
35
|
|
|
@@ -150,7 +150,7 @@ class S3Store(ObjectStoreBase):
|
|
|
150
150
|
"""Return the prefix from the base URI."""
|
|
151
151
|
return self.__prefix_name
|
|
152
152
|
|
|
153
|
-
def validate(self, error_col_name: str) ->
|
|
153
|
+
def validate(self, error_col_name: str) -> str | None:
|
|
154
154
|
"""
|
|
155
155
|
Checks if the URI exists.
|
|
156
156
|
|
|
@@ -161,10 +161,14 @@ class S3Store(ObjectStoreBase):
|
|
|
161
161
|
self.client().head_bucket(Bucket=self.bucket_name)
|
|
162
162
|
return self.__base_uri
|
|
163
163
|
except ClientError as e:
|
|
164
|
-
self.handle_s3_error(e,
|
|
164
|
+
self.handle_s3_error(e, f'validating destination for {error_col_name}')
|
|
165
|
+
except ConnectionError as e:
|
|
166
|
+
raise excs.Error(
|
|
167
|
+
f'Connection error while validating destination {self.__base_uri!r} for {error_col_name}: {e}'
|
|
168
|
+
) from e
|
|
165
169
|
return None
|
|
166
170
|
|
|
167
|
-
def _prepare_uri_raw(self, tbl_id: uuid.UUID, col_id: int, tbl_version: int, ext:
|
|
171
|
+
def _prepare_uri_raw(self, tbl_id: uuid.UUID, col_id: int, tbl_version: int, ext: str | None = None) -> str:
|
|
168
172
|
"""
|
|
169
173
|
Construct a new, unique URI for a persisted media file.
|
|
170
174
|
"""
|
|
@@ -172,19 +176,19 @@ class S3Store(ObjectStoreBase):
|
|
|
172
176
|
parent = f'{self.__base_uri}{prefix}'
|
|
173
177
|
return f'{parent}/{filename}'
|
|
174
178
|
|
|
175
|
-
def _prepare_uri(self, col: 'Column', ext:
|
|
179
|
+
def _prepare_uri(self, col: 'Column', ext: str | None = None) -> str:
|
|
176
180
|
"""
|
|
177
181
|
Construct a new, unique URI for a persisted media file.
|
|
178
182
|
"""
|
|
179
|
-
assert col.
|
|
180
|
-
return self._prepare_uri_raw(col.
|
|
183
|
+
assert col.get_tbl() is not None, 'Column must be associated with a table'
|
|
184
|
+
return self._prepare_uri_raw(col.get_tbl().id, col.id, col.get_tbl().version, ext=ext)
|
|
181
185
|
|
|
182
186
|
def copy_object_to_local_file(self, src_path: str, dest_path: Path) -> None:
|
|
183
187
|
"""Copies an object to a local file. Thread safe."""
|
|
184
188
|
try:
|
|
185
189
|
self.client().download_file(Bucket=self.bucket_name, Key=self.prefix + src_path, Filename=str(dest_path))
|
|
186
190
|
except ClientError as e:
|
|
187
|
-
self.handle_s3_error(e,
|
|
191
|
+
self.handle_s3_error(e, f'downloading file {src_path!r}')
|
|
188
192
|
raise
|
|
189
193
|
|
|
190
194
|
def copy_local_file(self, col: 'Column', src_path: Path) -> str:
|
|
@@ -200,10 +204,10 @@ class S3Store(ObjectStoreBase):
|
|
|
200
204
|
_logger.debug(f'Media Storage: copied {src_path} to {new_file_uri}')
|
|
201
205
|
return new_file_uri
|
|
202
206
|
except ClientError as e:
|
|
203
|
-
self.handle_s3_error(e,
|
|
207
|
+
self.handle_s3_error(e, 'uploading file')
|
|
204
208
|
raise
|
|
205
209
|
|
|
206
|
-
def _get_filtered_objects(self, tbl_id: uuid.UUID, tbl_version:
|
|
210
|
+
def _get_filtered_objects(self, tbl_id: uuid.UUID, tbl_version: int | None = None) -> tuple[Iterator, Any]:
|
|
207
211
|
"""Private method to get filtered objects for a table, optionally filtered by version.
|
|
208
212
|
|
|
209
213
|
Args:
|
|
@@ -239,10 +243,10 @@ class S3Store(ObjectStoreBase):
|
|
|
239
243
|
return object_iterator, bucket
|
|
240
244
|
|
|
241
245
|
except ClientError as e:
|
|
242
|
-
self.handle_s3_error(e,
|
|
246
|
+
self.handle_s3_error(e, f'setting up iterator {self.prefix}')
|
|
243
247
|
raise
|
|
244
248
|
|
|
245
|
-
def count(self, tbl_id: uuid.UUID, tbl_version:
|
|
249
|
+
def count(self, tbl_id: uuid.UUID, tbl_version: int | None = None) -> int:
|
|
246
250
|
"""Count the number of files belonging to tbl_id. If tbl_version is not None,
|
|
247
251
|
count only those files belonging to the specified tbl_version.
|
|
248
252
|
|
|
@@ -259,7 +263,7 @@ class S3Store(ObjectStoreBase):
|
|
|
259
263
|
|
|
260
264
|
return sum(1 for _ in object_iterator)
|
|
261
265
|
|
|
262
|
-
def delete(self, tbl_id: uuid.UUID, tbl_version:
|
|
266
|
+
def delete(self, tbl_id: uuid.UUID, tbl_version: int | None = None) -> int:
|
|
263
267
|
"""Delete all files belonging to tbl_id. If tbl_version is not None, delete
|
|
264
268
|
only those files belonging to the specified tbl_version.
|
|
265
269
|
|
|
@@ -298,7 +302,7 @@ class S3Store(ObjectStoreBase):
|
|
|
298
302
|
return total_deleted
|
|
299
303
|
|
|
300
304
|
except ClientError as e:
|
|
301
|
-
self.handle_s3_error(e,
|
|
305
|
+
self.handle_s3_error(e, f'deleting with {self.prefix}')
|
|
302
306
|
raise
|
|
303
307
|
|
|
304
308
|
def list_objects(self, return_uri: bool, n_max: int = 10) -> list[str]:
|
|
@@ -321,28 +325,31 @@ class S3Store(ObjectStoreBase):
|
|
|
321
325
|
return r
|
|
322
326
|
r.append(f'{p}{obj["Key"]}')
|
|
323
327
|
except ClientError as e:
|
|
324
|
-
self.handle_s3_error(e,
|
|
328
|
+
self.handle_s3_error(e, f'listing objects from {self.prefix!r}')
|
|
325
329
|
return r
|
|
326
330
|
|
|
327
|
-
|
|
328
|
-
def handle_s3_error(
|
|
329
|
-
cls, e: 'ClientError', bucket_name: str, operation: str = '', *, ignore_404: bool = False
|
|
330
|
-
) -> None:
|
|
331
|
+
def handle_s3_error(self, e: 'ClientError', operation: str = '', *, ignore_404: bool = False) -> None:
|
|
331
332
|
error_code = e.response.get('Error', {}).get('Code')
|
|
332
333
|
error_message = e.response.get('Error', {}).get('Message', str(e))
|
|
333
334
|
if ignore_404 and error_code == '404':
|
|
334
335
|
return
|
|
335
336
|
if error_code == '404':
|
|
336
|
-
raise excs.Error(f'
|
|
337
|
+
raise excs.Error(f'Client error while {operation}: Bucket {self.bucket_name!r} not found') from e
|
|
337
338
|
elif error_code == '403':
|
|
338
|
-
raise excs.Error(
|
|
339
|
+
raise excs.Error(
|
|
340
|
+
f'Client error while {operation}: Access denied to bucket {self.bucket_name!r}: {error_message}'
|
|
341
|
+
) from e
|
|
339
342
|
elif error_code == 'PreconditionFailed' or 'PreconditionFailed' in error_message:
|
|
340
|
-
raise excs.Error(
|
|
343
|
+
raise excs.Error(
|
|
344
|
+
f'Client error while {operation}: Precondition failed for bucket {self.bucket_name!r}: {error_message}'
|
|
345
|
+
) from e
|
|
341
346
|
else:
|
|
342
|
-
raise excs.Error(
|
|
347
|
+
raise excs.Error(
|
|
348
|
+
f'Client error while {operation} in bucket {self.bucket_name!r}: {error_code} - {error_message}'
|
|
349
|
+
) from e
|
|
343
350
|
|
|
344
351
|
@classmethod
|
|
345
|
-
def create_boto_session(cls, profile_name:
|
|
352
|
+
def create_boto_session(cls, profile_name: str | None = None) -> Any:
|
|
346
353
|
"""Create a boto session using the defined profile"""
|
|
347
354
|
if profile_name:
|
|
348
355
|
try:
|
|
@@ -354,7 +361,7 @@ class S3Store(ObjectStoreBase):
|
|
|
354
361
|
return boto3.Session()
|
|
355
362
|
|
|
356
363
|
@classmethod
|
|
357
|
-
def create_boto_client(cls, profile_name:
|
|
364
|
+
def create_boto_client(cls, profile_name: str | None = None, extra_args: dict[str, Any] | None = None) -> Any:
|
|
358
365
|
config_args: dict[str, Any] = {
|
|
359
366
|
'max_pool_connections': 30,
|
|
360
367
|
'connect_timeout': 15,
|
|
@@ -380,8 +387,6 @@ class S3Store(ObjectStoreBase):
|
|
|
380
387
|
return boto3.client('s3', config=config)
|
|
381
388
|
|
|
382
389
|
@classmethod
|
|
383
|
-
def create_boto_resource(
|
|
384
|
-
cls, profile_name: Optional[str] = None, extra_args: Optional[dict[str, Any]] = None
|
|
385
|
-
) -> Any:
|
|
390
|
+
def create_boto_resource(cls, profile_name: str | None = None, extra_args: dict[str, Any] | None = None) -> Any:
|
|
386
391
|
# Create a session using the defined profile
|
|
387
392
|
return cls.create_boto_session(profile_name).resource('s3', **(extra_args or {}))
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pixeltable
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.19
|
|
4
4
|
Summary: AI Data Infrastructure: Declarative, Multimodal, and Incremental
|
|
5
5
|
Project-URL: homepage, https://pixeltable.com/
|
|
6
6
|
Project-URL: repository, https://github.com/pixeltable/pixeltable
|