pixeltable 0.4.18__py3-none-any.whl → 0.4.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (152) hide show
  1. pixeltable/__init__.py +1 -1
  2. pixeltable/_version.py +1 -0
  3. pixeltable/catalog/catalog.py +119 -100
  4. pixeltable/catalog/column.py +104 -115
  5. pixeltable/catalog/globals.py +1 -2
  6. pixeltable/catalog/insertable_table.py +44 -49
  7. pixeltable/catalog/path.py +3 -4
  8. pixeltable/catalog/schema_object.py +4 -4
  9. pixeltable/catalog/table.py +118 -122
  10. pixeltable/catalog/table_metadata.py +6 -6
  11. pixeltable/catalog/table_version.py +322 -257
  12. pixeltable/catalog/table_version_handle.py +4 -4
  13. pixeltable/catalog/table_version_path.py +9 -10
  14. pixeltable/catalog/tbl_ops.py +9 -3
  15. pixeltable/catalog/view.py +34 -28
  16. pixeltable/config.py +14 -10
  17. pixeltable/dataframe.py +68 -77
  18. pixeltable/env.py +74 -64
  19. pixeltable/exec/aggregation_node.py +6 -6
  20. pixeltable/exec/cache_prefetch_node.py +10 -10
  21. pixeltable/exec/data_row_batch.py +3 -3
  22. pixeltable/exec/exec_context.py +4 -5
  23. pixeltable/exec/exec_node.py +5 -5
  24. pixeltable/exec/expr_eval/evaluators.py +6 -6
  25. pixeltable/exec/expr_eval/expr_eval_node.py +8 -7
  26. pixeltable/exec/expr_eval/globals.py +6 -6
  27. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  28. pixeltable/exec/expr_eval/schedulers.py +11 -11
  29. pixeltable/exec/in_memory_data_node.py +2 -2
  30. pixeltable/exec/object_store_save_node.py +14 -17
  31. pixeltable/exec/sql_node.py +25 -25
  32. pixeltable/exprs/arithmetic_expr.py +4 -4
  33. pixeltable/exprs/array_slice.py +2 -2
  34. pixeltable/exprs/column_property_ref.py +3 -3
  35. pixeltable/exprs/column_ref.py +61 -74
  36. pixeltable/exprs/comparison.py +5 -5
  37. pixeltable/exprs/compound_predicate.py +3 -3
  38. pixeltable/exprs/data_row.py +12 -12
  39. pixeltable/exprs/expr.py +41 -31
  40. pixeltable/exprs/expr_dict.py +3 -3
  41. pixeltable/exprs/expr_set.py +3 -3
  42. pixeltable/exprs/function_call.py +14 -14
  43. pixeltable/exprs/in_predicate.py +4 -4
  44. pixeltable/exprs/inline_expr.py +8 -8
  45. pixeltable/exprs/is_null.py +1 -3
  46. pixeltable/exprs/json_mapper.py +8 -8
  47. pixeltable/exprs/json_path.py +6 -6
  48. pixeltable/exprs/literal.py +5 -5
  49. pixeltable/exprs/method_ref.py +2 -2
  50. pixeltable/exprs/object_ref.py +2 -2
  51. pixeltable/exprs/row_builder.py +14 -14
  52. pixeltable/exprs/rowid_ref.py +8 -8
  53. pixeltable/exprs/similarity_expr.py +50 -25
  54. pixeltable/exprs/sql_element_cache.py +4 -4
  55. pixeltable/exprs/string_op.py +2 -2
  56. pixeltable/exprs/type_cast.py +3 -5
  57. pixeltable/func/aggregate_function.py +8 -8
  58. pixeltable/func/callable_function.py +9 -9
  59. pixeltable/func/expr_template_function.py +3 -3
  60. pixeltable/func/function.py +15 -17
  61. pixeltable/func/function_registry.py +6 -7
  62. pixeltable/func/globals.py +2 -3
  63. pixeltable/func/mcp.py +2 -2
  64. pixeltable/func/query_template_function.py +16 -16
  65. pixeltable/func/signature.py +14 -14
  66. pixeltable/func/tools.py +11 -11
  67. pixeltable/func/udf.py +16 -18
  68. pixeltable/functions/__init__.py +1 -0
  69. pixeltable/functions/anthropic.py +7 -7
  70. pixeltable/functions/audio.py +76 -0
  71. pixeltable/functions/bedrock.py +6 -6
  72. pixeltable/functions/deepseek.py +4 -4
  73. pixeltable/functions/fireworks.py +2 -2
  74. pixeltable/functions/gemini.py +6 -6
  75. pixeltable/functions/globals.py +12 -12
  76. pixeltable/functions/groq.py +4 -4
  77. pixeltable/functions/huggingface.py +18 -20
  78. pixeltable/functions/image.py +7 -10
  79. pixeltable/functions/llama_cpp.py +7 -7
  80. pixeltable/functions/math.py +2 -3
  81. pixeltable/functions/mistralai.py +3 -3
  82. pixeltable/functions/ollama.py +9 -9
  83. pixeltable/functions/openai.py +21 -21
  84. pixeltable/functions/openrouter.py +7 -7
  85. pixeltable/functions/string.py +21 -28
  86. pixeltable/functions/timestamp.py +7 -8
  87. pixeltable/functions/together.py +4 -6
  88. pixeltable/functions/twelvelabs.py +92 -0
  89. pixeltable/functions/video.py +2 -24
  90. pixeltable/functions/vision.py +6 -6
  91. pixeltable/functions/whisper.py +7 -7
  92. pixeltable/functions/whisperx.py +16 -16
  93. pixeltable/globals.py +52 -36
  94. pixeltable/index/base.py +12 -8
  95. pixeltable/index/btree.py +19 -22
  96. pixeltable/index/embedding_index.py +30 -39
  97. pixeltable/io/datarows.py +3 -3
  98. pixeltable/io/external_store.py +13 -16
  99. pixeltable/io/fiftyone.py +5 -5
  100. pixeltable/io/globals.py +5 -5
  101. pixeltable/io/hf_datasets.py +4 -4
  102. pixeltable/io/label_studio.py +12 -12
  103. pixeltable/io/pandas.py +6 -6
  104. pixeltable/io/parquet.py +2 -2
  105. pixeltable/io/table_data_conduit.py +12 -12
  106. pixeltable/io/utils.py +2 -2
  107. pixeltable/iterators/audio.py +2 -2
  108. pixeltable/iterators/video.py +8 -13
  109. pixeltable/metadata/converters/convert_18.py +2 -2
  110. pixeltable/metadata/converters/convert_19.py +2 -2
  111. pixeltable/metadata/converters/convert_20.py +2 -2
  112. pixeltable/metadata/converters/convert_21.py +2 -2
  113. pixeltable/metadata/converters/convert_22.py +2 -2
  114. pixeltable/metadata/converters/convert_24.py +2 -2
  115. pixeltable/metadata/converters/convert_25.py +2 -2
  116. pixeltable/metadata/converters/convert_26.py +2 -2
  117. pixeltable/metadata/converters/convert_29.py +4 -4
  118. pixeltable/metadata/converters/convert_34.py +2 -2
  119. pixeltable/metadata/converters/convert_36.py +2 -2
  120. pixeltable/metadata/converters/convert_38.py +2 -2
  121. pixeltable/metadata/converters/convert_39.py +1 -2
  122. pixeltable/metadata/converters/util.py +11 -13
  123. pixeltable/metadata/schema.py +22 -21
  124. pixeltable/metadata/utils.py +2 -6
  125. pixeltable/mypy/mypy_plugin.py +5 -5
  126. pixeltable/plan.py +30 -28
  127. pixeltable/share/packager.py +7 -7
  128. pixeltable/share/publish.py +3 -3
  129. pixeltable/store.py +125 -61
  130. pixeltable/type_system.py +43 -46
  131. pixeltable/utils/__init__.py +1 -2
  132. pixeltable/utils/arrow.py +4 -4
  133. pixeltable/utils/av.py +8 -0
  134. pixeltable/utils/azure_store.py +305 -0
  135. pixeltable/utils/code.py +1 -2
  136. pixeltable/utils/dbms.py +15 -19
  137. pixeltable/utils/description_helper.py +2 -3
  138. pixeltable/utils/documents.py +5 -6
  139. pixeltable/utils/exception_handler.py +2 -2
  140. pixeltable/utils/filecache.py +5 -5
  141. pixeltable/utils/formatter.py +4 -6
  142. pixeltable/utils/gcs_store.py +9 -9
  143. pixeltable/utils/local_store.py +17 -17
  144. pixeltable/utils/object_stores.py +59 -43
  145. pixeltable/utils/s3_store.py +35 -30
  146. {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/METADATA +1 -1
  147. pixeltable-0.4.19.dist-info/RECORD +213 -0
  148. pixeltable/__version__.py +0 -3
  149. pixeltable-0.4.18.dist-info/RECORD +0 -211
  150. {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/WHEEL +0 -0
  151. {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/entry_points.txt +0 -0
  152. {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/licenses/LICENSE +0 -0
@@ -10,7 +10,7 @@ import urllib.request
10
10
  import uuid
11
11
  from collections import defaultdict
12
12
  from pathlib import Path
13
- from typing import TYPE_CHECKING, Optional
13
+ from typing import TYPE_CHECKING
14
14
  from uuid import UUID
15
15
 
16
16
  import PIL.Image
@@ -35,7 +35,7 @@ class LocalStore(ObjectStoreBase):
35
35
 
36
36
  __base_dir: Path
37
37
 
38
- soa: Optional[StorageObjectAddress]
38
+ soa: StorageObjectAddress | None
39
39
 
40
40
  def __init__(self, location: Path | StorageObjectAddress):
41
41
  if isinstance(location, Path):
@@ -69,7 +69,7 @@ class LocalStore(ObjectStoreBase):
69
69
  raise excs.Error(f'{error_col_name}`destination` must be a valid path. Error: {e}') from None
70
70
 
71
71
  @staticmethod
72
- def file_url_to_path(url: str) -> Optional[Path]:
72
+ def file_url_to_path(url: str) -> Path | None:
73
73
  """Convert a file:// URI to a Path object with support for Windows UNC paths."""
74
74
  assert isinstance(url, str), type(url)
75
75
  parsed = urllib.parse.urlparse(url)
@@ -90,7 +90,7 @@ class LocalStore(ObjectStoreBase):
90
90
  return Path(path_str)
91
91
 
92
92
  @classmethod
93
- def _save_binary_media_file(cls, file_data: bytes, dest_path: Path, format: Optional[str]) -> Path:
93
+ def _save_binary_media_file(cls, file_data: bytes, dest_path: Path, format: str | None) -> Path:
94
94
  """Save binary data to a file in a LocalStore. format is ignored for binary data."""
95
95
  assert isinstance(file_data, bytes)
96
96
  with open(dest_path, 'wb') as f:
@@ -100,7 +100,7 @@ class LocalStore(ObjectStoreBase):
100
100
  return dest_path
101
101
 
102
102
  @classmethod
103
- def _save_pil_image_file(cls, image: PIL.Image.Image, dest_path: Path, format: Optional[str]) -> Path:
103
+ def _save_pil_image_file(cls, image: PIL.Image.Image, dest_path: Path, format: str | None) -> Path:
104
104
  """Save a PIL Image to a file in a LocalStore with the specified format."""
105
105
  if dest_path.suffix != f'.{format}':
106
106
  dest_path = dest_path.with_name(f'{dest_path.name}.{format}')
@@ -111,7 +111,7 @@ class LocalStore(ObjectStoreBase):
111
111
  os.fsync(f.fileno()) # Forces OS to write to physical storage
112
112
  return dest_path
113
113
 
114
- def _prepare_path_raw(self, tbl_id: UUID, col_id: int, tbl_version: int, ext: Optional[str] = None) -> Path:
114
+ def _prepare_path_raw(self, tbl_id: UUID, col_id: int, tbl_version: int, ext: str | None = None) -> Path:
115
115
  """
116
116
  Construct a new, unique Path name in the __base_dir for a persisted file.
117
117
  Create the parent directory for the new Path if it does not already exist.
@@ -121,19 +121,19 @@ class LocalStore(ObjectStoreBase):
121
121
  parent.mkdir(parents=True, exist_ok=True)
122
122
  return parent / filename
123
123
 
124
- def _prepare_path(self, col: Column, ext: Optional[str] = None) -> Path:
124
+ def _prepare_path(self, col: Column, ext: str | None = None) -> Path:
125
125
  """
126
126
  Construct a new, unique Path name in the __base_dir for a persisted file.
127
127
  Create the parent directory for the new Path if it does not already exist.
128
128
  """
129
- assert col.tbl is not None, 'Column must be associated with a table'
130
- return self._prepare_path_raw(col.tbl.id, col.id, col.tbl.version, ext)
129
+ assert col.get_tbl() is not None, 'Column must be associated with a table'
130
+ return self._prepare_path_raw(col.get_tbl().id, col.id, col.get_tbl().version, ext)
131
131
 
132
132
  def contains_path(self, file_path: Path) -> bool:
133
133
  """Return True if the given path refers to a file managed by this LocalStore, else False."""
134
134
  return str(file_path).startswith(str(self.__base_dir))
135
135
 
136
- def resolve_url(self, file_url: Optional[str]) -> Optional[Path]:
136
+ def resolve_url(self, file_url: str | None) -> Path | None:
137
137
  """Return path if the given url refers to a file managed by this LocalStore, else None.
138
138
 
139
139
  Args:
@@ -168,7 +168,7 @@ class LocalStore(ObjectStoreBase):
168
168
  _logger.debug(f'Media Storage: copied {src_path} to {new_file_url}')
169
169
  return new_file_url
170
170
 
171
- def save_media_object(self, data: bytes | PIL.Image.Image, col: Column, format: Optional[str]) -> tuple[Path, str]:
171
+ def save_media_object(self, data: bytes | PIL.Image.Image, col: Column, format: str | None) -> tuple[Path, str]:
172
172
  """Save a data object to a file in a LocalStore
173
173
  Returns:
174
174
  dest_path: Path to the saved file
@@ -185,7 +185,7 @@ class LocalStore(ObjectStoreBase):
185
185
  new_file_url = urllib.parse.urljoin('file:', urllib.request.pathname2url(str(dest_path)))
186
186
  return dest_path, new_file_url
187
187
 
188
- def delete(self, tbl_id: UUID, tbl_version: Optional[int] = None) -> Optional[int]:
188
+ def delete(self, tbl_id: UUID, tbl_version: int | None = None) -> int | None:
189
189
  """Delete all files belonging to tbl_id. If tbl_version is not None, delete
190
190
  only those files belonging to the specified tbl_version.
191
191
 
@@ -209,7 +209,7 @@ class LocalStore(ObjectStoreBase):
209
209
  os.remove(p)
210
210
  return len(paths)
211
211
 
212
- def count(self, tbl_id: Optional[UUID], tbl_version: Optional[int] = None) -> int:
212
+ def count(self, tbl_id: UUID | None, tbl_version: int | None = None) -> int:
213
213
  """
214
214
  Return number of files for given tbl_id.
215
215
  """
@@ -277,7 +277,7 @@ class TempStore:
277
277
  return env.Env.get().tmp_dir
278
278
 
279
279
  @classmethod
280
- def count(cls, tbl_id: Optional[UUID] = None, tbl_version: Optional[int] = None) -> int:
280
+ def count(cls, tbl_id: UUID | None = None, tbl_version: int | None = None) -> int:
281
281
  return LocalStore(cls._tmp_dir()).count(tbl_id, tbl_version)
282
282
 
283
283
  @classmethod
@@ -285,11 +285,11 @@ class TempStore:
285
285
  return LocalStore(cls._tmp_dir()).contains_path(file_path)
286
286
 
287
287
  @classmethod
288
- def resolve_url(cls, file_url: Optional[str]) -> Optional[Path]:
288
+ def resolve_url(cls, file_url: str | None) -> Path | None:
289
289
  return LocalStore(cls._tmp_dir()).resolve_url(file_url)
290
290
 
291
291
  @classmethod
292
- def save_media_object(cls, data: bytes | PIL.Image.Image, col: Column, format: Optional[str]) -> tuple[Path, str]:
292
+ def save_media_object(cls, data: bytes | PIL.Image.Image, col: Column, format: str | None) -> tuple[Path, str]:
293
293
  return LocalStore(cls._tmp_dir()).save_media_object(data, col, format)
294
294
 
295
295
  @classmethod
@@ -302,7 +302,7 @@ class TempStore:
302
302
  _logger.debug(f'Media Storage: deleted {file_path}')
303
303
 
304
304
  @classmethod
305
- def create_path(cls, tbl_id: Optional[UUID] = None, extension: str = '') -> Path:
305
+ def create_path(cls, tbl_id: UUID | None = None, extension: str = '') -> Path:
306
306
  """Return a new, unique Path located in the temporary store.
307
307
  If tbl_id is provided, the path name will be similar to a LocalStore path based on the tbl_id.
308
308
  If tbl_id is None, a random UUID will be used to create the path."""
@@ -7,7 +7,7 @@ import urllib.parse
7
7
  import urllib.request
8
8
  import uuid
9
9
  from pathlib import Path
10
- from typing import TYPE_CHECKING, Any, NamedTuple, Optional
10
+ from typing import TYPE_CHECKING, NamedTuple
11
11
  from uuid import UUID
12
12
 
13
13
  from pixeltable import env, exceptions as excs
@@ -44,7 +44,7 @@ class StorageObjectAddress(NamedTuple):
44
44
  key: str = '' # Key parsed from the source (prefix + object_name)
45
45
  prefix: str = '' # Prefix (within the bucket) parsed from the source
46
46
  object_name: str = '' # Object name parsed from the source (if requested and applicable)
47
- path: Optional[Path] = None
47
+ path: Path | None = None
48
48
 
49
49
  @property
50
50
  def has_object(self) -> bool:
@@ -56,11 +56,11 @@ class StorageObjectAddress(NamedTuple):
56
56
 
57
57
  @property
58
58
  def is_azure_scheme(self) -> bool:
59
- return self.scheme in ['wasb', 'wasbs', 'abfs', 'abfss']
59
+ return self.scheme in ('wasb', 'wasbs', 'abfs', 'abfss')
60
60
 
61
61
  @property
62
62
  def has_valid_storage_target(self) -> bool:
63
- return self.storage_target in [
63
+ return self.storage_target in (
64
64
  StorageTarget.LOCAL_STORE,
65
65
  StorageTarget.S3_STORE,
66
66
  StorageTarget.R2_STORE,
@@ -68,7 +68,7 @@ class StorageObjectAddress(NamedTuple):
68
68
  StorageTarget.GCS_STORE,
69
69
  StorageTarget.AZURE_STORE,
70
70
  StorageTarget.HTTP_STORE,
71
- ]
71
+ )
72
72
 
73
73
  @property
74
74
  def prefix_free_uri(self) -> str:
@@ -120,9 +120,7 @@ class ObjectPath:
120
120
  return tbl_id.hex
121
121
 
122
122
  @classmethod
123
- def create_prefix_raw(
124
- cls, tbl_id: UUID, col_id: int, tbl_version: int, ext: Optional[str] = None
125
- ) -> tuple[str, str]:
123
+ def create_prefix_raw(cls, tbl_id: UUID, col_id: int, tbl_version: int, ext: str | None = None) -> tuple[str, str]:
126
124
  """Construct a unique unix-style prefix and filename for a persisted file.
127
125
  The results are derived from table, col, and version specs.
128
126
  Returns:
@@ -202,7 +200,7 @@ class ObjectPath:
202
200
  container = parsed.netloc
203
201
  key = parsed.path.lstrip('/')
204
202
 
205
- elif scheme in ['wasb', 'wasbs', 'abfs', 'abfss']:
203
+ elif scheme in ('wasb', 'wasbs', 'abfs', 'abfss'):
206
204
  # Azure-specific URI schemes
207
205
  # wasb[s]://container@account.blob.core.windows.net/<optional prefix>/<optional object>
208
206
  # abfs[s]://container@account.dfs.core.windows.net/<optional prefix>/<optional object>
@@ -216,7 +214,7 @@ class ObjectPath:
216
214
  raise ValueError(f'Invalid Azure URI format: {src_addr}')
217
215
  key = parsed.path.lstrip('/')
218
216
 
219
- elif scheme in ['http', 'https']:
217
+ elif scheme in ('http', 'https'):
220
218
  # Standard HTTP(S) URL format
221
219
  # https://account.blob.core.windows.net/container/<optional path>/<optional object>
222
220
  # https://account.r2.cloudflarestorage.com/container/<optional path>/<optional object>
@@ -253,7 +251,7 @@ class ObjectPath:
253
251
  return r
254
252
 
255
253
  @classmethod
256
- def parse_object_storage_addr(cls, src_addr: str, may_contain_object_name: bool) -> StorageObjectAddress:
254
+ def parse_object_storage_addr(cls, src_addr: str, allow_obj_name: bool) -> StorageObjectAddress:
257
255
  """
258
256
  Parses a cloud storage URI into its scheme, bucket, prefix, and object name.
259
257
 
@@ -273,14 +271,14 @@ class ObjectPath:
273
271
  https://raw.github.com/pixeltable/pixeltable/main/docs/resources/images/000000000030.jpg
274
272
  """
275
273
  soa = cls.parse_object_storage_addr1(src_addr)
276
- prefix, object_name = cls.separate_prefix_object(soa.key, may_contain_object_name)
274
+ prefix, object_name = cls.separate_prefix_object(soa.key, allow_obj_name)
277
275
  assert not object_name.endswith('/')
278
276
  r = soa._replace(prefix=prefix, object_name=object_name)
279
277
  return r
280
278
 
281
279
 
282
280
  class ObjectStoreBase:
283
- def validate(self, error_col_name: str) -> Optional[str]:
281
+ def validate(self, error_prefix: str) -> str | None:
284
282
  """Check the store configuration. Returns base URI if store is accessible.
285
283
 
286
284
  Args:
@@ -303,7 +301,7 @@ class ObjectStoreBase:
303
301
  """
304
302
  raise AssertionError
305
303
 
306
- def move_local_file(self, col: Column, src_path: Path) -> Optional[str]:
304
+ def move_local_file(self, col: Column, src_path: Path) -> str | None:
307
305
  """Move a file associated with a Column to the store, returning the file's URL within the destination.
308
306
 
309
307
  Args:
@@ -324,7 +322,7 @@ class ObjectStoreBase:
324
322
  """
325
323
  raise AssertionError
326
324
 
327
- def count(self, tbl_id: UUID, tbl_version: Optional[int] = None) -> int:
325
+ def count(self, tbl_id: UUID, tbl_version: int | None = None) -> int:
328
326
  """Return the number of objects in the store associated with the given tbl_id
329
327
 
330
328
  Args:
@@ -336,7 +334,7 @@ class ObjectStoreBase:
336
334
  """
337
335
  raise AssertionError
338
336
 
339
- def delete(self, tbl_id: UUID, tbl_version: Optional[int] = None) -> Optional[int]:
337
+ def delete(self, tbl_id: UUID, tbl_version: int | None = None) -> int | None:
340
338
  """Delete objects in the destination for a given table ID, table version.
341
339
 
342
340
  Args:
@@ -360,28 +358,15 @@ class ObjectStoreBase:
360
358
 
361
359
  class ObjectOps:
362
360
  @classmethod
363
- def get_store(cls, dest: Optional[str], may_contain_object_name: bool, col_name: Optional[str] = None) -> Any:
361
+ def get_store(cls, dest: str | None, allow_obj_name: bool, col_name: str | None = None) -> ObjectStoreBase:
364
362
  from pixeltable.env import Env
365
363
  from pixeltable.utils.local_store import LocalStore
366
364
 
367
- soa = (
368
- Env.get().object_soa
369
- if dest is None
370
- else ObjectPath.parse_object_storage_addr(dest, may_contain_object_name=may_contain_object_name)
371
- )
365
+ dest = dest or str(Env.get().media_dir) # Use local media dir as fallback
366
+ soa = ObjectPath.parse_object_storage_addr(dest, allow_obj_name=allow_obj_name)
372
367
  if soa.storage_target == StorageTarget.LOCAL_STORE:
373
368
  return LocalStore(soa)
374
- if soa.storage_target == StorageTarget.S3_STORE and soa.scheme == 's3':
375
- env.Env.get().require_package('boto3')
376
- from pixeltable.utils.s3_store import S3Store
377
-
378
- return S3Store(soa)
379
- if soa.storage_target == StorageTarget.R2_STORE:
380
- env.Env.get().require_package('boto3')
381
- from pixeltable.utils.s3_store import S3Store
382
-
383
- return S3Store(soa)
384
- if soa.storage_target == StorageTarget.B2_STORE:
369
+ if soa.storage_target in (StorageTarget.S3_STORE, StorageTarget.R2_STORE, StorageTarget.B2_STORE):
385
370
  env.Env.get().require_package('boto3')
386
371
  from pixeltable.utils.s3_store import S3Store
387
372
 
@@ -391,6 +376,11 @@ class ObjectOps:
391
376
  from pixeltable.utils.gcs_store import GCSStore
392
377
 
393
378
  return GCSStore(soa)
379
+ if soa.storage_target == StorageTarget.AZURE_STORE:
380
+ env.Env.get().require_package('azure.storage.blob')
381
+ from pixeltable.utils.azure_store import AzureBlobStore
382
+
383
+ return AzureBlobStore(soa)
394
384
  if soa.storage_target == StorageTarget.HTTP_STORE and soa.is_http_readable:
395
385
  return HTTPStore(soa)
396
386
  error_col_name = f'Column {col_name!r}: ' if col_name is not None else ''
@@ -399,7 +389,7 @@ class ObjectOps:
399
389
  )
400
390
 
401
391
  @classmethod
402
- def validate_destination(cls, dest: str | Path | None, col_name: Optional[str]) -> str:
392
+ def validate_destination(cls, dest: str | Path | None, col_name: str | None = None) -> str:
403
393
  """Convert a Column destination parameter to a URI, else raise errors.
404
394
  Args:
405
395
  dest: The requested destination
@@ -407,19 +397,19 @@ class ObjectOps:
407
397
  Returns:
408
398
  URI of destination, or raises an error
409
399
  """
410
- error_col_name = f'Column {col_name!r}: ' if col_name is not None else ''
400
+ error_col_str = f'column {col_name!r}' if col_name is not None else ''
411
401
 
412
402
  # General checks on any destination
413
403
  if isinstance(dest, Path):
414
404
  dest = str(dest)
415
405
  if dest is not None and not isinstance(dest, str):
416
- raise excs.Error(f'{error_col_name}`destination` must be a string or path, got {dest!r}')
406
+ raise excs.Error(f'{error_col_str}: `destination` must be a string or path; got {dest!r}')
417
407
 
418
408
  # Specific checks for storage backends
419
409
  store = cls.get_store(dest, False, col_name)
420
- dest2 = store.validate(error_col_name)
410
+ dest2 = store.validate(error_col_str)
421
411
  if dest2 is None:
422
- raise excs.Error(f'{error_col_name}`destination` must be a supported destination, got {dest!r}')
412
+ raise excs.Error(f'{error_col_str}: `destination` must be a supported destination; got {dest!r}')
423
413
  return dest2
424
414
 
425
415
  @classmethod
@@ -427,7 +417,7 @@ class ObjectOps:
427
417
  """Copy an object from a URL to a local Path. Thread safe.
428
418
  Raises an exception if the download fails or the scheme is not supported
429
419
  """
430
- soa = ObjectPath.parse_object_storage_addr(src_uri, may_contain_object_name=True)
420
+ soa = ObjectPath.parse_object_storage_addr(src_uri, allow_obj_name=True)
431
421
  store = cls.get_store(src_uri, True)
432
422
  store.copy_object_to_local_file(soa.object_name, dest_path)
433
423
 
@@ -466,7 +456,7 @@ class ObjectOps:
466
456
  return store.copy_local_file(col, src_path)
467
457
 
468
458
  @classmethod
469
- def delete(cls, dest: Optional[str], tbl_id: UUID, tbl_version: Optional[int] = None) -> Optional[int]:
459
+ def delete(cls, dest: str | None, tbl_id: UUID, tbl_version: int | None = None) -> int | None:
470
460
  """Delete objects in the destination for a given table ID, table version.
471
461
  Returns:
472
462
  Number of objects deleted or None
@@ -475,13 +465,39 @@ class ObjectOps:
475
465
  return store.delete(tbl_id, tbl_version)
476
466
 
477
467
  @classmethod
478
- def count(cls, dest: Optional[str], tbl_id: UUID, tbl_version: Optional[int] = None) -> int:
479
- """Return the count of objects in the destination for a given table ID"""
468
+ def count(
469
+ cls,
470
+ tbl_id: UUID,
471
+ tbl_version: int | None = None,
472
+ dest: str | None = None,
473
+ default_input_dest: bool = False,
474
+ default_output_dest: bool = False,
475
+ ) -> int:
476
+ """
477
+ Return the count of objects in the destination for a given table ID.
478
+
479
+ At most one of dest, default_input, default_output may be specified. If none are specified, the fallback is the
480
+ local media directory.
481
+
482
+ Args:
483
+ tbl_id: Table ID for which to count objects
484
+ tbl_version: If specified, only counts objects for a specific table version
485
+ dest: The destination to count objects in
486
+ default_input_dest: If `True`, use the default input media destination
487
+ default_output_dest: If `True`, use the default output media destination
488
+ """
489
+ assert sum((dest is not None, default_input_dest, default_output_dest)) <= 1, (
490
+ 'At most one of dest, default_input, default_output may be specified'
491
+ )
492
+ if default_input_dest:
493
+ dest = env.Env.get().default_input_media_dest
494
+ if default_output_dest:
495
+ dest = env.Env.get().default_output_media_dest
480
496
  store = cls.get_store(dest, False)
481
497
  return store.count(tbl_id, tbl_version)
482
498
 
483
499
  @classmethod
484
- def list_objects(cls, dest: Optional[str], return_uri: bool, n_max: int = 10) -> list[str]:
500
+ def list_objects(cls, dest: str | None, return_uri: bool, n_max: int = 10) -> list[str]:
485
501
  """Return a list of objects found in the specified destination bucket.
486
502
  The dest specification string must not contain an object name.
487
503
  Each returned object includes the full set of prefixes.
@@ -4,11 +4,11 @@ import threading
4
4
  import urllib.parse
5
5
  import uuid
6
6
  from pathlib import Path
7
- from typing import TYPE_CHECKING, Any, Iterator, NamedTuple, Optional
7
+ from typing import TYPE_CHECKING, Any, Iterator, NamedTuple
8
8
 
9
9
  import boto3
10
10
  import botocore
11
- from botocore.exceptions import ClientError
11
+ from botocore.exceptions import ClientError, ConnectionError
12
12
 
13
13
  from pixeltable import env, exceptions as excs
14
14
  from pixeltable.config import Config
@@ -29,7 +29,7 @@ class S3CompatClientDict(NamedTuple):
29
29
  Thread-safe via the module-level 'client_lock'.
30
30
  """
31
31
 
32
- profile: Optional[str] # AWS-style profile used to locate credentials
32
+ profile: str | None # AWS-style profile used to locate credentials
33
33
  clients: dict[str, Any] # Map of endpoint URL → boto3 client instance
34
34
 
35
35
 
@@ -150,7 +150,7 @@ class S3Store(ObjectStoreBase):
150
150
  """Return the prefix from the base URI."""
151
151
  return self.__prefix_name
152
152
 
153
- def validate(self, error_col_name: str) -> Optional[str]:
153
+ def validate(self, error_col_name: str) -> str | None:
154
154
  """
155
155
  Checks if the URI exists.
156
156
 
@@ -161,10 +161,14 @@ class S3Store(ObjectStoreBase):
161
161
  self.client().head_bucket(Bucket=self.bucket_name)
162
162
  return self.__base_uri
163
163
  except ClientError as e:
164
- self.handle_s3_error(e, self.bucket_name, f'validate bucket {error_col_name}')
164
+ self.handle_s3_error(e, f'validating destination for {error_col_name}')
165
+ except ConnectionError as e:
166
+ raise excs.Error(
167
+ f'Connection error while validating destination {self.__base_uri!r} for {error_col_name}: {e}'
168
+ ) from e
165
169
  return None
166
170
 
167
- def _prepare_uri_raw(self, tbl_id: uuid.UUID, col_id: int, tbl_version: int, ext: Optional[str] = None) -> str:
171
+ def _prepare_uri_raw(self, tbl_id: uuid.UUID, col_id: int, tbl_version: int, ext: str | None = None) -> str:
168
172
  """
169
173
  Construct a new, unique URI for a persisted media file.
170
174
  """
@@ -172,19 +176,19 @@ class S3Store(ObjectStoreBase):
172
176
  parent = f'{self.__base_uri}{prefix}'
173
177
  return f'{parent}/{filename}'
174
178
 
175
- def _prepare_uri(self, col: 'Column', ext: Optional[str] = None) -> str:
179
+ def _prepare_uri(self, col: 'Column', ext: str | None = None) -> str:
176
180
  """
177
181
  Construct a new, unique URI for a persisted media file.
178
182
  """
179
- assert col.tbl is not None, 'Column must be associated with a table'
180
- return self._prepare_uri_raw(col.tbl.id, col.id, col.tbl.version, ext=ext)
183
+ assert col.get_tbl() is not None, 'Column must be associated with a table'
184
+ return self._prepare_uri_raw(col.get_tbl().id, col.id, col.get_tbl().version, ext=ext)
181
185
 
182
186
  def copy_object_to_local_file(self, src_path: str, dest_path: Path) -> None:
183
187
  """Copies an object to a local file. Thread safe."""
184
188
  try:
185
189
  self.client().download_file(Bucket=self.bucket_name, Key=self.prefix + src_path, Filename=str(dest_path))
186
190
  except ClientError as e:
187
- self.handle_s3_error(e, self.bucket_name, f'download file {src_path}')
191
+ self.handle_s3_error(e, f'downloading file {src_path!r}')
188
192
  raise
189
193
 
190
194
  def copy_local_file(self, col: 'Column', src_path: Path) -> str:
@@ -200,10 +204,10 @@ class S3Store(ObjectStoreBase):
200
204
  _logger.debug(f'Media Storage: copied {src_path} to {new_file_uri}')
201
205
  return new_file_uri
202
206
  except ClientError as e:
203
- self.handle_s3_error(e, self.bucket_name, f'setup iterator {self.prefix}')
207
+ self.handle_s3_error(e, 'uploading file')
204
208
  raise
205
209
 
206
- def _get_filtered_objects(self, tbl_id: uuid.UUID, tbl_version: Optional[int] = None) -> tuple[Iterator, Any]:
210
+ def _get_filtered_objects(self, tbl_id: uuid.UUID, tbl_version: int | None = None) -> tuple[Iterator, Any]:
207
211
  """Private method to get filtered objects for a table, optionally filtered by version.
208
212
 
209
213
  Args:
@@ -239,10 +243,10 @@ class S3Store(ObjectStoreBase):
239
243
  return object_iterator, bucket
240
244
 
241
245
  except ClientError as e:
242
- self.handle_s3_error(e, self.bucket_name, f'setup iterator {self.prefix}')
246
+ self.handle_s3_error(e, f'setting up iterator {self.prefix}')
243
247
  raise
244
248
 
245
- def count(self, tbl_id: uuid.UUID, tbl_version: Optional[int] = None) -> int:
249
+ def count(self, tbl_id: uuid.UUID, tbl_version: int | None = None) -> int:
246
250
  """Count the number of files belonging to tbl_id. If tbl_version is not None,
247
251
  count only those files belonging to the specified tbl_version.
248
252
 
@@ -259,7 +263,7 @@ class S3Store(ObjectStoreBase):
259
263
 
260
264
  return sum(1 for _ in object_iterator)
261
265
 
262
- def delete(self, tbl_id: uuid.UUID, tbl_version: Optional[int] = None) -> int:
266
+ def delete(self, tbl_id: uuid.UUID, tbl_version: int | None = None) -> int:
263
267
  """Delete all files belonging to tbl_id. If tbl_version is not None, delete
264
268
  only those files belonging to the specified tbl_version.
265
269
 
@@ -298,7 +302,7 @@ class S3Store(ObjectStoreBase):
298
302
  return total_deleted
299
303
 
300
304
  except ClientError as e:
301
- self.handle_s3_error(e, self.bucket_name, f'deleting with {self.prefix}')
305
+ self.handle_s3_error(e, f'deleting with {self.prefix}')
302
306
  raise
303
307
 
304
308
  def list_objects(self, return_uri: bool, n_max: int = 10) -> list[str]:
@@ -321,28 +325,31 @@ class S3Store(ObjectStoreBase):
321
325
  return r
322
326
  r.append(f'{p}{obj["Key"]}')
323
327
  except ClientError as e:
324
- self.handle_s3_error(e, self.bucket_name, f'list objects from {self.prefix}')
328
+ self.handle_s3_error(e, f'listing objects from {self.prefix!r}')
325
329
  return r
326
330
 
327
- @classmethod
328
- def handle_s3_error(
329
- cls, e: 'ClientError', bucket_name: str, operation: str = '', *, ignore_404: bool = False
330
- ) -> None:
331
+ def handle_s3_error(self, e: 'ClientError', operation: str = '', *, ignore_404: bool = False) -> None:
331
332
  error_code = e.response.get('Error', {}).get('Code')
332
333
  error_message = e.response.get('Error', {}).get('Message', str(e))
333
334
  if ignore_404 and error_code == '404':
334
335
  return
335
336
  if error_code == '404':
336
- raise excs.Error(f'Bucket {bucket_name} not found during {operation}: {error_message}')
337
+ raise excs.Error(f'Client error while {operation}: Bucket {self.bucket_name!r} not found') from e
337
338
  elif error_code == '403':
338
- raise excs.Error(f'Access denied to bucket {bucket_name} during {operation}: {error_message}')
339
+ raise excs.Error(
340
+ f'Client error while {operation}: Access denied to bucket {self.bucket_name!r}: {error_message}'
341
+ ) from e
339
342
  elif error_code == 'PreconditionFailed' or 'PreconditionFailed' in error_message:
340
- raise excs.Error(f'Precondition failed for bucket {bucket_name} during {operation}: {error_message}')
343
+ raise excs.Error(
344
+ f'Client error while {operation}: Precondition failed for bucket {self.bucket_name!r}: {error_message}'
345
+ ) from e
341
346
  else:
342
- raise excs.Error(f'Error during {operation} in bucket {bucket_name}: {error_code} - {error_message}')
347
+ raise excs.Error(
348
+ f'Client error while {operation} in bucket {self.bucket_name!r}: {error_code} - {error_message}'
349
+ ) from e
343
350
 
344
351
  @classmethod
345
- def create_boto_session(cls, profile_name: Optional[str] = None) -> Any:
352
+ def create_boto_session(cls, profile_name: str | None = None) -> Any:
346
353
  """Create a boto session using the defined profile"""
347
354
  if profile_name:
348
355
  try:
@@ -354,7 +361,7 @@ class S3Store(ObjectStoreBase):
354
361
  return boto3.Session()
355
362
 
356
363
  @classmethod
357
- def create_boto_client(cls, profile_name: Optional[str] = None, extra_args: Optional[dict[str, Any]] = None) -> Any:
364
+ def create_boto_client(cls, profile_name: str | None = None, extra_args: dict[str, Any] | None = None) -> Any:
358
365
  config_args: dict[str, Any] = {
359
366
  'max_pool_connections': 30,
360
367
  'connect_timeout': 15,
@@ -380,8 +387,6 @@ class S3Store(ObjectStoreBase):
380
387
  return boto3.client('s3', config=config)
381
388
 
382
389
  @classmethod
383
- def create_boto_resource(
384
- cls, profile_name: Optional[str] = None, extra_args: Optional[dict[str, Any]] = None
385
- ) -> Any:
390
+ def create_boto_resource(cls, profile_name: str | None = None, extra_args: dict[str, Any] | None = None) -> Any:
386
391
  # Create a session using the defined profile
387
392
  return cls.create_boto_session(profile_name).resource('s3', **(extra_args or {}))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pixeltable
3
- Version: 0.4.18
3
+ Version: 0.4.19
4
4
  Summary: AI Data Infrastructure: Declarative, Multimodal, and Incremental
5
5
  Project-URL: homepage, https://pixeltable.com/
6
6
  Project-URL: repository, https://github.com/pixeltable/pixeltable