pixeltable 0.4.17__py3-none-any.whl → 0.4.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (153) hide show
  1. pixeltable/__init__.py +1 -1
  2. pixeltable/_version.py +1 -0
  3. pixeltable/catalog/catalog.py +144 -118
  4. pixeltable/catalog/column.py +104 -115
  5. pixeltable/catalog/globals.py +1 -2
  6. pixeltable/catalog/insertable_table.py +44 -49
  7. pixeltable/catalog/path.py +3 -4
  8. pixeltable/catalog/schema_object.py +4 -4
  9. pixeltable/catalog/table.py +139 -124
  10. pixeltable/catalog/table_metadata.py +6 -6
  11. pixeltable/catalog/table_version.py +315 -246
  12. pixeltable/catalog/table_version_handle.py +4 -4
  13. pixeltable/catalog/table_version_path.py +9 -10
  14. pixeltable/catalog/tbl_ops.py +9 -3
  15. pixeltable/catalog/view.py +34 -28
  16. pixeltable/config.py +14 -10
  17. pixeltable/dataframe.py +69 -78
  18. pixeltable/env.py +78 -64
  19. pixeltable/exec/aggregation_node.py +6 -6
  20. pixeltable/exec/cache_prefetch_node.py +10 -10
  21. pixeltable/exec/data_row_batch.py +3 -3
  22. pixeltable/exec/exec_context.py +16 -4
  23. pixeltable/exec/exec_node.py +5 -5
  24. pixeltable/exec/expr_eval/evaluators.py +6 -6
  25. pixeltable/exec/expr_eval/expr_eval_node.py +8 -7
  26. pixeltable/exec/expr_eval/globals.py +6 -6
  27. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  28. pixeltable/exec/expr_eval/schedulers.py +11 -11
  29. pixeltable/exec/in_memory_data_node.py +2 -2
  30. pixeltable/exec/object_store_save_node.py +14 -17
  31. pixeltable/exec/sql_node.py +28 -27
  32. pixeltable/exprs/arithmetic_expr.py +4 -4
  33. pixeltable/exprs/array_slice.py +2 -2
  34. pixeltable/exprs/column_property_ref.py +3 -3
  35. pixeltable/exprs/column_ref.py +61 -74
  36. pixeltable/exprs/comparison.py +5 -5
  37. pixeltable/exprs/compound_predicate.py +3 -3
  38. pixeltable/exprs/data_row.py +12 -12
  39. pixeltable/exprs/expr.py +41 -31
  40. pixeltable/exprs/expr_dict.py +3 -3
  41. pixeltable/exprs/expr_set.py +3 -3
  42. pixeltable/exprs/function_call.py +14 -14
  43. pixeltable/exprs/in_predicate.py +4 -4
  44. pixeltable/exprs/inline_expr.py +8 -8
  45. pixeltable/exprs/is_null.py +1 -3
  46. pixeltable/exprs/json_mapper.py +8 -8
  47. pixeltable/exprs/json_path.py +6 -6
  48. pixeltable/exprs/literal.py +5 -5
  49. pixeltable/exprs/method_ref.py +2 -2
  50. pixeltable/exprs/object_ref.py +2 -2
  51. pixeltable/exprs/row_builder.py +14 -14
  52. pixeltable/exprs/rowid_ref.py +8 -8
  53. pixeltable/exprs/similarity_expr.py +50 -25
  54. pixeltable/exprs/sql_element_cache.py +4 -4
  55. pixeltable/exprs/string_op.py +2 -2
  56. pixeltable/exprs/type_cast.py +3 -5
  57. pixeltable/func/aggregate_function.py +8 -8
  58. pixeltable/func/callable_function.py +9 -9
  59. pixeltable/func/expr_template_function.py +3 -3
  60. pixeltable/func/function.py +15 -17
  61. pixeltable/func/function_registry.py +6 -7
  62. pixeltable/func/globals.py +2 -3
  63. pixeltable/func/mcp.py +2 -2
  64. pixeltable/func/query_template_function.py +16 -16
  65. pixeltable/func/signature.py +14 -14
  66. pixeltable/func/tools.py +11 -11
  67. pixeltable/func/udf.py +16 -18
  68. pixeltable/functions/__init__.py +1 -0
  69. pixeltable/functions/anthropic.py +7 -7
  70. pixeltable/functions/audio.py +76 -0
  71. pixeltable/functions/bedrock.py +6 -6
  72. pixeltable/functions/deepseek.py +4 -4
  73. pixeltable/functions/fireworks.py +2 -2
  74. pixeltable/functions/gemini.py +6 -6
  75. pixeltable/functions/globals.py +12 -12
  76. pixeltable/functions/groq.py +4 -4
  77. pixeltable/functions/huggingface.py +1033 -6
  78. pixeltable/functions/image.py +7 -10
  79. pixeltable/functions/llama_cpp.py +7 -7
  80. pixeltable/functions/math.py +2 -3
  81. pixeltable/functions/mistralai.py +3 -3
  82. pixeltable/functions/ollama.py +9 -9
  83. pixeltable/functions/openai.py +21 -21
  84. pixeltable/functions/openrouter.py +7 -7
  85. pixeltable/functions/string.py +21 -28
  86. pixeltable/functions/timestamp.py +7 -8
  87. pixeltable/functions/together.py +4 -6
  88. pixeltable/functions/twelvelabs.py +92 -0
  89. pixeltable/functions/video.py +36 -31
  90. pixeltable/functions/vision.py +6 -6
  91. pixeltable/functions/whisper.py +7 -7
  92. pixeltable/functions/whisperx.py +16 -16
  93. pixeltable/globals.py +75 -40
  94. pixeltable/index/base.py +12 -8
  95. pixeltable/index/btree.py +19 -22
  96. pixeltable/index/embedding_index.py +30 -39
  97. pixeltable/io/datarows.py +3 -3
  98. pixeltable/io/external_store.py +13 -16
  99. pixeltable/io/fiftyone.py +5 -5
  100. pixeltable/io/globals.py +5 -5
  101. pixeltable/io/hf_datasets.py +4 -4
  102. pixeltable/io/label_studio.py +12 -12
  103. pixeltable/io/pandas.py +6 -6
  104. pixeltable/io/parquet.py +2 -2
  105. pixeltable/io/table_data_conduit.py +12 -12
  106. pixeltable/io/utils.py +2 -2
  107. pixeltable/iterators/audio.py +2 -2
  108. pixeltable/iterators/document.py +88 -57
  109. pixeltable/iterators/video.py +66 -37
  110. pixeltable/metadata/converters/convert_18.py +2 -2
  111. pixeltable/metadata/converters/convert_19.py +2 -2
  112. pixeltable/metadata/converters/convert_20.py +2 -2
  113. pixeltable/metadata/converters/convert_21.py +2 -2
  114. pixeltable/metadata/converters/convert_22.py +2 -2
  115. pixeltable/metadata/converters/convert_24.py +2 -2
  116. pixeltable/metadata/converters/convert_25.py +2 -2
  117. pixeltable/metadata/converters/convert_26.py +2 -2
  118. pixeltable/metadata/converters/convert_29.py +4 -4
  119. pixeltable/metadata/converters/convert_34.py +2 -2
  120. pixeltable/metadata/converters/convert_36.py +2 -2
  121. pixeltable/metadata/converters/convert_38.py +2 -2
  122. pixeltable/metadata/converters/convert_39.py +1 -2
  123. pixeltable/metadata/converters/util.py +11 -13
  124. pixeltable/metadata/schema.py +22 -21
  125. pixeltable/metadata/utils.py +2 -6
  126. pixeltable/mypy/mypy_plugin.py +5 -5
  127. pixeltable/plan.py +32 -34
  128. pixeltable/share/packager.py +7 -7
  129. pixeltable/share/publish.py +3 -3
  130. pixeltable/store.py +126 -41
  131. pixeltable/type_system.py +43 -46
  132. pixeltable/utils/__init__.py +1 -2
  133. pixeltable/utils/arrow.py +4 -4
  134. pixeltable/utils/av.py +74 -38
  135. pixeltable/utils/azure_store.py +305 -0
  136. pixeltable/utils/code.py +1 -2
  137. pixeltable/utils/dbms.py +15 -19
  138. pixeltable/utils/description_helper.py +2 -3
  139. pixeltable/utils/documents.py +5 -6
  140. pixeltable/utils/exception_handler.py +2 -2
  141. pixeltable/utils/filecache.py +5 -5
  142. pixeltable/utils/formatter.py +4 -6
  143. pixeltable/utils/gcs_store.py +9 -9
  144. pixeltable/utils/local_store.py +17 -17
  145. pixeltable/utils/object_stores.py +59 -43
  146. pixeltable/utils/s3_store.py +35 -30
  147. {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/METADATA +4 -4
  148. pixeltable-0.4.19.dist-info/RECORD +213 -0
  149. pixeltable/__version__.py +0 -3
  150. pixeltable-0.4.17.dist-info/RECORD +0 -211
  151. {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/WHEEL +0 -0
  152. {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/entry_points.txt +0 -0
  153. {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/licenses/LICENSE +0 -0
@@ -4,7 +4,7 @@ import io
4
4
  import json
5
5
  import logging
6
6
  import mimetypes
7
- from typing import Any, Callable, Optional
7
+ from typing import Any, Callable
8
8
 
9
9
  import av
10
10
  import numpy as np
@@ -39,7 +39,7 @@ class Formatter:
39
39
  self.__num_cols = num_cols
40
40
  self.__http_address = http_address
41
41
 
42
- def get_pandas_formatter(self, col_type: ts.ColumnType) -> Optional[Callable]:
42
+ def get_pandas_formatter(self, col_type: ts.ColumnType) -> Callable | None:
43
43
  if col_type.is_string_type():
44
44
  return self.format_string
45
45
  if col_type.is_float_type():
@@ -184,7 +184,7 @@ class Formatter:
184
184
  """
185
185
 
186
186
  @classmethod
187
- def extract_first_video_frame(cls, file_path: str) -> Optional[Image.Image]:
187
+ def extract_first_video_frame(cls, file_path: str) -> Image.Image | None:
188
188
  with av.open(file_path) as container:
189
189
  try:
190
190
  img = next(container.decode(video=0)).to_image()
@@ -224,9 +224,7 @@ class Formatter:
224
224
  """
225
225
 
226
226
  @classmethod
227
- def make_document_thumbnail(
228
- cls, file_path: str, max_width: int = 320, max_height: int = 320
229
- ) -> Optional[Image.Image]:
227
+ def make_document_thumbnail(cls, file_path: str, max_width: int = 320, max_height: int = 320) -> Image.Image | None:
230
228
  """
231
229
  Returns a thumbnail image of a document.
232
230
  """
@@ -5,7 +5,7 @@ import re
5
5
  import urllib.parse
6
6
  import uuid
7
7
  from pathlib import Path
8
- from typing import TYPE_CHECKING, Any, Iterator, Optional
8
+ from typing import TYPE_CHECKING, Any, Iterator
9
9
 
10
10
  from google.api_core.exceptions import GoogleAPIError
11
11
  from google.cloud import storage # type: ignore[attr-defined]
@@ -81,7 +81,7 @@ class GCSStore(ObjectStoreBase):
81
81
  """Return the prefix from the base URI."""
82
82
  return self.__prefix_name
83
83
 
84
- def validate(self, error_col_name: str) -> Optional[str]:
84
+ def validate(self, error_col_name: str) -> str | None:
85
85
  """
86
86
  Checks if the URI exists.
87
87
 
@@ -99,7 +99,7 @@ class GCSStore(ObjectStoreBase):
99
99
  self.handle_gcs_error(e, self.bucket_name, f'validate bucket {error_col_name}')
100
100
  return None
101
101
 
102
- def _prepare_uri_raw(self, tbl_id: uuid.UUID, col_id: int, tbl_version: int, ext: Optional[str] = None) -> str:
102
+ def _prepare_uri_raw(self, tbl_id: uuid.UUID, col_id: int, tbl_version: int, ext: str | None = None) -> str:
103
103
  """
104
104
  Construct a new, unique URI for a persisted media file.
105
105
  """
@@ -107,12 +107,12 @@ class GCSStore(ObjectStoreBase):
107
107
  parent = f'{self.__base_uri}{prefix}'
108
108
  return f'{parent}/{filename}'
109
109
 
110
- def _prepare_uri(self, col: Column, ext: Optional[str] = None) -> str:
110
+ def _prepare_uri(self, col: Column, ext: str | None = None) -> str:
111
111
  """
112
112
  Construct a new, unique URI for a persisted media file.
113
113
  """
114
- assert col.tbl is not None, 'Column must be associated with a table'
115
- return self._prepare_uri_raw(col.tbl.id, col.id, col.tbl.version, ext=ext)
114
+ assert col.get_tbl() is not None, 'Column must be associated with a table'
115
+ return self._prepare_uri_raw(col.get_tbl().id, col.id, col.get_tbl().version, ext=ext)
116
116
 
117
117
  def copy_local_file(self, col: Column, src_path: Path) -> str:
118
118
  """Copy a local file, and return its new URL"""
@@ -142,7 +142,7 @@ class GCSStore(ObjectStoreBase):
142
142
  self.handle_gcs_error(e, self.bucket_name, f'download file {src_path}')
143
143
  raise
144
144
 
145
- def _get_filtered_objects(self, bucket: Any, tbl_id: uuid.UUID, tbl_version: Optional[int] = None) -> Iterator:
145
+ def _get_filtered_objects(self, bucket: Any, tbl_id: uuid.UUID, tbl_version: int | None = None) -> Iterator:
146
146
  """Private method to get filtered objects for a table, optionally filtered by version.
147
147
 
148
148
  Args:
@@ -168,7 +168,7 @@ class GCSStore(ObjectStoreBase):
168
168
 
169
169
  return blob_iterator
170
170
 
171
- def count(self, tbl_id: uuid.UUID, tbl_version: Optional[int] = None) -> int:
171
+ def count(self, tbl_id: uuid.UUID, tbl_version: int | None = None) -> int:
172
172
  """Count the number of files belonging to tbl_id. If tbl_version is not None,
173
173
  count only those files belonging to the specified tbl_version.
174
174
 
@@ -193,7 +193,7 @@ class GCSStore(ObjectStoreBase):
193
193
  self.handle_gcs_error(e, self.bucket_name, f'setup iterator {self.prefix}')
194
194
  raise
195
195
 
196
- def delete(self, tbl_id: uuid.UUID, tbl_version: Optional[int] = None) -> int:
196
+ def delete(self, tbl_id: uuid.UUID, tbl_version: int | None = None) -> int:
197
197
  """Delete all files belonging to tbl_id. If tbl_version is not None, delete
198
198
  only those files belonging to the specified tbl_version.
199
199
 
@@ -10,7 +10,7 @@ import urllib.request
10
10
  import uuid
11
11
  from collections import defaultdict
12
12
  from pathlib import Path
13
- from typing import TYPE_CHECKING, Optional
13
+ from typing import TYPE_CHECKING
14
14
  from uuid import UUID
15
15
 
16
16
  import PIL.Image
@@ -35,7 +35,7 @@ class LocalStore(ObjectStoreBase):
35
35
 
36
36
  __base_dir: Path
37
37
 
38
- soa: Optional[StorageObjectAddress]
38
+ soa: StorageObjectAddress | None
39
39
 
40
40
  def __init__(self, location: Path | StorageObjectAddress):
41
41
  if isinstance(location, Path):
@@ -69,7 +69,7 @@ class LocalStore(ObjectStoreBase):
69
69
  raise excs.Error(f'{error_col_name}`destination` must be a valid path. Error: {e}') from None
70
70
 
71
71
  @staticmethod
72
- def file_url_to_path(url: str) -> Optional[Path]:
72
+ def file_url_to_path(url: str) -> Path | None:
73
73
  """Convert a file:// URI to a Path object with support for Windows UNC paths."""
74
74
  assert isinstance(url, str), type(url)
75
75
  parsed = urllib.parse.urlparse(url)
@@ -90,7 +90,7 @@ class LocalStore(ObjectStoreBase):
90
90
  return Path(path_str)
91
91
 
92
92
  @classmethod
93
- def _save_binary_media_file(cls, file_data: bytes, dest_path: Path, format: Optional[str]) -> Path:
93
+ def _save_binary_media_file(cls, file_data: bytes, dest_path: Path, format: str | None) -> Path:
94
94
  """Save binary data to a file in a LocalStore. format is ignored for binary data."""
95
95
  assert isinstance(file_data, bytes)
96
96
  with open(dest_path, 'wb') as f:
@@ -100,7 +100,7 @@ class LocalStore(ObjectStoreBase):
100
100
  return dest_path
101
101
 
102
102
  @classmethod
103
- def _save_pil_image_file(cls, image: PIL.Image.Image, dest_path: Path, format: Optional[str]) -> Path:
103
+ def _save_pil_image_file(cls, image: PIL.Image.Image, dest_path: Path, format: str | None) -> Path:
104
104
  """Save a PIL Image to a file in a LocalStore with the specified format."""
105
105
  if dest_path.suffix != f'.{format}':
106
106
  dest_path = dest_path.with_name(f'{dest_path.name}.{format}')
@@ -111,7 +111,7 @@ class LocalStore(ObjectStoreBase):
111
111
  os.fsync(f.fileno()) # Forces OS to write to physical storage
112
112
  return dest_path
113
113
 
114
- def _prepare_path_raw(self, tbl_id: UUID, col_id: int, tbl_version: int, ext: Optional[str] = None) -> Path:
114
+ def _prepare_path_raw(self, tbl_id: UUID, col_id: int, tbl_version: int, ext: str | None = None) -> Path:
115
115
  """
116
116
  Construct a new, unique Path name in the __base_dir for a persisted file.
117
117
  Create the parent directory for the new Path if it does not already exist.
@@ -121,19 +121,19 @@ class LocalStore(ObjectStoreBase):
121
121
  parent.mkdir(parents=True, exist_ok=True)
122
122
  return parent / filename
123
123
 
124
- def _prepare_path(self, col: Column, ext: Optional[str] = None) -> Path:
124
+ def _prepare_path(self, col: Column, ext: str | None = None) -> Path:
125
125
  """
126
126
  Construct a new, unique Path name in the __base_dir for a persisted file.
127
127
  Create the parent directory for the new Path if it does not already exist.
128
128
  """
129
- assert col.tbl is not None, 'Column must be associated with a table'
130
- return self._prepare_path_raw(col.tbl.id, col.id, col.tbl.version, ext)
129
+ assert col.get_tbl() is not None, 'Column must be associated with a table'
130
+ return self._prepare_path_raw(col.get_tbl().id, col.id, col.get_tbl().version, ext)
131
131
 
132
132
  def contains_path(self, file_path: Path) -> bool:
133
133
  """Return True if the given path refers to a file managed by this LocalStore, else False."""
134
134
  return str(file_path).startswith(str(self.__base_dir))
135
135
 
136
- def resolve_url(self, file_url: Optional[str]) -> Optional[Path]:
136
+ def resolve_url(self, file_url: str | None) -> Path | None:
137
137
  """Return path if the given url refers to a file managed by this LocalStore, else None.
138
138
 
139
139
  Args:
@@ -168,7 +168,7 @@ class LocalStore(ObjectStoreBase):
168
168
  _logger.debug(f'Media Storage: copied {src_path} to {new_file_url}')
169
169
  return new_file_url
170
170
 
171
- def save_media_object(self, data: bytes | PIL.Image.Image, col: Column, format: Optional[str]) -> tuple[Path, str]:
171
+ def save_media_object(self, data: bytes | PIL.Image.Image, col: Column, format: str | None) -> tuple[Path, str]:
172
172
  """Save a data object to a file in a LocalStore
173
173
  Returns:
174
174
  dest_path: Path to the saved file
@@ -185,7 +185,7 @@ class LocalStore(ObjectStoreBase):
185
185
  new_file_url = urllib.parse.urljoin('file:', urllib.request.pathname2url(str(dest_path)))
186
186
  return dest_path, new_file_url
187
187
 
188
- def delete(self, tbl_id: UUID, tbl_version: Optional[int] = None) -> Optional[int]:
188
+ def delete(self, tbl_id: UUID, tbl_version: int | None = None) -> int | None:
189
189
  """Delete all files belonging to tbl_id. If tbl_version is not None, delete
190
190
  only those files belonging to the specified tbl_version.
191
191
 
@@ -209,7 +209,7 @@ class LocalStore(ObjectStoreBase):
209
209
  os.remove(p)
210
210
  return len(paths)
211
211
 
212
- def count(self, tbl_id: Optional[UUID], tbl_version: Optional[int] = None) -> int:
212
+ def count(self, tbl_id: UUID | None, tbl_version: int | None = None) -> int:
213
213
  """
214
214
  Return number of files for given tbl_id.
215
215
  """
@@ -277,7 +277,7 @@ class TempStore:
277
277
  return env.Env.get().tmp_dir
278
278
 
279
279
  @classmethod
280
- def count(cls, tbl_id: Optional[UUID] = None, tbl_version: Optional[int] = None) -> int:
280
+ def count(cls, tbl_id: UUID | None = None, tbl_version: int | None = None) -> int:
281
281
  return LocalStore(cls._tmp_dir()).count(tbl_id, tbl_version)
282
282
 
283
283
  @classmethod
@@ -285,11 +285,11 @@ class TempStore:
285
285
  return LocalStore(cls._tmp_dir()).contains_path(file_path)
286
286
 
287
287
  @classmethod
288
- def resolve_url(cls, file_url: Optional[str]) -> Optional[Path]:
288
+ def resolve_url(cls, file_url: str | None) -> Path | None:
289
289
  return LocalStore(cls._tmp_dir()).resolve_url(file_url)
290
290
 
291
291
  @classmethod
292
- def save_media_object(cls, data: bytes | PIL.Image.Image, col: Column, format: Optional[str]) -> tuple[Path, str]:
292
+ def save_media_object(cls, data: bytes | PIL.Image.Image, col: Column, format: str | None) -> tuple[Path, str]:
293
293
  return LocalStore(cls._tmp_dir()).save_media_object(data, col, format)
294
294
 
295
295
  @classmethod
@@ -302,7 +302,7 @@ class TempStore:
302
302
  _logger.debug(f'Media Storage: deleted {file_path}')
303
303
 
304
304
  @classmethod
305
- def create_path(cls, tbl_id: Optional[UUID] = None, extension: str = '') -> Path:
305
+ def create_path(cls, tbl_id: UUID | None = None, extension: str = '') -> Path:
306
306
  """Return a new, unique Path located in the temporary store.
307
307
  If tbl_id is provided, the path name will be similar to a LocalStore path based on the tbl_id.
308
308
  If tbl_id is None, a random UUID will be used to create the path."""
@@ -7,7 +7,7 @@ import urllib.parse
7
7
  import urllib.request
8
8
  import uuid
9
9
  from pathlib import Path
10
- from typing import TYPE_CHECKING, Any, NamedTuple, Optional
10
+ from typing import TYPE_CHECKING, NamedTuple
11
11
  from uuid import UUID
12
12
 
13
13
  from pixeltable import env, exceptions as excs
@@ -44,7 +44,7 @@ class StorageObjectAddress(NamedTuple):
44
44
  key: str = '' # Key parsed from the source (prefix + object_name)
45
45
  prefix: str = '' # Prefix (within the bucket) parsed from the source
46
46
  object_name: str = '' # Object name parsed from the source (if requested and applicable)
47
- path: Optional[Path] = None
47
+ path: Path | None = None
48
48
 
49
49
  @property
50
50
  def has_object(self) -> bool:
@@ -56,11 +56,11 @@ class StorageObjectAddress(NamedTuple):
56
56
 
57
57
  @property
58
58
  def is_azure_scheme(self) -> bool:
59
- return self.scheme in ['wasb', 'wasbs', 'abfs', 'abfss']
59
+ return self.scheme in ('wasb', 'wasbs', 'abfs', 'abfss')
60
60
 
61
61
  @property
62
62
  def has_valid_storage_target(self) -> bool:
63
- return self.storage_target in [
63
+ return self.storage_target in (
64
64
  StorageTarget.LOCAL_STORE,
65
65
  StorageTarget.S3_STORE,
66
66
  StorageTarget.R2_STORE,
@@ -68,7 +68,7 @@ class StorageObjectAddress(NamedTuple):
68
68
  StorageTarget.GCS_STORE,
69
69
  StorageTarget.AZURE_STORE,
70
70
  StorageTarget.HTTP_STORE,
71
- ]
71
+ )
72
72
 
73
73
  @property
74
74
  def prefix_free_uri(self) -> str:
@@ -120,9 +120,7 @@ class ObjectPath:
120
120
  return tbl_id.hex
121
121
 
122
122
  @classmethod
123
- def create_prefix_raw(
124
- cls, tbl_id: UUID, col_id: int, tbl_version: int, ext: Optional[str] = None
125
- ) -> tuple[str, str]:
123
+ def create_prefix_raw(cls, tbl_id: UUID, col_id: int, tbl_version: int, ext: str | None = None) -> tuple[str, str]:
126
124
  """Construct a unique unix-style prefix and filename for a persisted file.
127
125
  The results are derived from table, col, and version specs.
128
126
  Returns:
@@ -202,7 +200,7 @@ class ObjectPath:
202
200
  container = parsed.netloc
203
201
  key = parsed.path.lstrip('/')
204
202
 
205
- elif scheme in ['wasb', 'wasbs', 'abfs', 'abfss']:
203
+ elif scheme in ('wasb', 'wasbs', 'abfs', 'abfss'):
206
204
  # Azure-specific URI schemes
207
205
  # wasb[s]://container@account.blob.core.windows.net/<optional prefix>/<optional object>
208
206
  # abfs[s]://container@account.dfs.core.windows.net/<optional prefix>/<optional object>
@@ -216,7 +214,7 @@ class ObjectPath:
216
214
  raise ValueError(f'Invalid Azure URI format: {src_addr}')
217
215
  key = parsed.path.lstrip('/')
218
216
 
219
- elif scheme in ['http', 'https']:
217
+ elif scheme in ('http', 'https'):
220
218
  # Standard HTTP(S) URL format
221
219
  # https://account.blob.core.windows.net/container/<optional path>/<optional object>
222
220
  # https://account.r2.cloudflarestorage.com/container/<optional path>/<optional object>
@@ -253,7 +251,7 @@ class ObjectPath:
253
251
  return r
254
252
 
255
253
  @classmethod
256
- def parse_object_storage_addr(cls, src_addr: str, may_contain_object_name: bool) -> StorageObjectAddress:
254
+ def parse_object_storage_addr(cls, src_addr: str, allow_obj_name: bool) -> StorageObjectAddress:
257
255
  """
258
256
  Parses a cloud storage URI into its scheme, bucket, prefix, and object name.
259
257
 
@@ -273,14 +271,14 @@ class ObjectPath:
273
271
  https://raw.github.com/pixeltable/pixeltable/main/docs/resources/images/000000000030.jpg
274
272
  """
275
273
  soa = cls.parse_object_storage_addr1(src_addr)
276
- prefix, object_name = cls.separate_prefix_object(soa.key, may_contain_object_name)
274
+ prefix, object_name = cls.separate_prefix_object(soa.key, allow_obj_name)
277
275
  assert not object_name.endswith('/')
278
276
  r = soa._replace(prefix=prefix, object_name=object_name)
279
277
  return r
280
278
 
281
279
 
282
280
  class ObjectStoreBase:
283
- def validate(self, error_col_name: str) -> Optional[str]:
281
+ def validate(self, error_prefix: str) -> str | None:
284
282
  """Check the store configuration. Returns base URI if store is accessible.
285
283
 
286
284
  Args:
@@ -303,7 +301,7 @@ class ObjectStoreBase:
303
301
  """
304
302
  raise AssertionError
305
303
 
306
- def move_local_file(self, col: Column, src_path: Path) -> Optional[str]:
304
+ def move_local_file(self, col: Column, src_path: Path) -> str | None:
307
305
  """Move a file associated with a Column to the store, returning the file's URL within the destination.
308
306
 
309
307
  Args:
@@ -324,7 +322,7 @@ class ObjectStoreBase:
324
322
  """
325
323
  raise AssertionError
326
324
 
327
- def count(self, tbl_id: UUID, tbl_version: Optional[int] = None) -> int:
325
+ def count(self, tbl_id: UUID, tbl_version: int | None = None) -> int:
328
326
  """Return the number of objects in the store associated with the given tbl_id
329
327
 
330
328
  Args:
@@ -336,7 +334,7 @@ class ObjectStoreBase:
336
334
  """
337
335
  raise AssertionError
338
336
 
339
- def delete(self, tbl_id: UUID, tbl_version: Optional[int] = None) -> Optional[int]:
337
+ def delete(self, tbl_id: UUID, tbl_version: int | None = None) -> int | None:
340
338
  """Delete objects in the destination for a given table ID, table version.
341
339
 
342
340
  Args:
@@ -360,28 +358,15 @@ class ObjectStoreBase:
360
358
 
361
359
  class ObjectOps:
362
360
  @classmethod
363
- def get_store(cls, dest: Optional[str], may_contain_object_name: bool, col_name: Optional[str] = None) -> Any:
361
+ def get_store(cls, dest: str | None, allow_obj_name: bool, col_name: str | None = None) -> ObjectStoreBase:
364
362
  from pixeltable.env import Env
365
363
  from pixeltable.utils.local_store import LocalStore
366
364
 
367
- soa = (
368
- Env.get().object_soa
369
- if dest is None
370
- else ObjectPath.parse_object_storage_addr(dest, may_contain_object_name=may_contain_object_name)
371
- )
365
+ dest = dest or str(Env.get().media_dir) # Use local media dir as fallback
366
+ soa = ObjectPath.parse_object_storage_addr(dest, allow_obj_name=allow_obj_name)
372
367
  if soa.storage_target == StorageTarget.LOCAL_STORE:
373
368
  return LocalStore(soa)
374
- if soa.storage_target == StorageTarget.S3_STORE and soa.scheme == 's3':
375
- env.Env.get().require_package('boto3')
376
- from pixeltable.utils.s3_store import S3Store
377
-
378
- return S3Store(soa)
379
- if soa.storage_target == StorageTarget.R2_STORE:
380
- env.Env.get().require_package('boto3')
381
- from pixeltable.utils.s3_store import S3Store
382
-
383
- return S3Store(soa)
384
- if soa.storage_target == StorageTarget.B2_STORE:
369
+ if soa.storage_target in (StorageTarget.S3_STORE, StorageTarget.R2_STORE, StorageTarget.B2_STORE):
385
370
  env.Env.get().require_package('boto3')
386
371
  from pixeltable.utils.s3_store import S3Store
387
372
 
@@ -391,6 +376,11 @@ class ObjectOps:
391
376
  from pixeltable.utils.gcs_store import GCSStore
392
377
 
393
378
  return GCSStore(soa)
379
+ if soa.storage_target == StorageTarget.AZURE_STORE:
380
+ env.Env.get().require_package('azure.storage.blob')
381
+ from pixeltable.utils.azure_store import AzureBlobStore
382
+
383
+ return AzureBlobStore(soa)
394
384
  if soa.storage_target == StorageTarget.HTTP_STORE and soa.is_http_readable:
395
385
  return HTTPStore(soa)
396
386
  error_col_name = f'Column {col_name!r}: ' if col_name is not None else ''
@@ -399,7 +389,7 @@ class ObjectOps:
399
389
  )
400
390
 
401
391
  @classmethod
402
- def validate_destination(cls, dest: str | Path | None, col_name: Optional[str]) -> str:
392
+ def validate_destination(cls, dest: str | Path | None, col_name: str | None = None) -> str:
403
393
  """Convert a Column destination parameter to a URI, else raise errors.
404
394
  Args:
405
395
  dest: The requested destination
@@ -407,19 +397,19 @@ class ObjectOps:
407
397
  Returns:
408
398
  URI of destination, or raises an error
409
399
  """
410
- error_col_name = f'Column {col_name!r}: ' if col_name is not None else ''
400
+ error_col_str = f'column {col_name!r}' if col_name is not None else ''
411
401
 
412
402
  # General checks on any destination
413
403
  if isinstance(dest, Path):
414
404
  dest = str(dest)
415
405
  if dest is not None and not isinstance(dest, str):
416
- raise excs.Error(f'{error_col_name}`destination` must be a string or path, got {dest!r}')
406
+ raise excs.Error(f'{error_col_str}: `destination` must be a string or path; got {dest!r}')
417
407
 
418
408
  # Specific checks for storage backends
419
409
  store = cls.get_store(dest, False, col_name)
420
- dest2 = store.validate(error_col_name)
410
+ dest2 = store.validate(error_col_str)
421
411
  if dest2 is None:
422
- raise excs.Error(f'{error_col_name}`destination` must be a supported destination, got {dest!r}')
412
+ raise excs.Error(f'{error_col_str}: `destination` must be a supported destination; got {dest!r}')
423
413
  return dest2
424
414
 
425
415
  @classmethod
@@ -427,7 +417,7 @@ class ObjectOps:
427
417
  """Copy an object from a URL to a local Path. Thread safe.
428
418
  Raises an exception if the download fails or the scheme is not supported
429
419
  """
430
- soa = ObjectPath.parse_object_storage_addr(src_uri, may_contain_object_name=True)
420
+ soa = ObjectPath.parse_object_storage_addr(src_uri, allow_obj_name=True)
431
421
  store = cls.get_store(src_uri, True)
432
422
  store.copy_object_to_local_file(soa.object_name, dest_path)
433
423
 
@@ -466,7 +456,7 @@ class ObjectOps:
466
456
  return store.copy_local_file(col, src_path)
467
457
 
468
458
  @classmethod
469
- def delete(cls, dest: Optional[str], tbl_id: UUID, tbl_version: Optional[int] = None) -> Optional[int]:
459
+ def delete(cls, dest: str | None, tbl_id: UUID, tbl_version: int | None = None) -> int | None:
470
460
  """Delete objects in the destination for a given table ID, table version.
471
461
  Returns:
472
462
  Number of objects deleted or None
@@ -475,13 +465,39 @@ class ObjectOps:
475
465
  return store.delete(tbl_id, tbl_version)
476
466
 
477
467
  @classmethod
478
- def count(cls, dest: Optional[str], tbl_id: UUID, tbl_version: Optional[int] = None) -> int:
479
- """Return the count of objects in the destination for a given table ID"""
468
+ def count(
469
+ cls,
470
+ tbl_id: UUID,
471
+ tbl_version: int | None = None,
472
+ dest: str | None = None,
473
+ default_input_dest: bool = False,
474
+ default_output_dest: bool = False,
475
+ ) -> int:
476
+ """
477
+ Return the count of objects in the destination for a given table ID.
478
+
479
+ At most one of dest, default_input, default_output may be specified. If none are specified, the fallback is the
480
+ local media directory.
481
+
482
+ Args:
483
+ tbl_id: Table ID for which to count objects
484
+ tbl_version: If specified, only counts objects for a specific table version
485
+ dest: The destination to count objects in
486
+ default_input_dest: If `True`, use the default input media destination
487
+ default_output_dest: If `True`, use the default output media destination
488
+ """
489
+ assert sum((dest is not None, default_input_dest, default_output_dest)) <= 1, (
490
+ 'At most one of dest, default_input, default_output may be specified'
491
+ )
492
+ if default_input_dest:
493
+ dest = env.Env.get().default_input_media_dest
494
+ if default_output_dest:
495
+ dest = env.Env.get().default_output_media_dest
480
496
  store = cls.get_store(dest, False)
481
497
  return store.count(tbl_id, tbl_version)
482
498
 
483
499
  @classmethod
484
- def list_objects(cls, dest: Optional[str], return_uri: bool, n_max: int = 10) -> list[str]:
500
+ def list_objects(cls, dest: str | None, return_uri: bool, n_max: int = 10) -> list[str]:
485
501
  """Return a list of objects found in the specified destination bucket.
486
502
  The dest specification string must not contain an object name.
487
503
  Each returned object includes the full set of prefixes.