pixeltable 0.4.17__py3-none-any.whl → 0.4.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (153) hide show
  1. pixeltable/__init__.py +1 -1
  2. pixeltable/_version.py +1 -0
  3. pixeltable/catalog/catalog.py +144 -118
  4. pixeltable/catalog/column.py +104 -115
  5. pixeltable/catalog/globals.py +1 -2
  6. pixeltable/catalog/insertable_table.py +44 -49
  7. pixeltable/catalog/path.py +3 -4
  8. pixeltable/catalog/schema_object.py +4 -4
  9. pixeltable/catalog/table.py +139 -124
  10. pixeltable/catalog/table_metadata.py +6 -6
  11. pixeltable/catalog/table_version.py +315 -246
  12. pixeltable/catalog/table_version_handle.py +4 -4
  13. pixeltable/catalog/table_version_path.py +9 -10
  14. pixeltable/catalog/tbl_ops.py +9 -3
  15. pixeltable/catalog/view.py +34 -28
  16. pixeltable/config.py +14 -10
  17. pixeltable/dataframe.py +69 -78
  18. pixeltable/env.py +78 -64
  19. pixeltable/exec/aggregation_node.py +6 -6
  20. pixeltable/exec/cache_prefetch_node.py +10 -10
  21. pixeltable/exec/data_row_batch.py +3 -3
  22. pixeltable/exec/exec_context.py +16 -4
  23. pixeltable/exec/exec_node.py +5 -5
  24. pixeltable/exec/expr_eval/evaluators.py +6 -6
  25. pixeltable/exec/expr_eval/expr_eval_node.py +8 -7
  26. pixeltable/exec/expr_eval/globals.py +6 -6
  27. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  28. pixeltable/exec/expr_eval/schedulers.py +11 -11
  29. pixeltable/exec/in_memory_data_node.py +2 -2
  30. pixeltable/exec/object_store_save_node.py +14 -17
  31. pixeltable/exec/sql_node.py +28 -27
  32. pixeltable/exprs/arithmetic_expr.py +4 -4
  33. pixeltable/exprs/array_slice.py +2 -2
  34. pixeltable/exprs/column_property_ref.py +3 -3
  35. pixeltable/exprs/column_ref.py +61 -74
  36. pixeltable/exprs/comparison.py +5 -5
  37. pixeltable/exprs/compound_predicate.py +3 -3
  38. pixeltable/exprs/data_row.py +12 -12
  39. pixeltable/exprs/expr.py +41 -31
  40. pixeltable/exprs/expr_dict.py +3 -3
  41. pixeltable/exprs/expr_set.py +3 -3
  42. pixeltable/exprs/function_call.py +14 -14
  43. pixeltable/exprs/in_predicate.py +4 -4
  44. pixeltable/exprs/inline_expr.py +8 -8
  45. pixeltable/exprs/is_null.py +1 -3
  46. pixeltable/exprs/json_mapper.py +8 -8
  47. pixeltable/exprs/json_path.py +6 -6
  48. pixeltable/exprs/literal.py +5 -5
  49. pixeltable/exprs/method_ref.py +2 -2
  50. pixeltable/exprs/object_ref.py +2 -2
  51. pixeltable/exprs/row_builder.py +14 -14
  52. pixeltable/exprs/rowid_ref.py +8 -8
  53. pixeltable/exprs/similarity_expr.py +50 -25
  54. pixeltable/exprs/sql_element_cache.py +4 -4
  55. pixeltable/exprs/string_op.py +2 -2
  56. pixeltable/exprs/type_cast.py +3 -5
  57. pixeltable/func/aggregate_function.py +8 -8
  58. pixeltable/func/callable_function.py +9 -9
  59. pixeltable/func/expr_template_function.py +3 -3
  60. pixeltable/func/function.py +15 -17
  61. pixeltable/func/function_registry.py +6 -7
  62. pixeltable/func/globals.py +2 -3
  63. pixeltable/func/mcp.py +2 -2
  64. pixeltable/func/query_template_function.py +16 -16
  65. pixeltable/func/signature.py +14 -14
  66. pixeltable/func/tools.py +11 -11
  67. pixeltable/func/udf.py +16 -18
  68. pixeltable/functions/__init__.py +1 -0
  69. pixeltable/functions/anthropic.py +7 -7
  70. pixeltable/functions/audio.py +76 -0
  71. pixeltable/functions/bedrock.py +6 -6
  72. pixeltable/functions/deepseek.py +4 -4
  73. pixeltable/functions/fireworks.py +2 -2
  74. pixeltable/functions/gemini.py +6 -6
  75. pixeltable/functions/globals.py +12 -12
  76. pixeltable/functions/groq.py +4 -4
  77. pixeltable/functions/huggingface.py +1033 -6
  78. pixeltable/functions/image.py +7 -10
  79. pixeltable/functions/llama_cpp.py +7 -7
  80. pixeltable/functions/math.py +2 -3
  81. pixeltable/functions/mistralai.py +3 -3
  82. pixeltable/functions/ollama.py +9 -9
  83. pixeltable/functions/openai.py +21 -21
  84. pixeltable/functions/openrouter.py +7 -7
  85. pixeltable/functions/string.py +21 -28
  86. pixeltable/functions/timestamp.py +7 -8
  87. pixeltable/functions/together.py +4 -6
  88. pixeltable/functions/twelvelabs.py +92 -0
  89. pixeltable/functions/video.py +36 -31
  90. pixeltable/functions/vision.py +6 -6
  91. pixeltable/functions/whisper.py +7 -7
  92. pixeltable/functions/whisperx.py +16 -16
  93. pixeltable/globals.py +75 -40
  94. pixeltable/index/base.py +12 -8
  95. pixeltable/index/btree.py +19 -22
  96. pixeltable/index/embedding_index.py +30 -39
  97. pixeltable/io/datarows.py +3 -3
  98. pixeltable/io/external_store.py +13 -16
  99. pixeltable/io/fiftyone.py +5 -5
  100. pixeltable/io/globals.py +5 -5
  101. pixeltable/io/hf_datasets.py +4 -4
  102. pixeltable/io/label_studio.py +12 -12
  103. pixeltable/io/pandas.py +6 -6
  104. pixeltable/io/parquet.py +2 -2
  105. pixeltable/io/table_data_conduit.py +12 -12
  106. pixeltable/io/utils.py +2 -2
  107. pixeltable/iterators/audio.py +2 -2
  108. pixeltable/iterators/document.py +88 -57
  109. pixeltable/iterators/video.py +66 -37
  110. pixeltable/metadata/converters/convert_18.py +2 -2
  111. pixeltable/metadata/converters/convert_19.py +2 -2
  112. pixeltable/metadata/converters/convert_20.py +2 -2
  113. pixeltable/metadata/converters/convert_21.py +2 -2
  114. pixeltable/metadata/converters/convert_22.py +2 -2
  115. pixeltable/metadata/converters/convert_24.py +2 -2
  116. pixeltable/metadata/converters/convert_25.py +2 -2
  117. pixeltable/metadata/converters/convert_26.py +2 -2
  118. pixeltable/metadata/converters/convert_29.py +4 -4
  119. pixeltable/metadata/converters/convert_34.py +2 -2
  120. pixeltable/metadata/converters/convert_36.py +2 -2
  121. pixeltable/metadata/converters/convert_38.py +2 -2
  122. pixeltable/metadata/converters/convert_39.py +1 -2
  123. pixeltable/metadata/converters/util.py +11 -13
  124. pixeltable/metadata/schema.py +22 -21
  125. pixeltable/metadata/utils.py +2 -6
  126. pixeltable/mypy/mypy_plugin.py +5 -5
  127. pixeltable/plan.py +32 -34
  128. pixeltable/share/packager.py +7 -7
  129. pixeltable/share/publish.py +3 -3
  130. pixeltable/store.py +126 -41
  131. pixeltable/type_system.py +43 -46
  132. pixeltable/utils/__init__.py +1 -2
  133. pixeltable/utils/arrow.py +4 -4
  134. pixeltable/utils/av.py +74 -38
  135. pixeltable/utils/azure_store.py +305 -0
  136. pixeltable/utils/code.py +1 -2
  137. pixeltable/utils/dbms.py +15 -19
  138. pixeltable/utils/description_helper.py +2 -3
  139. pixeltable/utils/documents.py +5 -6
  140. pixeltable/utils/exception_handler.py +2 -2
  141. pixeltable/utils/filecache.py +5 -5
  142. pixeltable/utils/formatter.py +4 -6
  143. pixeltable/utils/gcs_store.py +9 -9
  144. pixeltable/utils/local_store.py +17 -17
  145. pixeltable/utils/object_stores.py +59 -43
  146. pixeltable/utils/s3_store.py +35 -30
  147. {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/METADATA +4 -4
  148. pixeltable-0.4.19.dist-info/RECORD +213 -0
  149. pixeltable/__version__.py +0 -3
  150. pixeltable-0.4.17.dist-info/RECORD +0 -211
  151. {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/WHEEL +0 -0
  152. {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/entry_points.txt +0 -0
  153. {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/licenses/LICENSE +0 -0
@@ -7,7 +7,7 @@ import urllib.parse
7
7
  import urllib.request
8
8
  import uuid
9
9
  from pathlib import Path
10
- from typing import Any, Iterator, Optional
10
+ from typing import Any, Iterator
11
11
  from uuid import UUID
12
12
 
13
13
  import more_itertools
@@ -57,7 +57,7 @@ class TablePackager:
57
57
  preview_header: dict[str, str]
58
58
  preview: list[list[Any]]
59
59
 
60
- def __init__(self, table: catalog.Table, additional_md: Optional[dict[str, Any]] = None) -> None:
60
+ def __init__(self, table: catalog.Table, additional_md: dict[str, Any] | None = None) -> None:
61
61
  self.table = table
62
62
  self.tmp_dir = TempStore.create_path()
63
63
  self.media_files = {}
@@ -342,11 +342,11 @@ class TablePackager:
342
342
  scaled_img.save(buffer, 'webp')
343
343
  return base64.b64encode(buffer.getvalue()).decode()
344
344
 
345
- def __encode_video(self, video_path: str) -> Optional[str]:
345
+ def __encode_video(self, video_path: str) -> str | None:
346
346
  thumb = Formatter.extract_first_video_frame(video_path)
347
347
  return self.__encode_image(thumb) if thumb is not None else None
348
348
 
349
- def __encode_document(self, doc_path: str) -> Optional[str]:
349
+ def __encode_document(self, doc_path: str) -> str | None:
350
350
  thumb = Formatter.make_document_thumbnail(doc_path)
351
351
  return self.__encode_image(thumb) if thumb is not None else None
352
352
 
@@ -364,11 +364,11 @@ class TableRestorer:
364
364
  """
365
365
 
366
366
  tbl_path: str
367
- md: Optional[dict[str, Any]]
367
+ md: dict[str, Any] | None
368
368
  tmp_dir: Path
369
369
  media_files: dict[str, str] # Mapping from pxtmedia:// URLs to local file:// URLs
370
370
 
371
- def __init__(self, tbl_path: str, md: Optional[dict[str, Any]] = None) -> None:
371
+ def __init__(self, tbl_path: str, md: dict[str, Any] | None = None) -> None:
372
372
  self.tbl_path = tbl_path
373
373
  self.md = md
374
374
  self.tmp_dir = TempStore.create_path()
@@ -710,7 +710,7 @@ class TableRestorer:
710
710
  self,
711
711
  val: Any,
712
712
  sql_type: sql.types.TypeEngine[Any],
713
- col: Optional[catalog.Column],
713
+ col: catalog.Column | None,
714
714
  is_media_col: bool,
715
715
  is_cellmd_col: bool,
716
716
  ) -> Any:
@@ -3,7 +3,7 @@ import sys
3
3
  import urllib.parse
4
4
  import urllib.request
5
5
  from pathlib import Path
6
- from typing import Literal, Optional
6
+ from typing import Literal
7
7
 
8
8
  import requests
9
9
  from requests.adapters import HTTPAdapter
@@ -159,7 +159,7 @@ def _download_bundle_from_s3(parsed_location: urllib.parse.ParseResult, bundle_f
159
159
 
160
160
 
161
161
  def _create_retry_session(
162
- max_retries: int = 3, backoff_factor: float = 1.0, status_forcelist: Optional[list] = None
162
+ max_retries: int = 3, backoff_factor: float = 1.0, status_forcelist: list | None = None
163
163
  ) -> requests.Session:
164
164
  """Create a requests session with retry configuration"""
165
165
  if status_forcelist is None:
@@ -222,7 +222,7 @@ def _upload_to_presigned_url(file_path: Path, url: str, max_retries: int = 3) ->
222
222
 
223
223
 
224
224
  def _download_from_presigned_url(
225
- url: str, output_path: Path, headers: Optional[dict[str, str]] = None, max_retries: int = 3
225
+ url: str, output_path: Path, headers: dict[str, str] | None = None, max_retries: int = 3
226
226
  ) -> None:
227
227
  """Download file with progress bar and retries"""
228
228
  session = _create_retry_session(max_retries=max_retries)
pixeltable/store.py CHANGED
@@ -3,8 +3,9 @@ from __future__ import annotations
3
3
  import abc
4
4
  import logging
5
5
  import sys
6
+ import time
6
7
  import warnings
7
- from typing import Any, Iterable, Iterator, Optional
8
+ from typing import Any, Iterable, Iterator
8
9
 
9
10
  import more_itertools
10
11
  import psycopg
@@ -33,11 +34,11 @@ class StoreBase:
33
34
 
34
35
  tbl_version: catalog.TableVersionHandle
35
36
  sa_md: sql.MetaData
36
- sa_tbl: Optional[sql.Table]
37
+ sa_tbl: sql.Table | None
37
38
  _pk_cols: list[sql.Column]
38
39
  v_min_col: sql.Column
39
40
  v_max_col: sql.Column
40
- base: Optional[StoreBase]
41
+ base: StoreBase | None
41
42
 
42
43
  # In my cursory experiments this was the optimal batch size: it was an improvement over 5_000 and there was no real
43
44
  # benefit to going higher.
@@ -79,12 +80,13 @@ class StoreBase:
79
80
  self._pk_cols = [*rowid_cols, self.v_min_col]
80
81
  return [*rowid_cols, self.v_min_col, self.v_max_col]
81
82
 
82
- def create_sa_tbl(self, tbl_version: Optional[catalog.TableVersion] = None) -> None:
83
+ def create_sa_tbl(self, tbl_version: catalog.TableVersion | None = None) -> None:
83
84
  """Create self.sa_tbl from self.tbl_version."""
84
85
  if tbl_version is None:
85
86
  tbl_version = self.tbl_version.get()
86
87
  system_cols = self._create_system_columns()
87
88
  all_cols = system_cols.copy()
89
+ # we captured all columns, including dropped ones: they're still part of the physical table
88
90
  for col in [c for c in tbl_version.cols if c.is_stored]:
89
91
  # re-create sql.Column for each column, regardless of whether it already has sa_col set: it was bound
90
92
  # to the last sql.Table version we created and cannot be reused
@@ -111,6 +113,11 @@ class StoreBase:
111
113
  idx_name = f'vmax_idx_{tbl_version.id.hex}'
112
114
  idxs.append(sql.Index(idx_name, self.v_max_col, postgresql_using=Env.get().dbms.version_index_type))
113
115
 
116
+ # we only capture indices visible in this version
117
+ for idx_info in tbl_version.idxs.values():
118
+ idx = idx_info.idx.sa_index(tbl_version._store_idx_name(idx_info.id), idx_info.val_col)
119
+ idxs.append(idx)
120
+
114
121
  self.sa_tbl = sql.Table(self._storage_name(), self.sa_md, *all_cols, *idxs)
115
122
  # _logger.debug(f'created sa tbl for {tbl_version.id!s} (sa_tbl={id(self.sa_tbl):x}, tv={id(tbl_version):x})')
116
123
 
@@ -135,35 +142,122 @@ class StoreBase:
135
142
  assert isinstance(result, int)
136
143
  return result
137
144
 
145
+ def _exec_if_not_exists(self, stmt: str, wait_for_table: bool) -> None:
146
+ """
147
+ Execute a statement containing 'IF NOT EXISTS' and ignore any duplicate object-related errors.
148
+
149
+ The statement needs to run in a separate transaction, because the expected error conditions will abort the
150
+ enclosing transaction (and the ability to run additional statements in that same transaction).
151
+ """
152
+ while True:
153
+ with Env.get().begin_xact(for_write=True) as conn:
154
+ try:
155
+ if wait_for_table:
156
+ # Try to lock the table to make sure that it exists. This needs to run in the same transaction
157
+ # as 'stmt' to avoid a race condition.
158
+ # TODO: adapt this for CockroachDB
159
+ lock_stmt = f'LOCK TABLE {self._storage_name()} IN ACCESS EXCLUSIVE MODE'
160
+ conn.execute(sql.text(lock_stmt))
161
+ conn.execute(sql.text(stmt))
162
+ return
163
+ except (sql.exc.IntegrityError, sql.exc.ProgrammingError) as e:
164
+ Env.get().console_logger.info(f'{stmt} failed with: {e}')
165
+ if (
166
+ isinstance(e.orig, psycopg.errors.UniqueViolation)
167
+ and 'duplicate key value violates unique constraint' in str(e.orig)
168
+ ) or (
169
+ isinstance(e.orig, (psycopg.errors.DuplicateObject, psycopg.errors.DuplicateTable))
170
+ and 'already exists' in str(e.orig)
171
+ ):
172
+ # table already exists
173
+ return
174
+ elif isinstance(e.orig, psycopg.errors.UndefinedTable):
175
+ # the Lock Table failed because the table doesn't exist yet; try again
176
+ time.sleep(1)
177
+ continue
178
+ else:
179
+ raise
180
+
181
+ def _store_tbl_exists(self) -> bool:
182
+ """Returns True if the store table exists, False otherwise."""
183
+ with Env.get().begin_xact(for_write=False) as conn:
184
+ q = (
185
+ 'SELECT COUNT(*) FROM pg_catalog.pg_tables '
186
+ f"WHERE schemaname = 'public' AND tablename = {self._storage_name()!r}"
187
+ )
188
+ res = conn.execute(sql.text(q)).scalar_one()
189
+ return res == 1
190
+
138
191
  def create(self) -> None:
139
- """Create If Not Exists for this table"""
192
+ """
193
+ Create or update store table to bring it in sync with self.sa_tbl. Idempotent.
194
+
195
+ This runs a sequence of DDL statements (Create Table, Alter Table Add Column, Create Index), each of which
196
+ is run in its own transaction.
197
+
198
+ The exception to that are local replicas, for which TableRestorer creates an enclosing transaction. In theory,
199
+ this should avoid the potential for race conditions that motivate the error handling present in
200
+ _exec_if_not_exists() (meaning: we shouldn't see those errors when creating local replicas).
201
+ TODO: remove the special case for local replicas in order to make the logic easier to reason about.
202
+ """
203
+ postgres_dialect = sql.dialects.postgresql.dialect()
204
+
205
+ if not self._store_tbl_exists():
206
+ # run Create Table If Not Exists; we always need If Not Exists to avoid race conditions between concurrent
207
+ # Pixeltable processes
208
+ create_stmt = sql.schema.CreateTable(self.sa_tbl, if_not_exists=True).compile(dialect=postgres_dialect)
209
+ self._exec_if_not_exists(str(create_stmt), wait_for_table=False)
210
+ else:
211
+ # ensure that all columns exist by running Alter Table Add Column If Not Exists for all columns
212
+ for col in self.sa_tbl.columns:
213
+ stmt = self._add_column_stmt(col)
214
+ self._exec_if_not_exists(stmt, wait_for_table=True)
215
+ # TODO: do we also need to ensure that these columns are now visible (ie, is there another potential race
216
+ # condition here?)
217
+
218
+ # ensure that all visible indices exist by running Create Index If Not Exists
219
+ for index in self.sa_tbl.indexes:
220
+ create_stmt = sql.schema.CreateIndex(index, if_not_exists=True).compile(dialect=postgres_dialect)
221
+ self._exec_if_not_exists(str(create_stmt), wait_for_table=True)
222
+
223
+ def create_index(self, idx_id: int) -> None:
224
+ """Create If Not Exists for this index"""
225
+ idx_info = self.tbl_version.get().idxs[idx_id]
226
+ sa_idx = idx_info.idx.sa_index(self.tbl_version.get()._store_idx_name(idx_id), idx_info.val_col)
140
227
  conn = Env.get().conn
141
- stmt = sql.schema.CreateTable(self.sa_tbl).compile(conn)
228
+ stmt = sql.schema.CreateIndex(sa_idx, if_not_exists=True).compile(conn)
142
229
  create_stmt = str(stmt)
143
- if_not_exists_stmt = create_stmt.replace('CREATE TABLE', 'CREATE TABLE IF NOT EXISTS')
144
-
145
- # Postgres seems not to handle concurrent Create Table If Not Exists correctly, we need to ignore the various
146
- # errors that can occur when two connections run the same Create Table statement.
147
- try:
148
- conn.execute(sql.text(if_not_exists_stmt))
149
- except (sql.exc.IntegrityError, sql.exc.ProgrammingError) as e:
150
- Env.get().console_logger.info(f'StoreBase.create() failed with: {e}')
151
- if (
152
- isinstance(e.orig, psycopg.errors.UniqueViolation)
153
- and 'duplicate key value violates unique constraint "pg_type_typname_nsp_index"' in str(e.orig)
154
- ) or (
155
- isinstance(e.orig, (psycopg.errors.DuplicateObject, psycopg.errors.DuplicateTable))
156
- and 'already exists' in str(e.orig)
157
- ):
158
- pass
159
- else:
160
- raise
230
+ self._exec_if_not_exists(create_stmt, wait_for_table=True)
231
+
232
+ def validate(self) -> None:
233
+ """Validate store table against self.table_version"""
234
+ with Env.get().begin_xact() as conn:
235
+ # check that all columns are present
236
+ q = f'SELECT column_name FROM information_schema.columns WHERE table_name = {self._storage_name()!r}'
237
+ store_col_info = {row[0] for row in conn.execute(sql.text(q)).fetchall()}
238
+ tbl_col_info = {col.store_name() for col in self.tbl_version.get().cols if col.is_stored}
239
+ assert tbl_col_info.issubset(store_col_info)
240
+
241
+ # check that all visible indices are present
242
+ q = f'SELECT indexname FROM pg_indexes WHERE tablename = {self._storage_name()!r}'
243
+ store_idx_names = {row[0] for row in conn.execute(sql.text(q)).fetchall()}
244
+ tbl_index_names = {
245
+ self.tbl_version.get()._store_idx_name(info.id) for info in self.tbl_version.get().idxs.values()
246
+ }
247
+ assert tbl_index_names.issubset(store_idx_names)
161
248
 
162
249
  def drop(self) -> None:
163
250
  """Drop store table"""
164
251
  conn = Env.get().conn
165
252
  self.sa_md.drop_all(bind=conn)
166
253
 
254
+ def _add_column_stmt(self, sa_col: sql.Column) -> str:
255
+ col_type_str = sa_col.type.compile(dialect=sql.dialects.postgresql.dialect())
256
+ return (
257
+ f'ALTER TABLE {self._storage_name()} ADD COLUMN IF NOT EXISTS '
258
+ f'{sa_col.name} {col_type_str} {"NOT " if not sa_col.nullable else ""} NULL'
259
+ )
260
+
167
261
  def add_column(self, col: catalog.Column) -> None:
168
262
  """Add column(s) to the store-resident table based on a catalog column
169
263
 
@@ -172,7 +266,7 @@ class StoreBase:
172
266
  """
173
267
  assert col.is_stored
174
268
  conn = Env.get().conn
175
- col_type_str = col.get_sa_col_type().compile(dialect=conn.dialect)
269
+ col_type_str = col.sa_col_type.compile(dialect=conn.dialect)
176
270
  s_txt = f'ALTER TABLE {self._storage_name()} ADD COLUMN {col.store_name()} {col_type_str} NULL'
177
271
  added_storage_cols = [col.store_name()]
178
272
  if col.stores_cellmd:
@@ -195,15 +289,6 @@ class StoreBase:
195
289
  log_stmt(_logger, stmt)
196
290
  Env.get().conn.execute(stmt)
197
291
 
198
- def ensure_columns_exist(self, cols: Iterable[catalog.Column]) -> None:
199
- conn = Env.get().conn
200
- sql_text = f'SELECT column_name FROM information_schema.columns WHERE table_name = {self._storage_name()!r}'
201
- result = conn.execute(sql.text(sql_text))
202
- existing_cols = {row[0] for row in result}
203
- for col in cols:
204
- if col.store_name() not in existing_cols:
205
- self.add_column(col)
206
-
207
292
  def load_column(self, col: catalog.Column, exec_plan: ExecNode, abort_on_exc: bool) -> int:
208
293
  """Update store column of a computed column with values produced by an execution plan
209
294
 
@@ -213,7 +298,7 @@ class StoreBase:
213
298
  sql.exc.DBAPIError if there was a SQL error during execution
214
299
  excs.Error if on_error='abort' and there was an exception during row evaluation
215
300
  """
216
- assert col.tbl.id == self.tbl_version.id
301
+ assert col.get_tbl().id == self.tbl_version.id
217
302
  num_excs = 0
218
303
  num_rows = 0
219
304
  # create temp table to store output of exec_plan, with the same primary key as the store table
@@ -283,7 +368,7 @@ class StoreBase:
283
368
  exec_plan: ExecNode,
284
369
  v_min: int,
285
370
  show_progress: bool = True,
286
- rowids: Optional[Iterator[int]] = None,
371
+ rowids: Iterator[int] | None = None,
287
372
  abort_on_exc: bool = False,
288
373
  ) -> tuple[set[int], RowCountStats]:
289
374
  """Insert rows into the store table and update the catalog table's md
@@ -295,7 +380,7 @@ class StoreBase:
295
380
  num_excs = 0
296
381
  num_rows = 0
297
382
  cols_with_excs: set[int] = set()
298
- progress_bar: Optional[tqdm] = None # create this only after we started executing
383
+ progress_bar: tqdm | None = None # create this only after we started executing
299
384
  row_builder = exec_plan.row_builder
300
385
 
301
386
  store_col_names = row_builder.store_column_names()
@@ -368,7 +453,7 @@ class StoreBase:
368
453
  # stmt_text = f'INSERT INTO {self.sa_tbl.name} ({col_names_str}) VALUES ({placeholders_str})'
369
454
  # conn.exec_driver_sql(stmt_text, table_rows)
370
455
 
371
- def _versions_clause(self, versions: list[Optional[int]], match_on_vmin: bool) -> sql.ColumnElement[bool]:
456
+ def _versions_clause(self, versions: list[int | None], match_on_vmin: bool) -> sql.ColumnElement[bool]:
372
457
  """Return filter for base versions"""
373
458
  v = versions[0]
374
459
  if v is None:
@@ -386,9 +471,9 @@ class StoreBase:
386
471
  def delete_rows(
387
472
  self,
388
473
  current_version: int,
389
- base_versions: list[Optional[int]],
474
+ base_versions: list[int | None],
390
475
  match_on_vmin: bool,
391
- where_clause: Optional[sql.ColumnElement[bool]],
476
+ where_clause: sql.ColumnElement[bool] | None,
392
477
  ) -> int:
393
478
  """Mark rows as deleted that are live and were created prior to current_version.
394
479
  Also: populate the undo columns
@@ -514,7 +599,7 @@ class StoreComponentView(StoreView):
514
599
  self.rowid_cols.append(self.pos_col)
515
600
  return self.rowid_cols
516
601
 
517
- def create_sa_tbl(self, tbl_version: Optional[catalog.TableVersion] = None) -> None:
602
+ def create_sa_tbl(self, tbl_version: catalog.TableVersion | None = None) -> None:
518
603
  if tbl_version is None:
519
604
  tbl_version = self.tbl_version.get()
520
605
  super().create_sa_tbl(tbl_version)
pixeltable/type_system.py CHANGED
@@ -10,7 +10,7 @@ import typing
10
10
  import urllib.parse
11
11
  import urllib.request
12
12
  from pathlib import Path
13
- from typing import Any, ClassVar, Iterable, Literal, Mapping, Optional, Sequence, Union
13
+ from typing import Any, ClassVar, Iterable, Literal, Mapping, Sequence, Union
14
14
 
15
15
  from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
16
16
 
@@ -51,11 +51,11 @@ class ColumnType:
51
51
  @classmethod
52
52
  def supertype(
53
53
  cls,
54
- type1: Optional['ColumnType.Type'],
55
- type2: Optional['ColumnType.Type'],
54
+ type1: 'ColumnType.Type' | None,
55
+ type2: 'ColumnType.Type' | None,
56
56
  # we need to pass this in because we can't easily append it as a class member
57
57
  common_supertypes: dict[tuple['ColumnType.Type', 'ColumnType.Type'], 'ColumnType.Type'],
58
- ) -> Optional['ColumnType.Type']:
58
+ ) -> 'ColumnType.Type' | None:
59
59
  if type1 == type2:
60
60
  return type1
61
61
  t = common_supertypes.get((type1, type2))
@@ -188,7 +188,7 @@ class ColumnType:
188
188
  if as_schema:
189
189
  return base_str if self.nullable else f'Required[{base_str}]'
190
190
  else:
191
- return f'Optional[{base_str}]' if self.nullable else base_str
191
+ return f'{base_str} | None' if self.nullable else base_str
192
192
 
193
193
  def _to_base_str(self) -> str:
194
194
  """
@@ -217,7 +217,7 @@ class ColumnType:
217
217
  # Default: just compare base types (this works for all types whose only parameter is nullable)
218
218
  return self._type == other._type
219
219
 
220
- def supertype(self, other: ColumnType) -> Optional[ColumnType]:
220
+ def supertype(self, other: ColumnType) -> ColumnType | None:
221
221
  if self == other:
222
222
  return self
223
223
  if self.matches(other):
@@ -237,7 +237,7 @@ class ColumnType:
237
237
  return None
238
238
 
239
239
  @classmethod
240
- def infer_literal_type(cls, val: Any, nullable: bool = False) -> Optional[ColumnType]:
240
+ def infer_literal_type(cls, val: Any, nullable: bool = False) -> ColumnType | None:
241
241
  if val is None:
242
242
  return InvalidType(nullable=True)
243
243
  if isinstance(val, str):
@@ -271,7 +271,7 @@ class ColumnType:
271
271
  return None
272
272
 
273
273
  @classmethod
274
- def infer_common_literal_type(cls, vals: Iterable[Any]) -> Optional[ColumnType]:
274
+ def infer_common_literal_type(cls, vals: Iterable[Any]) -> ColumnType | None:
275
275
  """
276
276
  Returns the most specific type that is a supertype of all literals in `vals`. If no such type
277
277
  exists, returns None.
@@ -279,7 +279,7 @@ class ColumnType:
279
279
  Args:
280
280
  vals: A collection of literals.
281
281
  """
282
- inferred_type: Optional[ColumnType] = None
282
+ inferred_type: ColumnType | None = None
283
283
  for val in vals:
284
284
  val_type = cls.infer_literal_type(val)
285
285
  if inferred_type is None:
@@ -299,7 +299,7 @@ class ColumnType:
299
299
  nullable_default: bool = False,
300
300
  allow_builtin_types: bool = True,
301
301
  infer_pydantic_json: bool = False,
302
- ) -> Optional[ColumnType]:
302
+ ) -> ColumnType | None:
303
303
  """
304
304
  Convert a Python type into a Pixeltable `ColumnType` instance.
305
305
 
@@ -317,9 +317,9 @@ class ColumnType:
317
317
  origin = typing.get_origin(t)
318
318
  type_args = typing.get_args(t)
319
319
  if origin in (typing.Union, types.UnionType):
320
- # Check if `t` has the form Optional[T].
320
+ # Check if `t` has the form T | None.
321
321
  if len(type_args) == 2 and type(None) in type_args:
322
- # `t` is a type of the form Optional[T] (equivalently, T | None or None | T).
322
+ # `t` is a type of the form T | None (equivalently, T | None or None | T).
323
323
  # We treat it as the underlying type but with nullable=True.
324
324
  underlying_py_type = type_args[0] if type_args[1] is type(None) else type_args[1]
325
325
  underlying = cls.from_python_type(
@@ -338,7 +338,7 @@ class ColumnType:
338
338
  if isinstance(parameters, ColumnType):
339
339
  return parameters.copy(nullable=nullable_default)
340
340
  else:
341
- # It's something other than Optional[T], Required[T], or an explicitly annotated type.
341
+ # It's something other than T | None, Required[T], or an explicitly annotated type.
342
342
  if origin is not None:
343
343
  # Discard type parameters to ensure that parameterized types such as `list[T]`
344
344
  # are correctly mapped to Pixeltable types.
@@ -411,7 +411,7 @@ class ColumnType:
411
411
  raise excs.Error(f'Unknown type: {t}')
412
412
 
413
413
  @classmethod
414
- def from_json_schema(cls, schema: dict[str, Any]) -> Optional[ColumnType]:
414
+ def from_json_schema(cls, schema: dict[str, Any]) -> ColumnType | None:
415
415
  # We first express the JSON schema as a Python type, and then convert it to a Pixeltable type.
416
416
  # TODO: Is there a meaningful fallback if one of these operations fails? (Maybe another use case for a pxt Any
417
417
  # type?)
@@ -704,10 +704,10 @@ class DateType(ColumnType):
704
704
 
705
705
 
706
706
  class JsonType(ColumnType):
707
- json_schema: Optional[dict[str, Any]]
708
- __validator: Optional[jsonschema.protocols.Validator]
707
+ json_schema: dict[str, Any] | None
708
+ __validator: jsonschema.protocols.Validator | None
709
709
 
710
- def __init__(self, json_schema: Optional[dict[str, Any]] = None, nullable: bool = False):
710
+ def __init__(self, json_schema: dict[str, Any] | None = None, nullable: bool = False):
711
711
  super().__init__(self.Type.JSON, nullable=nullable)
712
712
  self.json_schema = json_schema
713
713
  if json_schema is None:
@@ -777,7 +777,7 @@ class JsonType(ColumnType):
777
777
  return val.model_dump()
778
778
  return val
779
779
 
780
- def supertype(self, other: ColumnType) -> Optional[JsonType]:
780
+ def supertype(self, other: ColumnType) -> JsonType | None:
781
781
  # Try using the (much faster) supertype logic in ColumnType first. That will work if, for example, the types
782
782
  # are identical except for nullability. If that doesn't work and both types are JsonType, then we will need to
783
783
  # merge their schemas.
@@ -799,7 +799,7 @@ class JsonType(ColumnType):
799
799
  )
800
800
 
801
801
  @classmethod
802
- def __superschema(cls, a: dict[str, Any], b: dict[str, Any]) -> Optional[dict[str, Any]]:
802
+ def __superschema(cls, a: dict[str, Any], b: dict[str, Any]) -> dict[str, Any] | None:
803
803
  # Defining a general type hierarchy over all JSON schemas would be a challenging problem. In order to keep
804
804
  # things manageable, we only define a hierarchy among "conforming" schemas, which provides enough generality
805
805
  # for the most important use cases (unions for type inference, validation of inline exprs). A schema is
@@ -859,7 +859,7 @@ class JsonType(ColumnType):
859
859
  return {} # Unresolvable type conflict; the supertype is an unrestricted JsonType.
860
860
 
861
861
  @classmethod
862
- def __superschema_with_nulls(cls, a: dict[str, Any], b: dict[str, Any]) -> Optional[dict[str, Any]]:
862
+ def __superschema_with_nulls(cls, a: dict[str, Any], b: dict[str, Any]) -> dict[str, Any] | None:
863
863
  a, a_nullable = cls.__unpack_null_from_schema(a)
864
864
  b, b_nullable = cls.__unpack_null_from_schema(b)
865
865
 
@@ -888,15 +888,12 @@ class JsonType(ColumnType):
888
888
 
889
889
 
890
890
  class ArrayType(ColumnType):
891
- shape: Optional[tuple[Optional[int], ...]]
892
- pxt_dtype: Optional[ColumnType]
893
- dtype: Optional[ColumnType.Type]
891
+ shape: tuple[int | None, ...] | None
892
+ pxt_dtype: ColumnType | None
893
+ dtype: ColumnType.Type | None
894
894
 
895
895
  def __init__(
896
- self,
897
- shape: Optional[tuple[Optional[int], ...]] = None,
898
- dtype: Optional[ColumnType] = None,
899
- nullable: bool = False,
896
+ self, shape: tuple[int | None, ...] | None = None, dtype: ColumnType | None = None, nullable: bool = False
900
897
  ):
901
898
  super().__init__(self.Type.ARRAY, nullable=nullable)
902
899
  assert shape is None or dtype is not None, (shape, dtype) # cannot specify a shape without a dtype
@@ -921,7 +918,7 @@ class ArrayType(ColumnType):
921
918
  def __hash__(self) -> int:
922
919
  return hash((self._type, self.nullable, self.shape, self.dtype))
923
920
 
924
- def supertype(self, other: ColumnType) -> Optional[ArrayType]:
921
+ def supertype(self, other: ColumnType) -> ArrayType | None:
925
922
  basic_supertype = super().supertype(other)
926
923
  if basic_supertype is not None:
927
924
  assert isinstance(basic_supertype, ArrayType)
@@ -934,7 +931,7 @@ class ArrayType(ColumnType):
934
931
  if super_dtype is None:
935
932
  # if the dtypes are incompatible, then the supertype is a fully general array
936
933
  return ArrayType(nullable=(self.nullable or other.nullable))
937
- super_shape: Optional[tuple[Optional[int], ...]]
934
+ super_shape: tuple[int | None, ...] | None
938
935
  if self.shape is None or other.shape is None or len(self.shape) != len(other.shape):
939
936
  super_shape = None
940
937
  else:
@@ -965,7 +962,7 @@ class ArrayType(ColumnType):
965
962
  return cls(shape, dtype, nullable=d['nullable'])
966
963
 
967
964
  @classmethod
968
- def from_np_dtype(cls, dtype: np.dtype, nullable: bool) -> Optional[ColumnType]:
965
+ def from_np_dtype(cls, dtype: np.dtype, nullable: bool) -> ColumnType | None:
969
966
  """
970
967
  Return pixeltable type corresponding to a given simple numpy dtype
971
968
  """
@@ -994,10 +991,10 @@ class ArrayType(ColumnType):
994
991
  return None
995
992
 
996
993
  @classmethod
997
- def from_literal(cls, val: np.ndarray, nullable: bool = False) -> Optional[ArrayType]:
994
+ def from_literal(cls, val: np.ndarray, nullable: bool = False) -> ArrayType | None:
998
995
  # determine our dtype
999
996
  assert isinstance(val, np.ndarray)
1000
- pxttype: Optional[ColumnType] = cls.from_np_dtype(val.dtype, nullable)
997
+ pxttype: ColumnType | None = cls.from_np_dtype(val.dtype, nullable)
1001
998
  if pxttype is None:
1002
999
  return None
1003
1000
  return cls(val.shape, dtype=pxttype, nullable=nullable)
@@ -1060,7 +1057,7 @@ class ArrayType(ColumnType):
1060
1057
  def to_sa_type(cls) -> sql.types.TypeEngine:
1061
1058
  return sql.LargeBinary()
1062
1059
 
1063
- def numpy_dtype(self) -> Optional[np.dtype]:
1060
+ def numpy_dtype(self) -> np.dtype | None:
1064
1061
  if self.dtype is None:
1065
1062
  return None
1066
1063
  if self.dtype == self.Type.INT:
@@ -1077,10 +1074,10 @@ class ArrayType(ColumnType):
1077
1074
  class ImageType(ColumnType):
1078
1075
  def __init__(
1079
1076
  self,
1080
- width: Optional[int] = None,
1081
- height: Optional[int] = None,
1082
- size: Optional[tuple[int, int]] = None,
1083
- mode: Optional[str] = None,
1077
+ width: int | None = None,
1078
+ height: int | None = None,
1079
+ size: tuple[int, int] | None = None,
1080
+ mode: str | None = None,
1084
1081
  nullable: bool = False,
1085
1082
  ):
1086
1083
  # TODO: does it make sense to specify only width or height?
@@ -1121,7 +1118,7 @@ class ImageType(ColumnType):
1121
1118
  def __hash__(self) -> int:
1122
1119
  return hash((self._type, self.nullable, self.size, self.mode))
1123
1120
 
1124
- def supertype(self, other: ColumnType) -> Optional[ImageType]:
1121
+ def supertype(self, other: ColumnType) -> ImageType | None:
1125
1122
  basic_supertype = super().supertype(other)
1126
1123
  if basic_supertype is not None:
1127
1124
  assert isinstance(basic_supertype, ImageType)
@@ -1136,7 +1133,7 @@ class ImageType(ColumnType):
1136
1133
  return ImageType(width=width, height=height, mode=mode, nullable=(self.nullable or other.nullable))
1137
1134
 
1138
1135
  @property
1139
- def size(self) -> Optional[tuple[int, int]]:
1136
+ def size(self) -> tuple[int, int] | None:
1140
1137
  if self.width is None or self.height is None:
1141
1138
  return None
1142
1139
  return (self.width, self.height)
@@ -1255,7 +1252,7 @@ class DocumentType(ColumnType):
1255
1252
  TXT = 4
1256
1253
 
1257
1254
  @classmethod
1258
- def from_extension(cls, ext: str) -> Optional['DocumentType.DocumentFormat']:
1255
+ def from_extension(cls, ext: str) -> 'DocumentType.DocumentFormat' | None:
1259
1256
  if ext in ('.htm', '.html'):
1260
1257
  return cls.HTML
1261
1258
  if ext == '.md':
@@ -1268,7 +1265,7 @@ class DocumentType(ColumnType):
1268
1265
  return cls.TXT
1269
1266
  return None
1270
1267
 
1271
- def __init__(self, nullable: bool = False, doc_formats: Optional[str] = None):
1268
+ def __init__(self, nullable: bool = False, doc_formats: str | None = None):
1272
1269
  super().__init__(self.Type.DOCUMENT, nullable=nullable)
1273
1270
  self.doc_formats = doc_formats
1274
1271
  if doc_formats is not None:
@@ -1365,13 +1362,13 @@ class Array(np.ndarray, _PxtType):
1365
1362
  def __class_getitem__(cls, item: Any) -> _AnnotatedAlias:
1366
1363
  """
1367
1364
  `item` (the type subscript) must be a tuple with exactly two elements (in any order):
1368
- - A tuple of `Optional[int]`s, specifying the shape of the array
1365
+ - A tuple of `int | None`s, specifying the shape of the array
1369
1366
  - A type, specifying the dtype of the array
1370
1367
  Example: Array[(3, None, 2), pxt.Float]
1371
1368
  """
1372
1369
  params = item if isinstance(item, tuple) else (item,)
1373
- shape: Optional[tuple] = None
1374
- dtype: Optional[ColumnType] = None
1370
+ shape: tuple | None = None
1371
+ dtype: ColumnType | None = None
1375
1372
  if not any(isinstance(param, (type, _AnnotatedAlias)) for param in params):
1376
1373
  raise TypeError('Array type parameter must include a dtype.')
1377
1374
  for param in params:
@@ -1411,8 +1408,8 @@ class Image(PIL.Image.Image, _PxtType):
1411
1408
  else:
1412
1409
  # Not a tuple (single arg)
1413
1410
  params = (item,)
1414
- size: Optional[tuple] = None
1415
- mode: Optional[str] = None
1411
+ size: tuple | None = None
1412
+ mode: str | None = None
1416
1413
  for param in params:
1417
1414
  if isinstance(param, tuple):
1418
1415
  if (
@@ -2,7 +2,6 @@ import hashlib
2
2
  import urllib.parse
3
3
  import urllib.request
4
4
  from pathlib import Path
5
- from typing import Optional
6
5
 
7
6
 
8
7
  def print_perf_counter_delta(delta: float) -> str:
@@ -39,7 +38,7 @@ def sha256sum(path: Path | str) -> str:
39
38
  return h.hexdigest()
40
39
 
41
40
 
42
- def parse_local_file_path(file_or_url: str) -> Optional[Path]:
41
+ def parse_local_file_path(file_or_url: str) -> Path | None:
43
42
  """
44
43
  Parses a string that may be either a URL or a local file path.
45
44